Allow passing of override scrubber obtions.

2013-05-22 14:44:38 +01:00 · 2013-05-22 14:44:38 +01:00 · c82ee9c8bb
commit c82ee9c8bb
parent 8c70807c73
2 changed files with 23 additions and 15 deletions
--- a/Webperl/Block.pm
+++ b/Webperl/Block.pm
@ -378,7 +378,7 @@ sub validate_htmlarea {
    # Now we get to the actual validation and stuff. Begin by scrubbing any tags
    # and other crap we don't want out completely. As far as I can tell, this should
    # always generate a result of some kind...
-    $text = scrub_html($text);
+    $text = scrub_html($text, $settings -> {"allow_tags"}, $settings -> {"tag_rules"}, $settings -> {"scrub_defaults"});

    # ... but check, just in case
    return ("",  $self -> {"template"} -> replace_langvar("BLOCK_VALIDATE_SCRUBFAIL", {"***field***" => $settings -> {"nicename"}}))
--- a/Webperl/HTMLValidator.pm
+++ b/Webperl/HTMLValidator.pm
@ -38,13 +38,15 @@ our @EXPORT    = qw(scrub_html tidy_html check_xhtml);
 # discussion on  http://wiki.moxiecode.com/index.php/TinyMCE:Security
 # Several tags removed to make xhtml conformance easier and to remove
 # deprecated and eyestabbery.
-my @allow = ("a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment",
+my $default_allow = [
+    "a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment",
    "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "li", "ol", "p",
    "pre", "small", "span", "strong", "sub", "sup", "table", "tbody", "td",
-             "tfoot", "th", "thead", "tr", "tt", "ul");
+    "tfoot", "th", "thead", "tr", "tt", "ul"
+];

 # Explicit rules for allowed tags, required to provide per-tag tweaks to the filter.
-my @rules = (
+my $default_rules = {
    img => {
        src    => qr{^(?:http|https)://}i,
        alt    => 1,
@ -88,10 +90,10 @@ my @rules = (
        title => 1,
        '*'   => 0,
    },
-);
+};

 # Default ruleset applied when no explicit rule is found for a tag.
-my @default = (
+my $default_default = {
    0   =>    # default rule, deny all tags
    {
        'href'  => qr{^(?:http|https)://[-\w]+(?:\.[-\w]+)/}i, # Force basic URL forms
@ -100,10 +102,10 @@ my @default = (
        'name'  => 1,
        '*'     => 0, # default rule, deny all attributes
    }
-);
+};


-## @fn $ scrub_html($html)
+## @fn $ scrub_html($html, $allow, $rules, $default)
 # Remove dangerous/unwanted elements and attributes from a html document. This will
 # use HTML::Scrubber to remove the elements and attributes from the specified html
 # that could be used maliciously. There is still the potential for a clever attacker
@ -111,18 +113,24 @@ my @default = (
 # html input is permitted...
 #
 # @param html    The string containing the html to clean up
+# @param allow   An optional reference to an array of allowed tags to pass to HTML::SCrubber -> new()
+# @param rules   An optional reference to a hash of rules to pass to HTML::SCrubber -> new()
+# @param default An optional reference to a hash of defaults to pass to HTML::SCrubber -> new()
 # @return A string containing the scrubbed html.
 sub scrub_html {
    my $html    = shift;
+    my $allow   = shift || $default_allow;
+    my $rules   = shift || $default_rules;
+    my $default = shift || $default_default;

    # Die immediately if there's a nul character in the string, that should never, ever be there.
    die_log("HACK ATTEMPT", "Hack attempt detected. Sod off.")
        if($html =~ /\0/);

    # First, a new scrubber
-    my $scrubber = HTML::Scrubber -> new(allow   => \@allow,
-                                         rules   => \@rules,
-                                         default => \@default,
+    my $scrubber = HTML::Scrubber -> new(allow   => $allow,
+                                         rules   => $rules,
+                                         default => $default,
                                         comment => 0,
                                         process => 0);