Allow passing of override scrubber obtions.

This commit is contained in:
Chris 2013-05-22 14:44:38 +01:00
parent 8c70807c73
commit c82ee9c8bb
2 changed files with 23 additions and 15 deletions

View File

@ -378,7 +378,7 @@ sub validate_htmlarea {
# Now we get to the actual validation and stuff. Begin by scrubbing any tags # Now we get to the actual validation and stuff. Begin by scrubbing any tags
# and other crap we don't want out completely. As far as I can tell, this should # and other crap we don't want out completely. As far as I can tell, this should
# always generate a result of some kind... # always generate a result of some kind...
$text = scrub_html($text); $text = scrub_html($text, $settings -> {"allow_tags"}, $settings -> {"tag_rules"}, $settings -> {"scrub_defaults"});
# ... but check, just in case # ... but check, just in case
return ("", $self -> {"template"} -> replace_langvar("BLOCK_VALIDATE_SCRUBFAIL", {"***field***" => $settings -> {"nicename"}})) return ("", $self -> {"template"} -> replace_langvar("BLOCK_VALIDATE_SCRUBFAIL", {"***field***" => $settings -> {"nicename"}}))

View File

@ -38,13 +38,15 @@ our @EXPORT = qw(scrub_html tidy_html check_xhtml);
# discussion on http://wiki.moxiecode.com/index.php/TinyMCE:Security # discussion on http://wiki.moxiecode.com/index.php/TinyMCE:Security
# Several tags removed to make xhtml conformance easier and to remove # Several tags removed to make xhtml conformance easier and to remove
# deprecated and eyestabbery. # deprecated and eyestabbery.
my @allow = ("a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment", my $default_allow = [
"em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "li", "ol", "p", "a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment",
"pre", "small", "span", "strong", "sub", "sup", "table", "tbody", "td", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "li", "ol", "p",
"tfoot", "th", "thead", "tr", "tt", "ul"); "pre", "small", "span", "strong", "sub", "sup", "table", "tbody", "td",
"tfoot", "th", "thead", "tr", "tt", "ul"
];
# Explicit rules for allowed tags, required to provide per-tag tweaks to the filter. # Explicit rules for allowed tags, required to provide per-tag tweaks to the filter.
my @rules = ( my $default_rules = {
img => { img => {
src => qr{^(?:http|https)://}i, src => qr{^(?:http|https)://}i,
alt => 1, alt => 1,
@ -88,10 +90,10 @@ my @rules = (
title => 1, title => 1,
'*' => 0, '*' => 0,
}, },
); };
# Default ruleset applied when no explicit rule is found for a tag. # Default ruleset applied when no explicit rule is found for a tag.
my @default = ( my $default_default = {
0 => # default rule, deny all tags 0 => # default rule, deny all tags
{ {
'href' => qr{^(?:http|https)://[-\w]+(?:\.[-\w]+)/}i, # Force basic URL forms 'href' => qr{^(?:http|https)://[-\w]+(?:\.[-\w]+)/}i, # Force basic URL forms
@ -100,29 +102,35 @@ my @default = (
'name' => 1, 'name' => 1,
'*' => 0, # default rule, deny all attributes '*' => 0, # default rule, deny all attributes
} }
); };
## @fn $ scrub_html($html) ## @fn $ scrub_html($html, $allow, $rules, $default)
# Remove dangerous/unwanted elements and attributes from a html document. This will # Remove dangerous/unwanted elements and attributes from a html document. This will
# use HTML::Scrubber to remove the elements and attributes from the specified html # use HTML::Scrubber to remove the elements and attributes from the specified html
# that could be used maliciously. There is still the potential for a clever attacker # that could be used maliciously. There is still the potential for a clever attacker
# to craft a page that bypasses this, but that exists pretty much regardless once # to craft a page that bypasses this, but that exists pretty much regardless once
# html input is permitted... # html input is permitted...
# #
# @param html The string containing the html to clean up # @param html The string containing the html to clean up
# @param allow An optional reference to an array of allowed tags to pass to HTML::SCrubber -> new()
# @param rules An optional reference to a hash of rules to pass to HTML::SCrubber -> new()
# @param default An optional reference to a hash of defaults to pass to HTML::SCrubber -> new()
# @return A string containing the scrubbed html. # @return A string containing the scrubbed html.
sub scrub_html { sub scrub_html {
my $html = shift; my $html = shift;
my $allow = shift || $default_allow;
my $rules = shift || $default_rules;
my $default = shift || $default_default;
# Die immediately if there's a nul character in the string, that should never, ever be there. # Die immediately if there's a nul character in the string, that should never, ever be there.
die_log("HACK ATTEMPT", "Hack attempt detected. Sod off.") die_log("HACK ATTEMPT", "Hack attempt detected. Sod off.")
if($html =~ /\0/); if($html =~ /\0/);
# First, a new scrubber # First, a new scrubber
my $scrubber = HTML::Scrubber -> new(allow => \@allow, my $scrubber = HTML::Scrubber -> new(allow => $allow,
rules => \@rules, rules => $rules,
default => \@default, default => $default,
comment => 0, comment => 0,
process => 0); process => 0);