Allow passing of override scrubber obtions.

This commit is contained in:
Chris 2013-05-22 14:44:38 +01:00
parent 8c70807c73
commit c82ee9c8bb
2 changed files with 23 additions and 15 deletions

View File

@ -378,7 +378,7 @@ sub validate_htmlarea {
# Now we get to the actual validation and stuff. Begin by scrubbing any tags
# and other crap we don't want out completely. As far as I can tell, this should
# always generate a result of some kind...
$text = scrub_html($text);
$text = scrub_html($text, $settings -> {"allow_tags"}, $settings -> {"tag_rules"}, $settings -> {"scrub_defaults"});
# ... but check, just in case
return ("", $self -> {"template"} -> replace_langvar("BLOCK_VALIDATE_SCRUBFAIL", {"***field***" => $settings -> {"nicename"}}))

View File

@ -38,13 +38,15 @@ our @EXPORT = qw(scrub_html tidy_html check_xhtml);
# discussion on http://wiki.moxiecode.com/index.php/TinyMCE:Security
# Several tags removed to make xhtml conformance easier and to remove
# deprecated and eyestabbery.
my @allow = ("a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment",
my $default_allow = [
"a", "b", "blockquote", "br", "caption", "col", "colgroup", "comment",
"em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "li", "ol", "p",
"pre", "small", "span", "strong", "sub", "sup", "table", "tbody", "td",
"tfoot", "th", "thead", "tr", "tt", "ul");
"tfoot", "th", "thead", "tr", "tt", "ul"
];
# Explicit rules for allowed tags, required to provide per-tag tweaks to the filter.
my @rules = (
my $default_rules = {
img => {
src => qr{^(?:http|https)://}i,
alt => 1,
@ -88,10 +90,10 @@ my @rules = (
title => 1,
'*' => 0,
},
);
};
# Default ruleset applied when no explicit rule is found for a tag.
my @default = (
my $default_default = {
0 => # default rule, deny all tags
{
'href' => qr{^(?:http|https)://[-\w]+(?:\.[-\w]+)/}i, # Force basic URL forms
@ -100,10 +102,10 @@ my @default = (
'name' => 1,
'*' => 0, # default rule, deny all attributes
}
);
};
## @fn $ scrub_html($html)
## @fn $ scrub_html($html, $allow, $rules, $default)
# Remove dangerous/unwanted elements and attributes from a html document. This will
# use HTML::Scrubber to remove the elements and attributes from the specified html
# that could be used maliciously. There is still the potential for a clever attacker
@ -111,18 +113,24 @@ my @default = (
# html input is permitted...
#
# @param html The string containing the html to clean up
# @param allow An optional reference to an array of allowed tags to pass to HTML::SCrubber -> new()
# @param rules An optional reference to a hash of rules to pass to HTML::SCrubber -> new()
# @param default An optional reference to a hash of defaults to pass to HTML::SCrubber -> new()
# @return A string containing the scrubbed html.
sub scrub_html {
my $html = shift;
my $allow = shift || $default_allow;
my $rules = shift || $default_rules;
my $default = shift || $default_default;
# Die immediately if there's a nul character in the string, that should never, ever be there.
die_log("HACK ATTEMPT", "Hack attempt detected. Sod off.")
if($html =~ /\0/);
# First, a new scrubber
my $scrubber = HTML::Scrubber -> new(allow => \@allow,
rules => \@rules,
default => \@default,
my $scrubber = HTML::Scrubber -> new(allow => $allow,
rules => $rules,
default => $default,
comment => 0,
process => 0);