Compare commits

...

2 Commits

Author SHA1 Message Date
11af6d0511 test: add XSS regression tests 2026-02-19 17:50:44 +01:00
5b90ade52a fix(parser): normalize decoded URLs before scheme checks 2026-02-19 17:50:35 +01:00
2 changed files with 62 additions and 7 deletions

View File

@@ -379,16 +379,14 @@ s/\x01F$i\x02/<$part->{tag}>@{[escape_html($part->{content})]}<\/$part->{tag}>/;
sub is_safe_url {
my ($url) = @_;
my $normalized = $url // '';
my $normalized = decode_url_escapes($url);
$normalized =~ s/^\s+//;
$normalized =~ s/\s+$//;
$normalized =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg
while $normalized =~ /%[0-9A-Fa-f]{2}/;
$normalized =~ s/&#x([0-9A-Fa-f]+);?/chr(hex($1))/eg;
$normalized =~ s/&#(\d+);?/chr($1)/eg;
my $scheme_check = $normalized;
$scheme_check =~ s/[\x00-\x20\x7f]+//g;
if ( $normalized =~ /^([a-z][a-z0-9+\-.]*):/i ) {
if ( $scheme_check =~ /^([a-z][a-z0-9+\-.]*):/i ) {
my $scheme = lc $1;
return 1
if $scheme eq 'http' || $scheme eq 'https' || $scheme eq 'mailto';
@@ -398,6 +396,28 @@ sub is_safe_url {
return 1;
}
sub decode_url_escapes {
my ($value) = @_;
my $decoded = $value // '';
for ( 1 .. 8 ) {
my $before = $decoded;
$decoded =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
$decoded =~ s/&#x([0-9A-Fa-f]+);?/_safe_chr(hex($1))/eg;
$decoded =~ s/&#(\d+);?/_safe_chr($1)/eg;
last if $decoded eq $before;
}
return $decoded;
}
sub _safe_chr {
my ($codepoint) = @_;
return '' if !defined $codepoint;
return '' if $codepoint > 0x10FFFF;
return chr($codepoint);
}
sub escape_html {
my ($text) = @_;
$text =~ s/&/&amp;/g;

View File

@@ -2,7 +2,7 @@
use strict;
use warnings;
use Test::More tests => 10;
use Test::More tests => 17;
use MarkdownParser;
my $parser = MarkdownParser->new();
@@ -48,3 +48,38 @@ is( $parser->parse("![Image](file:///etc/passwd)"),
"<p>Image</p>\n", "File protocol blocked in images" );
is( $parser->parse("![Image](javascript:%2f%2falert('XSS'))"),
"<p>Image</p>\n", "Encoded JavaScript protocol blocked in images" );
is(
$parser->parse("[Click me](javascript&#10;:alert('XSS'))"),
"<p>Click me</p>\n",
"JavaScript protocol with numeric newline entity blocked"
);
is(
$parser->parse("[Click me](java&#x0D;script:alert('XSS'))"),
"<p>Click me</p>\n",
"JavaScript protocol with hex carriage return entity blocked"
);
is(
$parser->parse("[Click me](javascr&#x69;pt%3Aalert('XSS'))"),
"<p>Click me</p>\n",
"Mixed encoded JavaScript protocol blocked"
);
is(
$parser->parse("![Image](java&#9;script:alert('XSS'))"),
"<p>Image</p>\n",
"JavaScript protocol with tab entity blocked in images"
);
is(
$parser->parse("[email](mailto:user\@example.com)"),
"<p><a href=\"mailto:user\@example.com\">email</a></p>\n",
"Mailto protocol remains allowed"
);
is(
$parser->parse("[safe](%68%74%74%70%73://example.com/path)"),
"<p><a href=\"%68%74%74%70%73://example.com/path\">safe</a></p>\n",
"Percent-encoded https scheme remains allowed"
);
is(
$parser->parse("[relative](/docs/java script:guide)"),
"<p><a href=\"/docs/java script:guide\">relative</a></p>\n",
"Relative URL with colon in path remains allowed"
);