Compare commits
1 Commits
main
...
feat/use-g
| Author | SHA1 | Date | |
|---|---|---|---|
| 277ca8be24 |
@@ -13,7 +13,6 @@ A lightweight, pure Perl markdown to HTML converter that uses a state machine fo
|
|||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- Perl 5.42 or higher
|
- Perl 5.42 or higher
|
||||||
- Getopt::Long (but it's already installed on most Perl installations)
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@@ -30,7 +29,7 @@ chmod +x m2h.pl
|
|||||||
Convert a markdown file to HTML:
|
Convert a markdown file to HTML:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
perl -Ilib m2h.pl input.md > output.html
|
perl -Ilibm2h.pl input.md > output.html
|
||||||
```
|
```
|
||||||
|
|
||||||
Or read from stdin:
|
Or read from stdin:
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ my %CLOSING_TAGS = (
|
|||||||
olist => "</ol>",
|
olist => "</ol>",
|
||||||
blockquote => "</blockquote>",
|
blockquote => "</blockquote>",
|
||||||
table => "</table>",
|
table => "</table>",
|
||||||
code_block => "</code></pre>",
|
|
||||||
);
|
);
|
||||||
|
|
||||||
sub new {
|
sub new {
|
||||||
@@ -167,6 +166,7 @@ sub handle_code_block_line {
|
|||||||
my ( $self, $line ) = @_;
|
my ( $self, $line ) = @_;
|
||||||
|
|
||||||
if ( $line =~ /^```/ ) {
|
if ( $line =~ /^```/ ) {
|
||||||
|
$self->{output} .= "</code></pre>\n";
|
||||||
$self->transition_to_state('paragraph');
|
$self->transition_to_state('paragraph');
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -288,8 +288,8 @@ sub parse_inline {
|
|||||||
push @bold_parts, { type => 'bold', content => $1 };
|
push @bold_parts, { type => 'bold', content => $1 };
|
||||||
return "\x01B$idx\x02";
|
return "\x01B$idx\x02";
|
||||||
}->()/ge;
|
}->()/ge;
|
||||||
$text =~ s/(?<!\w)___((?:[^_]|_(?!_))+?)___(?!\w)/<strong>$1<\/strong>/g;
|
$text =~ s/___((?:[^_]|_(?!_))+?)___/<strong>$1<\/strong>/g;
|
||||||
$text =~ s/(?<!\w)__((?:[^_]|_(?!_))+?)__(?!\w)/<strong>$1<\/strong>/g;
|
$text =~ s/__((?:[^_]|_(?!_))+?)__/<strong>$1<\/strong>/g;
|
||||||
|
|
||||||
my @italic_parts;
|
my @italic_parts;
|
||||||
my $italic_idx = 0;
|
my $italic_idx = 0;
|
||||||
@@ -298,7 +298,7 @@ sub parse_inline {
|
|||||||
push @italic_parts, { type => 'italic', content => $1 };
|
push @italic_parts, { type => 'italic', content => $1 };
|
||||||
return "\x01I$idx\x02";
|
return "\x01I$idx\x02";
|
||||||
}->()/ge;
|
}->()/ge;
|
||||||
$text =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
|
$text =~ s/_([^_]+)_/sub {
|
||||||
my $idx = $italic_idx++;
|
my $idx = $italic_idx++;
|
||||||
push @italic_parts, { type => 'italic', content => $1 };
|
push @italic_parts, { type => 'italic', content => $1 };
|
||||||
return "\x01I$idx\x02";
|
return "\x01I$idx\x02";
|
||||||
@@ -312,7 +312,7 @@ sub parse_inline {
|
|||||||
push @italic_parts, { type => 'italic', content => $1 };
|
push @italic_parts, { type => 'italic', content => $1 };
|
||||||
return "\x01I$idx\x02";
|
return "\x01I$idx\x02";
|
||||||
}->()/ge;
|
}->()/ge;
|
||||||
$content =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
|
$content =~ s/_([^_]+)_/sub {
|
||||||
my $idx = $italic_idx++;
|
my $idx = $italic_idx++;
|
||||||
push @italic_parts, { type => 'italic', content => $1 };
|
push @italic_parts, { type => 'italic', content => $1 };
|
||||||
return "\x01I$idx\x02";
|
return "\x01I$idx\x02";
|
||||||
@@ -379,45 +379,13 @@ s/\x01F$i\x02/<$part->{tag}>@{[escape_html($part->{content})]}<\/$part->{tag}>/;
|
|||||||
|
|
||||||
sub is_safe_url {
|
sub is_safe_url {
|
||||||
my ($url) = @_;
|
my ($url) = @_;
|
||||||
my $normalized = decode_url_escapes($url);
|
return 0 if $url =~ /^\s*javascript:/i;
|
||||||
|
return 0 if $url =~ /^\s*data:/i;
|
||||||
$normalized =~ s/^\s+//;
|
return 0 if $url =~ /^\s*vbscript:/i;
|
||||||
$normalized =~ s/\s+$//;
|
return 0 if $url =~ /^\s*file:/i;
|
||||||
my $scheme_check = $normalized;
|
|
||||||
$scheme_check =~ s/[\x00-\x20\x7f]+//g;
|
|
||||||
|
|
||||||
if ( $scheme_check =~ /^([a-z][a-z0-9+\-.]*):/i ) {
|
|
||||||
my $scheme = lc $1;
|
|
||||||
return 1
|
|
||||||
if $scheme eq 'http' || $scheme eq 'https' || $scheme eq 'mailto';
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub decode_url_escapes {
|
|
||||||
my ($value) = @_;
|
|
||||||
my $decoded = $value // '';
|
|
||||||
|
|
||||||
for ( 1 .. 8 ) {
|
|
||||||
my $before = $decoded;
|
|
||||||
$decoded =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
|
|
||||||
$decoded =~ s/&#x([0-9A-Fa-f]+);?/_safe_chr(hex($1))/eg;
|
|
||||||
$decoded =~ s/&#(\d+);?/_safe_chr($1)/eg;
|
|
||||||
last if $decoded eq $before;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $decoded;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub _safe_chr {
|
|
||||||
my ($codepoint) = @_;
|
|
||||||
return '' if !defined $codepoint;
|
|
||||||
return '' if $codepoint > 0x10FFFF;
|
|
||||||
return chr($codepoint);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub escape_html {
|
sub escape_html {
|
||||||
my ($text) = @_;
|
my ($text) = @_;
|
||||||
$text =~ s/&/&/g;
|
$text =~ s/&/&/g;
|
||||||
@@ -429,3 +397,4 @@ sub escape_html {
|
|||||||
}
|
}
|
||||||
|
|
||||||
1;
|
1;
|
||||||
|
|
||||||
|
|||||||
8
m2h.pl
8
m2h.pl
@@ -1,8 +1,7 @@
|
|||||||
#!/usr/bin/env perl -w
|
#!perl -w
|
||||||
use strict;
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
use MarkdownParser;
|
use MarkdownParser;
|
||||||
use open qw(:std :encoding(UTF-8));
|
|
||||||
|
|
||||||
sub show_help {
|
sub show_help {
|
||||||
print <<"EOF";
|
print <<"EOF";
|
||||||
@@ -31,7 +30,6 @@ sub read_input {
|
|||||||
if ($file) {
|
if ($file) {
|
||||||
open my $fh, '<', $file
|
open my $fh, '<', $file
|
||||||
or die "Error: Cannot open file: $file\n";
|
or die "Error: Cannot open file: $file\n";
|
||||||
binmode $fh, ':encoding(UTF-8)';
|
|
||||||
my $content = <$fh>;
|
my $content = <$fh>;
|
||||||
close $fh;
|
close $fh;
|
||||||
return $content;
|
return $content;
|
||||||
@@ -54,16 +52,12 @@ show_version() if $version;
|
|||||||
|
|
||||||
my $input_file = shift @ARGV;
|
my $input_file = shift @ARGV;
|
||||||
|
|
||||||
binmode STDIN, ':encoding(UTF-8)';
|
|
||||||
binmode STDOUT, ':encoding(UTF-8)';
|
|
||||||
|
|
||||||
my $input = read_input($input_file);
|
my $input = read_input($input_file);
|
||||||
|
|
||||||
my $output;
|
my $output;
|
||||||
if ($output_file) {
|
if ($output_file) {
|
||||||
open $output, '>', $output_file
|
open $output, '>', $output_file
|
||||||
or die "Error: Cannot write to file: $output_file\n";
|
or die "Error: Cannot write to file: $output_file\n";
|
||||||
binmode $output, ':encoding(UTF-8)';
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$output = \*STDOUT;
|
$output = \*STDOUT;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
|
||||||
use Test::More tests => 13;
|
use Test::More tests => 11;
|
||||||
use MarkdownParser;
|
use MarkdownParser;
|
||||||
|
|
||||||
my $parser = MarkdownParser->new();
|
my $parser = MarkdownParser->new();
|
||||||
@@ -62,7 +62,4 @@ is(
|
|||||||
"<p><strong>bold text</strong></p>\n",
|
"<p><strong>bold text</strong></p>\n",
|
||||||
"Bold with ___"
|
"Bold with ___"
|
||||||
);
|
);
|
||||||
is( $parser->parse("my_variable"),
|
|
||||||
"<p>my_variable</p>\n", "Underscore inside word unchanged" );
|
|
||||||
is( $parser->parse("CONST__VALUE"),
|
|
||||||
"<p>CONST__VALUE</p>\n", "Double underscores inside word unchanged" );
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
|
||||||
use Test::More tests => 17;
|
use Test::More tests => 8;
|
||||||
use MarkdownParser;
|
use MarkdownParser;
|
||||||
|
|
||||||
my $parser = MarkdownParser->new();
|
my $parser = MarkdownParser->new();
|
||||||
@@ -37,49 +37,8 @@ is(
|
|||||||
"<p>Click me</p>\n",
|
"<p>Click me</p>\n",
|
||||||
"Data protocol blocked in links"
|
"Data protocol blocked in links"
|
||||||
);
|
);
|
||||||
is(
|
|
||||||
$parser->parse("[Click me](javascript:alert('XSS'))"),
|
|
||||||
"<p>Click me</p>\n",
|
|
||||||
"Encoded JavaScript protocol blocked in links"
|
|
||||||
);
|
|
||||||
is( $parser->parse(")"),
|
is( $parser->parse(")"),
|
||||||
"<p>Image</p>\n", "JavaScript protocol blocked in images" );
|
"<p>Image</p>\n", "JavaScript protocol blocked in images" );
|
||||||
is( $parser->parse(""),
|
is( $parser->parse(""),
|
||||||
"<p>Image</p>\n", "File protocol blocked in images" );
|
"<p>Image</p>\n", "File protocol blocked in images" );
|
||||||
is( $parser->parse(")"),
|
|
||||||
"<p>Image</p>\n", "Encoded JavaScript protocol blocked in images" );
|
|
||||||
is(
|
|
||||||
$parser->parse("[Click me](javascript :alert('XSS'))"),
|
|
||||||
"<p>Click me</p>\n",
|
|
||||||
"JavaScript protocol with numeric newline entity blocked"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse("[Click me](java
script:alert('XSS'))"),
|
|
||||||
"<p>Click me</p>\n",
|
|
||||||
"JavaScript protocol with hex carriage return entity blocked"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse("[Click me](javascript%3Aalert('XSS'))"),
|
|
||||||
"<p>Click me</p>\n",
|
|
||||||
"Mixed encoded JavaScript protocol blocked"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse(")"),
|
|
||||||
"<p>Image</p>\n",
|
|
||||||
"JavaScript protocol with tab entity blocked in images"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse("[email](mailto:user\@example.com)"),
|
|
||||||
"<p><a href=\"mailto:user\@example.com\">email</a></p>\n",
|
|
||||||
"Mailto protocol remains allowed"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse("[safe](%68%74%74%70%73://example.com/path)"),
|
|
||||||
"<p><a href=\"%68%74%74%70%73://example.com/path\">safe</a></p>\n",
|
|
||||||
"Percent-encoded https scheme remains allowed"
|
|
||||||
);
|
|
||||||
is(
|
|
||||||
$parser->parse("[relative](/docs/java script:guide)"),
|
|
||||||
"<p><a href=\"/docs/java script:guide\">relative</a></p>\n",
|
|
||||||
"Relative URL with colon in path remains allowed"
|
|
||||||
);
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
|
||||||
use Test::More tests => 6;
|
use Test::More tests => 5;
|
||||||
use MarkdownParser;
|
use MarkdownParser;
|
||||||
|
|
||||||
my $parser = MarkdownParser->new();
|
my $parser = MarkdownParser->new();
|
||||||
@@ -32,8 +32,4 @@ is(
|
|||||||
"<pre><code>\n</code></pre>\n",
|
"<pre><code>\n</code></pre>\n",
|
||||||
"Empty code block"
|
"Empty code block"
|
||||||
);
|
);
|
||||||
is(
|
|
||||||
$parser->parse("```\nunterminated"),
|
|
||||||
"<pre><code>\nunterminated\n</code></pre>\n",
|
|
||||||
"Unclosed code block is closed at EOF"
|
|
||||||
);
|
|
||||||
|
|||||||
Reference in New Issue
Block a user