Compare commits
9 Commits
2a7daa13fd
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| da24af38e3 | |||
| 6670e81640 | |||
| 3459e91645 | |||
| 9bd98b4fb9 | |||
| cf77dd5cf2 | |||
| cee5bc89fa | |||
| c43f718495 | |||
| c80e7d8e6b | |||
| caaecb1661 |
@@ -1,4 +1,4 @@
|
||||
# m2h - Markdown to HTML Converter
|
||||
# m2h
|
||||
|
||||
A lightweight, pure Perl markdown to HTML converter that uses a state machine for parsing.
|
||||
|
||||
@@ -13,6 +13,7 @@ A lightweight, pure Perl markdown to HTML converter that uses a state machine fo
|
||||
## Requirements
|
||||
|
||||
- Perl 5.42 or higher
|
||||
- Getopt::Long (but it's already installed on most Perl installations)
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -288,8 +288,8 @@ sub parse_inline {
|
||||
push @bold_parts, { type => 'bold', content => $1 };
|
||||
return "\x01B$idx\x02";
|
||||
}->()/ge;
|
||||
$text =~ s/___((?:[^_]|_(?!_))+?)___/<strong>$1<\/strong>/g;
|
||||
$text =~ s/__((?:[^_]|_(?!_))+?)__/<strong>$1<\/strong>/g;
|
||||
$text =~ s/(?<!\w)___((?:[^_]|_(?!_))+?)___(?!\w)/<strong>$1<\/strong>/g;
|
||||
$text =~ s/(?<!\w)__((?:[^_]|_(?!_))+?)__(?!\w)/<strong>$1<\/strong>/g;
|
||||
|
||||
my @italic_parts;
|
||||
my $italic_idx = 0;
|
||||
@@ -298,7 +298,7 @@ sub parse_inline {
|
||||
push @italic_parts, { type => 'italic', content => $1 };
|
||||
return "\x01I$idx\x02";
|
||||
}->()/ge;
|
||||
$text =~ s/_([^_]+)_/sub {
|
||||
$text =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
|
||||
my $idx = $italic_idx++;
|
||||
push @italic_parts, { type => 'italic', content => $1 };
|
||||
return "\x01I$idx\x02";
|
||||
@@ -312,7 +312,7 @@ sub parse_inline {
|
||||
push @italic_parts, { type => 'italic', content => $1 };
|
||||
return "\x01I$idx\x02";
|
||||
}->()/ge;
|
||||
$content =~ s/_([^_]+)_/sub {
|
||||
$content =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
|
||||
my $idx = $italic_idx++;
|
||||
push @italic_parts, { type => 'italic', content => $1 };
|
||||
return "\x01I$idx\x02";
|
||||
@@ -379,10 +379,22 @@ s/\x01F$i\x02/<$part->{tag}>@{[escape_html($part->{content})]}<\/$part->{tag}>/;
|
||||
|
||||
sub is_safe_url {
|
||||
my ($url) = @_;
|
||||
return 0 if $url =~ /^\s*javascript:/i;
|
||||
return 0 if $url =~ /^\s*data:/i;
|
||||
return 0 if $url =~ /^\s*vbscript:/i;
|
||||
return 0 if $url =~ /^\s*file:/i;
|
||||
my $normalized = $url // '';
|
||||
|
||||
$normalized =~ s/^\s+//;
|
||||
$normalized =~ s/\s+$//;
|
||||
$normalized =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg
|
||||
while $normalized =~ /%[0-9A-Fa-f]{2}/;
|
||||
$normalized =~ s/&#x([0-9A-Fa-f]+);?/chr(hex($1))/eg;
|
||||
$normalized =~ s/&#(\d+);?/chr($1)/eg;
|
||||
|
||||
if ( $normalized =~ /^([a-z][a-z0-9+\-.]*):/i ) {
|
||||
my $scheme = lc $1;
|
||||
return 1
|
||||
if $scheme eq 'http' || $scheme eq 'https' || $scheme eq 'mailto';
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -397,4 +409,3 @@ sub escape_html {
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
|
||||
65
m2h.pl
65
m2h.pl
@@ -1,6 +1,8 @@
|
||||
#!perl -w
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use MarkdownParser;
|
||||
use open qw(:std :encoding(UTF-8));
|
||||
|
||||
sub show_help {
|
||||
print <<"EOF";
|
||||
@@ -23,46 +25,45 @@ sub show_version {
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $output_file;
|
||||
my $input_file;
|
||||
|
||||
for ( my $i = 0 ; $i < @ARGV ; $i++ ) {
|
||||
my $arg = $ARGV[$i];
|
||||
if ( $arg eq '-h' || $arg eq '--help' ) {
|
||||
show_help();
|
||||
}
|
||||
elsif ( $arg eq '-v' || $arg eq '--version' ) {
|
||||
show_version();
|
||||
}
|
||||
elsif ( $arg eq '-o' || $arg eq '--output' ) {
|
||||
$output_file = $ARGV[ ++$i ]
|
||||
or die "Error: -o requires a filename\n";
|
||||
}
|
||||
elsif ( $arg =~ /^-/ ) {
|
||||
die "Error: Unknown option: $arg\n";
|
||||
}
|
||||
else {
|
||||
$input_file = $arg;
|
||||
}
|
||||
}
|
||||
|
||||
my $input;
|
||||
if ($input_file) {
|
||||
open my $fh, '<', $input_file
|
||||
or die "Error: Cannot open file: $input_file\n";
|
||||
sub read_input {
|
||||
my ($file) = @_;
|
||||
local $/;
|
||||
$input = <$fh>;
|
||||
if ($file) {
|
||||
open my $fh, '<', $file
|
||||
or die "Error: Cannot open file: $file\n";
|
||||
binmode $fh, ':encoding(UTF-8)';
|
||||
my $content = <$fh>;
|
||||
close $fh;
|
||||
return $content;
|
||||
}
|
||||
return <STDIN>;
|
||||
}
|
||||
else {
|
||||
local $/;
|
||||
$input = <STDIN>;
|
||||
}
|
||||
|
||||
my $output_file;
|
||||
my $help = 0;
|
||||
my $version = 0;
|
||||
|
||||
GetOptions(
|
||||
'help|h' => \$help,
|
||||
'version|v' => \$version,
|
||||
'output|o=s' => \$output_file,
|
||||
) or show_help();
|
||||
|
||||
show_help() if $help;
|
||||
show_version() if $version;
|
||||
|
||||
my $input_file = shift @ARGV;
|
||||
|
||||
binmode STDIN, ':encoding(UTF-8)';
|
||||
binmode STDOUT, ':encoding(UTF-8)';
|
||||
|
||||
my $input = read_input($input_file);
|
||||
|
||||
my $output;
|
||||
if ($output_file) {
|
||||
open $output, '>', $output_file
|
||||
or die "Error: Cannot write to file: $output_file\n";
|
||||
binmode $output, ':encoding(UTF-8)';
|
||||
}
|
||||
else {
|
||||
$output = \*STDOUT;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 11;
|
||||
use Test::More tests => 13;
|
||||
use MarkdownParser;
|
||||
|
||||
my $parser = MarkdownParser->new();
|
||||
@@ -62,4 +62,7 @@ is(
|
||||
"<p><strong>bold text</strong></p>\n",
|
||||
"Bold with ___"
|
||||
);
|
||||
|
||||
is( $parser->parse("my_variable"),
|
||||
"<p>my_variable</p>\n", "Underscore inside word unchanged" );
|
||||
is( $parser->parse("CONST__VALUE"),
|
||||
"<p>CONST__VALUE</p>\n", "Double underscores inside word unchanged" );
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 8;
|
||||
use Test::More tests => 10;
|
||||
use MarkdownParser;
|
||||
|
||||
my $parser = MarkdownParser->new();
|
||||
@@ -37,8 +37,14 @@ is(
|
||||
"<p>Click me</p>\n",
|
||||
"Data protocol blocked in links"
|
||||
);
|
||||
is(
|
||||
$parser->parse("[Click me](javascript:alert('XSS'))"),
|
||||
"<p>Click me</p>\n",
|
||||
"Encoded JavaScript protocol blocked in links"
|
||||
);
|
||||
is( $parser->parse(")"),
|
||||
"<p>Image</p>\n", "JavaScript protocol blocked in images" );
|
||||
is( $parser->parse(""),
|
||||
"<p>Image</p>\n", "File protocol blocked in images" );
|
||||
|
||||
is( $parser->parse(")"),
|
||||
"<p>Image</p>\n", "Encoded JavaScript protocol blocked in images" );
|
||||
|
||||
Reference in New Issue
Block a user