Compare commits

...

14 Commits

16 changed files with 1151 additions and 0 deletions

36
Makefile Normal file
View File

@@ -0,0 +1,36 @@
PREFIX ?= /usr/local
BINDIR = $(PREFIX)/bin
PERLLIB = $(PREFIX)/lib/perl5/site_perl
SCRIPT = m2h.pl
TARGET = m2h
.PHONY: all install uninstall test clean tidy help
all:
install: install-bin install-lib
install-bin:
@mkdir -p $(BINDIR)
@cp $(SCRIPT) $(BINDIR)/$(TARGET)
@chmod +x $(BINDIR)/$(TARGET)
install-lib:
@mkdir -p $(PERLLIB)
@cp lib/MarkdownParser.pm $(PERLLIB)/
uninstall:
@rm -f $(BINDIR)/$(TARGET)
@rm -f $(PERLLIB)/MarkdownParser.pm
test:
@prove -lr t
clean:
@rm -f *.html
@find . -name \*.bak -exec rm -fv {} \;
tidy:
@perltidy -b $(SCRIPT) lib/MarkdownParser.pm t/*

110
README.md Normal file
View File

@@ -0,0 +1,110 @@
# m2h
A lightweight, pure Perl markdown to HTML converter that uses a state machine for parsing.
## Features
- Pure Perl implementation - no external dependencies
- State machine-based parsing for efficient and maintainable code
- Converts standard markdown syntax to HTML
- Secure HTML output, especially against XSS or file protocols
- Fast and lightweight
## Requirements
- Perl 5.42 or higher
- Getopt::Long (but it's already installed on most Perl installations)
## Installation
No installation required. Simply download the script and make it executable:
```bash
chmod +x m2h.pl
```
## Usage
### Basic Usage
Convert a markdown file to HTML:
```bash
perl -Ilibm2h.pl input.md > output.html
```
Or read from stdin:
```bash
cat input.md | perl -Ilib m2h.pl > output.html
```
### Command Line Options
```
m2h [options] [file]
Options:
-h, --help Show this help message
-v, --version Show version information
-o, --output Specify output file (default: stdout)
```
Install the script using:
```bash
make install
```
Run the test suite using:
```bash
make test
```
## Supported Markdown Features
- Headers (H1-H6)
- Paragraphs
- Bold and italic text
- Links
- Images
- Lists (ordered and unordered)
- Code blocks and inline code
- Blockquotes
- Horizontal rules
- Tables
## How It Works
m2h uses a state machine to parse markdown text. The parser transitions between different states (e.g., paragraph, code block, list) based on the input it encounters, allowing for efficient and accurate parsing of markdown syntax.
## Example
### Input (markdown)
```markdown
# Hello World
This is a **bold** and _italic_ example.
- Item 1
- Item 2
- Item 3
```
### Output (HTML)
```html
<h1>Hello World</h1>
<p>This is a <strong>bold</strong> and <em>italic</em> example.</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
<li>Item 3</li>
</ul>
```
## License
MIT License, see [LICENSE](LICENSE) for details.

411
lib/MarkdownParser.pm Normal file
View File

@@ -0,0 +1,411 @@
package MarkdownParser;
use strict;
use warnings;
our $VERSION = '1.0';
my %CLOSING_TAGS = (
ulist => "</ul>",
olist => "</ol>",
blockquote => "</blockquote>",
table => "</table>",
);
sub new {
my $class = shift;
return bless {
state => 'paragraph',
output => '',
lines => [],
}, $class;
}
sub _init_handlers {
my $self = shift;
$self->{handlers} = {
code_block_start => sub {
my ( $self, $line_type ) = @_;
$self->transition_to_state('code_block');
$self->{output} .= "<pre><code>\n";
},
header => sub {
my ( $self, $line_type ) = @_;
$self->transition_to_state('paragraph');
my $level = length( $line_type->{match} );
$self->{output} .=
"<h$level>"
. $self->parse_inline( $line_type->{text} )
. "</h$level>\n";
},
blockquote => sub {
my ( $self, $line_type ) = @_;
$self->handle_list_or_blockquote( 'blockquote', '<blockquote>',
$line_type->{text} );
},
ulist => sub {
my ( $self, $line_type ) = @_;
$self->handle_list_or_blockquote( 'ulist', '<ul>',
$line_type->{text} );
},
olist => sub {
my ( $self, $line_type ) = @_;
$self->handle_list_or_blockquote( 'olist', '<ol>',
$line_type->{text} );
},
horizontal_rule => sub {
my ( $self, $line_type ) = @_;
$self->transition_to_state('paragraph');
$self->{output} .= "<hr>\n";
},
table_row => sub {
my ( $self, $line_type ) = @_;
if ( $self->{state} ne 'table' ) {
$self->transition_to_state('table');
$self->{output} .= "<table>\n";
$self->{table_is_header} = 1;
}
$self->handle_table_row( $line_type->{text} );
},
table_separator => sub {
my ( $self, $line_type ) = @_;
if ( $self->{state} eq 'table' ) {
$self->{table_is_header} = 0;
}
},
blank => sub {
my ( $self, $line_type ) = @_;
$self->finish_state();
},
};
}
sub parse {
my ( $self, $text ) = @_;
$self->{output} = '';
$self->{lines} = [ split /\r?\n/, $text ];
$self->{state} = 'paragraph';
delete $self->{paragraph_buffer};
$self->_init_handlers();
foreach my $line ( @{ $self->{lines} } ) {
$self->process_line($line);
}
$self->finish_state();
return $self->{output};
}
sub process_line {
my ( $self, $line ) = @_;
if ( $self->{state} eq 'code_block' ) {
$self->handle_code_block_line($line);
return;
}
if ( $self->{state} eq 'table' ) {
$self->handle_table_line($line);
return;
}
my $line_type = $self->detect_line_type($line);
my $handler = $self->{handlers}->{ $line_type->{type} };
if ($handler) {
$handler->( $self, $line_type );
}
else {
$self->handle_paragraph_line($line);
}
}
sub detect_line_type {
my ( $self, $line ) = @_;
return { type => 'code_block_start' } if $line =~ /^```/;
if ( $line =~ /^(#{1,6})\s+(.+)/ ) {
return { type => 'header', match => $1, text => $2 };
}
if ( $line =~ /^>\s+(.+)/ ) {
return { type => 'blockquote', text => $1 };
}
if ( $line =~ /^[-*+]\s+(.+)/ ) {
return { type => 'ulist', text => $1 };
}
if ( $line =~ /^\d+\.\s+(.+)/ ) {
return { type => 'olist', text => $1 };
}
return { type => 'horizontal_rule' } if $line =~ /^[-*_]{3,}$/;
return { type => 'blank' } if $line =~ /^\s*$/;
if ( $line =~ /^\|.+\|/ ) {
if ( $line =~ /^\|[\s\-:]*\|/ ) {
return { type => 'table_separator' };
}
return { type => 'table_row', text => $line };
}
return { type => 'paragraph' };
}
sub transition_to_state {
my ( $self, $new_state ) = @_;
if ( $self->{state} ne $new_state ) {
$self->finish_state();
$self->{state} = $new_state;
}
}
sub handle_code_block_line {
my ( $self, $line ) = @_;
if ( $line =~ /^```/ ) {
$self->{output} .= "</code></pre>\n";
$self->transition_to_state('paragraph');
}
else {
$self->{output} .= escape_html($line) . "\n";
}
}
sub handle_list_or_blockquote {
my ( $self, $target_state, $open_tag, $text ) = @_;
if ( $self->{state} ne $target_state ) {
$self->transition_to_state($target_state);
$self->{output} .= "$open_tag\n";
}
my $inner_tag = $target_state eq 'blockquote' ? 'p' : 'li';
$self->{output} .=
"<$inner_tag>" . $self->parse_inline($text) . "</$inner_tag>\n";
}
sub handle_paragraph_line {
my ( $self, $line ) = @_;
if ( $self->{state} ne 'paragraph' ) {
$self->transition_to_state('paragraph');
}
$self->{paragraph_buffer} //= '';
$self->{paragraph_buffer} .=
( $self->{paragraph_buffer} ? ' ' : '' ) . $line;
}
sub handle_table_line {
my ( $self, $line ) = @_;
my $line_type = $self->detect_line_type($line);
if ( $line_type->{type} eq 'table_separator' ) {
$self->{table_is_header} = 0;
return;
}
if ( $line_type->{type} eq 'table_row' ) {
$self->handle_table_row( $line_type->{text} );
return;
}
$self->finish_state();
$self->process_line($line);
}
sub handle_table_row {
my ( $self, $row ) = @_;
$row =~ s/^\|\s*//;
$row =~ s/\s*\|$//;
my @cells = map { s/^\s+//; s/\s+$//; $_ } split( /\|/, $row );
$self->{output} .= "<tr>\n";
for my $cell (@cells) {
my $tag = $self->{table_is_header} ? 'th' : 'td';
$self->{output} .= "<$tag>" . $self->parse_inline($cell) . "</$tag>\n";
}
$self->{output} .= "</tr>\n";
}
sub finish_state {
my $self = shift;
if ( $self->{state} eq 'paragraph'
&& exists $self->{paragraph_buffer}
&& $self->{paragraph_buffer} =~ /\S/ )
{
$self->{output} .=
"<p>" . $self->parse_inline( $self->{paragraph_buffer} ) . "</p>\n";
delete $self->{paragraph_buffer};
}
elsif ( exists $CLOSING_TAGS{ $self->{state} } ) {
$self->{output} .= $CLOSING_TAGS{ $self->{state} } . "\n";
}
$self->{state} = 'paragraph';
}
sub parse_inline {
my ( $self, $text ) = @_;
my @placeholders;
my $placeholder_idx = 0;
$text =~ s/`([^`]+)`/sub {
my $idx = $placeholder_idx++;
push @placeholders, { type => 'code', content => $1 };
return "\x01$idx\x02";
}->()/ge;
$text =~ s/!\[([^\]]*)\]\(((?:[^()]|\([^()]*\))+)\)/sub {
my $idx = $placeholder_idx++;
push @placeholders, { type => 'image', alt => $1, url => $2 };
return "\x01$idx\x02";
}->()/ge;
$text =~ s/\[([^\]]+)\]\(((?:[^()]|\([^()]*\))+)\)/sub {
my $idx = $placeholder_idx++;
push @placeholders, { type => 'link', text => $1, url => $2 };
return "\x01$idx\x02";
}->()/ge;
my @bold_parts;
my $bold_idx = 0;
$text =~ s/\*\*\*((?:[^*]|\*(?!\*))+)\*\*\*/sub {
my $idx = $bold_idx++;
push @bold_parts, { type => 'bold', content => $1 };
return "\x01B$idx\x02";
}->()/ge;
$text =~ s/\*\*((?:[^*]|\*(?!\*))+)\*\*/sub {
my $idx = $bold_idx++;
push @bold_parts, { type => 'bold', content => $1 };
return "\x01B$idx\x02";
}->()/ge;
$text =~ s/(?<!\w)___((?:[^_]|_(?!_))+?)___(?!\w)/<strong>$1<\/strong>/g;
$text =~ s/(?<!\w)__((?:[^_]|_(?!_))+?)__(?!\w)/<strong>$1<\/strong>/g;
my @italic_parts;
my $italic_idx = 0;
$text =~ s/\*([^*]+)\*/sub {
my $idx = $italic_idx++;
push @italic_parts, { type => 'italic', content => $1 };
return "\x01I$idx\x02";
}->()/ge;
$text =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
my $idx = $italic_idx++;
push @italic_parts, { type => 'italic', content => $1 };
return "\x01I$idx\x02";
}->()/ge;
for ( my $i = 0 ; $i < @bold_parts ; $i++ ) {
my $part = $bold_parts[$i];
my $content = $part->{content};
$content =~ s/\*([^*]+)\*/sub {
my $idx = $italic_idx++;
push @italic_parts, { type => 'italic', content => $1 };
return "\x01I$idx\x02";
}->()/ge;
$content =~ s/(?<!\w)_((?:[^_]|_(?!_))+?)_(?!\w)/sub {
my $idx = $italic_idx++;
push @italic_parts, { type => 'italic', content => $1 };
return "\x01I$idx\x02";
}->()/ge;
$text =~ s/\x01B$i\x02/<strong>$content<\/strong>/;
}
my @format_parts;
my $format_idx = 0;
$text =~ s/<(strong|em)>(.*?)<\/(strong|em)>/sub {
my $idx = $format_idx++;
push @format_parts, { tag => $1, content => $2 };
return "\x01F$idx\x02";
}->()/gse;
$text = escape_html($text);
for ( my $i = 0 ; $i < @format_parts ; $i++ ) {
my $part = $format_parts[$i];
$text =~
s/\x01F$i\x02/<$part->{tag}>@{[escape_html($part->{content})]}<\/$part->{tag}>/;
}
for ( my $i = 0 ; $i < @italic_parts ; $i++ ) {
my $part = $italic_parts[$i];
$text =~ s/\x01I$i\x02/<em>@{[escape_html($part->{content})]}<\/em>/;
}
for ( my $i = 0 ; $i < @placeholders ; $i++ ) {
my $part = $placeholders[$i];
my $replacement;
if ( $part->{type} eq 'code' ) {
$replacement =
"<code>" . escape_html( $part->{content} ) . "</code>";
}
elsif ( $part->{type} eq 'image' ) {
if ( is_safe_url( $part->{url} ) ) {
$replacement =
"<img src=\""
. escape_html( $part->{url} )
. "\" alt=\""
. escape_html( $part->{alt} ) . "\">";
}
else {
$replacement = escape_html( $part->{alt} );
}
}
elsif ( $part->{type} eq 'link' ) {
if ( is_safe_url( $part->{url} ) ) {
$replacement =
"<a href=\""
. escape_html( $part->{url} ) . "\">"
. escape_html( $part->{text} ) . "</a>";
}
else {
$replacement = escape_html( $part->{text} );
}
}
$text =~ s/\x01$i\x02/$replacement/;
}
return $text;
}
sub is_safe_url {
my ($url) = @_;
my $normalized = $url // '';
$normalized =~ s/^\s+//;
$normalized =~ s/\s+$//;
$normalized =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg
while $normalized =~ /%[0-9A-Fa-f]{2}/;
$normalized =~ s/&#x([0-9A-Fa-f]+);?/chr(hex($1))/eg;
$normalized =~ s/&#(\d+);?/chr($1)/eg;
if ( $normalized =~ /^([a-z][a-z0-9+\-.]*):/i ) {
my $scheme = lc $1;
return 1
if $scheme eq 'http' || $scheme eq 'https' || $scheme eq 'mailto';
return 0;
}
return 1;
}
sub escape_html {
my ($text) = @_;
$text =~ s/&/&amp;/g;
$text =~ s/</&lt;/g;
$text =~ s/>/&gt;/g;
$text =~ s/"/&quot;/g;
$text =~ s/'/&#39;/g;
return $text;
}
1;

75
m2h.pl Executable file
View File

@@ -0,0 +1,75 @@
#!perl -w
use strict;
use Getopt::Long;
use MarkdownParser;
use open qw(:std :encoding(UTF-8));
sub show_help {
print <<"EOF";
m2h - Markdown to HTML Converter
Usage: $0 [options] [file]
Options:
-h, --help Show this help message
-v, --version Show version information
-o, --output Specify output file (default: stdout)
If no file is specified, input is read from stdin.
EOF
exit 0;
}
sub show_version {
print "m2h version $MarkdownParser::VERSION\n";
exit 0;
}
sub read_input {
my ($file) = @_;
local $/;
if ($file) {
open my $fh, '<', $file
or die "Error: Cannot open file: $file\n";
binmode $fh, ':encoding(UTF-8)';
my $content = <$fh>;
close $fh;
return $content;
}
return <STDIN>;
}
my $output_file;
my $help = 0;
my $version = 0;
GetOptions(
'help|h' => \$help,
'version|v' => \$version,
'output|o=s' => \$output_file,
) or show_help();
show_help() if $help;
show_version() if $version;
my $input_file = shift @ARGV;
binmode STDIN, ':encoding(UTF-8)';
binmode STDOUT, ':encoding(UTF-8)';
my $input = read_input($input_file);
my $output;
if ($output_file) {
open $output, '>', $output_file
or die "Error: Cannot write to file: $output_file\n";
binmode $output, ':encoding(UTF-8)';
}
else {
$output = \*STDOUT;
}
my $parser = MarkdownParser->new();
print $output $parser->parse($input);
close $output if $output_file;

9
t/00-load.t Executable file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 1;
use MarkdownParser;
ok( 1, 'Module loaded successfully' );

16
t/01-headers.t Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 6;
use MarkdownParser;
my $parser = MarkdownParser->new();
is( $parser->parse("# Header 1"), "<h1>Header 1</h1>\n", "H1 header" );
is( $parser->parse("## Header 2"), "<h2>Header 2</h2>\n", "H2 header" );
is( $parser->parse("### Header 3"), "<h3>Header 3</h3>\n", "H3 header" );
is( $parser->parse("#### Header 4"), "<h4>Header 4</h4>\n", "H4 header" );
is( $parser->parse("##### Header 5"), "<h5>Header 5</h5>\n", "H5 header" );
is( $parser->parse("###### Header 6"), "<h6>Header 6</h6>\n", "H6 header" );

25
t/02-paragraphs.t Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 3;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("Simple paragraph"),
"<p>Simple paragraph</p>\n",
"Single paragraph"
);
is(
$parser->parse("First paragraph\n\nSecond paragraph"),
"<p>First paragraph</p>\n<p>Second paragraph</p>\n",
"Multiple paragraphs"
);
is(
$parser->parse("Paragraph with\nmultiple lines"),
"<p>Paragraph with multiple lines</p>\n",
"Multi-line paragraph"
);

68
t/03-formatting.t Executable file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 13;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("**bold text**"),
"<p><strong>bold text</strong></p>\n",
"Bold with **"
);
is(
$parser->parse("__bold text__"),
"<p><strong>bold text</strong></p>\n",
"Bold with __"
);
is(
$parser->parse("*italic text*"),
"<p><em>italic text</em></p>\n",
"Italic with *"
);
is(
$parser->parse("_italic text_"),
"<p><em>italic text</em></p>\n",
"Italic with _"
);
is(
$parser->parse("**bold** and *italic*"),
"<p><strong>bold</strong> and <em>italic</em></p>\n",
"Bold and italic together"
);
is(
$parser->parse("Text with **bold** in middle"),
"<p>Text with <strong>bold</strong> in middle</p>\n",
"Bold in middle of text"
);
is(
$parser->parse("Text with *italic* in middle"),
"<p>Text with <em>italic</em> in middle</p>\n",
"Italic in middle of text"
);
is(
$parser->parse("**bold** *italic* **bold again**"),
"<p><strong>bold</strong> <em>italic</em> <strong>bold again</strong></p>\n",
"Multiple formatting"
);
is(
$parser->parse("***bold text***"),
"<p><strong>bold text</strong></p>\n",
"Bold with ***"
);
is(
$parser->parse("**bold *italic* bold**"),
"<p><strong>bold <em>italic</em> bold</strong></p>\n",
"Nested formatting"
);
is(
$parser->parse("___bold text___"),
"<p><strong>bold text</strong></p>\n",
"Bold with ___"
);
is( $parser->parse("my_variable"),
"<p>my_variable</p>\n", "Underscore inside word unchanged" );
is( $parser->parse("CONST__VALUE"),
"<p>CONST__VALUE</p>\n", "Double underscores inside word unchanged" );

50
t/04-links-images.t Executable file
View File

@@ -0,0 +1,50 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 10;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("[link text](http://example.com)"),
"<p><a href=\"http://example.com\">link text</a></p>\n",
"Simple link"
);
is(
$parser->parse("[link with spaces](https://example.com/path)"),
"<p><a href=\"https://example.com/path\">link with spaces</a></p>\n",
"Link with path"
);
is(
$parser->parse("![alt text](image.png)"),
"<p><img src=\"image.png\" alt=\"alt text\"></p>\n",
"Simple image"
);
is(
$parser->parse("![alt with spaces](http://example.com/image.jpg)"),
"<p><img src=\"http://example.com/image.jpg\" alt=\"alt with spaces\"></p>\n",
"Image with URL"
);
is(
$parser->parse("[Click me](javascript:alert('XSS'))"),
"<p>Click me</p>\n",
"JavaScript protocol blocked in links"
);
is(
$parser->parse("[Click me](data:text/html,<script>alert('XSS')</script>)"),
"<p>Click me</p>\n",
"Data protocol blocked in links"
);
is(
$parser->parse("[Click me](javascript&#x3A;alert('XSS'))"),
"<p>Click me</p>\n",
"Encoded JavaScript protocol blocked in links"
);
is( $parser->parse("![Image](javascript:alert('XSS'))"),
"<p>Image</p>\n", "JavaScript protocol blocked in images" );
is( $parser->parse("![Image](file:///etc/passwd)"),
"<p>Image</p>\n", "File protocol blocked in images" );
is( $parser->parse("![Image](javascript:%2f%2falert('XSS'))"),
"<p>Image</p>\n", "Encoded JavaScript protocol blocked in images" );

40
t/05-lists.t Executable file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 6;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("- Item 1\n- Item 2\n- Item 3"),
"<ul>\n<li>Item 1</li>\n<li>Item 2</li>\n<li>Item 3</li>\n</ul>\n",
"Unordered list with -"
);
is(
$parser->parse("* Item 1\n* Item 2"),
"<ul>\n<li>Item 1</li>\n<li>Item 2</li>\n</ul>\n",
"Unordered list with *"
);
is(
$parser->parse("+ Item 1\n+ Item 2"),
"<ul>\n<li>Item 1</li>\n<li>Item 2</li>\n</ul>\n",
"Unordered list with +"
);
is(
$parser->parse("1. First item\n2. Second item\n3. Third item"),
"<ol>\n<li>First item</li>\n<li>Second item</li>\n<li>Third item</li>\n</ol>\n",
"Ordered list"
);
is(
$parser->parse("- Item 1\n\n- Item 2"),
"<ul>\n<li>Item 1</li>\n</ul>\n<ul>\n<li>Item 2</li>\n</ul>\n",
"Multiple list blocks"
);
is(
$parser->parse("- **Bold item**\n- *Italic item*"),
"<ul>\n<li><strong>Bold item</strong></li>\n<li><em>Italic item</em></li>\n</ul>\n",
"List items with formatting"
);

35
t/06-code.t Executable file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 5;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("`inline code`"),
"<p><code>inline code</code></p>\n",
"Inline code"
);
is(
$parser->parse("Text with `code` in it"),
"<p>Text with <code>code</code> in it</p>\n",
"Inline code in text"
);
is(
$parser->parse("```\ncode block\n```"),
"<pre><code>\ncode block\n</code></pre>\n",
"Code block"
);
is(
$parser->parse("```\nline 1\nline 2\nline 3\n```"),
"<pre><code>\nline 1\nline 2\nline 3\n</code></pre>\n",
"Multi-line code block"
);
is(
$parser->parse("```\n```"),
"<pre><code>\n</code></pre>\n",
"Empty code block"
);

25
t/07-blockquotes.t Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 3;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("> Quote text"),
"<blockquote>\n<p>Quote text</p>\n</blockquote>\n",
"Simple blockquote"
);
is(
$parser->parse("> First line\n> Second line"),
"<blockquote>\n<p>First line</p>\n<p>Second line</p>\n</blockquote>\n",
"Multi-line blockquote"
);
is(
$parser->parse("> Quote with **bold**"),
"<blockquote>\n<p>Quote with <strong>bold</strong></p>\n</blockquote>\n",
"Blockquote with formatting"
);

13
t/08-horizontal-rules.t Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 3;
use MarkdownParser;
my $parser = MarkdownParser->new();
is( $parser->parse("---"), "<hr>\n", "Horizontal rule with ---" );
is( $parser->parse("***"), "<hr>\n", "Horizontal rule with ***" );
is( $parser->parse("___"), "<hr>\n", "Horizontal rule with ___" );

97
t/09-complex.t Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 4;
use MarkdownParser;
my $parser = MarkdownParser->new();
my $input = <<'EOF';
# Title
This is a paragraph with **bold** and *italic* text.
- List item 1
- List item 2
[Link](http://example.com)
EOF
my $expected = <<'EOF';
<h1>Title</h1>
<p>This is a paragraph with <strong>bold</strong> and <em>italic</em> text.</p>
<ul>
<li>List item 1</li>
<li>List item 2</li>
</ul>
<p><a href="http://example.com">Link</a></p>
EOF
is( $parser->parse($input), $expected, "Complex document" );
$input = <<'EOF';
## Section
Paragraph one.
> Blockquote here
Another paragraph.
EOF
$expected = <<'EOF';
<h2>Section</h2>
<p>Paragraph one.</p>
<blockquote>
<p>Blockquote here</p>
</blockquote>
<p>Another paragraph.</p>
EOF
is( $parser->parse($input), $expected, "Document with blockquote" );
$input = <<'EOF';
# Code Example
Here is some `inline code` and a code block:
```
function test() {
return true;
}
```
EOF
$expected = <<'EOF';
<h1>Code Example</h1>
<p>Here is some <code>inline code</code> and a code block:</p>
<pre><code>
function test() {
return true;
}
</code></pre>
EOF
is( $parser->parse($input), $expected, "Document with code" );
$input = <<'EOF';
# Header
Paragraph with [link](url) and ![image](img.png).
---
## Another Header
EOF
$expected = <<'EOF';
<h1>Header</h1>
<p>Paragraph with <a href="url">link</a> and <img src="img.png" alt="image">.</p>
<hr>
<h2>Another Header</h2>
EOF
is( $parser->parse($input),
$expected, "Document with links, images, and horizontal rule" );

30
t/10-html-escape.t Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 4;
use MarkdownParser;
my $parser = MarkdownParser->new();
is(
$parser->parse("Text with <tag>"),
"<p>Text with &lt;tag&gt;</p>\n",
"HTML tags escaped"
);
is(
$parser->parse("Text with & symbol"),
"<p>Text with &amp; symbol</p>\n",
"Ampersand escaped"
);
is(
$parser->parse('Text with "quotes"'),
"<p>Text with &quot;quotes&quot;</p>\n",
"Quotes escaped"
);
is(
$parser->parse("Text with 'apostrophe'"),
"<p>Text with &#39;apostrophe&#39;</p>\n",
"Apostrophe escaped"
);

111
t/11-tables.t Normal file
View File

@@ -0,0 +1,111 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 4;
use MarkdownParser;
my $parser = MarkdownParser->new();
my $input = <<'EOF';
| Header 1 | Header 2 |
|----------|----------|
| Cell 1 | Cell 2 |
| Cell 3 | Cell 4 |
EOF
my $expected = <<'EOF';
<table>
<tr>
<th>Header 1</th>
<th>Header 2</th>
</tr>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
<tr>
<td>Cell 3</td>
<td>Cell 4</td>
</tr>
</table>
EOF
is( $parser->parse($input), $expected, "Basic table" );
$input = <<'EOF';
| Name | Age |
|------|-----|
| John | 25 |
| Jane | 30 |
EOF
$expected = <<'EOF';
<table>
<tr>
<th>Name</th>
<th>Age</th>
</tr>
<tr>
<td>John</td>
<td>25</td>
</tr>
<tr>
<td>Jane</td>
<td>30</td>
</tr>
</table>
EOF
is( $parser->parse($input), $expected, "Table with different content" );
$input = <<'EOF';
# Title
| Col1 | Col2 |
|------|------|
| Data | Info |
More text.
EOF
$expected = <<'EOF';
<h1>Title</h1>
<table>
<tr>
<th>Col1</th>
<th>Col2</th>
</tr>
<tr>
<td>Data</td>
<td>Info</td>
</tr>
</table>
<p>More text.</p>
EOF
is( $parser->parse($input), $expected, "Table with surrounding content" );
$input = <<'EOF';
| **Bold** | *Italic* | [Link](url) |
|----------|----------|-------------|
| Text | More | Info |
EOF
$expected = <<'EOF';
<table>
<tr>
<th><strong>Bold</strong></th>
<th><em>Italic</em></th>
<th><a href="url">Link</a></th>
</tr>
<tr>
<td>Text</td>
<td>More</td>
<td>Info</td>
</tr>
</table>
EOF
is( $parser->parse($input), $expected, "Table with inline formatting" );