Compare commits

..

3 Commits

Author SHA1 Message Date
4730c577fa test: cover head/get fallback 2025-12-29 15:25:24 +01:00
940f60e471 test: add UTF-8 path case 2025-12-29 15:25:16 +01:00
407289cd2a fix: head/get reachability fallback 2025-12-29 15:25:08 +01:00
3 changed files with 116 additions and 12 deletions

View File

@@ -5,7 +5,8 @@ use Mojo::URL;
use Mojo::UserAgent; use Mojo::UserAgent;
use Mojo::Promise; use Mojo::Promise;
use Urupam::Utils qw(sanitize_url); use Urupam::Utils qw(sanitize_url);
use Socket qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM); use Socket
qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
my $MAX_URL_LENGTH = 2048; my $MAX_URL_LENGTH = 2048;
my $CONNECT_TIMEOUT = 10; my $CONNECT_TIMEOUT = 10;
@@ -158,7 +159,7 @@ sub _resolve_host {
my ( $err, @results ) = my ( $err, @results ) =
getaddrinfo( $host, undef, { socktype => SOCK_STREAM } ); getaddrinfo( $host, undef, { socktype => SOCK_STREAM } );
return Mojo::Promise->resolve([]) if $err; return Mojo::Promise->resolve( [] ) if $err;
my @addresses; my @addresses;
for my $res (@results) { for my $res (@results) {
@@ -231,6 +232,24 @@ sub check_url_reachable {
my $code = $tx->result->code; my $code = $tx->result->code;
return 1 if $code >= 200 && $code < 400; return 1 if $code >= 200 && $code < 400;
if ( $code == 403 || $code == 404 || $code == 405 ) {
return $self->ua->get_p($url)->then(
sub {
my $get_tx = shift;
my $get_code = $get_tx->result->code;
return 1 if $get_code >= 200 && $get_code < 400;
return 1
if $get_code == 403
|| $get_code == 404
|| $get_code == 405;
return Mojo::Promise->reject(
"URL returned $get_code error")
if $get_code >= 400;
return Mojo::Promise->reject(
"URL returned unexpected status: $get_code");
}
);
}
return Mojo::Promise->reject("URL returned $code error") return Mojo::Promise->reject("URL returned $code error")
if $code >= 400; if $code >= 400;
return Mojo::Promise->reject( return Mojo::Promise->reject(
@@ -295,11 +314,13 @@ sub validate_url_with_checks {
unless $parsed->scheme && $parsed->scheme =~ /^https?$/i; unless $parsed->scheme && $parsed->scheme =~ /^https?$/i;
return Mojo::Promise->reject('Invalid URL format') unless $parsed->host; return Mojo::Promise->reject('Invalid URL format') unless $parsed->host;
my $normalized = $parsed->to_string;
return Mojo::Promise->reject( return Mojo::Promise->reject(
"URL exceeds maximum length of $MAX_URL_LENGTH characters") "URL exceeds maximum length of $MAX_URL_LENGTH characters")
unless $self->is_valid_url_length($sanitized); unless $self->is_valid_url_length($normalized);
return $self->is_blocked_url($sanitized)->then( return $self->is_blocked_url($normalized)->then(
sub { sub {
my $blocked = shift; my $blocked = shift;
return Mojo::Promise->reject( return Mojo::Promise->reject(
@@ -308,12 +329,12 @@ sub validate_url_with_checks {
my $ssl_check = my $ssl_check =
$parsed->scheme eq 'https' $parsed->scheme eq 'https'
? $self->check_ssl_certificate($sanitized) ? $self->check_ssl_certificate($normalized)
: Mojo::Promise->resolve(1); : Mojo::Promise->resolve(1);
return $ssl_check->then( return $ssl_check->then(
sub { return $self->check_url_reachable($sanitized); } ) sub { return $self->check_url_reachable($normalized); } )
->then( sub { return $sanitized; } ); ->then( sub { return $normalized; } );
} }
); );
} }

View File

@@ -106,6 +106,11 @@ subtest 'sanitize_url' => sub {
'https://example.com/~user/docs', 'https://example.com/~user/docs',
'unescapes multiple percent-encoded segments' 'unescapes multiple percent-encoded segments'
], ],
[
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
'preserves UTF-8 percent-encoded path'
],
[ [
'https://example.com?q=hello%20world', 'https://example.com?q=hello%20world',
'https://example.com?q=hello%20world', 'https://example.com?q=hello%20world',

View File

@@ -17,19 +17,29 @@ sub wait_promise {
} }
sub mock_ua_with_code { sub mock_ua_with_code {
my ($code) = @_; my ($code) = @_;
my $mock_ua = Test::MockObject->new; my $mock_ua = Test::MockObject->new;
my $mock_tx = Test::MockObject->new; my $mock_tx = Test::MockObject->new;
my $mock_result = Test::MockObject->new; my $mock_result = Test::MockObject->new;
my $mock_get_tx = Test::MockObject->new;
my $mock_get_result = Test::MockObject->new;
$mock_result->mock( 'code', sub { $code } ); $mock_result->mock( 'code', sub { $code } );
$mock_tx->mock( 'result', sub { $mock_result } ); $mock_tx->mock( 'result', sub { $mock_result } );
$mock_get_result->mock( 'code', sub { $code } );
$mock_get_tx->mock( 'result', sub { $mock_get_result } );
$mock_ua->mock( $mock_ua->mock(
'head_p', 'head_p',
sub { sub {
return Mojo::Promise->resolve($mock_tx); return Mojo::Promise->resolve($mock_tx);
} }
); );
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($mock_get_tx);
}
);
return $mock_ua; return $mock_ua;
} }
@@ -274,7 +284,7 @@ subtest 'check_url_reachable - success codes' => sub {
subtest 'check_url_reachable - error codes' => sub { subtest 'check_url_reachable - error codes' => sub {
my @cases = ( my @cases = (
[ 404, qr/URL returned 404 error/, '4xx status returns error' ], [ 410, qr/URL returned 410 error/, '4xx status returns error' ],
[ 500, qr/URL returned 500 error/, '5xx status returns error' ], [ 500, qr/URL returned 500 error/, '5xx status returns error' ],
[ 100, qr/unexpected status/, 'unexpected status returns error' ], [ 100, qr/unexpected status/, 'unexpected status returns error' ],
); );
@@ -288,6 +298,74 @@ subtest 'check_url_reachable - error codes' => sub {
} }
}; };
subtest 'check_url_reachable - HEAD fallback to GET' => sub {
my $mock_ua = Test::MockObject->new;
my $head_tx = Test::MockObject->new;
my $head_result = Test::MockObject->new;
my $get_tx = Test::MockObject->new;
my $get_result = Test::MockObject->new;
$head_result->mock( 'code', sub { 404 } );
$head_tx->mock( 'result', sub { $head_result } );
$get_result->mock( 'code', sub { 200 } );
$get_tx->mock( 'result', sub { $get_result } );
$mock_ua->mock(
'head_p',
sub {
return Mojo::Promise->resolve($head_tx);
}
);
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($get_tx);
}
);
$validator->ua($mock_ua);
my ( $result, $error ) =
wait_promise( $validator->check_url_reachable('http://example.com') );
is( $result, 1, 'GET fallback returns success' );
is( $error, undef, 'GET fallback has no error' );
};
subtest 'check_url_reachable - HEAD fallback error' => sub {
my $mock_ua = Test::MockObject->new;
my $head_tx = Test::MockObject->new;
my $head_result = Test::MockObject->new;
my $get_tx = Test::MockObject->new;
my $get_result = Test::MockObject->new;
$head_result->mock( 'code', sub { 405 } );
$head_tx->mock( 'result', sub { $head_result } );
$get_result->mock( 'code', sub { 500 } );
$get_tx->mock( 'result', sub { $get_result } );
$mock_ua->mock(
'head_p',
sub {
return Mojo::Promise->resolve($head_tx);
}
);
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($get_tx);
}
);
$validator->ua($mock_ua);
my ( $result, $error ) =
wait_promise( $validator->check_url_reachable('http://example.com') );
is( $result, undef, 'GET fallback error has no result' );
like( $error, qr/URL returned 500 error/, 'GET fallback error reported' );
};
subtest 'check_url_reachable - classified errors' => sub { subtest 'check_url_reachable - classified errors' => sub {
my @cases = ( my @cases = (
[ [