Compare commits

..

3 Commits

Author SHA1 Message Date
4730c577fa test: cover head/get fallback 2025-12-29 15:25:24 +01:00
940f60e471 test: add UTF-8 path case 2025-12-29 15:25:16 +01:00
407289cd2a fix: head/get reachability fallback 2025-12-29 15:25:08 +01:00
3 changed files with 116 additions and 12 deletions

View File

@@ -5,7 +5,8 @@ use Mojo::URL;
use Mojo::UserAgent;
use Mojo::Promise;
use Urupam::Utils qw(sanitize_url);
use Socket qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
use Socket
qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
my $MAX_URL_LENGTH = 2048;
my $CONNECT_TIMEOUT = 10;
@@ -158,7 +159,7 @@ sub _resolve_host {
my ( $err, @results ) =
getaddrinfo( $host, undef, { socktype => SOCK_STREAM } );
return Mojo::Promise->resolve([]) if $err;
return Mojo::Promise->resolve( [] ) if $err;
my @addresses;
for my $res (@results) {
@@ -231,6 +232,24 @@ sub check_url_reachable {
my $code = $tx->result->code;
return 1 if $code >= 200 && $code < 400;
if ( $code == 403 || $code == 404 || $code == 405 ) {
return $self->ua->get_p($url)->then(
sub {
my $get_tx = shift;
my $get_code = $get_tx->result->code;
return 1 if $get_code >= 200 && $get_code < 400;
return 1
if $get_code == 403
|| $get_code == 404
|| $get_code == 405;
return Mojo::Promise->reject(
"URL returned $get_code error")
if $get_code >= 400;
return Mojo::Promise->reject(
"URL returned unexpected status: $get_code");
}
);
}
return Mojo::Promise->reject("URL returned $code error")
if $code >= 400;
return Mojo::Promise->reject(
@@ -295,11 +314,13 @@ sub validate_url_with_checks {
unless $parsed->scheme && $parsed->scheme =~ /^https?$/i;
return Mojo::Promise->reject('Invalid URL format') unless $parsed->host;
my $normalized = $parsed->to_string;
return Mojo::Promise->reject(
"URL exceeds maximum length of $MAX_URL_LENGTH characters")
unless $self->is_valid_url_length($sanitized);
unless $self->is_valid_url_length($normalized);
return $self->is_blocked_url($sanitized)->then(
return $self->is_blocked_url($normalized)->then(
sub {
my $blocked = shift;
return Mojo::Promise->reject(
@@ -308,12 +329,12 @@ sub validate_url_with_checks {
my $ssl_check =
$parsed->scheme eq 'https'
? $self->check_ssl_certificate($sanitized)
? $self->check_ssl_certificate($normalized)
: Mojo::Promise->resolve(1);
return $ssl_check->then(
sub { return $self->check_url_reachable($sanitized); } )
->then( sub { return $sanitized; } );
sub { return $self->check_url_reachable($normalized); } )
->then( sub { return $normalized; } );
}
);
}

View File

@@ -106,6 +106,11 @@ subtest 'sanitize_url' => sub {
'https://example.com/~user/docs',
'unescapes multiple percent-encoded segments'
],
[
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
'preserves UTF-8 percent-encoded path'
],
[
'https://example.com?q=hello%20world',
'https://example.com?q=hello%20world',

View File

@@ -21,15 +21,25 @@ sub mock_ua_with_code {
my $mock_ua = Test::MockObject->new;
my $mock_tx = Test::MockObject->new;
my $mock_result = Test::MockObject->new;
my $mock_get_tx = Test::MockObject->new;
my $mock_get_result = Test::MockObject->new;
$mock_result->mock( 'code', sub { $code } );
$mock_tx->mock( 'result', sub { $mock_result } );
$mock_get_result->mock( 'code', sub { $code } );
$mock_get_tx->mock( 'result', sub { $mock_get_result } );
$mock_ua->mock(
'head_p',
sub {
return Mojo::Promise->resolve($mock_tx);
}
);
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($mock_get_tx);
}
);
return $mock_ua;
}
@@ -274,7 +284,7 @@ subtest 'check_url_reachable - success codes' => sub {
subtest 'check_url_reachable - error codes' => sub {
my @cases = (
[ 404, qr/URL returned 404 error/, '4xx status returns error' ],
[ 410, qr/URL returned 410 error/, '4xx status returns error' ],
[ 500, qr/URL returned 500 error/, '5xx status returns error' ],
[ 100, qr/unexpected status/, 'unexpected status returns error' ],
);
@@ -288,6 +298,74 @@ subtest 'check_url_reachable - error codes' => sub {
}
};
subtest 'check_url_reachable - HEAD fallback to GET' => sub {
my $mock_ua = Test::MockObject->new;
my $head_tx = Test::MockObject->new;
my $head_result = Test::MockObject->new;
my $get_tx = Test::MockObject->new;
my $get_result = Test::MockObject->new;
$head_result->mock( 'code', sub { 404 } );
$head_tx->mock( 'result', sub { $head_result } );
$get_result->mock( 'code', sub { 200 } );
$get_tx->mock( 'result', sub { $get_result } );
$mock_ua->mock(
'head_p',
sub {
return Mojo::Promise->resolve($head_tx);
}
);
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($get_tx);
}
);
$validator->ua($mock_ua);
my ( $result, $error ) =
wait_promise( $validator->check_url_reachable('http://example.com') );
is( $result, 1, 'GET fallback returns success' );
is( $error, undef, 'GET fallback has no error' );
};
subtest 'check_url_reachable - HEAD fallback error' => sub {
my $mock_ua = Test::MockObject->new;
my $head_tx = Test::MockObject->new;
my $head_result = Test::MockObject->new;
my $get_tx = Test::MockObject->new;
my $get_result = Test::MockObject->new;
$head_result->mock( 'code', sub { 405 } );
$head_tx->mock( 'result', sub { $head_result } );
$get_result->mock( 'code', sub { 500 } );
$get_tx->mock( 'result', sub { $get_result } );
$mock_ua->mock(
'head_p',
sub {
return Mojo::Promise->resolve($head_tx);
}
);
$mock_ua->mock(
'get_p',
sub {
return Mojo::Promise->resolve($get_tx);
}
);
$validator->ua($mock_ua);
my ( $result, $error ) =
wait_promise( $validator->check_url_reachable('http://example.com') );
is( $result, undef, 'GET fallback error has no result' );
like( $error, qr/URL returned 500 error/, 'GET fallback error reported' );
};
subtest 'check_url_reachable - classified errors' => sub {
my @cases = (
[