Compare commits
3 Commits
b5ab00ef93
...
4730c577fa
| Author | SHA1 | Date | |
|---|---|---|---|
| 4730c577fa | |||
| 940f60e471 | |||
| 407289cd2a |
@@ -5,7 +5,8 @@ use Mojo::URL;
|
||||
use Mojo::UserAgent;
|
||||
use Mojo::Promise;
|
||||
use Urupam::Utils qw(sanitize_url);
|
||||
use Socket qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
|
||||
use Socket
|
||||
qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
|
||||
|
||||
my $MAX_URL_LENGTH = 2048;
|
||||
my $CONNECT_TIMEOUT = 10;
|
||||
@@ -231,6 +232,24 @@ sub check_url_reachable {
|
||||
my $code = $tx->result->code;
|
||||
|
||||
return 1 if $code >= 200 && $code < 400;
|
||||
if ( $code == 403 || $code == 404 || $code == 405 ) {
|
||||
return $self->ua->get_p($url)->then(
|
||||
sub {
|
||||
my $get_tx = shift;
|
||||
my $get_code = $get_tx->result->code;
|
||||
return 1 if $get_code >= 200 && $get_code < 400;
|
||||
return 1
|
||||
if $get_code == 403
|
||||
|| $get_code == 404
|
||||
|| $get_code == 405;
|
||||
return Mojo::Promise->reject(
|
||||
"URL returned $get_code error")
|
||||
if $get_code >= 400;
|
||||
return Mojo::Promise->reject(
|
||||
"URL returned unexpected status: $get_code");
|
||||
}
|
||||
);
|
||||
}
|
||||
return Mojo::Promise->reject("URL returned $code error")
|
||||
if $code >= 400;
|
||||
return Mojo::Promise->reject(
|
||||
@@ -295,11 +314,13 @@ sub validate_url_with_checks {
|
||||
unless $parsed->scheme && $parsed->scheme =~ /^https?$/i;
|
||||
return Mojo::Promise->reject('Invalid URL format') unless $parsed->host;
|
||||
|
||||
my $normalized = $parsed->to_string;
|
||||
|
||||
return Mojo::Promise->reject(
|
||||
"URL exceeds maximum length of $MAX_URL_LENGTH characters")
|
||||
unless $self->is_valid_url_length($sanitized);
|
||||
unless $self->is_valid_url_length($normalized);
|
||||
|
||||
return $self->is_blocked_url($sanitized)->then(
|
||||
return $self->is_blocked_url($normalized)->then(
|
||||
sub {
|
||||
my $blocked = shift;
|
||||
return Mojo::Promise->reject(
|
||||
@@ -308,12 +329,12 @@ sub validate_url_with_checks {
|
||||
|
||||
my $ssl_check =
|
||||
$parsed->scheme eq 'https'
|
||||
? $self->check_ssl_certificate($sanitized)
|
||||
? $self->check_ssl_certificate($normalized)
|
||||
: Mojo::Promise->resolve(1);
|
||||
|
||||
return $ssl_check->then(
|
||||
sub { return $self->check_url_reachable($sanitized); } )
|
||||
->then( sub { return $sanitized; } );
|
||||
sub { return $self->check_url_reachable($normalized); } )
|
||||
->then( sub { return $normalized; } );
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -106,6 +106,11 @@ subtest 'sanitize_url' => sub {
|
||||
'https://example.com/~user/docs',
|
||||
'unescapes multiple percent-encoded segments'
|
||||
],
|
||||
[
|
||||
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
|
||||
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
|
||||
'preserves UTF-8 percent-encoded path'
|
||||
],
|
||||
[
|
||||
'https://example.com?q=hello%20world',
|
||||
'https://example.com?q=hello%20world',
|
||||
|
||||
@@ -21,15 +21,25 @@ sub mock_ua_with_code {
|
||||
my $mock_ua = Test::MockObject->new;
|
||||
my $mock_tx = Test::MockObject->new;
|
||||
my $mock_result = Test::MockObject->new;
|
||||
my $mock_get_tx = Test::MockObject->new;
|
||||
my $mock_get_result = Test::MockObject->new;
|
||||
|
||||
$mock_result->mock( 'code', sub { $code } );
|
||||
$mock_tx->mock( 'result', sub { $mock_result } );
|
||||
$mock_get_result->mock( 'code', sub { $code } );
|
||||
$mock_get_tx->mock( 'result', sub { $mock_get_result } );
|
||||
$mock_ua->mock(
|
||||
'head_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($mock_tx);
|
||||
}
|
||||
);
|
||||
$mock_ua->mock(
|
||||
'get_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($mock_get_tx);
|
||||
}
|
||||
);
|
||||
|
||||
return $mock_ua;
|
||||
}
|
||||
@@ -274,7 +284,7 @@ subtest 'check_url_reachable - success codes' => sub {
|
||||
|
||||
subtest 'check_url_reachable - error codes' => sub {
|
||||
my @cases = (
|
||||
[ 404, qr/URL returned 404 error/, '4xx status returns error' ],
|
||||
[ 410, qr/URL returned 410 error/, '4xx status returns error' ],
|
||||
[ 500, qr/URL returned 500 error/, '5xx status returns error' ],
|
||||
[ 100, qr/unexpected status/, 'unexpected status returns error' ],
|
||||
);
|
||||
@@ -288,6 +298,74 @@ subtest 'check_url_reachable - error codes' => sub {
|
||||
}
|
||||
};
|
||||
|
||||
subtest 'check_url_reachable - HEAD fallback to GET' => sub {
|
||||
my $mock_ua = Test::MockObject->new;
|
||||
my $head_tx = Test::MockObject->new;
|
||||
my $head_result = Test::MockObject->new;
|
||||
my $get_tx = Test::MockObject->new;
|
||||
my $get_result = Test::MockObject->new;
|
||||
|
||||
$head_result->mock( 'code', sub { 404 } );
|
||||
$head_tx->mock( 'result', sub { $head_result } );
|
||||
|
||||
$get_result->mock( 'code', sub { 200 } );
|
||||
$get_tx->mock( 'result', sub { $get_result } );
|
||||
|
||||
$mock_ua->mock(
|
||||
'head_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($head_tx);
|
||||
}
|
||||
);
|
||||
$mock_ua->mock(
|
||||
'get_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($get_tx);
|
||||
}
|
||||
);
|
||||
|
||||
$validator->ua($mock_ua);
|
||||
my ( $result, $error ) =
|
||||
wait_promise( $validator->check_url_reachable('http://example.com') );
|
||||
|
||||
is( $result, 1, 'GET fallback returns success' );
|
||||
is( $error, undef, 'GET fallback has no error' );
|
||||
};
|
||||
|
||||
subtest 'check_url_reachable - HEAD fallback error' => sub {
|
||||
my $mock_ua = Test::MockObject->new;
|
||||
my $head_tx = Test::MockObject->new;
|
||||
my $head_result = Test::MockObject->new;
|
||||
my $get_tx = Test::MockObject->new;
|
||||
my $get_result = Test::MockObject->new;
|
||||
|
||||
$head_result->mock( 'code', sub { 405 } );
|
||||
$head_tx->mock( 'result', sub { $head_result } );
|
||||
|
||||
$get_result->mock( 'code', sub { 500 } );
|
||||
$get_tx->mock( 'result', sub { $get_result } );
|
||||
|
||||
$mock_ua->mock(
|
||||
'head_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($head_tx);
|
||||
}
|
||||
);
|
||||
$mock_ua->mock(
|
||||
'get_p',
|
||||
sub {
|
||||
return Mojo::Promise->resolve($get_tx);
|
||||
}
|
||||
);
|
||||
|
||||
$validator->ua($mock_ua);
|
||||
my ( $result, $error ) =
|
||||
wait_promise( $validator->check_url_reachable('http://example.com') );
|
||||
|
||||
is( $result, undef, 'GET fallback error has no result' );
|
||||
like( $error, qr/URL returned 500 error/, 'GET fallback error reported' );
|
||||
};
|
||||
|
||||
subtest 'check_url_reachable - classified errors' => sub {
|
||||
my @cases = (
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user