Compare commits
3 Commits
b5ab00ef93
...
4730c577fa
| Author | SHA1 | Date | |
|---|---|---|---|
| 4730c577fa | |||
| 940f60e471 | |||
| 407289cd2a |
@@ -5,7 +5,8 @@ use Mojo::URL;
|
|||||||
use Mojo::UserAgent;
|
use Mojo::UserAgent;
|
||||||
use Mojo::Promise;
|
use Mojo::Promise;
|
||||||
use Urupam::Utils qw(sanitize_url);
|
use Urupam::Utils qw(sanitize_url);
|
||||||
use Socket qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
|
use Socket
|
||||||
|
qw(getaddrinfo getnameinfo NI_NUMERICHOST NI_NUMERICSERV AF_INET AF_INET6 SOCK_STREAM);
|
||||||
|
|
||||||
my $MAX_URL_LENGTH = 2048;
|
my $MAX_URL_LENGTH = 2048;
|
||||||
my $CONNECT_TIMEOUT = 10;
|
my $CONNECT_TIMEOUT = 10;
|
||||||
@@ -158,7 +159,7 @@ sub _resolve_host {
|
|||||||
|
|
||||||
my ( $err, @results ) =
|
my ( $err, @results ) =
|
||||||
getaddrinfo( $host, undef, { socktype => SOCK_STREAM } );
|
getaddrinfo( $host, undef, { socktype => SOCK_STREAM } );
|
||||||
return Mojo::Promise->resolve([]) if $err;
|
return Mojo::Promise->resolve( [] ) if $err;
|
||||||
|
|
||||||
my @addresses;
|
my @addresses;
|
||||||
for my $res (@results) {
|
for my $res (@results) {
|
||||||
@@ -231,6 +232,24 @@ sub check_url_reachable {
|
|||||||
my $code = $tx->result->code;
|
my $code = $tx->result->code;
|
||||||
|
|
||||||
return 1 if $code >= 200 && $code < 400;
|
return 1 if $code >= 200 && $code < 400;
|
||||||
|
if ( $code == 403 || $code == 404 || $code == 405 ) {
|
||||||
|
return $self->ua->get_p($url)->then(
|
||||||
|
sub {
|
||||||
|
my $get_tx = shift;
|
||||||
|
my $get_code = $get_tx->result->code;
|
||||||
|
return 1 if $get_code >= 200 && $get_code < 400;
|
||||||
|
return 1
|
||||||
|
if $get_code == 403
|
||||||
|
|| $get_code == 404
|
||||||
|
|| $get_code == 405;
|
||||||
|
return Mojo::Promise->reject(
|
||||||
|
"URL returned $get_code error")
|
||||||
|
if $get_code >= 400;
|
||||||
|
return Mojo::Promise->reject(
|
||||||
|
"URL returned unexpected status: $get_code");
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
return Mojo::Promise->reject("URL returned $code error")
|
return Mojo::Promise->reject("URL returned $code error")
|
||||||
if $code >= 400;
|
if $code >= 400;
|
||||||
return Mojo::Promise->reject(
|
return Mojo::Promise->reject(
|
||||||
@@ -295,11 +314,13 @@ sub validate_url_with_checks {
|
|||||||
unless $parsed->scheme && $parsed->scheme =~ /^https?$/i;
|
unless $parsed->scheme && $parsed->scheme =~ /^https?$/i;
|
||||||
return Mojo::Promise->reject('Invalid URL format') unless $parsed->host;
|
return Mojo::Promise->reject('Invalid URL format') unless $parsed->host;
|
||||||
|
|
||||||
|
my $normalized = $parsed->to_string;
|
||||||
|
|
||||||
return Mojo::Promise->reject(
|
return Mojo::Promise->reject(
|
||||||
"URL exceeds maximum length of $MAX_URL_LENGTH characters")
|
"URL exceeds maximum length of $MAX_URL_LENGTH characters")
|
||||||
unless $self->is_valid_url_length($sanitized);
|
unless $self->is_valid_url_length($normalized);
|
||||||
|
|
||||||
return $self->is_blocked_url($sanitized)->then(
|
return $self->is_blocked_url($normalized)->then(
|
||||||
sub {
|
sub {
|
||||||
my $blocked = shift;
|
my $blocked = shift;
|
||||||
return Mojo::Promise->reject(
|
return Mojo::Promise->reject(
|
||||||
@@ -308,12 +329,12 @@ sub validate_url_with_checks {
|
|||||||
|
|
||||||
my $ssl_check =
|
my $ssl_check =
|
||||||
$parsed->scheme eq 'https'
|
$parsed->scheme eq 'https'
|
||||||
? $self->check_ssl_certificate($sanitized)
|
? $self->check_ssl_certificate($normalized)
|
||||||
: Mojo::Promise->resolve(1);
|
: Mojo::Promise->resolve(1);
|
||||||
|
|
||||||
return $ssl_check->then(
|
return $ssl_check->then(
|
||||||
sub { return $self->check_url_reachable($sanitized); } )
|
sub { return $self->check_url_reachable($normalized); } )
|
||||||
->then( sub { return $sanitized; } );
|
->then( sub { return $normalized; } );
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -106,6 +106,11 @@ subtest 'sanitize_url' => sub {
|
|||||||
'https://example.com/~user/docs',
|
'https://example.com/~user/docs',
|
||||||
'unescapes multiple percent-encoded segments'
|
'unescapes multiple percent-encoded segments'
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
|
||||||
|
'https://fr.wikipedia.org/wiki/Pic_L%C3%A9nine',
|
||||||
|
'preserves UTF-8 percent-encoded path'
|
||||||
|
],
|
||||||
[
|
[
|
||||||
'https://example.com?q=hello%20world',
|
'https://example.com?q=hello%20world',
|
||||||
'https://example.com?q=hello%20world',
|
'https://example.com?q=hello%20world',
|
||||||
|
|||||||
@@ -21,15 +21,25 @@ sub mock_ua_with_code {
|
|||||||
my $mock_ua = Test::MockObject->new;
|
my $mock_ua = Test::MockObject->new;
|
||||||
my $mock_tx = Test::MockObject->new;
|
my $mock_tx = Test::MockObject->new;
|
||||||
my $mock_result = Test::MockObject->new;
|
my $mock_result = Test::MockObject->new;
|
||||||
|
my $mock_get_tx = Test::MockObject->new;
|
||||||
|
my $mock_get_result = Test::MockObject->new;
|
||||||
|
|
||||||
$mock_result->mock( 'code', sub { $code } );
|
$mock_result->mock( 'code', sub { $code } );
|
||||||
$mock_tx->mock( 'result', sub { $mock_result } );
|
$mock_tx->mock( 'result', sub { $mock_result } );
|
||||||
|
$mock_get_result->mock( 'code', sub { $code } );
|
||||||
|
$mock_get_tx->mock( 'result', sub { $mock_get_result } );
|
||||||
$mock_ua->mock(
|
$mock_ua->mock(
|
||||||
'head_p',
|
'head_p',
|
||||||
sub {
|
sub {
|
||||||
return Mojo::Promise->resolve($mock_tx);
|
return Mojo::Promise->resolve($mock_tx);
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
$mock_ua->mock(
|
||||||
|
'get_p',
|
||||||
|
sub {
|
||||||
|
return Mojo::Promise->resolve($mock_get_tx);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
return $mock_ua;
|
return $mock_ua;
|
||||||
}
|
}
|
||||||
@@ -274,7 +284,7 @@ subtest 'check_url_reachable - success codes' => sub {
|
|||||||
|
|
||||||
subtest 'check_url_reachable - error codes' => sub {
|
subtest 'check_url_reachable - error codes' => sub {
|
||||||
my @cases = (
|
my @cases = (
|
||||||
[ 404, qr/URL returned 404 error/, '4xx status returns error' ],
|
[ 410, qr/URL returned 410 error/, '4xx status returns error' ],
|
||||||
[ 500, qr/URL returned 500 error/, '5xx status returns error' ],
|
[ 500, qr/URL returned 500 error/, '5xx status returns error' ],
|
||||||
[ 100, qr/unexpected status/, 'unexpected status returns error' ],
|
[ 100, qr/unexpected status/, 'unexpected status returns error' ],
|
||||||
);
|
);
|
||||||
@@ -288,6 +298,74 @@ subtest 'check_url_reachable - error codes' => sub {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
subtest 'check_url_reachable - HEAD fallback to GET' => sub {
|
||||||
|
my $mock_ua = Test::MockObject->new;
|
||||||
|
my $head_tx = Test::MockObject->new;
|
||||||
|
my $head_result = Test::MockObject->new;
|
||||||
|
my $get_tx = Test::MockObject->new;
|
||||||
|
my $get_result = Test::MockObject->new;
|
||||||
|
|
||||||
|
$head_result->mock( 'code', sub { 404 } );
|
||||||
|
$head_tx->mock( 'result', sub { $head_result } );
|
||||||
|
|
||||||
|
$get_result->mock( 'code', sub { 200 } );
|
||||||
|
$get_tx->mock( 'result', sub { $get_result } );
|
||||||
|
|
||||||
|
$mock_ua->mock(
|
||||||
|
'head_p',
|
||||||
|
sub {
|
||||||
|
return Mojo::Promise->resolve($head_tx);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
$mock_ua->mock(
|
||||||
|
'get_p',
|
||||||
|
sub {
|
||||||
|
return Mojo::Promise->resolve($get_tx);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
$validator->ua($mock_ua);
|
||||||
|
my ( $result, $error ) =
|
||||||
|
wait_promise( $validator->check_url_reachable('http://example.com') );
|
||||||
|
|
||||||
|
is( $result, 1, 'GET fallback returns success' );
|
||||||
|
is( $error, undef, 'GET fallback has no error' );
|
||||||
|
};
|
||||||
|
|
||||||
|
subtest 'check_url_reachable - HEAD fallback error' => sub {
|
||||||
|
my $mock_ua = Test::MockObject->new;
|
||||||
|
my $head_tx = Test::MockObject->new;
|
||||||
|
my $head_result = Test::MockObject->new;
|
||||||
|
my $get_tx = Test::MockObject->new;
|
||||||
|
my $get_result = Test::MockObject->new;
|
||||||
|
|
||||||
|
$head_result->mock( 'code', sub { 405 } );
|
||||||
|
$head_tx->mock( 'result', sub { $head_result } );
|
||||||
|
|
||||||
|
$get_result->mock( 'code', sub { 500 } );
|
||||||
|
$get_tx->mock( 'result', sub { $get_result } );
|
||||||
|
|
||||||
|
$mock_ua->mock(
|
||||||
|
'head_p',
|
||||||
|
sub {
|
||||||
|
return Mojo::Promise->resolve($head_tx);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
$mock_ua->mock(
|
||||||
|
'get_p',
|
||||||
|
sub {
|
||||||
|
return Mojo::Promise->resolve($get_tx);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
$validator->ua($mock_ua);
|
||||||
|
my ( $result, $error ) =
|
||||||
|
wait_promise( $validator->check_url_reachable('http://example.com') );
|
||||||
|
|
||||||
|
is( $result, undef, 'GET fallback error has no result' );
|
||||||
|
like( $error, qr/URL returned 500 error/, 'GET fallback error reported' );
|
||||||
|
};
|
||||||
|
|
||||||
subtest 'check_url_reachable - classified errors' => sub {
|
subtest 'check_url_reachable - classified errors' => sub {
|
||||||
my @cases = (
|
my @cases = (
|
||||||
[
|
[
|
||||||
|
|||||||
Reference in New Issue
Block a user