Improve URL checks to reduce false-negatives
This commit improves the URL health checking mechanism to reduce false negatives. - Treat all 2XX status codes as successful, addressing issues with codes like `204`. - Improve URL matching to exclude URLs within Markdown inline code block and support URLs containing parentheses. - Add `forceHttpGetForUrlPatterns` to customize HTTP method per URL to allow verifying URLs behind CDN/WAFs that do not respond to HTTP HEAD. - Send the Host header for improved handling of webpages behind proxies. - Improve formatting and context for output messages. - Fix the defaulting options for redirects and cookie handling. - Update the user agent pool to modern browsers and platforms. - Add support for randomizing TLS fingerprint to mimic various clients better, improving the effectiveness of checks. However, this is not fully supported by Node.js's HTTP client; see nodejs/undici#1983 for more details. - Use `AbortSignal` instead of `AbortController` as more modern and simpler way to handle timeouts.
This commit is contained in:
@@ -1,19 +1,22 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { fetchWithTimeout } from './FetchWithTimeout';
|
||||
import { getDomainFromUrl } from './UrlDomainProcessing';
|
||||
|
||||
export function fetchFollow(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
fetchOptions: RequestInit,
|
||||
followOptions: IFollowOptions | undefined,
|
||||
fetchOptions?: Partial<RequestInit>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<Response> {
|
||||
const defaultedFollowOptions = {
|
||||
const defaultedFollowOptions: Required<FollowOptions> = {
|
||||
...DefaultFollowOptions,
|
||||
...followOptions,
|
||||
};
|
||||
if (followRedirects(defaultedFollowOptions)) {
|
||||
console.log(indentText(`Follow options: ${JSON.stringify(defaultedFollowOptions)}`));
|
||||
if (!followRedirects(defaultedFollowOptions)) {
|
||||
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
|
||||
}
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' };
|
||||
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
|
||||
return followRecursivelyWithCookies(
|
||||
url,
|
||||
@@ -24,13 +27,13 @@ export function fetchFollow(
|
||||
);
|
||||
}
|
||||
|
||||
export interface IFollowOptions {
|
||||
followRedirects?: boolean;
|
||||
maximumRedirectFollowDepth?: number;
|
||||
enableCookies?: boolean;
|
||||
export interface FollowOptions {
|
||||
readonly followRedirects?: boolean;
|
||||
readonly maximumRedirectFollowDepth?: number;
|
||||
readonly enableCookies?: boolean;
|
||||
}
|
||||
|
||||
export const DefaultFollowOptions: Required<IFollowOptions> = {
|
||||
const DefaultFollowOptions: Required<FollowOptions> = {
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
@@ -64,6 +67,10 @@ async function followRecursivelyWithCookies(
|
||||
if (cookieHeader) {
|
||||
cookies.addHeader(cookieHeader);
|
||||
}
|
||||
options.headers = {
|
||||
...options.headers,
|
||||
Host: getDomainFromUrl(nextUrl),
|
||||
};
|
||||
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
|
||||
}
|
||||
|
||||
@@ -77,7 +84,7 @@ class CookieStorage {
|
||||
constructor(private readonly enabled: boolean) {
|
||||
}
|
||||
|
||||
public hasAny() {
|
||||
public hasAny(): boolean {
|
||||
return this.enabled && this.cookies.length > 0;
|
||||
}
|
||||
|
||||
@@ -88,17 +95,17 @@ class CookieStorage {
|
||||
this.cookies.push(header);
|
||||
}
|
||||
|
||||
public getHeader() {
|
||||
public getHeader(): string {
|
||||
return this.cookies.join(' ; ');
|
||||
}
|
||||
}
|
||||
|
||||
function followRedirects(options: IFollowOptions) {
|
||||
if (!options.followRedirects) {
|
||||
function followRedirects(options: FollowOptions): boolean {
|
||||
if (options.followRedirects !== true) {
|
||||
return false;
|
||||
}
|
||||
if (options.maximumRedirectFollowDepth === 0) {
|
||||
return false;
|
||||
if (options.maximumRedirectFollowDepth === undefined || options.maximumRedirectFollowDepth <= 0) {
|
||||
throw new Error('Invalid followRedirects configuration: maximumRedirectFollowDepth must be a positive integer');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user