Improve URL checks to reduce false-negatives
This commit improves the URL health checking mechanism to reduce false negatives. - Treat all 2XX status codes as successful, addressing issues with codes like `204`. - Exclude URLs within Markdown inline code blocks. - Send the Host header for improved handling of webpages behind proxies. - Improve formatting and context for output messages. - Fix the defaulting options for redirects and cookie handling. - Add URL exclusion support for non-responsive URLs. - Update the user agent pool to modern browsers and platforms. - Improve CI/CD workflow to respond to modifications in the `test/checks/external-urls` directory, offering immediate feedback on potential impacts to the external URL test. - Add support for randomizing TLS fingerprint to mimic various clients better, improving the effectiveness of checks. However, this is not fully supported by Node.js's HTTP client; see nodejs/undici#1983 for more details. - Use `AbortSignal` instead of `AbortController` as more modern and simpler way to handle timeouts.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { splitTextIntoLines, indentText } from '../utils/text';
|
||||
import { indentText, splitTextIntoLines } from '@tests/shared/Text';
|
||||
import { log, die } from '../utils/log';
|
||||
import { readAppLogFile } from './app-logs';
|
||||
import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns';
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { filterEmpty } from '@tests/shared/Text';
|
||||
import { runCommand } from '../../utils/run-command';
|
||||
import { log, LogLevel } from '../../utils/log';
|
||||
import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform';
|
||||
import { filterEmpty } from '../../utils/text';
|
||||
|
||||
export async function captureWindowTitles(processId: number) {
|
||||
if (!processId) { throw new Error('Missing process ID.'); }
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args';
|
||||
import { log, die } from './utils/log';
|
||||
import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm';
|
||||
@@ -15,7 +16,6 @@ import {
|
||||
APP_EXECUTION_DURATION_IN_SECONDS,
|
||||
SCREENSHOT_PATH,
|
||||
} from './config';
|
||||
import { indentText } from './utils/text';
|
||||
import type { ExtractionResult } from './app/extractors/common/extraction-result';
|
||||
|
||||
export async function main(): Promise<void> {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { exec, type ExecOptions, type ExecException } from 'node:child_process';
|
||||
import { indentText } from './text';
|
||||
import { exec } from 'child_process';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import type { ExecOptions, ExecException } from 'child_process';
|
||||
|
||||
const TIMEOUT_IN_SECONDS = 180;
|
||||
const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
import { isString } from '@/TypeHelpers';
|
||||
|
||||
export function indentText(
|
||||
text: string,
|
||||
indentLevel = 1,
|
||||
): string {
|
||||
validateText(text);
|
||||
const indentation = '\t'.repeat(indentLevel);
|
||||
return splitTextIntoLines(text)
|
||||
.map((line) => (line ? `${indentation}${line}` : line))
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
export function splitTextIntoLines(text: string): string[] {
|
||||
validateText(text);
|
||||
return text
|
||||
.split(/[\r\n]+/);
|
||||
}
|
||||
|
||||
export function filterEmpty(texts: readonly (string | undefined | null)[]): string[] {
|
||||
return texts
|
||||
.filter((title): title is string => Boolean(title));
|
||||
}
|
||||
|
||||
function validateText(text: string): void {
|
||||
if (!isString(text)) {
|
||||
throw new Error(`text is not a string. It is: ${typeof text}\n${text}`);
|
||||
}
|
||||
}
|
||||
@@ -1,64 +1,62 @@
|
||||
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import { getUrlStatus, type IRequestOptions } from './Requestor';
|
||||
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { getUrlStatus, type RequestOptions } from './Requestor';
|
||||
import { groupUrlsByDomain } from './UrlDomainProcessing';
|
||||
import type { FollowOptions } from './FetchFollow';
|
||||
import type { UrlStatus } from './UrlStatus';
|
||||
|
||||
export async function getUrlStatusesInParallel(
|
||||
urls: string[],
|
||||
options?: IBatchRequestOptions,
|
||||
): Promise<IUrlStatus[]> {
|
||||
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
|
||||
options?: BatchRequestOptions,
|
||||
): Promise<UrlStatus[]> {
|
||||
// urls = ['https://privacy.sexy']; // Comment out this line to use a hardcoded URL for testing.
|
||||
const uniqueUrls = Array.from(new Set(urls));
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
console.log('Options: ', defaultedOptions);
|
||||
const results = await request(uniqueUrls, defaultedOptions);
|
||||
const defaultedDomainOptions = { ...DefaultDomainOptions, ...options?.domainOptions };
|
||||
console.log('Batch request options applied:', defaultedDomainOptions);
|
||||
const results = await request(uniqueUrls, defaultedDomainOptions, options);
|
||||
return results;
|
||||
}
|
||||
|
||||
export interface IBatchRequestOptions {
|
||||
domainOptions?: IDomainOptions;
|
||||
requestOptions?: IRequestOptions;
|
||||
export interface BatchRequestOptions {
|
||||
readonly domainOptions?: Partial<DomainOptions>;
|
||||
readonly requestOptions?: Partial<RequestOptions>;
|
||||
readonly followOptions?: Partial<FollowOptions>;
|
||||
}
|
||||
|
||||
interface IDomainOptions {
|
||||
sameDomainParallelize?: boolean;
|
||||
sameDomainDelayInMs?: number;
|
||||
interface DomainOptions {
|
||||
readonly sameDomainParallelize?: boolean;
|
||||
readonly sameDomainDelayInMs?: number;
|
||||
}
|
||||
|
||||
const DefaultOptions: Required<IBatchRequestOptions> = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 5 /* sec */ * 1000,
|
||||
requestTimeoutInMs: 60 /* sec */ * 1000,
|
||||
additionalHeaders: {},
|
||||
},
|
||||
const DefaultDomainOptions: Required<DomainOptions> = {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
};
|
||||
|
||||
function request(
|
||||
urls: string[],
|
||||
options: Required<IBatchRequestOptions>,
|
||||
): Promise<IUrlStatus[]> {
|
||||
if (!options.domainOptions.sameDomainParallelize) {
|
||||
domainOptions: Required<DomainOptions>,
|
||||
options?: BatchRequestOptions,
|
||||
): Promise<UrlStatus[]> {
|
||||
if (!domainOptions.sameDomainParallelize) {
|
||||
return runOnEachDomainWithDelay(
|
||||
urls,
|
||||
(url) => getUrlStatus(url, options.requestOptions),
|
||||
options.domainOptions.sameDomainDelayInMs,
|
||||
(url) => getUrlStatus(url, options?.requestOptions, options?.followOptions),
|
||||
domainOptions.sameDomainDelayInMs,
|
||||
);
|
||||
}
|
||||
return Promise.all(urls.map((url) => getUrlStatus(url, options.requestOptions)));
|
||||
return Promise.all(
|
||||
urls.map((url) => getUrlStatus(url, options?.requestOptions, options?.followOptions)),
|
||||
);
|
||||
}
|
||||
|
||||
async function runOnEachDomainWithDelay(
|
||||
urls: string[],
|
||||
action: (url: string) => Promise<IUrlStatus>,
|
||||
action: (url: string) => Promise<UrlStatus>,
|
||||
delayInMs: number | undefined,
|
||||
): Promise<IUrlStatus[]> {
|
||||
): Promise<UrlStatus[]> {
|
||||
const grouped = groupUrlsByDomain(urls);
|
||||
const tasks = grouped.map(async (group) => {
|
||||
const results = new Array<IUrlStatus>();
|
||||
const results = new Array<UrlStatus>();
|
||||
/* eslint-disable no-await-in-loop */
|
||||
for (const url of group) {
|
||||
const status = await action(url);
|
||||
|
||||
@@ -1,27 +1,33 @@
|
||||
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { type UrlStatus, formatUrlStatus } from './UrlStatus';
|
||||
|
||||
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
|
||||
|
||||
export async function retryWithExponentialBackOff(
|
||||
action: () => Promise<IUrlStatus>,
|
||||
action: () => Promise<UrlStatus>,
|
||||
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
|
||||
currentRetry = 1,
|
||||
): Promise<IUrlStatus> {
|
||||
): Promise<UrlStatus> {
|
||||
const maxTries = 3;
|
||||
const status = await action();
|
||||
if (shouldRetry(status)) {
|
||||
if (currentRetry <= maxTries) {
|
||||
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
|
||||
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status);
|
||||
console.log([
|
||||
`Attempt ${currentRetry}: Retrying in ${exponentialBackOffInMs / 1000} seconds.`,
|
||||
'Details:',
|
||||
indentText(formatUrlStatus(status)),
|
||||
].join('\n'));
|
||||
await sleep(exponentialBackOffInMs);
|
||||
return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1);
|
||||
}
|
||||
console.warn('💀 All retry attempts failed. Final failure to retrieve URL:', indentText(formatUrlStatus(status)));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
function shouldRetry(status: IUrlStatus) {
|
||||
function shouldRetry(status: UrlStatus): boolean {
|
||||
if (status.error) {
|
||||
return true;
|
||||
}
|
||||
@@ -32,14 +38,14 @@ function shouldRetry(status: IUrlStatus) {
|
||||
|| status.code === 429; // Too Many Requests
|
||||
}
|
||||
|
||||
function isTransientError(statusCode: number) {
|
||||
function isTransientError(statusCode: number): boolean {
|
||||
return statusCode >= 500 && statusCode <= 599;
|
||||
}
|
||||
|
||||
function getRetryTimeoutInMs(
|
||||
currentRetry: number,
|
||||
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
|
||||
) {
|
||||
): number {
|
||||
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
|
||||
// of the exponentially increasing base amount
|
||||
const minRandom = 1 - retryRandomFactor;
|
||||
|
||||
@@ -1,19 +1,17 @@
|
||||
import { fetchWithTimeout } from './FetchWithTimeout';
|
||||
import { getDomainFromUrl } from './UrlDomainProcessing';
|
||||
|
||||
export function fetchFollow(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
fetchOptions: RequestInit,
|
||||
followOptions: IFollowOptions | undefined,
|
||||
fetchOptions?: Partial<RequestInit>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<Response> {
|
||||
const defaultedFollowOptions = {
|
||||
...DefaultFollowOptions,
|
||||
...followOptions,
|
||||
};
|
||||
const defaultedFollowOptions = { ...DefaultFollowOptions, ...followOptions };
|
||||
if (followRedirects(defaultedFollowOptions)) {
|
||||
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
|
||||
}
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' };
|
||||
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
|
||||
return followRecursivelyWithCookies(
|
||||
url,
|
||||
@@ -24,13 +22,15 @@ export function fetchFollow(
|
||||
);
|
||||
}
|
||||
|
||||
export interface IFollowOptions {
|
||||
followRedirects?: boolean;
|
||||
maximumRedirectFollowDepth?: number;
|
||||
enableCookies?: boolean;
|
||||
// "cors" | "navigate" | "no-cors" | "same-origin";
|
||||
|
||||
export interface FollowOptions {
|
||||
readonly followRedirects?: boolean;
|
||||
readonly maximumRedirectFollowDepth?: number;
|
||||
readonly enableCookies?: boolean;
|
||||
}
|
||||
|
||||
export const DefaultFollowOptions: Required<IFollowOptions> = {
|
||||
const DefaultFollowOptions: Required<FollowOptions> = {
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
@@ -64,6 +64,10 @@ async function followRecursivelyWithCookies(
|
||||
if (cookieHeader) {
|
||||
cookies.addHeader(cookieHeader);
|
||||
}
|
||||
options.headers = {
|
||||
...options.headers,
|
||||
Host: getDomainFromUrl(nextUrl),
|
||||
};
|
||||
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
|
||||
}
|
||||
|
||||
@@ -77,7 +81,7 @@ class CookieStorage {
|
||||
constructor(private readonly enabled: boolean) {
|
||||
}
|
||||
|
||||
public hasAny() {
|
||||
public hasAny(): boolean {
|
||||
return this.enabled && this.cookies.length > 0;
|
||||
}
|
||||
|
||||
@@ -88,12 +92,12 @@ class CookieStorage {
|
||||
this.cookies.push(header);
|
||||
}
|
||||
|
||||
public getHeader() {
|
||||
public getHeader(): string {
|
||||
return this.cookies.join(' ; ');
|
||||
}
|
||||
}
|
||||
|
||||
function followRedirects(options: IFollowOptions) {
|
||||
function followRedirects(options: FollowOptions): boolean {
|
||||
if (!options.followRedirects) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ export async function fetchWithTimeout(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
init?: RequestInit,
|
||||
): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
): ReturnType<typeof fetch> {
|
||||
const options: RequestInit = {
|
||||
...(init ?? {}),
|
||||
signal: controller.signal,
|
||||
signal: AbortSignal.timeout(timeoutInMs),
|
||||
};
|
||||
const promise = fetch(url, options);
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutInMs);
|
||||
return promise.finally(() => clearTimeout(timeout));
|
||||
return fetch(
|
||||
url,
|
||||
options,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
export interface IUrlStatus {
|
||||
url: string;
|
||||
error?: string;
|
||||
code?: number;
|
||||
}
|
||||
@@ -13,7 +13,10 @@ A CLI and SDK for checking the availability of external URLs.
|
||||
- 😇 **Rate Limiting**: Queues requests by domain to be polite.
|
||||
- 🔁 **Retries**: Implements retry pattern with exponential back-off.
|
||||
- ⌚ **Timeouts**: Configurable timeout for each request.
|
||||
- 🎭️ **User-Agent Rotation**: Change user agents for each request.
|
||||
- 🎭️ **Impersonation**: Impersonate different browsers for each request.
|
||||
- **🌐 User-Agent Rotation**: Change user agents.
|
||||
- **🔑 TLS Handshakes**: Perform TLS and HTTP handshakes that are identical to that of a real browser.
|
||||
- 🫙 **Cookie jar**: Preserve cookies during redirects to mimic real browser.
|
||||
|
||||
## CLI
|
||||
|
||||
@@ -54,6 +57,7 @@ const statuses = await getUrlStatusesInParallel([ 'https://privacy.sexy', /* ...
|
||||
- **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds)
|
||||
- Sets the delay between requests to the same domain.
|
||||
- `requestOptions` (*object*): See [request options](#request-options).
|
||||
- `followOptions` (*object*): See [follow options](#follow-options).
|
||||
|
||||
### `getUrlStatus`
|
||||
|
||||
@@ -72,7 +76,6 @@ console.log(`Status code: ${status.code}`);
|
||||
- The longer the base time, the greater the intervals between retries.
|
||||
- **`additionalHeaders`** (*object*), default: `false`
|
||||
- Additional HTTP headers to send along with the default headers. Overrides default headers if specified.
|
||||
- **`followOptions`** (*object*): See [follow options](#follow-options).
|
||||
- **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds)
|
||||
- Time limit to abort the request if no response is received within the specified time frame.
|
||||
|
||||
@@ -83,19 +86,7 @@ Follows `3XX` redirects while preserving cookies.
|
||||
Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter.
|
||||
|
||||
```js
|
||||
const status = await fetchFollow('https://privacy.sexy', {
|
||||
// First argument is same options as fetch API, except `redirect` options
|
||||
// that's discarded in favor of next argument follow options
|
||||
headers: {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
},
|
||||
}, {
|
||||
// Second argument sets the redirect behavior
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
}
|
||||
);
|
||||
const status = await fetchFollow('https://privacy.sexy', 1000 /* timeout in milliseconds */);
|
||||
console.log(`Status code: ${status.code}`);
|
||||
```
|
||||
|
||||
|
||||
@@ -1,70 +1,108 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
|
||||
import { fetchFollow, type IFollowOptions, DefaultFollowOptions } from './FetchFollow';
|
||||
import { fetchFollow, type FollowOptions } from './FetchFollow';
|
||||
import { getRandomUserAgent } from './UserAgents';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { getDomainFromUrl } from './UrlDomainProcessing';
|
||||
import { randomizeTlsFingerprint, getTlsContextInfo } from './TlsFingerprintRandomizer';
|
||||
import type { UrlStatus } from './UrlStatus';
|
||||
|
||||
export function getUrlStatus(
|
||||
url: string,
|
||||
options: IRequestOptions = DefaultOptions,
|
||||
): Promise<IUrlStatus> {
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
const fetchOptions = getFetchOptions(url, defaultedOptions);
|
||||
return retryWithExponentialBackOff(async () => {
|
||||
console.log('Requesting', url);
|
||||
let result: IUrlStatus;
|
||||
try {
|
||||
const response = await fetchFollow(
|
||||
url,
|
||||
defaultedOptions.requestTimeoutInMs,
|
||||
fetchOptions,
|
||||
defaultedOptions.followOptions,
|
||||
);
|
||||
result = { url, code: response.status };
|
||||
} catch (err) {
|
||||
result = { url, error: JSON.stringify(err, null, '\t') };
|
||||
}
|
||||
return result;
|
||||
}, defaultedOptions.retryExponentialBaseInMs);
|
||||
requestOptions?: Partial<RequestOptions>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<UrlStatus> {
|
||||
const defaultedOptions = getDefaultedRequestOptions(requestOptions);
|
||||
if (defaultedOptions.randomizeTlsFingerprint) {
|
||||
randomizeTlsFingerprint();
|
||||
}
|
||||
return fetchUrlStatusWithRetry(url, defaultedOptions, followOptions);
|
||||
}
|
||||
|
||||
export interface IRequestOptions {
|
||||
export interface RequestOptions {
|
||||
readonly retryExponentialBaseInMs?: number;
|
||||
readonly additionalHeaders?: Record<string, string>;
|
||||
readonly additionalHeadersUrlIgnore?: string[];
|
||||
readonly followOptions?: IFollowOptions;
|
||||
readonly requestTimeoutInMs: number;
|
||||
readonly randomizeTlsFingerprint: boolean;
|
||||
}
|
||||
|
||||
const DefaultOptions: Required<IRequestOptions> = {
|
||||
retryExponentialBaseInMs: 5000,
|
||||
const DefaultOptions: Required<RequestOptions> = {
|
||||
retryExponentialBaseInMs: 5 /* sec */ * 1000,
|
||||
additionalHeaders: {},
|
||||
additionalHeadersUrlIgnore: [],
|
||||
requestTimeoutInMs: 60 /* seconds */ * 1000,
|
||||
followOptions: DefaultFollowOptions,
|
||||
randomizeTlsFingerprint: true,
|
||||
};
|
||||
|
||||
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit {
|
||||
function fetchUrlStatusWithRetry(
|
||||
url: string,
|
||||
requestOptions: Required<RequestOptions>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<UrlStatus> {
|
||||
const fetchOptions = getFetchOptions(url, requestOptions);
|
||||
return retryWithExponentialBackOff(async () => {
|
||||
console.log(`Initiating request for URL: ${url}`);
|
||||
let result: UrlStatus;
|
||||
try {
|
||||
const response = await fetchFollow(
|
||||
url,
|
||||
requestOptions.requestTimeoutInMs,
|
||||
fetchOptions,
|
||||
followOptions,
|
||||
);
|
||||
result = { url, code: response.status };
|
||||
} catch (err) {
|
||||
result = {
|
||||
url,
|
||||
error: [
|
||||
'Error:', indentText(JSON.stringify(err, null, '\t') || err.toString()),
|
||||
'Options:', indentText(JSON.stringify(fetchOptions, null, '\t')),
|
||||
'TLS:', indentText(getTlsContextInfo()),
|
||||
].join('\n'),
|
||||
};
|
||||
}
|
||||
return result;
|
||||
}, requestOptions.retryExponentialBaseInMs);
|
||||
}
|
||||
|
||||
function getFetchOptions(url: string, options: Required<RequestOptions>): RequestInit {
|
||||
const additionalHeaders = options.additionalHeadersUrlIgnore
|
||||
.some((ignorePattern) => url.startsWith(ignorePattern))
|
||||
? {}
|
||||
: options.additionalHeaders;
|
||||
return {
|
||||
method: 'HEAD',
|
||||
method: 'GET', // Fetch only headers without the full response body for better speed
|
||||
headers: {
|
||||
...getDefaultHeaders(),
|
||||
...getDefaultHeaders(url),
|
||||
...additionalHeaders,
|
||||
},
|
||||
redirect: 'manual', // Redirects are handled manually, automatic redirects do not work with Host header
|
||||
};
|
||||
}
|
||||
|
||||
function getDefaultHeaders(): Record<string, string> {
|
||||
function getDefaultHeaders(url: string): Record<string, string> {
|
||||
return {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'upgrade-insecure-requests': '1',
|
||||
connection: 'keep-alive',
|
||||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'accept-encoding': 'gzip, deflate, br',
|
||||
'cache-control': 'max-age=0',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
// Needed for websites that filter out non-browser user agents.
|
||||
'User-Agent': getRandomUserAgent(),
|
||||
|
||||
// Required for some websites, especially those behind proxies, to correctly handle the request.
|
||||
Host: getDomainFromUrl(url),
|
||||
|
||||
// The following mimic a real browser request to improve compatibility with most web servers.
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
Connection: 'keep-alive',
|
||||
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
};
|
||||
}
|
||||
|
||||
function getDefaultedRequestOptions(
|
||||
options?: Partial<RequestOptions>,
|
||||
): Required<RequestOptions> {
|
||||
return {
|
||||
...DefaultOptions,
|
||||
...options,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Modifies the TLS fingerprint of Node.js HTTP client to circumvent TLS fingerprinting blocks.
|
||||
* TLS fingerprinting is a technique used to identify clients based on the unencrypted data sent
|
||||
* during the TLS handshake, used for blocking or identifying non-browser clients like debugging
|
||||
* proxies or automated scripts.
|
||||
*
|
||||
* However, Node.js's HTTP client does not fully support all methods required for impersonating a
|
||||
* browser's TLS fingerprint, as reported in https://github.com/nodejs/undici/issues/1983.
|
||||
* While this implementation can alter the TLS fingerprint by randomizing the cipher suite order,
|
||||
* it may not perfectly mimic specific browser fingerprints due to limitations in the TLS
|
||||
* implementation of Node.js.
|
||||
*
|
||||
* For more detailed information, visit:
|
||||
* - https://archive.today/2024.03.13-102042/https://httptoolkit.com/blog/tls-fingerprinting-node-js/
|
||||
* - https://check.ja3.zone/ (To check your tool's or browser's fingerprint)
|
||||
* - https://github.com/lwthiker/curl-impersonate (A solution for curl)
|
||||
* - https://github.com/depicts/got-tls (Cipher manipulation support for Node.js)
|
||||
*/
|
||||
|
||||
import { constants } from 'crypto';
|
||||
import tls from 'tls';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
|
||||
export function randomizeTlsFingerprint() {
|
||||
tls.DEFAULT_CIPHERS = getShuffledCiphers().join(':');
|
||||
console.log(
|
||||
[
|
||||
'Original ciphers:', indentText(constants.defaultCipherList),
|
||||
'Current context', indentText(getTlsContextInfo()),
|
||||
].join('\n'),
|
||||
);
|
||||
}
|
||||
|
||||
export function getTlsContextInfo(): string {
|
||||
return [
|
||||
`Ciphers: ${tls.DEFAULT_CIPHERS}`,
|
||||
`Minimum TLS protocol version: ${tls.DEFAULT_MIN_VERSION}`,
|
||||
`Node fingerprint: ${constants.defaultCoreCipherList === tls.DEFAULT_CIPHERS ? 'Visible' : 'Masked'}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Shuffles the order of TLS ciphers, excluding the top 3 most important ciphers to maintain
|
||||
* security preferences. This approach modifies the default cipher list of Node.js to create a
|
||||
* unique TLS fingerprint, thus helping to circumvent detection mechanisms based on static
|
||||
* fingerprinting. It leverages randomness in the cipher order as a simple method to generate a
|
||||
* new, unique TLS fingerprint which is not easily identifiable. The technique is based on altering
|
||||
* parameters used in the TLS handshake process, particularly the cipher suite order, to avoid
|
||||
* matching known fingerprints that could identify the client as a Node.js application.
|
||||
*
|
||||
* For more details, refer to:
|
||||
* - https://archive.today/2024.03.13-102234/https://getsetfetch.org/blog/tls-fingerprint.html
|
||||
*/
|
||||
export function getShuffledCiphers(): readonly string[] {
|
||||
const nodeOrderedCipherList = constants.defaultCoreCipherList.split(':');
|
||||
const totalTopCiphersToKeep = 3;
|
||||
// Keep the most important ciphers in the same order
|
||||
const fixedCiphers = nodeOrderedCipherList.slice(0, totalTopCiphersToKeep);
|
||||
// Shuffle the rest
|
||||
const shuffledCiphers = nodeOrderedCipherList.slice(totalTopCiphersToKeep)
|
||||
.map((cipher) => ({ cipher, sort: Math.random() }))
|
||||
.sort((a, b) => a.sort - b.sort)
|
||||
.map(({ cipher }) => cipher);
|
||||
const ciphers = [
|
||||
...fixedCiphers,
|
||||
...shuffledCiphers,
|
||||
];
|
||||
return ciphers;
|
||||
}
|
||||
@@ -2,18 +2,18 @@ export function groupUrlsByDomain(urls: string[]): string[][] {
|
||||
const domains = new Set<string>();
|
||||
const urlsWithDomain = urls.map((url) => ({
|
||||
url,
|
||||
domain: extractDomain(url),
|
||||
domain: getDomainFromUrl(url),
|
||||
}));
|
||||
for (const url of urlsWithDomain) {
|
||||
domains.add(url.domain);
|
||||
}
|
||||
return Array.from(domains).map((domain) => {
|
||||
return urlsWithDomain
|
||||
.filter((url) => url.domain === domain)
|
||||
.filter((url) => url.domain.toLowerCase() === domain.toLowerCase())
|
||||
.map((url) => url.url);
|
||||
});
|
||||
}
|
||||
|
||||
function extractDomain(url: string): string {
|
||||
return url.split('://')[1].split('/')[0].toLowerCase();
|
||||
export function getDomainFromUrl(url: string): string {
|
||||
return new URL(url).host;
|
||||
}
|
||||
19
tests/checks/external-urls/StatusChecker/UrlStatus.ts
Normal file
19
tests/checks/external-urls/StatusChecker/UrlStatus.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
|
||||
export interface UrlStatus {
|
||||
readonly url: string;
|
||||
readonly error?: string;
|
||||
readonly code?: number;
|
||||
}
|
||||
|
||||
export function formatUrlStatus(status: UrlStatus): string {
|
||||
return [
|
||||
`URL: ${status.url}`,
|
||||
...status.code !== undefined ? [
|
||||
`Response code: ${status.code}`,
|
||||
] : [],
|
||||
...status.error ? [
|
||||
`Error:\n${indentText(status.error)}`,
|
||||
] : [],
|
||||
].join('\n');
|
||||
}
|
||||
@@ -3,73 +3,28 @@ export function getRandomUserAgent(): string {
|
||||
}
|
||||
|
||||
const UserAgents = [
|
||||
// Chrome
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537',
|
||||
|
||||
// Firefox
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15',
|
||||
|
||||
// Safari
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/604.1',
|
||||
|
||||
// Internet Explorer
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
|
||||
|
||||
// Edge
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 Edge/15.0',
|
||||
|
||||
// Opera
|
||||
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
|
||||
|
||||
// iOS Devices
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/18.2b11866 Mobile/16B91 Safari/605.1.15',
|
||||
'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1',
|
||||
|
||||
// Android Devices
|
||||
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.3',
|
||||
|
||||
// Other Devices/Browsers
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.3 Edge/15.0',
|
||||
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.3',
|
||||
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.3 OPR/53.0.2907.99',
|
||||
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2)',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20120121 Firefox/46.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; Tablet PC 2.0)',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:36.0) Gecko/20100101 Firefox/36.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
|
||||
'Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:28.0) Gecko/20100101 Firefox/28.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/20161202 Firefox/21.0.1',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; CrOS x86_64 4319.74.0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
// Safari 17.1 - macOS and iPad
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
||||
// Safari - iOS 17 - iPhone
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
||||
// Safari - iOS 17 - iPad mini
|
||||
'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
||||
// Edge - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
|
||||
// Edge - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
|
||||
// Edge - Android
|
||||
'Mozilla/5.0 (Linux; Android 10; HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.43 Mobile Safari/537.36 EdgA/119.0.2151.92',
|
||||
// Chrome - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
// Chrome - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
// Chrome - Android (Phone)
|
||||
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36',
|
||||
// Firefox - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
// Firefox - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0,',
|
||||
// Firefox - Android (Phone)
|
||||
'Mozilla/5.0 (Android 14; Mobile; rv:109.0) Gecko/120.0 Firefox/120.0',
|
||||
];
|
||||
|
||||
@@ -1,50 +1,82 @@
|
||||
import { test, expect } from 'vitest';
|
||||
import { parseApplication } from '@/application/Parser/ApplicationParser';
|
||||
import type { IApplication } from '@/domain/IApplication';
|
||||
import { getUrlStatusesInParallel, type IBatchRequestOptions } from './StatusChecker/BatchStatusChecker';
|
||||
import type { IUrlStatus } from './StatusChecker/IUrlStatus';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
|
||||
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
|
||||
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
|
||||
|
||||
// arrange
|
||||
const app = parseApplication();
|
||||
const urls = collectUniqueUrls(app);
|
||||
const requestOptions: IBatchRequestOptions = {
|
||||
const urls = collectUniqueUrls({
|
||||
application: app,
|
||||
excludePatterns: [
|
||||
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
|
||||
],
|
||||
});
|
||||
const requestOptions: BatchRequestOptions = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false, // be nice to our external servers
|
||||
sameDomainParallelize: false, // be nice to our third-party servers
|
||||
sameDomainDelayInMs: 5 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 3 /* sec */ * 1000,
|
||||
requestTimeoutInMs: 60 /* sec */ * 1000,
|
||||
additionalHeaders: { referer: app.projectDetails.homepage },
|
||||
randomizeTlsFingerprint: true,
|
||||
},
|
||||
followOptions: {
|
||||
followRedirects: true,
|
||||
enableCookies: true,
|
||||
},
|
||||
};
|
||||
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
|
||||
|
||||
test(`all URLs (${urls.length}) should be alive`, async () => {
|
||||
// act
|
||||
const results = await getUrlStatusesInParallel(urls, requestOptions);
|
||||
const deadUrls = results.filter((r) => r.code !== 200);
|
||||
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls));
|
||||
// assert
|
||||
const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
|
||||
expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
|
||||
}, testTimeoutInMs);
|
||||
|
||||
function collectUniqueUrls(application: IApplication): string[] {
|
||||
function isOkStatusCode(statusCode: number): boolean {
|
||||
return statusCode >= 200 && statusCode < 300;
|
||||
}
|
||||
|
||||
function collectUniqueUrls(
|
||||
options: {
|
||||
readonly application: IApplication,
|
||||
readonly excludePatterns?: readonly RegExp[],
|
||||
},
|
||||
): string[] {
|
||||
return [ // Get all nodes
|
||||
...application.collections.flatMap((c) => c.getAllCategories()),
|
||||
...application.collections.flatMap((c) => c.getAllScripts()),
|
||||
...options.application.collections.flatMap((c) => c.getAllCategories()),
|
||||
...options.application.collections.flatMap((c) => c.getAllScripts()),
|
||||
]
|
||||
// Get all docs
|
||||
.flatMap((documentable) => documentable.docs)
|
||||
// Parse all URLs
|
||||
.flatMap((docString) => docString.match(/(https?:\/\/[^\s`"<>()]+)/g) || [])
|
||||
.flatMap((docString) => extractUrls(docString))
|
||||
// Remove duplicates
|
||||
.filter((url, index, array) => array.indexOf(url) === index);
|
||||
.filter((url, index, array) => array.indexOf(url) === index)
|
||||
// Exclude certain URLs based on patterns
|
||||
.filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
|
||||
}
|
||||
|
||||
function printUrls(statuses: IUrlStatus[]): string {
|
||||
/* eslint-disable prefer-template */
|
||||
return '\n'
|
||||
+ statuses.map((status) => `- ${status.url}\n`
|
||||
+ (status.code ? `\tResponse code: ${status.code}` : '')
|
||||
+ (status.error ? `\tError: ${status.error}` : ''))
|
||||
.join('\n')
|
||||
+ '\n';
|
||||
/* eslint-enable prefer-template */
|
||||
function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
|
||||
return patterns.some((pattern) => pattern.test(url));
|
||||
}
|
||||
|
||||
function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
|
||||
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
|
||||
}
|
||||
|
||||
function extractUrls(textWithInlineCode: string): string[] {
|
||||
/*
|
||||
Matches all URLs.
|
||||
Inline code blocks contain URLs not intended for user interaction and not
|
||||
guaranteed to support expected HTTP methods, leading to false-negatives.
|
||||
*/
|
||||
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+)/g;
|
||||
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user