Compare commits
2 Commits
0.13.4
...
dead-urls-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6720a46d2e | ||
|
|
287b8e61a0 |
5
.github/workflows/checks.external-urls.yaml
vendored
5
.github/workflows/checks.external-urls.yaml
vendored
@@ -1,6 +1,7 @@
|
||||
name: checks.external-urls
|
||||
|
||||
on:
|
||||
push:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # at 00:00 on every Sunday
|
||||
|
||||
@@ -20,3 +21,7 @@ jobs:
|
||||
-
|
||||
name: Test
|
||||
run: npm run check:external-urls
|
||||
env:
|
||||
RANDOMIZED_URL_CHECK_LIMIT: "${{ github.event_name == 'push' && '10' || '' }}"
|
||||
# - Scheduled checks has no limits, ensuring thorough testing.
|
||||
# - For push events, triggered by code changes, the amount of URLs are limited to provide quick feedback.
|
||||
|
||||
12
src/application/Common/Shuffle.ts
Normal file
12
src/application/Common/Shuffle.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
Shuffle an array of strings, returning a new array with elements in random order.
|
||||
Uses the Fisher-Yates (or Durstenfeld) algorithm.
|
||||
*/
|
||||
export function shuffle<T>(array: readonly T[]): T[] {
|
||||
const shuffledArray = [...array];
|
||||
for (let i = array.length - 1; i > 0; i--) {
|
||||
const j = Math.floor(Math.random() * (i + 1));
|
||||
[shuffledArray[i], shuffledArray[j]] = [shuffledArray[j], shuffledArray[i]];
|
||||
}
|
||||
return shuffledArray;
|
||||
}
|
||||
@@ -1,7 +1,10 @@
|
||||
export type SchedulerCallbackType = (...args: unknown[]) => void;
|
||||
export type SchedulerType = (callback: SchedulerCallbackType, ms: number) => void;
|
||||
|
||||
export function sleep(time: number, scheduler: SchedulerType = setTimeout) {
|
||||
export function sleep(
|
||||
time: number,
|
||||
scheduler: SchedulerType = setTimeout,
|
||||
): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
scheduler(() => resolve(undefined), time);
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { splitTextIntoLines, indentText } from '../utils/text';
|
||||
import { indentText, splitTextIntoLines } from '@tests/shared/Text';
|
||||
import { log, die } from '../utils/log';
|
||||
import { readAppLogFile } from './app-logs';
|
||||
import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns';
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { filterEmpty } from '@tests/shared/Text';
|
||||
import { runCommand } from '../../utils/run-command';
|
||||
import { log, LogLevel } from '../../utils/log';
|
||||
import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform';
|
||||
import { filterEmpty } from '../../utils/text';
|
||||
|
||||
export async function captureWindowTitles(processId: number) {
|
||||
if (!processId) { throw new Error('Missing process ID.'); }
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args';
|
||||
import { log, die } from './utils/log';
|
||||
import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm';
|
||||
@@ -15,7 +16,6 @@ import {
|
||||
APP_EXECUTION_DURATION_IN_SECONDS,
|
||||
SCREENSHOT_PATH,
|
||||
} from './config';
|
||||
import { indentText } from './utils/text';
|
||||
import type { ExtractionResult } from './app/extractors/common/extraction-result';
|
||||
|
||||
export async function main(): Promise<void> {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { exec, type ExecOptions, type ExecException } from 'node:child_process';
|
||||
import { indentText } from './text';
|
||||
import { exec } from 'child_process';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import type { ExecOptions, ExecException } from 'child_process';
|
||||
|
||||
const TIMEOUT_IN_SECONDS = 180;
|
||||
const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB
|
||||
|
||||
69
tests/checks/external-urls/DocumentationUrlExtractor.ts
Normal file
69
tests/checks/external-urls/DocumentationUrlExtractor.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import type { IApplication } from '@/domain/IApplication';
|
||||
import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
|
||||
|
||||
interface UrlExtractionContext {
|
||||
readonly logger: TestExecutionDetailsLogger;
|
||||
readonly application: IApplication;
|
||||
readonly urlExclusionPatterns: readonly RegExp[];
|
||||
}
|
||||
|
||||
export function extractDocumentationUrls(
|
||||
context: UrlExtractionContext,
|
||||
): string[] {
|
||||
const urlsInApplication = extractUrlsFromApplication(context.application);
|
||||
context.logger.logLabeledInformation(
|
||||
'Extracted URLs from application',
|
||||
urlsInApplication.length.toString(),
|
||||
);
|
||||
const uniqueUrls = filterDuplicateUrls(urlsInApplication);
|
||||
context.logger.logLabeledInformation(
|
||||
'Unique URLs after deduplication',
|
||||
`${uniqueUrls.length} (duplicates removed)`,
|
||||
);
|
||||
context.logger.logLabeledInformation(
|
||||
'Exclusion patterns for URLs',
|
||||
context.urlExclusionPatterns.length === 0
|
||||
? 'None (all URLs included)'
|
||||
: context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'),
|
||||
);
|
||||
const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns);
|
||||
context.logger.logLabeledInformation(
|
||||
'URLs extracted for testing',
|
||||
`${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`,
|
||||
);
|
||||
return includedUrls;
|
||||
}
|
||||
|
||||
function extractUrlsFromApplication(application: IApplication): string[] {
|
||||
return [ // Get all executables
|
||||
...application.collections.flatMap((c) => c.getAllCategories()),
|
||||
...application.collections.flatMap((c) => c.getAllScripts()),
|
||||
]
|
||||
// Get all docs
|
||||
.flatMap((documentable) => documentable.docs)
|
||||
// Parse all URLs
|
||||
.flatMap((docString) => extractUrlsExcludingCodeBlocks(docString));
|
||||
}
|
||||
|
||||
function filterDuplicateUrls(urls: readonly string[]): string[] {
|
||||
return urls.filter((url, index, array) => array.indexOf(url) === index);
|
||||
}
|
||||
|
||||
function filterUrlsExcludingPatterns(
|
||||
urls: readonly string[],
|
||||
patterns: readonly RegExp[],
|
||||
): string[] {
|
||||
return urls.filter((url) => !patterns.some((pattern) => pattern.test(url)));
|
||||
}
|
||||
|
||||
function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] {
|
||||
/*
|
||||
Matches URLs:
|
||||
- Excludes inline code blocks as they may contain URLs not intended for user interaction
|
||||
and not guaranteed to support expected HTTP methods, leading to false-negatives.
|
||||
- Supports URLs containing parentheses, avoiding matches within code that might not represent
|
||||
actual links.
|
||||
*/
|
||||
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
|
||||
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
|
||||
}
|
||||
@@ -1,64 +1,65 @@
|
||||
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import { getUrlStatus, type IRequestOptions } from './Requestor';
|
||||
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { getUrlStatus, type RequestOptions } from './Requestor';
|
||||
import { groupUrlsByDomain } from './UrlDomainProcessing';
|
||||
import type { FollowOptions } from './FetchFollow';
|
||||
import type { UrlStatus } from './UrlStatus';
|
||||
|
||||
export async function getUrlStatusesInParallel(
|
||||
urls: string[],
|
||||
options?: IBatchRequestOptions,
|
||||
): Promise<IUrlStatus[]> {
|
||||
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
|
||||
options?: BatchRequestOptions,
|
||||
): Promise<UrlStatus[]> {
|
||||
// urls = ['https://privacy.sexy']; // Comment out this line to use a hardcoded URL for testing.
|
||||
const uniqueUrls = Array.from(new Set(urls));
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
console.log('Options: ', defaultedOptions);
|
||||
const results = await request(uniqueUrls, defaultedOptions);
|
||||
const defaultedDomainOptions: Required<DomainOptions> = {
|
||||
...DefaultDomainOptions,
|
||||
...options?.domainOptions,
|
||||
};
|
||||
console.log('Batch request options applied:', defaultedDomainOptions);
|
||||
const results = await request(uniqueUrls, defaultedDomainOptions, options);
|
||||
return results;
|
||||
}
|
||||
|
||||
export interface IBatchRequestOptions {
|
||||
domainOptions?: IDomainOptions;
|
||||
requestOptions?: IRequestOptions;
|
||||
export interface BatchRequestOptions {
|
||||
readonly domainOptions?: Partial<DomainOptions>;
|
||||
readonly requestOptions?: Partial<RequestOptions>;
|
||||
readonly followOptions?: Partial<FollowOptions>;
|
||||
}
|
||||
|
||||
interface IDomainOptions {
|
||||
sameDomainParallelize?: boolean;
|
||||
sameDomainDelayInMs?: number;
|
||||
interface DomainOptions {
|
||||
readonly sameDomainParallelize?: boolean;
|
||||
readonly sameDomainDelayInMs?: number;
|
||||
}
|
||||
|
||||
const DefaultOptions: Required<IBatchRequestOptions> = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 5 /* sec */ * 1000,
|
||||
requestTimeoutInMs: 60 /* sec */ * 1000,
|
||||
additionalHeaders: {},
|
||||
},
|
||||
const DefaultDomainOptions: Required<DomainOptions> = {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
};
|
||||
|
||||
function request(
|
||||
urls: string[],
|
||||
options: Required<IBatchRequestOptions>,
|
||||
): Promise<IUrlStatus[]> {
|
||||
if (!options.domainOptions.sameDomainParallelize) {
|
||||
domainOptions: Required<DomainOptions>,
|
||||
options?: BatchRequestOptions,
|
||||
): Promise<UrlStatus[]> {
|
||||
if (!domainOptions.sameDomainParallelize) {
|
||||
return runOnEachDomainWithDelay(
|
||||
urls,
|
||||
(url) => getUrlStatus(url, options.requestOptions),
|
||||
options.domainOptions.sameDomainDelayInMs,
|
||||
(url) => getUrlStatus(url, options?.requestOptions, options?.followOptions),
|
||||
domainOptions.sameDomainDelayInMs,
|
||||
);
|
||||
}
|
||||
return Promise.all(urls.map((url) => getUrlStatus(url, options.requestOptions)));
|
||||
return Promise.all(
|
||||
urls.map((url) => getUrlStatus(url, options?.requestOptions, options?.followOptions)),
|
||||
);
|
||||
}
|
||||
|
||||
async function runOnEachDomainWithDelay(
|
||||
urls: string[],
|
||||
action: (url: string) => Promise<IUrlStatus>,
|
||||
action: (url: string) => Promise<UrlStatus>,
|
||||
delayInMs: number | undefined,
|
||||
): Promise<IUrlStatus[]> {
|
||||
): Promise<UrlStatus[]> {
|
||||
const grouped = groupUrlsByDomain(urls);
|
||||
const tasks = grouped.map(async (group) => {
|
||||
const results = new Array<IUrlStatus>();
|
||||
const results = new Array<UrlStatus>();
|
||||
/* eslint-disable no-await-in-loop */
|
||||
for (const url of group) {
|
||||
const status = await action(url);
|
||||
|
||||
@@ -1,27 +1,33 @@
|
||||
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { type UrlStatus, formatUrlStatus } from './UrlStatus';
|
||||
|
||||
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
|
||||
|
||||
export async function retryWithExponentialBackOff(
|
||||
action: () => Promise<IUrlStatus>,
|
||||
action: () => Promise<UrlStatus>,
|
||||
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
|
||||
currentRetry = 1,
|
||||
): Promise<IUrlStatus> {
|
||||
): Promise<UrlStatus> {
|
||||
const maxTries = 3;
|
||||
const status = await action();
|
||||
if (shouldRetry(status)) {
|
||||
if (currentRetry <= maxTries) {
|
||||
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
|
||||
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status);
|
||||
console.log([
|
||||
`Attempt ${currentRetry}: Retrying in ${exponentialBackOffInMs / 1000} seconds.`,
|
||||
'Details:',
|
||||
indentText(formatUrlStatus(status)),
|
||||
].join('\n'));
|
||||
await sleep(exponentialBackOffInMs);
|
||||
return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1);
|
||||
}
|
||||
console.warn('💀 All retry attempts failed. Final failure to retrieve URL:', indentText(formatUrlStatus(status)));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
function shouldRetry(status: IUrlStatus) {
|
||||
function shouldRetry(status: UrlStatus): boolean {
|
||||
if (status.error) {
|
||||
return true;
|
||||
}
|
||||
@@ -32,14 +38,14 @@ function shouldRetry(status: IUrlStatus) {
|
||||
|| status.code === 429; // Too Many Requests
|
||||
}
|
||||
|
||||
function isTransientError(statusCode: number) {
|
||||
function isTransientError(statusCode: number): boolean {
|
||||
return statusCode >= 500 && statusCode <= 599;
|
||||
}
|
||||
|
||||
function getRetryTimeoutInMs(
|
||||
currentRetry: number,
|
||||
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
|
||||
) {
|
||||
): number {
|
||||
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
|
||||
// of the exponentially increasing base amount
|
||||
const minRandom = 1 - retryRandomFactor;
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { fetchWithTimeout } from './FetchWithTimeout';
|
||||
import { getDomainFromUrl } from './UrlDomainProcessing';
|
||||
|
||||
export function fetchFollow(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
fetchOptions: RequestInit,
|
||||
followOptions: IFollowOptions | undefined,
|
||||
fetchOptions?: Partial<RequestInit>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<Response> {
|
||||
const defaultedFollowOptions = {
|
||||
const defaultedFollowOptions: Required<FollowOptions> = {
|
||||
...DefaultFollowOptions,
|
||||
...followOptions,
|
||||
};
|
||||
if (followRedirects(defaultedFollowOptions)) {
|
||||
console.log(indentText(`Follow options: ${JSON.stringify(defaultedFollowOptions)}`));
|
||||
if (!followRedirects(defaultedFollowOptions)) {
|
||||
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
|
||||
}
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' };
|
||||
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
|
||||
return followRecursivelyWithCookies(
|
||||
url,
|
||||
@@ -24,13 +27,13 @@ export function fetchFollow(
|
||||
);
|
||||
}
|
||||
|
||||
export interface IFollowOptions {
|
||||
followRedirects?: boolean;
|
||||
maximumRedirectFollowDepth?: number;
|
||||
enableCookies?: boolean;
|
||||
export interface FollowOptions {
|
||||
readonly followRedirects?: boolean;
|
||||
readonly maximumRedirectFollowDepth?: number;
|
||||
readonly enableCookies?: boolean;
|
||||
}
|
||||
|
||||
export const DefaultFollowOptions: Required<IFollowOptions> = {
|
||||
const DefaultFollowOptions: Required<FollowOptions> = {
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
@@ -64,6 +67,10 @@ async function followRecursivelyWithCookies(
|
||||
if (cookieHeader) {
|
||||
cookies.addHeader(cookieHeader);
|
||||
}
|
||||
options.headers = {
|
||||
...options.headers,
|
||||
Host: getDomainFromUrl(nextUrl),
|
||||
};
|
||||
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
|
||||
}
|
||||
|
||||
@@ -77,7 +84,7 @@ class CookieStorage {
|
||||
constructor(private readonly enabled: boolean) {
|
||||
}
|
||||
|
||||
public hasAny() {
|
||||
public hasAny(): boolean {
|
||||
return this.enabled && this.cookies.length > 0;
|
||||
}
|
||||
|
||||
@@ -88,17 +95,17 @@ class CookieStorage {
|
||||
this.cookies.push(header);
|
||||
}
|
||||
|
||||
public getHeader() {
|
||||
public getHeader(): string {
|
||||
return this.cookies.join(' ; ');
|
||||
}
|
||||
}
|
||||
|
||||
function followRedirects(options: IFollowOptions) {
|
||||
if (!options.followRedirects) {
|
||||
function followRedirects(options: FollowOptions): boolean {
|
||||
if (options.followRedirects !== true) {
|
||||
return false;
|
||||
}
|
||||
if (options.maximumRedirectFollowDepth === 0) {
|
||||
return false;
|
||||
if (options.maximumRedirectFollowDepth === undefined || options.maximumRedirectFollowDepth <= 0) {
|
||||
throw new Error('Invalid followRedirects configuration: maximumRedirectFollowDepth must be a positive integer');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ export async function fetchWithTimeout(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
init?: RequestInit,
|
||||
): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
): ReturnType<typeof fetch> {
|
||||
const options: RequestInit = {
|
||||
...(init ?? {}),
|
||||
signal: controller.signal,
|
||||
signal: AbortSignal.timeout(timeoutInMs),
|
||||
};
|
||||
const promise = fetch(url, options);
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutInMs);
|
||||
return promise.finally(() => clearTimeout(timeout));
|
||||
return fetch(
|
||||
url,
|
||||
options,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
export interface IUrlStatus {
|
||||
url: string;
|
||||
error?: string;
|
||||
code?: number;
|
||||
}
|
||||
@@ -13,7 +13,10 @@ A CLI and SDK for checking the availability of external URLs.
|
||||
- 😇 **Rate Limiting**: Queues requests by domain to be polite.
|
||||
- 🔁 **Retries**: Implements retry pattern with exponential back-off.
|
||||
- ⌚ **Timeouts**: Configurable timeout for each request.
|
||||
- 🎭️ **User-Agent Rotation**: Change user agents for each request.
|
||||
- 🎭️ **Impersonation**: Impersonate different browsers for each request.
|
||||
- **🌐 User-Agent Rotation**: Change user agents.
|
||||
- **🔑 TLS Handshakes**: Perform TLS and HTTP handshakes that are identical to that of a real browser.
|
||||
- 🫙 **Cookie jar**: Preserve cookies during redirects to mimic real browser.
|
||||
|
||||
## CLI
|
||||
|
||||
@@ -54,6 +57,7 @@ const statuses = await getUrlStatusesInParallel([ 'https://privacy.sexy', /* ...
|
||||
- **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds)
|
||||
- Sets the delay between requests to the same domain.
|
||||
- `requestOptions` (*object*): See [request options](#request-options).
|
||||
- `followOptions` (*object*): See [follow options](#follow-options).
|
||||
|
||||
### `getUrlStatus`
|
||||
|
||||
@@ -72,7 +76,6 @@ console.log(`Status code: ${status.code}`);
|
||||
- The longer the base time, the greater the intervals between retries.
|
||||
- **`additionalHeaders`** (*object*), default: `false`
|
||||
- Additional HTTP headers to send along with the default headers. Overrides default headers if specified.
|
||||
- **`followOptions`** (*object*): See [follow options](#follow-options).
|
||||
- **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds)
|
||||
- Time limit to abort the request if no response is received within the specified time frame.
|
||||
|
||||
@@ -83,19 +86,7 @@ Follows `3XX` redirects while preserving cookies.
|
||||
Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter.
|
||||
|
||||
```js
|
||||
const status = await fetchFollow('https://privacy.sexy', {
|
||||
// First argument is same options as fetch API, except `redirect` options
|
||||
// that's discarded in favor of next argument follow options
|
||||
headers: {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
},
|
||||
}, {
|
||||
// Second argument sets the redirect behavior
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
}
|
||||
);
|
||||
const status = await fetchFollow('https://privacy.sexy', 1000 /* timeout in milliseconds */);
|
||||
console.log(`Status code: ${status.code}`);
|
||||
```
|
||||
|
||||
@@ -109,3 +100,10 @@ console.log(`Status code: ${status.code}`);
|
||||
- **`enableCookies`** (*boolean*), default: `true`
|
||||
- Enables cookie storage to facilitate seamless navigation through login or other authentication challenges.
|
||||
- 💡 Helps to over-come sign-in challenges with callbacks.
|
||||
- **`forceHttpGetForUrlPatterns`** (*array*), default: `[]`
|
||||
- Specifies URL patterns that should always use an HTTP GET request instead of the default HTTP HEAD.
|
||||
- This is useful for websites that do not respond to HEAD requests, such as those behind certain CDN or web application firewalls.
|
||||
- Provide patterns as regular expressions (`RegExp`), allowing them to match any part of a URL.
|
||||
- Examples:
|
||||
- To match any URL starting with "https://example.com/api": `/^https:\/\/example\.com\/api/`
|
||||
- To match any domain ending with "cloudflare.com": `/^https:\/\/.*\.cloudflare\.com\//`
|
||||
|
||||
@@ -1,70 +1,123 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
|
||||
import { fetchFollow, type IFollowOptions, DefaultFollowOptions } from './FetchFollow';
|
||||
import { fetchFollow, type FollowOptions } from './FetchFollow';
|
||||
import { getRandomUserAgent } from './UserAgents';
|
||||
import type { IUrlStatus } from './IUrlStatus';
|
||||
import { getDomainFromUrl } from './UrlDomainProcessing';
|
||||
import { randomizeTlsFingerprint, getTlsContextInfo } from './TlsFingerprintRandomizer';
|
||||
import type { UrlStatus } from './UrlStatus';
|
||||
|
||||
export function getUrlStatus(
|
||||
url: string,
|
||||
options: IRequestOptions = DefaultOptions,
|
||||
): Promise<IUrlStatus> {
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
const fetchOptions = getFetchOptions(url, defaultedOptions);
|
||||
return retryWithExponentialBackOff(async () => {
|
||||
console.log('Requesting', url);
|
||||
let result: IUrlStatus;
|
||||
try {
|
||||
const response = await fetchFollow(
|
||||
url,
|
||||
defaultedOptions.requestTimeoutInMs,
|
||||
fetchOptions,
|
||||
defaultedOptions.followOptions,
|
||||
);
|
||||
result = { url, code: response.status };
|
||||
} catch (err) {
|
||||
result = { url, error: JSON.stringify(err, null, '\t') };
|
||||
}
|
||||
return result;
|
||||
}, defaultedOptions.retryExponentialBaseInMs);
|
||||
requestOptions?: Partial<RequestOptions>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<UrlStatus> {
|
||||
const defaultedOptions = getDefaultedRequestOptions(requestOptions);
|
||||
if (defaultedOptions.randomizeTlsFingerprint) {
|
||||
randomizeTlsFingerprint();
|
||||
}
|
||||
return fetchUrlStatusWithRetry(url, defaultedOptions, followOptions);
|
||||
}
|
||||
|
||||
export interface IRequestOptions {
|
||||
export interface RequestOptions {
|
||||
readonly retryExponentialBaseInMs?: number;
|
||||
readonly additionalHeaders?: Record<string, string>;
|
||||
readonly additionalHeadersUrlIgnore?: string[];
|
||||
readonly followOptions?: IFollowOptions;
|
||||
readonly requestTimeoutInMs: number;
|
||||
readonly randomizeTlsFingerprint: boolean;
|
||||
readonly forceHttpGetForUrlPatterns: RegExp[];
|
||||
}
|
||||
|
||||
const DefaultOptions: Required<IRequestOptions> = {
|
||||
retryExponentialBaseInMs: 5000,
|
||||
const DefaultOptions: Required<RequestOptions> = {
|
||||
retryExponentialBaseInMs: 5 /* sec */ * 1000,
|
||||
additionalHeaders: {},
|
||||
additionalHeadersUrlIgnore: [],
|
||||
requestTimeoutInMs: 60 /* seconds */ * 1000,
|
||||
followOptions: DefaultFollowOptions,
|
||||
randomizeTlsFingerprint: true,
|
||||
forceHttpGetForUrlPatterns: [],
|
||||
};
|
||||
|
||||
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit {
|
||||
function fetchUrlStatusWithRetry(
|
||||
url: string,
|
||||
requestOptions: Required<RequestOptions>,
|
||||
followOptions?: Partial<FollowOptions>,
|
||||
): Promise<UrlStatus> {
|
||||
const fetchOptions = getFetchOptions(url, requestOptions);
|
||||
return retryWithExponentialBackOff(async () => {
|
||||
console.log(`🚀 Initiating request for URL: ${url}`);
|
||||
console.log(indentText([
|
||||
`HTTP method: ${fetchOptions.method}`,
|
||||
`Request options: ${JSON.stringify(requestOptions)}`,
|
||||
].join('\n')));
|
||||
let result: UrlStatus;
|
||||
try {
|
||||
const response = await fetchFollow(
|
||||
url,
|
||||
requestOptions.requestTimeoutInMs,
|
||||
fetchOptions,
|
||||
followOptions,
|
||||
);
|
||||
result = { url, code: response.status };
|
||||
} catch (err) {
|
||||
result = {
|
||||
url,
|
||||
error: [
|
||||
'Error:', indentText(JSON.stringify(err, null, '\t') || err.toString()),
|
||||
'Fetch options:', indentText(JSON.stringify(fetchOptions, null, '\t')),
|
||||
'Request options:', indentText(JSON.stringify(requestOptions, null, '\t')),
|
||||
'TLS:', indentText(getTlsContextInfo()),
|
||||
].join('\n'),
|
||||
};
|
||||
}
|
||||
return result;
|
||||
}, requestOptions.retryExponentialBaseInMs);
|
||||
}
|
||||
|
||||
function getFetchOptions(url: string, options: Required<RequestOptions>): RequestInit {
|
||||
const additionalHeaders = options.additionalHeadersUrlIgnore
|
||||
.some((ignorePattern) => url.startsWith(ignorePattern))
|
||||
? {}
|
||||
: options.additionalHeaders;
|
||||
return {
|
||||
method: 'HEAD',
|
||||
method: getHttpMethod(url, options),
|
||||
headers: {
|
||||
...getDefaultHeaders(),
|
||||
...getDefaultHeaders(url),
|
||||
...additionalHeaders,
|
||||
},
|
||||
redirect: 'manual', // Redirects are handled manually, automatic redirects do not work with Host header
|
||||
};
|
||||
}
|
||||
|
||||
function getDefaultHeaders(): Record<string, string> {
|
||||
function getHttpMethod(url: string, options: Required<RequestOptions>): 'HEAD' | 'GET' {
|
||||
if (options.forceHttpGetForUrlPatterns.some((pattern) => url.match(pattern))) {
|
||||
return 'GET';
|
||||
}
|
||||
// By default fetch only headers without the full response body for better speed
|
||||
return 'HEAD';
|
||||
}
|
||||
|
||||
function getDefaultHeaders(url: string): Record<string, string> {
|
||||
return {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'upgrade-insecure-requests': '1',
|
||||
connection: 'keep-alive',
|
||||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'accept-encoding': 'gzip, deflate, br',
|
||||
'cache-control': 'max-age=0',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
// Needed for websites that filter out non-browser user agents.
|
||||
'User-Agent': getRandomUserAgent(),
|
||||
|
||||
// Required for some websites, especially those behind proxies, to correctly handle the request.
|
||||
Host: getDomainFromUrl(url),
|
||||
|
||||
// The following mimic a real browser request to improve compatibility with most web servers.
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
Connection: 'keep-alive',
|
||||
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
};
|
||||
}
|
||||
|
||||
function getDefaultedRequestOptions(
|
||||
options?: Partial<RequestOptions>,
|
||||
): Required<RequestOptions> {
|
||||
return {
|
||||
...DefaultOptions,
|
||||
...options,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Modifies the TLS fingerprint of Node.js HTTP client to circumvent TLS fingerprinting blocks.
|
||||
* TLS fingerprinting is a technique used to identify clients based on the unencrypted data sent
|
||||
* during the TLS handshake, used for blocking or identifying non-browser clients like debugging
|
||||
* proxies or automated scripts.
|
||||
*
|
||||
* However, Node.js's HTTP client does not fully support all methods required for impersonating a
|
||||
* browser's TLS fingerprint, as reported in https://github.com/nodejs/undici/issues/1983.
|
||||
* While this implementation can alter the TLS fingerprint by randomizing the cipher suite order,
|
||||
* it may not perfectly mimic specific browser fingerprints due to limitations in the TLS
|
||||
* implementation of Node.js.
|
||||
*
|
||||
* For more detailed information, visit:
|
||||
* - https://archive.today/2024.03.13-102042/https://httptoolkit.com/blog/tls-fingerprinting-node-js/
|
||||
* - https://check.ja3.zone/ (To check your tool's or browser's fingerprint)
|
||||
* - https://github.com/lwthiker/curl-impersonate (A solution for curl)
|
||||
* - https://github.com/depicts/got-tls (Cipher manipulation support for Node.js)
|
||||
*/
|
||||
|
||||
import { constants } from 'crypto';
|
||||
import tls from 'tls';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
|
||||
export function randomizeTlsFingerprint() {
|
||||
tls.DEFAULT_CIPHERS = getShuffledCiphers().join(':');
|
||||
console.log(indentText(
|
||||
`TLS context:\n${indentText([
|
||||
'Original ciphers:', indentText(constants.defaultCipherList),
|
||||
'Current ciphers:', indentText(getTlsContextInfo()),
|
||||
].join('\n'))}`,
|
||||
));
|
||||
}
|
||||
|
||||
export function getTlsContextInfo(): string {
|
||||
return [
|
||||
`Ciphers: ${tls.DEFAULT_CIPHERS}`,
|
||||
`Minimum TLS protocol version: ${tls.DEFAULT_MIN_VERSION}`,
|
||||
`Node fingerprint: ${constants.defaultCoreCipherList === tls.DEFAULT_CIPHERS ? 'Visible' : 'Masked'}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Shuffles the order of TLS ciphers, excluding the top 3 most important ciphers to maintain
|
||||
* security preferences. This approach modifies the default cipher list of Node.js to create a
|
||||
* unique TLS fingerprint, thus helping to circumvent detection mechanisms based on static
|
||||
* fingerprinting. It leverages randomness in the cipher order as a simple method to generate a
|
||||
* new, unique TLS fingerprint which is not easily identifiable. The technique is based on altering
|
||||
* parameters used in the TLS handshake process, particularly the cipher suite order, to avoid
|
||||
* matching known fingerprints that could identify the client as a Node.js application.
|
||||
*
|
||||
* For more details, refer to:
|
||||
* - https://archive.today/2024.03.13-102234/https://getsetfetch.org/blog/tls-fingerprint.html
|
||||
*/
|
||||
export function getShuffledCiphers(): readonly string[] {
|
||||
const nodeOrderedCipherList = constants.defaultCoreCipherList.split(':');
|
||||
const totalTopCiphersToKeep = 3;
|
||||
// Keep the most important ciphers in the same order
|
||||
const fixedCiphers = nodeOrderedCipherList.slice(0, totalTopCiphersToKeep);
|
||||
// Shuffle the rest
|
||||
const shuffledCiphers = nodeOrderedCipherList.slice(totalTopCiphersToKeep)
|
||||
.map((cipher) => ({ cipher, sort: Math.random() }))
|
||||
.sort((a, b) => a.sort - b.sort)
|
||||
.map(({ cipher }) => cipher);
|
||||
const ciphers = [
|
||||
...fixedCiphers,
|
||||
...shuffledCiphers,
|
||||
];
|
||||
return ciphers;
|
||||
}
|
||||
@@ -2,18 +2,18 @@ export function groupUrlsByDomain(urls: string[]): string[][] {
|
||||
const domains = new Set<string>();
|
||||
const urlsWithDomain = urls.map((url) => ({
|
||||
url,
|
||||
domain: extractDomain(url),
|
||||
domain: getDomainFromUrl(url),
|
||||
}));
|
||||
for (const url of urlsWithDomain) {
|
||||
domains.add(url.domain);
|
||||
}
|
||||
return Array.from(domains).map((domain) => {
|
||||
return urlsWithDomain
|
||||
.filter((url) => url.domain === domain)
|
||||
.filter((url) => url.domain.toLowerCase() === domain.toLowerCase())
|
||||
.map((url) => url.url);
|
||||
});
|
||||
}
|
||||
|
||||
function extractDomain(url: string): string {
|
||||
return url.split('://')[1].split('/')[0].toLowerCase();
|
||||
export function getDomainFromUrl(url: string): string {
|
||||
return new URL(url).host;
|
||||
}
|
||||
19
tests/checks/external-urls/StatusChecker/UrlStatus.ts
Normal file
19
tests/checks/external-urls/StatusChecker/UrlStatus.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
|
||||
export interface UrlStatus {
|
||||
readonly url: string;
|
||||
readonly error?: string;
|
||||
readonly code?: number;
|
||||
}
|
||||
|
||||
export function formatUrlStatus(status: UrlStatus): string {
|
||||
return [
|
||||
`URL: ${status.url}`,
|
||||
...status.code !== undefined ? [
|
||||
`Response code: ${status.code}`,
|
||||
] : [],
|
||||
...status.error ? [
|
||||
`Error:\n${indentText(status.error)}`,
|
||||
] : [],
|
||||
].join('\n');
|
||||
}
|
||||
@@ -3,73 +3,28 @@ export function getRandomUserAgent(): string {
|
||||
}
|
||||
|
||||
const UserAgents = [
|
||||
// Chrome
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537',
|
||||
|
||||
// Firefox
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15',
|
||||
|
||||
// Safari
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/604.1',
|
||||
|
||||
// Internet Explorer
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
|
||||
|
||||
// Edge
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 Edge/15.0',
|
||||
|
||||
// Opera
|
||||
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
|
||||
|
||||
// iOS Devices
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/18.2b11866 Mobile/16B91 Safari/605.1.15',
|
||||
'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1',
|
||||
|
||||
// Android Devices
|
||||
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.3',
|
||||
|
||||
// Other Devices/Browsers
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.3 Edge/15.0',
|
||||
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.3',
|
||||
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.3 OPR/53.0.2907.99',
|
||||
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2)',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20120121 Firefox/46.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; Tablet PC 2.0)',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:36.0) Gecko/20100101 Firefox/36.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
|
||||
'Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:28.0) Gecko/20100101 Firefox/28.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/20161202 Firefox/21.0.1',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; CrOS x86_64 4319.74.0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
|
||||
// Safari 17.1 - macOS and iPad
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
||||
// Safari - iOS 17 - iPhone
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
||||
// Safari - iOS 17 - iPad mini
|
||||
'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
||||
// Edge - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
|
||||
// Edge - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
|
||||
// Edge - Android
|
||||
'Mozilla/5.0 (Linux; Android 10; HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.43 Mobile Safari/537.36 EdgA/119.0.2151.92',
|
||||
// Chrome - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
// Chrome - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
// Chrome - Android (Phone)
|
||||
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36',
|
||||
// Firefox - macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
|
||||
// Firefox - Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0,',
|
||||
// Firefox - Android (Phone)
|
||||
'Mozilla/5.0 (Android 14; Mobile; rv:109.0) Gecko/120.0 Firefox/120.0',
|
||||
];
|
||||
|
||||
26
tests/checks/external-urls/TestExecutionDetailsLogger.ts
Normal file
26
tests/checks/external-urls/TestExecutionDetailsLogger.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
|
||||
export class TestExecutionDetailsLogger {
|
||||
public logTestSectionStartDelimiter(): void {
|
||||
this.logSectionDelimiterLine();
|
||||
}
|
||||
|
||||
public logTestSectionEndDelimiter(): void {
|
||||
this.logSectionDelimiterLine();
|
||||
}
|
||||
|
||||
public logLabeledInformation(
|
||||
label: string,
|
||||
detailedInformation: string,
|
||||
): void {
|
||||
console.log([
|
||||
`${label}:`,
|
||||
indentText(detailedInformation),
|
||||
].join('\n'));
|
||||
}
|
||||
|
||||
private logSectionDelimiterLine(): void {
|
||||
const horizontalLine = '─'.repeat(40);
|
||||
console.log(horizontalLine);
|
||||
}
|
||||
}
|
||||
@@ -1,50 +1,101 @@
|
||||
import { test, expect } from 'vitest';
|
||||
import { parseApplication } from '@/application/Parser/ApplicationParser';
|
||||
import type { IApplication } from '@/domain/IApplication';
|
||||
import { getUrlStatusesInParallel, type IBatchRequestOptions } from './StatusChecker/BatchStatusChecker';
|
||||
import type { IUrlStatus } from './StatusChecker/IUrlStatus';
|
||||
import { indentText } from '@tests/shared/Text';
|
||||
import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
|
||||
import { shuffle } from '@/application/Common/Shuffle';
|
||||
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
|
||||
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
|
||||
import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
|
||||
import { extractDocumentationUrls } from './DocumentationUrlExtractor';
|
||||
|
||||
// arrange
|
||||
const logger = new TestExecutionDetailsLogger();
|
||||
logger.logTestSectionStartDelimiter();
|
||||
const app = parseApplication();
|
||||
const urls = collectUniqueUrls(app);
|
||||
const requestOptions: IBatchRequestOptions = {
|
||||
let urls = extractDocumentationUrls({
|
||||
logger,
|
||||
urlExclusionPatterns: [
|
||||
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
|
||||
],
|
||||
application: app,
|
||||
});
|
||||
urls = filterUrlsToEnvironmentCheckLimit(urls);
|
||||
logger.logLabeledInformation('URLs submitted for testing', urls.length.toString());
|
||||
const requestOptions: BatchRequestOptions = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false, // be nice to our external servers
|
||||
sameDomainParallelize: false, // be nice to our third-party servers
|
||||
sameDomainDelayInMs: 5 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 3 /* sec */ * 1000,
|
||||
requestTimeoutInMs: 60 /* sec */ * 1000,
|
||||
additionalHeaders: { referer: app.projectDetails.homepage },
|
||||
randomizeTlsFingerprint: true,
|
||||
},
|
||||
followOptions: {
|
||||
followRedirects: true,
|
||||
enableCookies: true,
|
||||
},
|
||||
};
|
||||
logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2));
|
||||
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
|
||||
|
||||
logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs));
|
||||
logger.logTestSectionEndDelimiter();
|
||||
test(`all URLs (${urls.length}) should be alive`, async () => {
|
||||
// act
|
||||
console.log('URLS', urls); // TODO: Delete
|
||||
const results = await getUrlStatusesInParallel(urls, requestOptions);
|
||||
const deadUrls = results.filter((r) => r.code !== 200);
|
||||
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls));
|
||||
// assert
|
||||
const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
|
||||
expect(deadUrls).to.have.lengthOf(
|
||||
0,
|
||||
formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]),
|
||||
);
|
||||
}, testTimeoutInMs);
|
||||
|
||||
function collectUniqueUrls(application: IApplication): string[] {
|
||||
return [ // Get all nodes
|
||||
...application.collections.flatMap((c) => c.getAllCategories()),
|
||||
...application.collections.flatMap((c) => c.getAllScripts()),
|
||||
]
|
||||
// Get all docs
|
||||
.flatMap((documentable) => documentable.docs)
|
||||
// Parse all URLs
|
||||
.flatMap((docString) => docString.match(/(https?:\/\/[^\s`"<>()]+)/g) || [])
|
||||
// Remove duplicates
|
||||
.filter((url, index, array) => array.indexOf(url) === index);
|
||||
function isOkStatusCode(statusCode: number): boolean {
|
||||
return statusCode >= 200 && statusCode < 300;
|
||||
}
|
||||
|
||||
function printUrls(statuses: IUrlStatus[]): string {
|
||||
/* eslint-disable prefer-template */
|
||||
return '\n'
|
||||
+ statuses.map((status) => `- ${status.url}\n`
|
||||
+ (status.code ? `\tResponse code: ${status.code}` : '')
|
||||
+ (status.error ? `\tError: ${status.error}` : ''))
|
||||
.join('\n')
|
||||
+ '\n';
|
||||
/* eslint-enable prefer-template */
|
||||
function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string {
|
||||
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
|
||||
}
|
||||
|
||||
function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] {
|
||||
const { RANDOMIZED_URL_CHECK_LIMIT } = process.env;
|
||||
logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited');
|
||||
if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') {
|
||||
const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10);
|
||||
if (Number.isNaN(maxUrlsInTest)) {
|
||||
throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`);
|
||||
}
|
||||
if (maxUrlsInTest < originalUrls.length) {
|
||||
return shuffle(originalUrls).slice(0, maxUrlsInTest);
|
||||
}
|
||||
}
|
||||
return originalUrls;
|
||||
}
|
||||
|
||||
function convertMillisecondsToHumanReadableFormat(milliseconds: number): string {
|
||||
const timeParts: string[] = [];
|
||||
const addTimePart = (amount: number, label: string) => {
|
||||
if (amount === 0) {
|
||||
return;
|
||||
}
|
||||
timeParts.push(`${amount} ${label}`);
|
||||
};
|
||||
|
||||
const hours = milliseconds / (1000 * 60 * 60);
|
||||
const absoluteHours = Math.floor(hours);
|
||||
addTimePart(absoluteHours, 'hours');
|
||||
|
||||
const minutes = (hours - absoluteHours) * 60;
|
||||
const absoluteMinutes = Math.floor(minutes);
|
||||
addTimePart(absoluteMinutes, 'minutes');
|
||||
|
||||
const seconds = (minutes - absoluteMinutes) * 60;
|
||||
const absoluteSeconds = Math.floor(seconds);
|
||||
addTimePart(absoluteSeconds, 'seconds');
|
||||
|
||||
return timeParts.join(', ');
|
||||
}
|
||||
|
||||
52
tests/unit/application/Common/Shuffle.spec.ts
Normal file
52
tests/unit/application/Common/Shuffle.spec.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { shuffle } from '@/application/Common/Shuffle';
|
||||
|
||||
describe('Shuffle', () => {
|
||||
describe('shuffle', () => {
|
||||
it('returns a new array', () => {
|
||||
// arrange
|
||||
const inputArray = ['a', 'b', 'c', 'd'];
|
||||
// act
|
||||
const result = shuffle(inputArray);
|
||||
// assert
|
||||
expect(result).not.to.equal(inputArray);
|
||||
});
|
||||
|
||||
it('returns an array of the same length', () => {
|
||||
// arrange
|
||||
const inputArray = ['a', 'b', 'c', 'd'];
|
||||
// act
|
||||
const result = shuffle(inputArray);
|
||||
// assert
|
||||
expect(result.length).toBe(inputArray.length);
|
||||
});
|
||||
|
||||
it('contains the same elements', () => {
|
||||
// arrange
|
||||
const inputArray = ['a', 'b', 'c', 'd'];
|
||||
// act
|
||||
const result = shuffle(inputArray);
|
||||
// assert
|
||||
expect(result).to.have.members(inputArray);
|
||||
});
|
||||
|
||||
it('does not modify the input array', () => {
|
||||
// arrange
|
||||
const inputArray = ['a', 'b', 'c', 'd'];
|
||||
const inputArrayCopy = [...inputArray];
|
||||
// act
|
||||
shuffle(inputArray);
|
||||
// assert
|
||||
expect(inputArray).to.deep.equal(inputArrayCopy);
|
||||
});
|
||||
|
||||
it('handles an empty array correctly', () => {
|
||||
// arrange
|
||||
const inputArray: string[] = [];
|
||||
// act
|
||||
const result = shuffle(inputArray);
|
||||
// assert
|
||||
expect(result).have.lengthOf(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user