Improve URL checks to reduce false-negatives

This commit improves the URL health checking mechanism to reduce false
negatives.

- Treat all 2XX status codes as successful, addressing issues with codes
  like `204`.
- Improve URL matching to exclude URLs within Markdown inline code block
  and support URLs containing parentheses.
- Add `forceHttpGetForUrlPatterns` to customize HTTP method per URL to
  allow verifying URLs behind CDN/WAFs that do not respond to HTTP HEAD.
- Send the Host header for improved handling of webpages behind proxies.
- Improve formatting and context for output messages.
- Fix the defaulting options for redirects and cookie handling.
- Update the user agent pool to modern browsers and platforms.
- Add support for randomizing TLS fingerprint to mimic various clients
  better, improving the effectiveness of checks. However, this is not
  fully supported by Node.js's HTTP client; see nodejs/undici#1983 for
  more details.
- Use `AbortSignal` instead of `AbortController` as more modern and
  simpler way to handle timeouts.
This commit is contained in:
undergroundwires
2024-03-13 11:34:19 +01:00
parent e7218850ba
commit 287b8e61a0
18 changed files with 363 additions and 222 deletions

View File

@@ -1,5 +1,6 @@
import { exec, type ExecOptions, type ExecException } from 'node:child_process';
import { indentText } from './text';
import { exec } from 'child_process';
import { indentText } from '@tests/shared/Text';
import type { ExecOptions, ExecException } from 'child_process';
const TIMEOUT_IN_SECONDS = 180;
const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB

View File

@@ -1,29 +0,0 @@
import { isString } from '@/TypeHelpers';
export function indentText(
text: string,
indentLevel = 1,
): string {
validateText(text);
const indentation = '\t'.repeat(indentLevel);
return splitTextIntoLines(text)
.map((line) => (line ? `${indentation}${line}` : line))
.join('\n');
}
export function splitTextIntoLines(text: string): string[] {
validateText(text);
return text
.split(/[\r\n]+/);
}
export function filterEmpty(texts: readonly (string | undefined | null)[]): string[] {
return texts
.filter((title): title is string => Boolean(title));
}
function validateText(text: string): void {
if (!isString(text)) {
throw new Error(`text is not a string. It is: ${typeof text}\n${text}`);
}
}