This commit improves the URL health checking mechanism to reduce false negatives. - Treat all 2XX status codes as successful, addressing issues with codes like `204`. - Improve URL matching to exclude URLs within Markdown inline code block and support URLs containing parentheses. - Add `forceHttpGetForUrlPatterns` to customize HTTP method per URL to allow verifying URLs behind CDN/WAFs that do not respond to HTTP HEAD. - Send the Host header for improved handling of webpages behind proxies. - Improve formatting and context for output messages. - Fix the defaulting options for redirects and cookie handling. - Update the user agent pool to modern browsers and platforms. - Add support for randomizing TLS fingerprint to mimic various clients better, improving the effectiveness of checks. However, this is not fully supported by Node.js's HTTP client; see nodejs/undici#1983 for more details. - Use `AbortSignal` instead of `AbortController` as more modern and simpler way to handle timeouts.
20 lines
541 B
TypeScript
20 lines
541 B
TypeScript
export function groupUrlsByDomain(urls: string[]): string[][] {
|
|
const domains = new Set<string>();
|
|
const urlsWithDomain = urls.map((url) => ({
|
|
url,
|
|
domain: getDomainFromUrl(url),
|
|
}));
|
|
for (const url of urlsWithDomain) {
|
|
domains.add(url.domain);
|
|
}
|
|
return Array.from(domains).map((domain) => {
|
|
return urlsWithDomain
|
|
.filter((url) => url.domain.toLowerCase() === domain.toLowerCase())
|
|
.map((url) => url.url);
|
|
});
|
|
}
|
|
|
|
export function getDomainFromUrl(url: string): string {
|
|
return new URL(url).host;
|
|
}
|