Compare commits

...

2 Commits

Author SHA1 Message Date
undergroundwires
b042b36aea Add expansion/collapse animations for cards
Key changes:

- Add animation for card opening/collapse.

Other supporting changes:

- Remove card expansion panel to its own component for easier
  maintainability and better separation of concerns.
- Use real DOM element instead of &:before pseudo class for showing
  expansion arrow. This increases by maintainability by separating its
  code and concerns.

- TODO: When one card is expanded and others is also expanded then the
  transition sucks.
2024-03-31 20:07:09 +02:00
undergroundwires
be7a886225 Improve URL checks to reduce false-negatives
This commit improves the URL health checking mechanism to reduce false
negatives.

- Treat all 2XX status codes as successful, addressing issues with codes
  like `204`.
- Exclude URLs within Markdown inline code blocks.
- Send the Host header for improved handling of webpages behind proxies.
- Improve formatting and context for output messages.
- Fix the defaulting options for redirects and cookie handling.
- Add URL exclusion support for non-responsive URLs.
- Update the user agent pool to modern browsers and platforms.
- Improve CI/CD workflow to respond to modifications in the
  `test/checks/external-urls` directory, offering immediate feedback on
  potential impacts to the external URL test.
- Add support for randomizing TLS fingerprint to mimic various clients
  better, improving the effectiveness of checks. However, this is not
  fully supported by Node.js's HTTP client; see nodejs/undici#1983 for
  more details.
- Use `AbortSignal` instead of `AbortController` as more modern and
  simpler way to handle timeouts.
2024-03-13 18:26:16 +01:00
24 changed files with 521 additions and 325 deletions

View File

@@ -3,6 +3,9 @@ name: checks.external-urls
on:
schedule:
- cron: '0 0 * * 0' # at 00:00 on every Sunday
push:
paths:
- tests/checks/external-urls/**
jobs:
run-check:

View File

@@ -1,7 +1,10 @@
export type SchedulerCallbackType = (...args: unknown[]) => void;
export type SchedulerType = (callback: SchedulerCallbackType, ms: number) => void;
export function sleep(time: number, scheduler: SchedulerType = setTimeout) {
export function sleep(
time: number,
scheduler: SchedulerType = setTimeout,
): Promise<void> {
return new Promise((resolve) => {
scheduler(() => resolve(undefined), time);
});

View File

@@ -0,0 +1,90 @@
<template>
<div class="card__expander">
<div class="card__expander__close-button">
<FlatButton
icon="xmark"
@click="collapse()"
/>
</div>
<div class="card__expander__content">
<ScriptsTree
:category-id="categoryId"
:has-top-padding="false"
/>
</div>
</div>
</template>
<script lang="ts">
import {
defineComponent,
} from 'vue';
import FlatButton from '@/presentation/components/Shared/FlatButton.vue';
import ScriptsTree from '@/presentation/components/Scripts/View/Tree/ScriptsTree.vue';
export default defineComponent({
components: {
ScriptsTree,
FlatButton,
},
props: {
categoryId: {
type: Number,
required: true,
},
},
emits: {
/* eslint-disable @typescript-eslint/no-unused-vars */
onCollapse: () => true,
/* eslint-enable @typescript-eslint/no-unused-vars */
},
setup(_, { emit }) {
function collapse() {
emit('onCollapse');
}
return {
collapse,
};
},
});
</script>
<style scoped lang="scss">
@use "@/presentation/assets/styles/main" as *;
$expanded-margin-top : 30px;
.card__expander {
transition: all 0.2s ease-in-out;
position: relative;
background-color: $color-primary-darker;
color: $color-on-primary;
display: flex;
align-items: center;
flex-direction: column;
margin-top: $expanded-margin-top;
.card__expander__content {
display: flex;
justify-content: center;
word-break: break-word;
max-width: 100%; // Prevents horizontal expansion of inner content (e.g., when a code block is shown)
width: 100%; // Expands the container to fill available horizontal space, enabling alignment of child items.
}
.card__expander__close-button {
font-size: $font-size-absolute-large;
align-self: flex-end;
margin-right: 0.25em;
@include clickable;
color: $color-primary-light;
@include hover-or-touch {
color: $color-primary;
}
}
}
</style>

View File

@@ -0,0 +1,24 @@
<template>
<div class="arrow-container">
<div class="arrow" />
</div>
</template>
<style scoped lang="scss">
@use "@/presentation/assets/styles/main" as *;
$arrow-size : 15px;
.arrow-container {
position: relative;
.arrow {
position: absolute;
left: calc(50% - $arrow-size * 1.5);
top: calc(1.5 * $arrow-size);
border: solid $color-primary-darker;
border-width: 0 $arrow-size $arrow-size 0;
padding: $arrow-size;
transform: rotate(-135deg);
}
}
</style>

View File

@@ -19,6 +19,7 @@
v-for="categoryId of categoryIds"
:key="categoryId"
class="card"
:total-cards-per-row="cardsPerRow"
:class="{
'small-screen': width <= 500,
'medium-screen': width > 500 && width < 750,
@@ -62,6 +63,19 @@ export default defineComponent({
);
const activeCategoryId = ref<number | undefined>(undefined);
const cardsPerRow = computed<number>(() => {
if (width.value === undefined) {
throw new Error('Unknown width, total cards should not be calculated');
}
if (width.value <= 500) {
return 1;
}
if (width.value < 750) {
return 2;
}
return 3;
});
function onSelected(categoryId: number, isExpanded: boolean) {
activeCategoryId.value = isExpanded ? categoryId : undefined;
}
@@ -108,6 +122,7 @@ export default defineComponent({
width,
categoryIds,
activeCategoryId,
cardsPerRow,
onSelected,
};
},

View File

@@ -29,20 +29,15 @@
:category-id="categoryId"
/>
</div>
<div class="card__expander" @click.stop>
<div class="card__expander__close-button">
<FlatButton
icon="xmark"
@click="collapse()"
/>
</div>
<div class="card__expander__content">
<ScriptsTree
:category-id="categoryId"
:has-top-padding="false"
/>
</div>
</div>
<CardExpansionPanelArrow v-show="isExpanded" />
<ExpandCollapseTransition>
<CardExpansionPanel
v-show="isExpanded"
:category-id="categoryId"
@on-collapse="collapse"
@click.stop
/>
</ExpandCollapseTransition>
</div>
</template>
@@ -51,24 +46,30 @@ import {
defineComponent, computed, shallowRef,
} from 'vue';
import AppIcon from '@/presentation/components/Shared/Icon/AppIcon.vue';
import FlatButton from '@/presentation/components/Shared/FlatButton.vue';
import ExpandCollapseTransition from '@/presentation/components/Shared/ExpandCollapse/ExpandCollapseTransition.vue';
import { injectKey } from '@/presentation/injectionSymbols';
import ScriptsTree from '@/presentation/components/Scripts/View/Tree/ScriptsTree.vue';
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import CardSelectionIndicator from './CardSelectionIndicator.vue';
import CardExpansionPanel from './CardExpansionPanel.vue';
import CardExpansionPanelArrow from './CardExpansionPanelArrow.vue';
export default defineComponent({
components: {
ScriptsTree,
AppIcon,
CardSelectionIndicator,
FlatButton,
CardExpansionPanel,
ExpandCollapseTransition,
CardExpansionPanelArrow,
},
props: {
categoryId: {
type: Number,
required: true,
},
totalCardsPerRow: {
type: Number,
required: true,
},
activeCategoryId: {
type: Number,
default: undefined,
@@ -94,6 +95,14 @@ export default defineComponent({
},
});
const cardWidth = computed<string>(() => {
const totalTimesGapIsUsedInRow = props.totalCardsPerRow - 1;
const totalGapWidthInRow = `calc(${totalTimesGapIsUsedInRow} * 15px)`; // TODO: 15px is hardcoded, $card-gap variable should be used
const availableRowWidthForCards = `calc(100% - (${totalGapWidthInRow}))`;
const availableWidthPerCard = `calc((${availableRowWidthForCards}) / ${totalTimesGapIsUsedInRow})`;
return availableWidthPerCard;
});
const cardElement = shallowRef<HTMLElement>();
const cardTitle = computed<string>(() => {
@@ -118,6 +127,7 @@ export default defineComponent({
cardTitle,
isExpanded,
cardElement,
cardWidth,
collapse,
};
},
@@ -131,11 +141,22 @@ export default defineComponent({
$card-inner-padding : 30px;
$arrow-size : 15px;
$expanded-margin-top : 30px;
$card-horizontal-gap : $card-gap;
.expansion__arrow {
position: relative;
.expansion__arrow__inner {
position: absolute;
left: calc(50% - $arrow-size * 1.5);
top: calc(1.5 * $arrow-size);
border: solid $color-primary-darker;
border-width: 0 $arrow-size $arrow-size 0;
padding: $arrow-size;
transform: rotate(-135deg);
}
}
.card {
transition: all 0.2s ease-in-out;
width: v-bind(cardWidth);
&__inner {
padding-top: $card-inner-padding;
padding-right: $card-inner-padding;
@@ -160,9 +181,6 @@ $card-horizontal-gap : $card-gap;
color: $color-on-secondary;
transform: scale(1.05);
}
&:after {
transition: all 0.3s ease-in-out;
}
.card__inner__title {
display: flex;
flex-direction: column;
@@ -184,73 +202,12 @@ $card-horizontal-gap : $card-gap;
font-size: $font-size-absolute-normal;
}
}
.card__expander {
transition: all 0.2s ease-in-out;
position: relative;
background-color: $color-primary-darker;
color: $color-on-primary;
display: flex;
align-items: center;
flex-direction: column;
.card__expander__content {
display: flex;
justify-content: center;
word-break: break-word;
max-width: 100%; // Prevents horizontal expansion of inner content (e.g., when a code block is shown)
width: 100%; // Expands the container to fill available horizontal space, enabling alignment of child items.
}
.card__expander__close-button {
font-size: $font-size-absolute-large;
align-self: flex-end;
margin-right: 0.25em;
@include clickable;
color: $color-primary-light;
@include hover-or-touch {
color: $color-primary;
}
}
}
&.is-collapsed {
.card__inner {
&:after {
content: "";
opacity: 0;
}
}
.card__expander {
max-height: 0;
min-height: 0;
overflow: hidden;
opacity: 0;
}
}
&.is-expanded {
.card__inner {
height: auto;
background-color: $color-secondary;
color: $color-on-secondary;
&:after { // arrow
content: "";
display: block;
position: absolute;
bottom: calc(-1 * #{$expanded-margin-top});
left: calc(50% - #{$arrow-size});
border-left: #{$arrow-size} solid transparent;
border-right: #{$arrow-size} solid transparent;
border-bottom: #{$arrow-size} solid $color-primary-darker;
}
}
.card__expander {
min-height: 200px;
margin-top: $expanded-margin-top;
opacity: 1;
}
@include hover-or-touch {
@@ -277,26 +234,26 @@ $card-horizontal-gap : $card-gap;
}
}
@mixin adaptive-card($cards-in-row) {
&.card {
.card {
$total-times-gap-is-used-in-row: $cards-in-row - 1;
$total-gap-width-in-row: $total-times-gap-is-used-in-row * $card-horizontal-gap;
$available-row-width-for-cards: calc(100% - #{$total-gap-width-in-row});
$available-width-per-card: calc(#{$available-row-width-for-cards} / #{$cards-in-row});
width:$available-width-per-card;
.card__expander {
$all-cards-width: 100% * $cards-in-row;
$additional-padding-width: $card-horizontal-gap * ($cards-in-row - 1);
width: calc(#{$all-cards-width} + #{$additional-padding-width});
}
@for $nth-card from 2 through $cards-in-row { // From second card to rest
&:nth-of-type(#{$cards-in-row}n+#{$nth-card}) {
.card__expander {
$card-left: -100% * ($nth-card - 1);
$additional-space: $card-horizontal-gap * ($nth-card - 1);
margin-left: calc(#{$card-left} - #{$additional-space});
}
}
}
// .card__expander {
// $all-cards-width: 100% * $cards-in-row;
// $additional-padding-width: $card-horizontal-gap * ($cards-in-row - 1);
// width: calc(#{$all-cards-width} + #{$additional-padding-width});
// }
// @for $nth-card from 2 through $cards-in-row { // From second card to rest
// &:nth-of-type(#{$cards-in-row}n+#{$nth-card}) {
// .card__expander {
// $card-left: -100% * ($nth-card - 1);
// $additional-space: $card-horizontal-gap * ($nth-card - 1);
// margin-left: calc(#{$card-left} - #{$additional-space});
// }
// }
// }
// Ensure new line after last row
$card-after-last: $cards-in-row + 1;
&:nth-of-type(#{$cards-in-row}n+#{$card-after-last}) {
@@ -304,8 +261,4 @@ $card-horizontal-gap : $card-gap;
}
}
}
.big-screen { @include adaptive-card(3); }
.medium-screen { @include adaptive-card(2); }
.small-screen { @include adaptive-card(1); }
</style>

View File

@@ -1,4 +1,4 @@
import { splitTextIntoLines, indentText } from '../utils/text';
import { indentText, splitTextIntoLines } from '@tests/shared/Text';
import { log, die } from '../utils/log';
import { readAppLogFile } from './app-logs';
import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns';

View File

@@ -1,7 +1,7 @@
import { filterEmpty } from '@tests/shared/Text';
import { runCommand } from '../../utils/run-command';
import { log, LogLevel } from '../../utils/log';
import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform';
import { filterEmpty } from '../../utils/text';
export async function captureWindowTitles(processId: number) {
if (!processId) { throw new Error('Missing process ID.'); }

View File

@@ -1,3 +1,4 @@
import { indentText } from '@tests/shared/Text';
import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args';
import { log, die } from './utils/log';
import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm';
@@ -15,7 +16,6 @@ import {
APP_EXECUTION_DURATION_IN_SECONDS,
SCREENSHOT_PATH,
} from './config';
import { indentText } from './utils/text';
import type { ExtractionResult } from './app/extractors/common/extraction-result';
export async function main(): Promise<void> {

View File

@@ -1,5 +1,6 @@
import { exec, type ExecOptions, type ExecException } from 'node:child_process';
import { indentText } from './text';
import { exec } from 'child_process';
import { indentText } from '@tests/shared/Text';
import type { ExecOptions, ExecException } from 'child_process';
const TIMEOUT_IN_SECONDS = 180;
const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB

View File

@@ -1,64 +1,62 @@
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import { getUrlStatus, type IRequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
import type { IUrlStatus } from './IUrlStatus';
import { getUrlStatus, type RequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlDomainProcessing';
import type { FollowOptions } from './FetchFollow';
import type { UrlStatus } from './UrlStatus';
export async function getUrlStatusesInParallel(
urls: string[],
options?: IBatchRequestOptions,
): Promise<IUrlStatus[]> {
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
options?: BatchRequestOptions,
): Promise<UrlStatus[]> {
// urls = ['https://privacy.sexy']; // Comment out this line to use a hardcoded URL for testing.
const uniqueUrls = Array.from(new Set(urls));
const defaultedOptions = { ...DefaultOptions, ...options };
console.log('Options: ', defaultedOptions);
const results = await request(uniqueUrls, defaultedOptions);
const defaultedDomainOptions = { ...DefaultDomainOptions, ...options?.domainOptions };
console.log('Batch request options applied:', defaultedDomainOptions);
const results = await request(uniqueUrls, defaultedDomainOptions, options);
return results;
}
export interface IBatchRequestOptions {
domainOptions?: IDomainOptions;
requestOptions?: IRequestOptions;
export interface BatchRequestOptions {
readonly domainOptions?: Partial<DomainOptions>;
readonly requestOptions?: Partial<RequestOptions>;
readonly followOptions?: Partial<FollowOptions>;
}
interface IDomainOptions {
sameDomainParallelize?: boolean;
sameDomainDelayInMs?: number;
interface DomainOptions {
readonly sameDomainParallelize?: boolean;
readonly sameDomainDelayInMs?: number;
}
const DefaultOptions: Required<IBatchRequestOptions> = {
domainOptions: {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
requestTimeoutInMs: 60 /* sec */ * 1000,
additionalHeaders: {},
},
const DefaultDomainOptions: Required<DomainOptions> = {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
};
function request(
urls: string[],
options: Required<IBatchRequestOptions>,
): Promise<IUrlStatus[]> {
if (!options.domainOptions.sameDomainParallelize) {
domainOptions: Required<DomainOptions>,
options?: BatchRequestOptions,
): Promise<UrlStatus[]> {
if (!domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelay(
urls,
(url) => getUrlStatus(url, options.requestOptions),
options.domainOptions.sameDomainDelayInMs,
(url) => getUrlStatus(url, options?.requestOptions, options?.followOptions),
domainOptions.sameDomainDelayInMs,
);
}
return Promise.all(urls.map((url) => getUrlStatus(url, options.requestOptions)));
return Promise.all(
urls.map((url) => getUrlStatus(url, options?.requestOptions, options?.followOptions)),
);
}
async function runOnEachDomainWithDelay(
urls: string[],
action: (url: string) => Promise<IUrlStatus>,
action: (url: string) => Promise<UrlStatus>,
delayInMs: number | undefined,
): Promise<IUrlStatus[]> {
): Promise<UrlStatus[]> {
const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => {
const results = new Array<IUrlStatus>();
const results = new Array<UrlStatus>();
/* eslint-disable no-await-in-loop */
for (const url of group) {
const status = await action(url);

View File

@@ -1,27 +1,33 @@
import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import type { IUrlStatus } from './IUrlStatus';
import { indentText } from '@tests/shared/Text';
import { type UrlStatus, formatUrlStatus } from './UrlStatus';
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
export async function retryWithExponentialBackOff(
action: () => Promise<IUrlStatus>,
action: () => Promise<UrlStatus>,
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
currentRetry = 1,
): Promise<IUrlStatus> {
): Promise<UrlStatus> {
const maxTries = 3;
const status = await action();
if (shouldRetry(status)) {
if (currentRetry <= maxTries) {
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status);
console.log([
`Attempt ${currentRetry}: Retrying in ${exponentialBackOffInMs / 1000} seconds.`,
'Details:',
indentText(formatUrlStatus(status)),
].join('\n'));
await sleep(exponentialBackOffInMs);
return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1);
}
console.warn('💀 All retry attempts failed. Final failure to retrieve URL:', indentText(formatUrlStatus(status)));
}
return status;
}
function shouldRetry(status: IUrlStatus) {
function shouldRetry(status: UrlStatus): boolean {
if (status.error) {
return true;
}
@@ -32,14 +38,14 @@ function shouldRetry(status: IUrlStatus) {
|| status.code === 429; // Too Many Requests
}
function isTransientError(statusCode: number) {
function isTransientError(statusCode: number): boolean {
return statusCode >= 500 && statusCode <= 599;
}
function getRetryTimeoutInMs(
currentRetry: number,
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
) {
): number {
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
// of the exponentially increasing base amount
const minRandom = 1 - retryRandomFactor;

View File

@@ -1,19 +1,17 @@
import { fetchWithTimeout } from './FetchWithTimeout';
import { getDomainFromUrl } from './UrlDomainProcessing';
export function fetchFollow(
url: string,
timeoutInMs: number,
fetchOptions: RequestInit,
followOptions: IFollowOptions | undefined,
fetchOptions?: Partial<RequestInit>,
followOptions?: Partial<FollowOptions>,
): Promise<Response> {
const defaultedFollowOptions = {
...DefaultFollowOptions,
...followOptions,
};
const defaultedFollowOptions = { ...DefaultFollowOptions, ...followOptions };
if (followRedirects(defaultedFollowOptions)) {
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
}
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' };
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
return followRecursivelyWithCookies(
url,
@@ -24,13 +22,15 @@ export function fetchFollow(
);
}
export interface IFollowOptions {
followRedirects?: boolean;
maximumRedirectFollowDepth?: number;
enableCookies?: boolean;
// "cors" | "navigate" | "no-cors" | "same-origin";
export interface FollowOptions {
readonly followRedirects?: boolean;
readonly maximumRedirectFollowDepth?: number;
readonly enableCookies?: boolean;
}
export const DefaultFollowOptions: Required<IFollowOptions> = {
const DefaultFollowOptions: Required<FollowOptions> = {
followRedirects: true,
maximumRedirectFollowDepth: 20,
enableCookies: true,
@@ -64,6 +64,10 @@ async function followRecursivelyWithCookies(
if (cookieHeader) {
cookies.addHeader(cookieHeader);
}
options.headers = {
...options.headers,
Host: getDomainFromUrl(nextUrl),
};
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
}
@@ -77,7 +81,7 @@ class CookieStorage {
constructor(private readonly enabled: boolean) {
}
public hasAny() {
public hasAny(): boolean {
return this.enabled && this.cookies.length > 0;
}
@@ -88,12 +92,12 @@ class CookieStorage {
this.cookies.push(header);
}
public getHeader() {
public getHeader(): string {
return this.cookies.join(' ; ');
}
}
function followRedirects(options: IFollowOptions) {
function followRedirects(options: FollowOptions): boolean {
if (!options.followRedirects) {
return false;
}

View File

@@ -2,13 +2,13 @@ export async function fetchWithTimeout(
url: string,
timeoutInMs: number,
init?: RequestInit,
): Promise<Response> {
const controller = new AbortController();
): ReturnType<typeof fetch> {
const options: RequestInit = {
...(init ?? {}),
signal: controller.signal,
signal: AbortSignal.timeout(timeoutInMs),
};
const promise = fetch(url, options);
const timeout = setTimeout(() => controller.abort(), timeoutInMs);
return promise.finally(() => clearTimeout(timeout));
return fetch(
url,
options,
);
}

View File

@@ -1,5 +0,0 @@
export interface IUrlStatus {
url: string;
error?: string;
code?: number;
}

View File

@@ -13,7 +13,10 @@ A CLI and SDK for checking the availability of external URLs.
- 😇 **Rate Limiting**: Queues requests by domain to be polite.
- 🔁 **Retries**: Implements retry pattern with exponential back-off.
-**Timeouts**: Configurable timeout for each request.
- 🎭️ **User-Agent Rotation**: Change user agents for each request.
- 🎭️ **Impersonation**: Impersonate different browsers for each request.
- **🌐 User-Agent Rotation**: Change user agents.
- **🔑 TLS Handshakes**: Perform TLS and HTTP handshakes that are identical to that of a real browser.
- 🫙 **Cookie jar**: Preserve cookies during redirects to mimic real browser.
## CLI
@@ -54,6 +57,7 @@ const statuses = await getUrlStatusesInParallel([ 'https://privacy.sexy', /* ...
- **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds)
- Sets the delay between requests to the same domain.
- `requestOptions` (*object*): See [request options](#request-options).
- `followOptions` (*object*): See [follow options](#follow-options).
### `getUrlStatus`
@@ -72,7 +76,6 @@ console.log(`Status code: ${status.code}`);
- The longer the base time, the greater the intervals between retries.
- **`additionalHeaders`** (*object*), default: `false`
- Additional HTTP headers to send along with the default headers. Overrides default headers if specified.
- **`followOptions`** (*object*): See [follow options](#follow-options).
- **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds)
- Time limit to abort the request if no response is received within the specified time frame.
@@ -83,19 +86,7 @@ Follows `3XX` redirects while preserving cookies.
Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter.
```js
const status = await fetchFollow('https://privacy.sexy', {
// First argument is same options as fetch API, except `redirect` options
// that's discarded in favor of next argument follow options
headers: {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
},
}, {
// Second argument sets the redirect behavior
followRedirects: true,
maximumRedirectFollowDepth: 20,
enableCookies: true,
}
);
const status = await fetchFollow('https://privacy.sexy', 1000 /* timeout in milliseconds */);
console.log(`Status code: ${status.code}`);
```

View File

@@ -1,70 +1,108 @@
import { indentText } from '@tests/shared/Text';
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
import { fetchFollow, type IFollowOptions, DefaultFollowOptions } from './FetchFollow';
import { fetchFollow, type FollowOptions } from './FetchFollow';
import { getRandomUserAgent } from './UserAgents';
import type { IUrlStatus } from './IUrlStatus';
import { getDomainFromUrl } from './UrlDomainProcessing';
import { randomizeTlsFingerprint, getTlsContextInfo } from './TlsFingerprintRandomizer';
import type { UrlStatus } from './UrlStatus';
export function getUrlStatus(
url: string,
options: IRequestOptions = DefaultOptions,
): Promise<IUrlStatus> {
const defaultedOptions = { ...DefaultOptions, ...options };
const fetchOptions = getFetchOptions(url, defaultedOptions);
return retryWithExponentialBackOff(async () => {
console.log('Requesting', url);
let result: IUrlStatus;
try {
const response = await fetchFollow(
url,
defaultedOptions.requestTimeoutInMs,
fetchOptions,
defaultedOptions.followOptions,
);
result = { url, code: response.status };
} catch (err) {
result = { url, error: JSON.stringify(err, null, '\t') };
}
return result;
}, defaultedOptions.retryExponentialBaseInMs);
requestOptions?: Partial<RequestOptions>,
followOptions?: Partial<FollowOptions>,
): Promise<UrlStatus> {
const defaultedOptions = getDefaultedRequestOptions(requestOptions);
if (defaultedOptions.randomizeTlsFingerprint) {
randomizeTlsFingerprint();
}
return fetchUrlStatusWithRetry(url, defaultedOptions, followOptions);
}
export interface IRequestOptions {
export interface RequestOptions {
readonly retryExponentialBaseInMs?: number;
readonly additionalHeaders?: Record<string, string>;
readonly additionalHeadersUrlIgnore?: string[];
readonly followOptions?: IFollowOptions;
readonly requestTimeoutInMs: number;
readonly randomizeTlsFingerprint: boolean;
}
const DefaultOptions: Required<IRequestOptions> = {
retryExponentialBaseInMs: 5000,
const DefaultOptions: Required<RequestOptions> = {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
additionalHeaders: {},
additionalHeadersUrlIgnore: [],
requestTimeoutInMs: 60 /* seconds */ * 1000,
followOptions: DefaultFollowOptions,
randomizeTlsFingerprint: true,
};
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit {
function fetchUrlStatusWithRetry(
url: string,
requestOptions: Required<RequestOptions>,
followOptions?: Partial<FollowOptions>,
): Promise<UrlStatus> {
const fetchOptions = getFetchOptions(url, requestOptions);
return retryWithExponentialBackOff(async () => {
console.log(`Initiating request for URL: ${url}`);
let result: UrlStatus;
try {
const response = await fetchFollow(
url,
requestOptions.requestTimeoutInMs,
fetchOptions,
followOptions,
);
result = { url, code: response.status };
} catch (err) {
result = {
url,
error: [
'Error:', indentText(JSON.stringify(err, null, '\t') || err.toString()),
'Options:', indentText(JSON.stringify(fetchOptions, null, '\t')),
'TLS:', indentText(getTlsContextInfo()),
].join('\n'),
};
}
return result;
}, requestOptions.retryExponentialBaseInMs);
}
function getFetchOptions(url: string, options: Required<RequestOptions>): RequestInit {
const additionalHeaders = options.additionalHeadersUrlIgnore
.some((ignorePattern) => url.startsWith(ignorePattern))
? {}
: options.additionalHeaders;
return {
method: 'HEAD',
method: 'GET', // Fetch only headers without the full response body for better speed
headers: {
...getDefaultHeaders(),
...getDefaultHeaders(url),
...additionalHeaders,
},
redirect: 'manual', // Redirects are handled manually, automatic redirects do not work with Host header
};
}
function getDefaultHeaders(): Record<string, string> {
function getDefaultHeaders(url: string): Record<string, string> {
return {
'user-agent': getRandomUserAgent(),
'upgrade-insecure-requests': '1',
connection: 'keep-alive',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'cache-control': 'max-age=0',
'accept-language': 'en-US,en;q=0.9',
// Needed for websites that filter out non-browser user agents.
'User-Agent': getRandomUserAgent(),
// Required for some websites, especially those behind proxies, to correctly handle the request.
Host: getDomainFromUrl(url),
// The following mimic a real browser request to improve compatibility with most web servers.
'Upgrade-Insecure-Requests': '1',
Connection: 'keep-alive',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'max-age=0',
'Accept-Language': 'en-US,en;q=0.9',
};
}
function getDefaultedRequestOptions(
options?: Partial<RequestOptions>,
): Required<RequestOptions> {
return {
...DefaultOptions,
...options,
};
}

View File

@@ -0,0 +1,69 @@
/**
* Modifies the TLS fingerprint of Node.js HTTP client to circumvent TLS fingerprinting blocks.
* TLS fingerprinting is a technique used to identify clients based on the unencrypted data sent
* during the TLS handshake, used for blocking or identifying non-browser clients like debugging
* proxies or automated scripts.
*
* However, Node.js's HTTP client does not fully support all methods required for impersonating a
* browser's TLS fingerprint, as reported in https://github.com/nodejs/undici/issues/1983.
* While this implementation can alter the TLS fingerprint by randomizing the cipher suite order,
* it may not perfectly mimic specific browser fingerprints due to limitations in the TLS
* implementation of Node.js.
*
* For more detailed information, visit:
* - https://archive.today/2024.03.13-102042/https://httptoolkit.com/blog/tls-fingerprinting-node-js/
* - https://check.ja3.zone/ (To check your tool's or browser's fingerprint)
* - https://github.com/lwthiker/curl-impersonate (A solution for curl)
* - https://github.com/depicts/got-tls (Cipher manipulation support for Node.js)
*/
import { constants } from 'crypto';
import tls from 'tls';
import { indentText } from '@tests/shared/Text';
export function randomizeTlsFingerprint() {
tls.DEFAULT_CIPHERS = getShuffledCiphers().join(':');
console.log(
[
'Original ciphers:', indentText(constants.defaultCipherList),
'Current context', indentText(getTlsContextInfo()),
].join('\n'),
);
}
export function getTlsContextInfo(): string {
return [
`Ciphers: ${tls.DEFAULT_CIPHERS}`,
`Minimum TLS protocol version: ${tls.DEFAULT_MIN_VERSION}`,
`Node fingerprint: ${constants.defaultCoreCipherList === tls.DEFAULT_CIPHERS ? 'Visible' : 'Masked'}`,
].join('\n');
}
/**
* Shuffles the order of TLS ciphers, excluding the top 3 most important ciphers to maintain
* security preferences. This approach modifies the default cipher list of Node.js to create a
* unique TLS fingerprint, thus helping to circumvent detection mechanisms based on static
* fingerprinting. It leverages randomness in the cipher order as a simple method to generate a
* new, unique TLS fingerprint which is not easily identifiable. The technique is based on altering
* parameters used in the TLS handshake process, particularly the cipher suite order, to avoid
* matching known fingerprints that could identify the client as a Node.js application.
*
* For more details, refer to:
* - https://archive.today/2024.03.13-102234/https://getsetfetch.org/blog/tls-fingerprint.html
*/
export function getShuffledCiphers(): readonly string[] {
const nodeOrderedCipherList = constants.defaultCoreCipherList.split(':');
const totalTopCiphersToKeep = 3;
// Keep the most important ciphers in the same order
const fixedCiphers = nodeOrderedCipherList.slice(0, totalTopCiphersToKeep);
// Shuffle the rest
const shuffledCiphers = nodeOrderedCipherList.slice(totalTopCiphersToKeep)
.map((cipher) => ({ cipher, sort: Math.random() }))
.sort((a, b) => a.sort - b.sort)
.map(({ cipher }) => cipher);
const ciphers = [
...fixedCiphers,
...shuffledCiphers,
];
return ciphers;
}

View File

@@ -2,18 +2,18 @@ export function groupUrlsByDomain(urls: string[]): string[][] {
const domains = new Set<string>();
const urlsWithDomain = urls.map((url) => ({
url,
domain: extractDomain(url),
domain: getDomainFromUrl(url),
}));
for (const url of urlsWithDomain) {
domains.add(url.domain);
}
return Array.from(domains).map((domain) => {
return urlsWithDomain
.filter((url) => url.domain === domain)
.filter((url) => url.domain.toLowerCase() === domain.toLowerCase())
.map((url) => url.url);
});
}
function extractDomain(url: string): string {
return url.split('://')[1].split('/')[0].toLowerCase();
export function getDomainFromUrl(url: string): string {
return new URL(url).host;
}

View File

@@ -0,0 +1,19 @@
import { indentText } from '@tests/shared/Text';
export interface UrlStatus {
readonly url: string;
readonly error?: string;
readonly code?: number;
}
export function formatUrlStatus(status: UrlStatus): string {
return [
`URL: ${status.url}`,
...status.code !== undefined ? [
`Response code: ${status.code}`,
] : [],
...status.error ? [
`Error:\n${indentText(status.error)}`,
] : [],
].join('\n');
}

View File

@@ -3,73 +3,28 @@ export function getRandomUserAgent(): string {
}
const UserAgents = [
// Chrome
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537',
// Firefox
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15',
// Safari
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/604.1',
// Internet Explorer
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
// Edge
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 Edge/15.0',
// Opera
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
// iOS Devices
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/18.2b11866 Mobile/16B91 Safari/605.1.15',
'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1',
// Android Devices
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.3',
// Other Devices/Browsers
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.3 Edge/15.0',
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.3',
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.3 OPR/53.0.2907.99',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20120121 Firefox/46.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; Tablet PC 2.0)',
'Mozilla/5.0 (Windows NT 5.1; rv:36.0) Gecko/20100101 Firefox/36.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
'Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:28.0) Gecko/20100101 Firefox/28.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/20161202 Firefox/21.0.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0',
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (X11; CrOS x86_64 4319.74.0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
// Safari 17.1 - macOS and iPad
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
// Safari - iOS 17 - iPhone
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
// Safari - iOS 17 - iPad mini
'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
// Edge - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
// Edge - Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
// Edge - Android
'Mozilla/5.0 (Linux; Android 10; HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.43 Mobile Safari/537.36 EdgA/119.0.2151.92',
// Chrome - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
// Chrome - Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
// Chrome - Android (Phone)
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36',
// Firefox - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
// Firefox - Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0,',
// Firefox - Android (Phone)
'Mozilla/5.0 (Android 14; Mobile; rv:109.0) Gecko/120.0 Firefox/120.0',
];

View File

@@ -1,50 +1,82 @@
import { test, expect } from 'vitest';
import { parseApplication } from '@/application/Parser/ApplicationParser';
import type { IApplication } from '@/domain/IApplication';
import { getUrlStatusesInParallel, type IBatchRequestOptions } from './StatusChecker/BatchStatusChecker';
import type { IUrlStatus } from './StatusChecker/IUrlStatus';
import { indentText } from '@tests/shared/Text';
import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
// arrange
const app = parseApplication();
const urls = collectUniqueUrls(app);
const requestOptions: IBatchRequestOptions = {
const urls = collectUniqueUrls({
application: app,
excludePatterns: [
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
],
});
const requestOptions: BatchRequestOptions = {
domainOptions: {
sameDomainParallelize: false, // be nice to our external servers
sameDomainParallelize: false, // be nice to our third-party servers
sameDomainDelayInMs: 5 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 3 /* sec */ * 1000,
requestTimeoutInMs: 60 /* sec */ * 1000,
additionalHeaders: { referer: app.projectDetails.homepage },
randomizeTlsFingerprint: true,
},
followOptions: {
followRedirects: true,
enableCookies: true,
},
};
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
test(`all URLs (${urls.length}) should be alive`, async () => {
// act
const results = await getUrlStatusesInParallel(urls, requestOptions);
const deadUrls = results.filter((r) => r.code !== 200);
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls));
// assert
const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
}, testTimeoutInMs);
function collectUniqueUrls(application: IApplication): string[] {
function isOkStatusCode(statusCode: number): boolean {
return statusCode >= 200 && statusCode < 300;
}
function collectUniqueUrls(
options: {
readonly application: IApplication,
readonly excludePatterns?: readonly RegExp[],
},
): string[] {
return [ // Get all nodes
...application.collections.flatMap((c) => c.getAllCategories()),
...application.collections.flatMap((c) => c.getAllScripts()),
...options.application.collections.flatMap((c) => c.getAllCategories()),
...options.application.collections.flatMap((c) => c.getAllScripts()),
]
// Get all docs
.flatMap((documentable) => documentable.docs)
// Parse all URLs
.flatMap((docString) => docString.match(/(https?:\/\/[^\s`"<>()]+)/g) || [])
.flatMap((docString) => extractUrls(docString))
// Remove duplicates
.filter((url, index, array) => array.indexOf(url) === index);
.filter((url, index, array) => array.indexOf(url) === index)
// Exclude certain URLs based on patterns
.filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
}
function printUrls(statuses: IUrlStatus[]): string {
/* eslint-disable prefer-template */
return '\n'
+ statuses.map((status) => `- ${status.url}\n`
+ (status.code ? `\tResponse code: ${status.code}` : '')
+ (status.error ? `\tError: ${status.error}` : ''))
.join('\n')
+ '\n';
/* eslint-enable prefer-template */
function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
return patterns.some((pattern) => pattern.test(url));
}
function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
}
function extractUrls(textWithInlineCode: string): string[] {
/*
Matches all URLs.
Inline code blocks contain URLs not intended for user interaction and not
guaranteed to support expected HTTP methods, leading to false-negatives.
*/
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+)/g;
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
}