Compare commits

...

2 Commits

Author SHA1 Message Date
undergroundwires
b042b36aea Add expansion/collapse animations for cards
Key changes:

- Add animation for card opening/collapse.

Other supporting changes:

- Remove card expansion panel to its own component for easier
  maintainability and better separation of concerns.
- Use real DOM element instead of &:before pseudo class for showing
  expansion arrow. This increases by maintainability by separating its
  code and concerns.

- TODO: When one card is expanded and others is also expanded then the
  transition sucks.
2024-03-31 20:07:09 +02:00
undergroundwires
be7a886225 Improve URL checks to reduce false-negatives
This commit improves the URL health checking mechanism to reduce false
negatives.

- Treat all 2XX status codes as successful, addressing issues with codes
  like `204`.
- Exclude URLs within Markdown inline code blocks.
- Send the Host header for improved handling of webpages behind proxies.
- Improve formatting and context for output messages.
- Fix the defaulting options for redirects and cookie handling.
- Add URL exclusion support for non-responsive URLs.
- Update the user agent pool to modern browsers and platforms.
- Improve CI/CD workflow to respond to modifications in the
  `test/checks/external-urls` directory, offering immediate feedback on
  potential impacts to the external URL test.
- Add support for randomizing TLS fingerprint to mimic various clients
  better, improving the effectiveness of checks. However, this is not
  fully supported by Node.js's HTTP client; see nodejs/undici#1983 for
  more details.
- Use `AbortSignal` instead of `AbortController` as more modern and
  simpler way to handle timeouts.
2024-03-13 18:26:16 +01:00
24 changed files with 521 additions and 325 deletions

View File

@@ -3,6 +3,9 @@ name: checks.external-urls
on: on:
schedule: schedule:
- cron: '0 0 * * 0' # at 00:00 on every Sunday - cron: '0 0 * * 0' # at 00:00 on every Sunday
push:
paths:
- tests/checks/external-urls/**
jobs: jobs:
run-check: run-check:

View File

@@ -1,7 +1,10 @@
export type SchedulerCallbackType = (...args: unknown[]) => void; export type SchedulerCallbackType = (...args: unknown[]) => void;
export type SchedulerType = (callback: SchedulerCallbackType, ms: number) => void; export type SchedulerType = (callback: SchedulerCallbackType, ms: number) => void;
export function sleep(time: number, scheduler: SchedulerType = setTimeout) { export function sleep(
time: number,
scheduler: SchedulerType = setTimeout,
): Promise<void> {
return new Promise((resolve) => { return new Promise((resolve) => {
scheduler(() => resolve(undefined), time); scheduler(() => resolve(undefined), time);
}); });

View File

@@ -0,0 +1,90 @@
<template>
<div class="card__expander">
<div class="card__expander__close-button">
<FlatButton
icon="xmark"
@click="collapse()"
/>
</div>
<div class="card__expander__content">
<ScriptsTree
:category-id="categoryId"
:has-top-padding="false"
/>
</div>
</div>
</template>
<script lang="ts">
import {
defineComponent,
} from 'vue';
import FlatButton from '@/presentation/components/Shared/FlatButton.vue';
import ScriptsTree from '@/presentation/components/Scripts/View/Tree/ScriptsTree.vue';
export default defineComponent({
components: {
ScriptsTree,
FlatButton,
},
props: {
categoryId: {
type: Number,
required: true,
},
},
emits: {
/* eslint-disable @typescript-eslint/no-unused-vars */
onCollapse: () => true,
/* eslint-enable @typescript-eslint/no-unused-vars */
},
setup(_, { emit }) {
function collapse() {
emit('onCollapse');
}
return {
collapse,
};
},
});
</script>
<style scoped lang="scss">
@use "@/presentation/assets/styles/main" as *;
$expanded-margin-top : 30px;
.card__expander {
transition: all 0.2s ease-in-out;
position: relative;
background-color: $color-primary-darker;
color: $color-on-primary;
display: flex;
align-items: center;
flex-direction: column;
margin-top: $expanded-margin-top;
.card__expander__content {
display: flex;
justify-content: center;
word-break: break-word;
max-width: 100%; // Prevents horizontal expansion of inner content (e.g., when a code block is shown)
width: 100%; // Expands the container to fill available horizontal space, enabling alignment of child items.
}
.card__expander__close-button {
font-size: $font-size-absolute-large;
align-self: flex-end;
margin-right: 0.25em;
@include clickable;
color: $color-primary-light;
@include hover-or-touch {
color: $color-primary;
}
}
}
</style>

View File

@@ -0,0 +1,24 @@
<template>
<div class="arrow-container">
<div class="arrow" />
</div>
</template>
<style scoped lang="scss">
@use "@/presentation/assets/styles/main" as *;
$arrow-size : 15px;
.arrow-container {
position: relative;
.arrow {
position: absolute;
left: calc(50% - $arrow-size * 1.5);
top: calc(1.5 * $arrow-size);
border: solid $color-primary-darker;
border-width: 0 $arrow-size $arrow-size 0;
padding: $arrow-size;
transform: rotate(-135deg);
}
}
</style>

View File

@@ -19,6 +19,7 @@
v-for="categoryId of categoryIds" v-for="categoryId of categoryIds"
:key="categoryId" :key="categoryId"
class="card" class="card"
:total-cards-per-row="cardsPerRow"
:class="{ :class="{
'small-screen': width <= 500, 'small-screen': width <= 500,
'medium-screen': width > 500 && width < 750, 'medium-screen': width > 500 && width < 750,
@@ -62,6 +63,19 @@ export default defineComponent({
); );
const activeCategoryId = ref<number | undefined>(undefined); const activeCategoryId = ref<number | undefined>(undefined);
const cardsPerRow = computed<number>(() => {
if (width.value === undefined) {
throw new Error('Unknown width, total cards should not be calculated');
}
if (width.value <= 500) {
return 1;
}
if (width.value < 750) {
return 2;
}
return 3;
});
function onSelected(categoryId: number, isExpanded: boolean) { function onSelected(categoryId: number, isExpanded: boolean) {
activeCategoryId.value = isExpanded ? categoryId : undefined; activeCategoryId.value = isExpanded ? categoryId : undefined;
} }
@@ -108,6 +122,7 @@ export default defineComponent({
width, width,
categoryIds, categoryIds,
activeCategoryId, activeCategoryId,
cardsPerRow,
onSelected, onSelected,
}; };
}, },

View File

@@ -29,20 +29,15 @@
:category-id="categoryId" :category-id="categoryId"
/> />
</div> </div>
<div class="card__expander" @click.stop> <CardExpansionPanelArrow v-show="isExpanded" />
<div class="card__expander__close-button"> <ExpandCollapseTransition>
<FlatButton <CardExpansionPanel
icon="xmark" v-show="isExpanded"
@click="collapse()" :category-id="categoryId"
/> @on-collapse="collapse"
</div> @click.stop
<div class="card__expander__content"> />
<ScriptsTree </ExpandCollapseTransition>
:category-id="categoryId"
:has-top-padding="false"
/>
</div>
</div>
</div> </div>
</template> </template>
@@ -51,24 +46,30 @@ import {
defineComponent, computed, shallowRef, defineComponent, computed, shallowRef,
} from 'vue'; } from 'vue';
import AppIcon from '@/presentation/components/Shared/Icon/AppIcon.vue'; import AppIcon from '@/presentation/components/Shared/Icon/AppIcon.vue';
import FlatButton from '@/presentation/components/Shared/FlatButton.vue'; import ExpandCollapseTransition from '@/presentation/components/Shared/ExpandCollapse/ExpandCollapseTransition.vue';
import { injectKey } from '@/presentation/injectionSymbols'; import { injectKey } from '@/presentation/injectionSymbols';
import ScriptsTree from '@/presentation/components/Scripts/View/Tree/ScriptsTree.vue';
import { sleep } from '@/infrastructure/Threading/AsyncSleep'; import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import CardSelectionIndicator from './CardSelectionIndicator.vue'; import CardSelectionIndicator from './CardSelectionIndicator.vue';
import CardExpansionPanel from './CardExpansionPanel.vue';
import CardExpansionPanelArrow from './CardExpansionPanelArrow.vue';
export default defineComponent({ export default defineComponent({
components: { components: {
ScriptsTree,
AppIcon, AppIcon,
CardSelectionIndicator, CardSelectionIndicator,
FlatButton, CardExpansionPanel,
ExpandCollapseTransition,
CardExpansionPanelArrow,
}, },
props: { props: {
categoryId: { categoryId: {
type: Number, type: Number,
required: true, required: true,
}, },
totalCardsPerRow: {
type: Number,
required: true,
},
activeCategoryId: { activeCategoryId: {
type: Number, type: Number,
default: undefined, default: undefined,
@@ -94,6 +95,14 @@ export default defineComponent({
}, },
}); });
const cardWidth = computed<string>(() => {
const totalTimesGapIsUsedInRow = props.totalCardsPerRow - 1;
const totalGapWidthInRow = `calc(${totalTimesGapIsUsedInRow} * 15px)`; // TODO: 15px is hardcoded, $card-gap variable should be used
const availableRowWidthForCards = `calc(100% - (${totalGapWidthInRow}))`;
const availableWidthPerCard = `calc((${availableRowWidthForCards}) / ${totalTimesGapIsUsedInRow})`;
return availableWidthPerCard;
});
const cardElement = shallowRef<HTMLElement>(); const cardElement = shallowRef<HTMLElement>();
const cardTitle = computed<string>(() => { const cardTitle = computed<string>(() => {
@@ -118,6 +127,7 @@ export default defineComponent({
cardTitle, cardTitle,
isExpanded, isExpanded,
cardElement, cardElement,
cardWidth,
collapse, collapse,
}; };
}, },
@@ -131,11 +141,22 @@ export default defineComponent({
$card-inner-padding : 30px; $card-inner-padding : 30px;
$arrow-size : 15px; $arrow-size : 15px;
$expanded-margin-top : 30px; $expanded-margin-top : 30px;
$card-horizontal-gap : $card-gap;
.expansion__arrow {
position: relative;
.expansion__arrow__inner {
position: absolute;
left: calc(50% - $arrow-size * 1.5);
top: calc(1.5 * $arrow-size);
border: solid $color-primary-darker;
border-width: 0 $arrow-size $arrow-size 0;
padding: $arrow-size;
transform: rotate(-135deg);
}
}
.card { .card {
transition: all 0.2s ease-in-out; width: v-bind(cardWidth);
&__inner { &__inner {
padding-top: $card-inner-padding; padding-top: $card-inner-padding;
padding-right: $card-inner-padding; padding-right: $card-inner-padding;
@@ -160,9 +181,6 @@ $card-horizontal-gap : $card-gap;
color: $color-on-secondary; color: $color-on-secondary;
transform: scale(1.05); transform: scale(1.05);
} }
&:after {
transition: all 0.3s ease-in-out;
}
.card__inner__title { .card__inner__title {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
@@ -184,73 +202,12 @@ $card-horizontal-gap : $card-gap;
font-size: $font-size-absolute-normal; font-size: $font-size-absolute-normal;
} }
} }
.card__expander {
transition: all 0.2s ease-in-out;
position: relative;
background-color: $color-primary-darker;
color: $color-on-primary;
display: flex;
align-items: center;
flex-direction: column;
.card__expander__content {
display: flex;
justify-content: center;
word-break: break-word;
max-width: 100%; // Prevents horizontal expansion of inner content (e.g., when a code block is shown)
width: 100%; // Expands the container to fill available horizontal space, enabling alignment of child items.
}
.card__expander__close-button {
font-size: $font-size-absolute-large;
align-self: flex-end;
margin-right: 0.25em;
@include clickable;
color: $color-primary-light;
@include hover-or-touch {
color: $color-primary;
}
}
}
&.is-collapsed {
.card__inner {
&:after {
content: "";
opacity: 0;
}
}
.card__expander {
max-height: 0;
min-height: 0;
overflow: hidden;
opacity: 0;
}
}
&.is-expanded { &.is-expanded {
.card__inner { .card__inner {
height: auto; height: auto;
background-color: $color-secondary; background-color: $color-secondary;
color: $color-on-secondary; color: $color-on-secondary;
&:after { // arrow
content: "";
display: block;
position: absolute;
bottom: calc(-1 * #{$expanded-margin-top});
left: calc(50% - #{$arrow-size});
border-left: #{$arrow-size} solid transparent;
border-right: #{$arrow-size} solid transparent;
border-bottom: #{$arrow-size} solid $color-primary-darker;
}
}
.card__expander {
min-height: 200px;
margin-top: $expanded-margin-top;
opacity: 1;
} }
@include hover-or-touch { @include hover-or-touch {
@@ -277,26 +234,26 @@ $card-horizontal-gap : $card-gap;
} }
} }
@mixin adaptive-card($cards-in-row) { @mixin adaptive-card($cards-in-row) {
&.card { .card {
$total-times-gap-is-used-in-row: $cards-in-row - 1; $total-times-gap-is-used-in-row: $cards-in-row - 1;
$total-gap-width-in-row: $total-times-gap-is-used-in-row * $card-horizontal-gap; $total-gap-width-in-row: $total-times-gap-is-used-in-row * $card-horizontal-gap;
$available-row-width-for-cards: calc(100% - #{$total-gap-width-in-row}); $available-row-width-for-cards: calc(100% - #{$total-gap-width-in-row});
$available-width-per-card: calc(#{$available-row-width-for-cards} / #{$cards-in-row}); $available-width-per-card: calc(#{$available-row-width-for-cards} / #{$cards-in-row});
width:$available-width-per-card; width:$available-width-per-card;
.card__expander { // .card__expander {
$all-cards-width: 100% * $cards-in-row; // $all-cards-width: 100% * $cards-in-row;
$additional-padding-width: $card-horizontal-gap * ($cards-in-row - 1); // $additional-padding-width: $card-horizontal-gap * ($cards-in-row - 1);
width: calc(#{$all-cards-width} + #{$additional-padding-width}); // width: calc(#{$all-cards-width} + #{$additional-padding-width});
} // }
@for $nth-card from 2 through $cards-in-row { // From second card to rest // @for $nth-card from 2 through $cards-in-row { // From second card to rest
&:nth-of-type(#{$cards-in-row}n+#{$nth-card}) { // &:nth-of-type(#{$cards-in-row}n+#{$nth-card}) {
.card__expander { // .card__expander {
$card-left: -100% * ($nth-card - 1); // $card-left: -100% * ($nth-card - 1);
$additional-space: $card-horizontal-gap * ($nth-card - 1); // $additional-space: $card-horizontal-gap * ($nth-card - 1);
margin-left: calc(#{$card-left} - #{$additional-space}); // margin-left: calc(#{$card-left} - #{$additional-space});
} // }
} // }
} // }
// Ensure new line after last row // Ensure new line after last row
$card-after-last: $cards-in-row + 1; $card-after-last: $cards-in-row + 1;
&:nth-of-type(#{$cards-in-row}n+#{$card-after-last}) { &:nth-of-type(#{$cards-in-row}n+#{$card-after-last}) {
@@ -304,8 +261,4 @@ $card-horizontal-gap : $card-gap;
} }
} }
} }
.big-screen { @include adaptive-card(3); }
.medium-screen { @include adaptive-card(2); }
.small-screen { @include adaptive-card(1); }
</style> </style>

View File

@@ -1,4 +1,4 @@
import { splitTextIntoLines, indentText } from '../utils/text'; import { indentText, splitTextIntoLines } from '@tests/shared/Text';
import { log, die } from '../utils/log'; import { log, die } from '../utils/log';
import { readAppLogFile } from './app-logs'; import { readAppLogFile } from './app-logs';
import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns'; import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns';

View File

@@ -1,7 +1,7 @@
import { filterEmpty } from '@tests/shared/Text';
import { runCommand } from '../../utils/run-command'; import { runCommand } from '../../utils/run-command';
import { log, LogLevel } from '../../utils/log'; import { log, LogLevel } from '../../utils/log';
import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform'; import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform';
import { filterEmpty } from '../../utils/text';
export async function captureWindowTitles(processId: number) { export async function captureWindowTitles(processId: number) {
if (!processId) { throw new Error('Missing process ID.'); } if (!processId) { throw new Error('Missing process ID.'); }

View File

@@ -1,3 +1,4 @@
import { indentText } from '@tests/shared/Text';
import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args'; import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args';
import { log, die } from './utils/log'; import { log, die } from './utils/log';
import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm'; import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm';
@@ -15,7 +16,6 @@ import {
APP_EXECUTION_DURATION_IN_SECONDS, APP_EXECUTION_DURATION_IN_SECONDS,
SCREENSHOT_PATH, SCREENSHOT_PATH,
} from './config'; } from './config';
import { indentText } from './utils/text';
import type { ExtractionResult } from './app/extractors/common/extraction-result'; import type { ExtractionResult } from './app/extractors/common/extraction-result';
export async function main(): Promise<void> { export async function main(): Promise<void> {

View File

@@ -1,5 +1,6 @@
import { exec, type ExecOptions, type ExecException } from 'node:child_process'; import { exec } from 'child_process';
import { indentText } from './text'; import { indentText } from '@tests/shared/Text';
import type { ExecOptions, ExecException } from 'child_process';
const TIMEOUT_IN_SECONDS = 180; const TIMEOUT_IN_SECONDS = 180;
const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB

View File

@@ -1,64 +1,62 @@
import { sleep } from '@/infrastructure/Threading/AsyncSleep'; import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import { getUrlStatus, type IRequestOptions } from './Requestor'; import { getUrlStatus, type RequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlPerDomainGrouper'; import { groupUrlsByDomain } from './UrlDomainProcessing';
import type { IUrlStatus } from './IUrlStatus'; import type { FollowOptions } from './FetchFollow';
import type { UrlStatus } from './UrlStatus';
export async function getUrlStatusesInParallel( export async function getUrlStatusesInParallel(
urls: string[], urls: string[],
options?: IBatchRequestOptions, options?: BatchRequestOptions,
): Promise<IUrlStatus[]> { ): Promise<UrlStatus[]> {
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing // urls = ['https://privacy.sexy']; // Comment out this line to use a hardcoded URL for testing.
const uniqueUrls = Array.from(new Set(urls)); const uniqueUrls = Array.from(new Set(urls));
const defaultedOptions = { ...DefaultOptions, ...options }; const defaultedDomainOptions = { ...DefaultDomainOptions, ...options?.domainOptions };
console.log('Options: ', defaultedOptions); console.log('Batch request options applied:', defaultedDomainOptions);
const results = await request(uniqueUrls, defaultedOptions); const results = await request(uniqueUrls, defaultedDomainOptions, options);
return results; return results;
} }
export interface IBatchRequestOptions { export interface BatchRequestOptions {
domainOptions?: IDomainOptions; readonly domainOptions?: Partial<DomainOptions>;
requestOptions?: IRequestOptions; readonly requestOptions?: Partial<RequestOptions>;
readonly followOptions?: Partial<FollowOptions>;
} }
interface IDomainOptions { interface DomainOptions {
sameDomainParallelize?: boolean; readonly sameDomainParallelize?: boolean;
sameDomainDelayInMs?: number; readonly sameDomainDelayInMs?: number;
} }
const DefaultOptions: Required<IBatchRequestOptions> = { const DefaultDomainOptions: Required<DomainOptions> = {
domainOptions: { sameDomainParallelize: false,
sameDomainParallelize: false, sameDomainDelayInMs: 3 /* sec */ * 1000,
sameDomainDelayInMs: 3 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
requestTimeoutInMs: 60 /* sec */ * 1000,
additionalHeaders: {},
},
}; };
function request( function request(
urls: string[], urls: string[],
options: Required<IBatchRequestOptions>, domainOptions: Required<DomainOptions>,
): Promise<IUrlStatus[]> { options?: BatchRequestOptions,
if (!options.domainOptions.sameDomainParallelize) { ): Promise<UrlStatus[]> {
if (!domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelay( return runOnEachDomainWithDelay(
urls, urls,
(url) => getUrlStatus(url, options.requestOptions), (url) => getUrlStatus(url, options?.requestOptions, options?.followOptions),
options.domainOptions.sameDomainDelayInMs, domainOptions.sameDomainDelayInMs,
); );
} }
return Promise.all(urls.map((url) => getUrlStatus(url, options.requestOptions))); return Promise.all(
urls.map((url) => getUrlStatus(url, options?.requestOptions, options?.followOptions)),
);
} }
async function runOnEachDomainWithDelay( async function runOnEachDomainWithDelay(
urls: string[], urls: string[],
action: (url: string) => Promise<IUrlStatus>, action: (url: string) => Promise<UrlStatus>,
delayInMs: number | undefined, delayInMs: number | undefined,
): Promise<IUrlStatus[]> { ): Promise<UrlStatus[]> {
const grouped = groupUrlsByDomain(urls); const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => { const tasks = grouped.map(async (group) => {
const results = new Array<IUrlStatus>(); const results = new Array<UrlStatus>();
/* eslint-disable no-await-in-loop */ /* eslint-disable no-await-in-loop */
for (const url of group) { for (const url of group) {
const status = await action(url); const status = await action(url);

View File

@@ -1,27 +1,33 @@
import { sleep } from '@/infrastructure/Threading/AsyncSleep'; import { sleep } from '@/infrastructure/Threading/AsyncSleep';
import type { IUrlStatus } from './IUrlStatus'; import { indentText } from '@tests/shared/Text';
import { type UrlStatus, formatUrlStatus } from './UrlStatus';
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000; const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
export async function retryWithExponentialBackOff( export async function retryWithExponentialBackOff(
action: () => Promise<IUrlStatus>, action: () => Promise<UrlStatus>,
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
currentRetry = 1, currentRetry = 1,
): Promise<IUrlStatus> { ): Promise<UrlStatus> {
const maxTries = 3; const maxTries = 3;
const status = await action(); const status = await action();
if (shouldRetry(status)) { if (shouldRetry(status)) {
if (currentRetry <= maxTries) { if (currentRetry <= maxTries) {
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs); const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status); console.log([
`Attempt ${currentRetry}: Retrying in ${exponentialBackOffInMs / 1000} seconds.`,
'Details:',
indentText(formatUrlStatus(status)),
].join('\n'));
await sleep(exponentialBackOffInMs); await sleep(exponentialBackOffInMs);
return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1); return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1);
} }
console.warn('💀 All retry attempts failed. Final failure to retrieve URL:', indentText(formatUrlStatus(status)));
} }
return status; return status;
} }
function shouldRetry(status: IUrlStatus) { function shouldRetry(status: UrlStatus): boolean {
if (status.error) { if (status.error) {
return true; return true;
} }
@@ -32,14 +38,14 @@ function shouldRetry(status: IUrlStatus) {
|| status.code === 429; // Too Many Requests || status.code === 429; // Too Many Requests
} }
function isTransientError(statusCode: number) { function isTransientError(statusCode: number): boolean {
return statusCode >= 500 && statusCode <= 599; return statusCode >= 500 && statusCode <= 599;
} }
function getRetryTimeoutInMs( function getRetryTimeoutInMs(
currentRetry: number, currentRetry: number,
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
) { ): number {
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150% const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
// of the exponentially increasing base amount // of the exponentially increasing base amount
const minRandom = 1 - retryRandomFactor; const minRandom = 1 - retryRandomFactor;

View File

@@ -1,19 +1,17 @@
import { fetchWithTimeout } from './FetchWithTimeout'; import { fetchWithTimeout } from './FetchWithTimeout';
import { getDomainFromUrl } from './UrlDomainProcessing';
export function fetchFollow( export function fetchFollow(
url: string, url: string,
timeoutInMs: number, timeoutInMs: number,
fetchOptions: RequestInit, fetchOptions?: Partial<RequestInit>,
followOptions: IFollowOptions | undefined, followOptions?: Partial<FollowOptions>,
): Promise<Response> { ): Promise<Response> {
const defaultedFollowOptions = { const defaultedFollowOptions = { ...DefaultFollowOptions, ...followOptions };
...DefaultFollowOptions,
...followOptions,
};
if (followRedirects(defaultedFollowOptions)) { if (followRedirects(defaultedFollowOptions)) {
return fetchWithTimeout(url, timeoutInMs, fetchOptions); return fetchWithTimeout(url, timeoutInMs, fetchOptions);
} }
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ }; fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' };
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies); const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
return followRecursivelyWithCookies( return followRecursivelyWithCookies(
url, url,
@@ -24,13 +22,15 @@ export function fetchFollow(
); );
} }
export interface IFollowOptions { // "cors" | "navigate" | "no-cors" | "same-origin";
followRedirects?: boolean;
maximumRedirectFollowDepth?: number; export interface FollowOptions {
enableCookies?: boolean; readonly followRedirects?: boolean;
readonly maximumRedirectFollowDepth?: number;
readonly enableCookies?: boolean;
} }
export const DefaultFollowOptions: Required<IFollowOptions> = { const DefaultFollowOptions: Required<FollowOptions> = {
followRedirects: true, followRedirects: true,
maximumRedirectFollowDepth: 20, maximumRedirectFollowDepth: 20,
enableCookies: true, enableCookies: true,
@@ -64,6 +64,10 @@ async function followRecursivelyWithCookies(
if (cookieHeader) { if (cookieHeader) {
cookies.addHeader(cookieHeader); cookies.addHeader(cookieHeader);
} }
options.headers = {
...options.headers,
Host: getDomainFromUrl(nextUrl),
};
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies); return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
} }
@@ -77,7 +81,7 @@ class CookieStorage {
constructor(private readonly enabled: boolean) { constructor(private readonly enabled: boolean) {
} }
public hasAny() { public hasAny(): boolean {
return this.enabled && this.cookies.length > 0; return this.enabled && this.cookies.length > 0;
} }
@@ -88,12 +92,12 @@ class CookieStorage {
this.cookies.push(header); this.cookies.push(header);
} }
public getHeader() { public getHeader(): string {
return this.cookies.join(' ; '); return this.cookies.join(' ; ');
} }
} }
function followRedirects(options: IFollowOptions) { function followRedirects(options: FollowOptions): boolean {
if (!options.followRedirects) { if (!options.followRedirects) {
return false; return false;
} }

View File

@@ -2,13 +2,13 @@ export async function fetchWithTimeout(
url: string, url: string,
timeoutInMs: number, timeoutInMs: number,
init?: RequestInit, init?: RequestInit,
): Promise<Response> { ): ReturnType<typeof fetch> {
const controller = new AbortController();
const options: RequestInit = { const options: RequestInit = {
...(init ?? {}), ...(init ?? {}),
signal: controller.signal, signal: AbortSignal.timeout(timeoutInMs),
}; };
const promise = fetch(url, options); return fetch(
const timeout = setTimeout(() => controller.abort(), timeoutInMs); url,
return promise.finally(() => clearTimeout(timeout)); options,
);
} }

View File

@@ -1,5 +0,0 @@
export interface IUrlStatus {
url: string;
error?: string;
code?: number;
}

View File

@@ -13,7 +13,10 @@ A CLI and SDK for checking the availability of external URLs.
- 😇 **Rate Limiting**: Queues requests by domain to be polite. - 😇 **Rate Limiting**: Queues requests by domain to be polite.
- 🔁 **Retries**: Implements retry pattern with exponential back-off. - 🔁 **Retries**: Implements retry pattern with exponential back-off.
-**Timeouts**: Configurable timeout for each request. -**Timeouts**: Configurable timeout for each request.
- 🎭️ **User-Agent Rotation**: Change user agents for each request. - 🎭️ **Impersonation**: Impersonate different browsers for each request.
- **🌐 User-Agent Rotation**: Change user agents.
- **🔑 TLS Handshakes**: Perform TLS and HTTP handshakes that are identical to that of a real browser.
- 🫙 **Cookie jar**: Preserve cookies during redirects to mimic real browser.
## CLI ## CLI
@@ -54,6 +57,7 @@ const statuses = await getUrlStatusesInParallel([ 'https://privacy.sexy', /* ...
- **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds) - **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds)
- Sets the delay between requests to the same domain. - Sets the delay between requests to the same domain.
- `requestOptions` (*object*): See [request options](#request-options). - `requestOptions` (*object*): See [request options](#request-options).
- `followOptions` (*object*): See [follow options](#follow-options).
### `getUrlStatus` ### `getUrlStatus`
@@ -72,7 +76,6 @@ console.log(`Status code: ${status.code}`);
- The longer the base time, the greater the intervals between retries. - The longer the base time, the greater the intervals between retries.
- **`additionalHeaders`** (*object*), default: `false` - **`additionalHeaders`** (*object*), default: `false`
- Additional HTTP headers to send along with the default headers. Overrides default headers if specified. - Additional HTTP headers to send along with the default headers. Overrides default headers if specified.
- **`followOptions`** (*object*): See [follow options](#follow-options).
- **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds) - **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds)
- Time limit to abort the request if no response is received within the specified time frame. - Time limit to abort the request if no response is received within the specified time frame.
@@ -83,19 +86,7 @@ Follows `3XX` redirects while preserving cookies.
Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter. Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter.
```js ```js
const status = await fetchFollow('https://privacy.sexy', { const status = await fetchFollow('https://privacy.sexy', 1000 /* timeout in milliseconds */);
// First argument is same options as fetch API, except `redirect` options
// that's discarded in favor of next argument follow options
headers: {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
},
}, {
// Second argument sets the redirect behavior
followRedirects: true,
maximumRedirectFollowDepth: 20,
enableCookies: true,
}
);
console.log(`Status code: ${status.code}`); console.log(`Status code: ${status.code}`);
``` ```

View File

@@ -1,70 +1,108 @@
import { indentText } from '@tests/shared/Text';
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler'; import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
import { fetchFollow, type IFollowOptions, DefaultFollowOptions } from './FetchFollow'; import { fetchFollow, type FollowOptions } from './FetchFollow';
import { getRandomUserAgent } from './UserAgents'; import { getRandomUserAgent } from './UserAgents';
import type { IUrlStatus } from './IUrlStatus'; import { getDomainFromUrl } from './UrlDomainProcessing';
import { randomizeTlsFingerprint, getTlsContextInfo } from './TlsFingerprintRandomizer';
import type { UrlStatus } from './UrlStatus';
export function getUrlStatus( export function getUrlStatus(
url: string, url: string,
options: IRequestOptions = DefaultOptions, requestOptions?: Partial<RequestOptions>,
): Promise<IUrlStatus> { followOptions?: Partial<FollowOptions>,
const defaultedOptions = { ...DefaultOptions, ...options }; ): Promise<UrlStatus> {
const fetchOptions = getFetchOptions(url, defaultedOptions); const defaultedOptions = getDefaultedRequestOptions(requestOptions);
return retryWithExponentialBackOff(async () => { if (defaultedOptions.randomizeTlsFingerprint) {
console.log('Requesting', url); randomizeTlsFingerprint();
let result: IUrlStatus; }
try { return fetchUrlStatusWithRetry(url, defaultedOptions, followOptions);
const response = await fetchFollow(
url,
defaultedOptions.requestTimeoutInMs,
fetchOptions,
defaultedOptions.followOptions,
);
result = { url, code: response.status };
} catch (err) {
result = { url, error: JSON.stringify(err, null, '\t') };
}
return result;
}, defaultedOptions.retryExponentialBaseInMs);
} }
export interface IRequestOptions { export interface RequestOptions {
readonly retryExponentialBaseInMs?: number; readonly retryExponentialBaseInMs?: number;
readonly additionalHeaders?: Record<string, string>; readonly additionalHeaders?: Record<string, string>;
readonly additionalHeadersUrlIgnore?: string[]; readonly additionalHeadersUrlIgnore?: string[];
readonly followOptions?: IFollowOptions;
readonly requestTimeoutInMs: number; readonly requestTimeoutInMs: number;
readonly randomizeTlsFingerprint: boolean;
} }
const DefaultOptions: Required<IRequestOptions> = { const DefaultOptions: Required<RequestOptions> = {
retryExponentialBaseInMs: 5000, retryExponentialBaseInMs: 5 /* sec */ * 1000,
additionalHeaders: {}, additionalHeaders: {},
additionalHeadersUrlIgnore: [], additionalHeadersUrlIgnore: [],
requestTimeoutInMs: 60 /* seconds */ * 1000, requestTimeoutInMs: 60 /* seconds */ * 1000,
followOptions: DefaultFollowOptions, randomizeTlsFingerprint: true,
}; };
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit { function fetchUrlStatusWithRetry(
url: string,
requestOptions: Required<RequestOptions>,
followOptions?: Partial<FollowOptions>,
): Promise<UrlStatus> {
const fetchOptions = getFetchOptions(url, requestOptions);
return retryWithExponentialBackOff(async () => {
console.log(`Initiating request for URL: ${url}`);
let result: UrlStatus;
try {
const response = await fetchFollow(
url,
requestOptions.requestTimeoutInMs,
fetchOptions,
followOptions,
);
result = { url, code: response.status };
} catch (err) {
result = {
url,
error: [
'Error:', indentText(JSON.stringify(err, null, '\t') || err.toString()),
'Options:', indentText(JSON.stringify(fetchOptions, null, '\t')),
'TLS:', indentText(getTlsContextInfo()),
].join('\n'),
};
}
return result;
}, requestOptions.retryExponentialBaseInMs);
}
function getFetchOptions(url: string, options: Required<RequestOptions>): RequestInit {
const additionalHeaders = options.additionalHeadersUrlIgnore const additionalHeaders = options.additionalHeadersUrlIgnore
.some((ignorePattern) => url.startsWith(ignorePattern)) .some((ignorePattern) => url.startsWith(ignorePattern))
? {} ? {}
: options.additionalHeaders; : options.additionalHeaders;
return { return {
method: 'HEAD', method: 'GET', // Fetch only headers without the full response body for better speed
headers: { headers: {
...getDefaultHeaders(), ...getDefaultHeaders(url),
...additionalHeaders, ...additionalHeaders,
}, },
redirect: 'manual', // Redirects are handled manually, automatic redirects do not work with Host header
}; };
} }
function getDefaultHeaders(): Record<string, string> { function getDefaultHeaders(url: string): Record<string, string> {
return { return {
'user-agent': getRandomUserAgent(), // Needed for websites that filter out non-browser user agents.
'upgrade-insecure-requests': '1', 'User-Agent': getRandomUserAgent(),
connection: 'keep-alive',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', // Required for some websites, especially those behind proxies, to correctly handle the request.
'accept-encoding': 'gzip, deflate, br', Host: getDomainFromUrl(url),
'cache-control': 'max-age=0',
'accept-language': 'en-US,en;q=0.9', // The following mimic a real browser request to improve compatibility with most web servers.
'Upgrade-Insecure-Requests': '1',
Connection: 'keep-alive',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'max-age=0',
'Accept-Language': 'en-US,en;q=0.9',
};
}
function getDefaultedRequestOptions(
options?: Partial<RequestOptions>,
): Required<RequestOptions> {
return {
...DefaultOptions,
...options,
}; };
} }

View File

@@ -0,0 +1,69 @@
/**
* Modifies the TLS fingerprint of Node.js HTTP client to circumvent TLS fingerprinting blocks.
* TLS fingerprinting is a technique used to identify clients based on the unencrypted data sent
* during the TLS handshake, used for blocking or identifying non-browser clients like debugging
* proxies or automated scripts.
*
* However, Node.js's HTTP client does not fully support all methods required for impersonating a
* browser's TLS fingerprint, as reported in https://github.com/nodejs/undici/issues/1983.
* While this implementation can alter the TLS fingerprint by randomizing the cipher suite order,
* it may not perfectly mimic specific browser fingerprints due to limitations in the TLS
* implementation of Node.js.
*
* For more detailed information, visit:
* - https://archive.today/2024.03.13-102042/https://httptoolkit.com/blog/tls-fingerprinting-node-js/
* - https://check.ja3.zone/ (To check your tool's or browser's fingerprint)
* - https://github.com/lwthiker/curl-impersonate (A solution for curl)
* - https://github.com/depicts/got-tls (Cipher manipulation support for Node.js)
*/
import { constants } from 'crypto';
import tls from 'tls';
import { indentText } from '@tests/shared/Text';
export function randomizeTlsFingerprint() {
tls.DEFAULT_CIPHERS = getShuffledCiphers().join(':');
console.log(
[
'Original ciphers:', indentText(constants.defaultCipherList),
'Current context', indentText(getTlsContextInfo()),
].join('\n'),
);
}
export function getTlsContextInfo(): string {
return [
`Ciphers: ${tls.DEFAULT_CIPHERS}`,
`Minimum TLS protocol version: ${tls.DEFAULT_MIN_VERSION}`,
`Node fingerprint: ${constants.defaultCoreCipherList === tls.DEFAULT_CIPHERS ? 'Visible' : 'Masked'}`,
].join('\n');
}
/**
* Shuffles the order of TLS ciphers, excluding the top 3 most important ciphers to maintain
* security preferences. This approach modifies the default cipher list of Node.js to create a
* unique TLS fingerprint, thus helping to circumvent detection mechanisms based on static
* fingerprinting. It leverages randomness in the cipher order as a simple method to generate a
* new, unique TLS fingerprint which is not easily identifiable. The technique is based on altering
* parameters used in the TLS handshake process, particularly the cipher suite order, to avoid
* matching known fingerprints that could identify the client as a Node.js application.
*
* For more details, refer to:
* - https://archive.today/2024.03.13-102234/https://getsetfetch.org/blog/tls-fingerprint.html
*/
export function getShuffledCiphers(): readonly string[] {
const nodeOrderedCipherList = constants.defaultCoreCipherList.split(':');
const totalTopCiphersToKeep = 3;
// Keep the most important ciphers in the same order
const fixedCiphers = nodeOrderedCipherList.slice(0, totalTopCiphersToKeep);
// Shuffle the rest
const shuffledCiphers = nodeOrderedCipherList.slice(totalTopCiphersToKeep)
.map((cipher) => ({ cipher, sort: Math.random() }))
.sort((a, b) => a.sort - b.sort)
.map(({ cipher }) => cipher);
const ciphers = [
...fixedCiphers,
...shuffledCiphers,
];
return ciphers;
}

View File

@@ -2,18 +2,18 @@ export function groupUrlsByDomain(urls: string[]): string[][] {
const domains = new Set<string>(); const domains = new Set<string>();
const urlsWithDomain = urls.map((url) => ({ const urlsWithDomain = urls.map((url) => ({
url, url,
domain: extractDomain(url), domain: getDomainFromUrl(url),
})); }));
for (const url of urlsWithDomain) { for (const url of urlsWithDomain) {
domains.add(url.domain); domains.add(url.domain);
} }
return Array.from(domains).map((domain) => { return Array.from(domains).map((domain) => {
return urlsWithDomain return urlsWithDomain
.filter((url) => url.domain === domain) .filter((url) => url.domain.toLowerCase() === domain.toLowerCase())
.map((url) => url.url); .map((url) => url.url);
}); });
} }
function extractDomain(url: string): string { export function getDomainFromUrl(url: string): string {
return url.split('://')[1].split('/')[0].toLowerCase(); return new URL(url).host;
} }

View File

@@ -0,0 +1,19 @@
import { indentText } from '@tests/shared/Text';
export interface UrlStatus {
readonly url: string;
readonly error?: string;
readonly code?: number;
}
export function formatUrlStatus(status: UrlStatus): string {
return [
`URL: ${status.url}`,
...status.code !== undefined ? [
`Response code: ${status.code}`,
] : [],
...status.error ? [
`Error:\n${indentText(status.error)}`,
] : [],
].join('\n');
}

View File

@@ -3,73 +3,28 @@ export function getRandomUserAgent(): string {
} }
const UserAgents = [ const UserAgents = [
// Chrome // Safari 17.1 - macOS and iPad
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537', // Safari - iOS 17 - iPhone
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
// Firefox // Safari - iOS 17 - iPad mini
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', 'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15', // Edge - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51',
// Safari // Edge - Windows
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/604.1', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58',
// Edge - Android
// Internet Explorer 'Mozilla/5.0 (Linux; Android 10; HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.43 Mobile Safari/537.36 EdgA/119.0.2151.92',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', // Chrome - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
// Edge // Chrome - Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 Edge/15.0', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
// Chrome - Android (Phone)
// Opera 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36',
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14', // Firefox - macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0',
// iOS Devices // Firefox - Windows
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/18.2b11866 Mobile/16B91 Safari/605.1.15', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0,',
'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1', // Firefox - Android (Phone)
'Mozilla/5.0 (Android 14; Mobile; rv:109.0) Gecko/120.0 Firefox/120.0',
// Android Devices
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.3',
// Other Devices/Browsers
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.3 Edge/15.0',
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.3',
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.3 OPR/53.0.2907.99',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20120121 Firefox/46.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; Tablet PC 2.0)',
'Mozilla/5.0 (Windows NT 5.1; rv:36.0) Gecko/20100101 Firefox/36.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
'Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:28.0) Gecko/20100101 Firefox/28.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/20161202 Firefox/21.0.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0',
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.3',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (X11; CrOS x86_64 4319.74.0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3',
]; ];

View File

@@ -1,50 +1,82 @@
import { test, expect } from 'vitest'; import { test, expect } from 'vitest';
import { parseApplication } from '@/application/Parser/ApplicationParser'; import { parseApplication } from '@/application/Parser/ApplicationParser';
import type { IApplication } from '@/domain/IApplication'; import type { IApplication } from '@/domain/IApplication';
import { getUrlStatusesInParallel, type IBatchRequestOptions } from './StatusChecker/BatchStatusChecker'; import { indentText } from '@tests/shared/Text';
import type { IUrlStatus } from './StatusChecker/IUrlStatus'; import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
// arrange
const app = parseApplication(); const app = parseApplication();
const urls = collectUniqueUrls(app); const urls = collectUniqueUrls({
const requestOptions: IBatchRequestOptions = { application: app,
excludePatterns: [
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
],
});
const requestOptions: BatchRequestOptions = {
domainOptions: { domainOptions: {
sameDomainParallelize: false, // be nice to our external servers sameDomainParallelize: false, // be nice to our third-party servers
sameDomainDelayInMs: 5 /* sec */ * 1000, sameDomainDelayInMs: 5 /* sec */ * 1000,
}, },
requestOptions: { requestOptions: {
retryExponentialBaseInMs: 3 /* sec */ * 1000, retryExponentialBaseInMs: 3 /* sec */ * 1000,
requestTimeoutInMs: 60 /* sec */ * 1000, requestTimeoutInMs: 60 /* sec */ * 1000,
additionalHeaders: { referer: app.projectDetails.homepage }, additionalHeaders: { referer: app.projectDetails.homepage },
randomizeTlsFingerprint: true,
},
followOptions: {
followRedirects: true,
enableCookies: true,
}, },
}; };
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000; const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
test(`all URLs (${urls.length}) should be alive`, async () => { test(`all URLs (${urls.length}) should be alive`, async () => {
// act
const results = await getUrlStatusesInParallel(urls, requestOptions); const results = await getUrlStatusesInParallel(urls, requestOptions);
const deadUrls = results.filter((r) => r.code !== 200); // assert
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls)); const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
}, testTimeoutInMs); }, testTimeoutInMs);
function collectUniqueUrls(application: IApplication): string[] { function isOkStatusCode(statusCode: number): boolean {
return statusCode >= 200 && statusCode < 300;
}
function collectUniqueUrls(
options: {
readonly application: IApplication,
readonly excludePatterns?: readonly RegExp[],
},
): string[] {
return [ // Get all nodes return [ // Get all nodes
...application.collections.flatMap((c) => c.getAllCategories()), ...options.application.collections.flatMap((c) => c.getAllCategories()),
...application.collections.flatMap((c) => c.getAllScripts()), ...options.application.collections.flatMap((c) => c.getAllScripts()),
] ]
// Get all docs // Get all docs
.flatMap((documentable) => documentable.docs) .flatMap((documentable) => documentable.docs)
// Parse all URLs // Parse all URLs
.flatMap((docString) => docString.match(/(https?:\/\/[^\s`"<>()]+)/g) || []) .flatMap((docString) => extractUrls(docString))
// Remove duplicates // Remove duplicates
.filter((url, index, array) => array.indexOf(url) === index); .filter((url, index, array) => array.indexOf(url) === index)
// Exclude certain URLs based on patterns
.filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
} }
function printUrls(statuses: IUrlStatus[]): string { function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
/* eslint-disable prefer-template */ return patterns.some((pattern) => pattern.test(url));
return '\n' }
+ statuses.map((status) => `- ${status.url}\n`
+ (status.code ? `\tResponse code: ${status.code}` : '') function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
+ (status.error ? `\tError: ${status.error}` : '')) return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
.join('\n') }
+ '\n';
/* eslint-enable prefer-template */ function extractUrls(textWithInlineCode: string): string[] {
/*
Matches all URLs.
Inline code blocks contain URLs not intended for user interaction and not
guaranteed to support expected HTTP methods, leading to false-negatives.
*/
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+)/g;
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
} }