Files
privacy.sexy/tests/integration/application/collections/StatusChecker/BatchStatusChecker.ts
undergroundwires 799fb091b8 Fix failing URL status checking integration tests
Implement following redirects over `fetch` supporting cookies.
`node-fetch` does not support sending cookies during redirect. However,
this is needed to not end-up in a redirect loop for a sign-in callback.

Fix integration tests failing due to redirects and 403 errors:
  - Many redirects from `answers.microsoft.com` was throwing: throwing
    `FetchError: maximum redirect reached` error. It was caused by not
    having cookies when following redirects therefore having an infinite
    sign-in callback for the webpage.
  - Fixes integration tests failing due to additional referer header being
    sent by the application. It adds support for making exceptions to
    additional header sending through a list of regexes.

Add in-depth documentation for URL status checking.
2021-10-30 16:19:10 +01:00

69 lines
2.3 KiB
TypeScript

import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
import { IUrlStatus } from './IUrlStatus';
import { getUrlStatusAsync, IRequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
export async function getUrlStatusesInParallelAsync(
urls: string[],
options?: IBatchRequestOptions): Promise<IUrlStatus[]> {
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
const uniqueUrls = Array.from(new Set(urls));
options = { ...DefaultOptions, ...options };
console.log('Options: ', options); // tslint:disable-line: no-console
const results = await requestAsync(uniqueUrls, options);
return results;
}
export interface IBatchRequestOptions {
domainOptions?: IDomainOptions;
requestOptions?: IRequestOptions;
}
interface IDomainOptions {
sameDomainParallelize?: boolean;
sameDomainDelayInMs?: number;
}
const DefaultOptions: IBatchRequestOptions = {
domainOptions: {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
additionalHeaders: {},
},
};
function requestAsync(urls: string[], options: IBatchRequestOptions): Promise<IUrlStatus[]> {
if (!options.domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelayAsync(
urls,
(url) => getUrlStatusAsync(url, options.requestOptions),
options.domainOptions.sameDomainDelayInMs);
} else {
return Promise.all(
urls.map((url) => getUrlStatusAsync(url, options.requestOptions)));
}
}
async function runOnEachDomainWithDelayAsync(
urls: string[],
action: (url: string) => Promise<IUrlStatus>,
delayInMs: number): Promise<IUrlStatus[]> {
const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => {
const results = new Array<IUrlStatus>();
for (const url of group) {
const status = await action(url);
results.push(status);
if (results.length !== group.length) {
await sleepAsync(delayInMs);
}
}
return results;
});
const r = await Promise.all(tasks);
return r.flat();
}