fix broken URLs and automate broken URL checks #70

This commit:
- Fixes broken URLs using archive.org or other references.
- Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source.
- Changes all insecure http sources to https alternatives
- Adds integration tests to check for broken URLs
  - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests.
- Run test pipeline weekly to get notified about broken URls without commits
This commit is contained in:
undergroundwires
2021-05-05 23:52:06 +02:00
parent 36f0805590
commit db62ed7f3a
14 changed files with 307 additions and 27 deletions

View File

@@ -0,0 +1,67 @@
import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
import { IUrlStatus } from './IUrlStatus';
import { getUrlStatusAsync, IRequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
export async function getUrlStatusesInParallelAsync(
urls: string[],
options?: IBatchRequestOptions): Promise<IUrlStatus[]> {
const uniqueUrls = Array.from(new Set(urls));
options = { ...DefaultOptions, ...options };
console.log('Options: ', options); // tslint:disable-line: no-console
const results = await requestAsync(uniqueUrls, options);
return results;
}
export interface IBatchRequestOptions {
domainOptions?: IDomainOptions;
requestOptions?: IRequestOptions;
}
interface IDomainOptions {
sameDomainParallelize?: boolean;
sameDomainDelayInMs?: number;
}
const DefaultOptions: IBatchRequestOptions = {
domainOptions: {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
additionalHeaders: {},
},
};
function requestAsync(urls: string[], options: IBatchRequestOptions): Promise<IUrlStatus[]> {
if (!options.domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelayAsync(
urls,
(url) => getUrlStatusAsync(url, options.requestOptions),
options.domainOptions.sameDomainDelayInMs);
} else {
return Promise.all(
urls.map((url) => getUrlStatusAsync(url, options.requestOptions)));
}
}
async function runOnEachDomainWithDelayAsync(
urls: string[],
action: (url: string) => Promise<IUrlStatus>,
delayInMs: number): Promise<IUrlStatus[]> {
const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => {
const results = new Array<IUrlStatus>();
for (const url of group) {
const status = await action(url);
results.push(status);
if (results.length !== group.length) {
await sleepAsync(delayInMs);
}
}
return results;
});
const r = await Promise.all(tasks);
return r.flat();
}