fix broken URLs and automate broken URL checks #70

This commit:
- Fixes broken URLs using archive.org or other references.
- Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source.
- Changes all insecure http sources to https alternatives
- Adds integration tests to check for broken URLs
  - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests.
- Run test pipeline weekly to get notified about broken URls without commits
This commit is contained in:
undergroundwires
2021-05-05 23:52:06 +02:00
parent 36f0805590
commit db62ed7f3a
14 changed files with 307 additions and 27 deletions

View File

@@ -5,7 +5,7 @@ on:
pull_request:
paths: [ '/package.json', '/package-lock.json' ] # Allow PRs to be green if they do not introduce dependency change
schedule:
- cron: '0 0 * * 0'
- cron: '0 0 * * 0' # at 00:00 on every Sunday
jobs:
npm-audit:

View File

@@ -1,6 +1,10 @@
name: Test
on: [ push, pull_request ]
on:
push:
pull_request:
schedule: # for integration tests
- cron: '0 0 * * 0' # at 00:00 on every Sunday
jobs:
run-tests:

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 483 KiB

After

Width:  |  Height:  |  Size: 579 KiB

30
package-lock.json generated
View File

@@ -16,6 +16,7 @@
"@juggle/resize-observer": "^3.3.0",
"ace-builds": "^1.4.12",
"core-js": "^3.9.1",
"cross-fetch": "^3.1.4",
"file-saver": "^2.0.5",
"inversify": "^5.0.5",
"liquor-tree": "^0.2.70",
@@ -5772,6 +5773,14 @@
"sha.js": "^2.4.8"
}
},
"node_modules/cross-fetch": {
"version": "3.1.4",
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.4.tgz",
"integrity": "sha512-1eAtFWdIubi6T4XPy6ei9iUFoKpUkIF971QLN8lIvvvwueI65+Nw5haMNKUwfJxabqlIIDODJKGrQ66gxC0PbQ==",
"dependencies": {
"node-fetch": "2.6.1"
}
},
"node_modules/cross-spawn": {
"version": "6.0.5",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz",
@@ -12658,6 +12667,14 @@
"semver": "bin/semver"
}
},
"node_modules/node-fetch": {
"version": "2.6.1",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz",
"integrity": "sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==",
"engines": {
"node": "4.x || >=6.0.0"
}
},
"node_modules/node-forge": {
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.10.0.tgz",
@@ -25705,6 +25722,14 @@
"sha.js": "^2.4.8"
}
},
"cross-fetch": {
"version": "3.1.4",
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.4.tgz",
"integrity": "sha512-1eAtFWdIubi6T4XPy6ei9iUFoKpUkIF971QLN8lIvvvwueI65+Nw5haMNKUwfJxabqlIIDODJKGrQ66gxC0PbQ==",
"requires": {
"node-fetch": "2.6.1"
}
},
"cross-spawn": {
"version": "6.0.5",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz",
@@ -31409,6 +31434,11 @@
}
}
},
"node-fetch": {
"version": "2.6.1",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz",
"integrity": "sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw=="
},
"node-forge": {
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.10.0.tgz",

View File

@@ -30,6 +30,7 @@
"@juggle/resize-observer": "^3.3.0",
"ace-builds": "^1.4.12",
"core-js": "^3.9.1",
"cross-fetch": "^3.1.4",
"file-saver": "^2.0.5",
"inversify": "^5.0.5",
"liquor-tree": "^0.2.70",

View File

@@ -185,7 +185,7 @@ actions:
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/sessionstore-backups/upgrade.js*-20*
-
name: Delete Firefox passwords
docs: http://kb.mozillazine.org/Password_Manager
docs: https://web.archive.org/web/20210425202923/http://kb.mozillazine.org/Password_Manager
code: |-
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/signons.txt
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/signons2.txt
@@ -437,7 +437,7 @@ actions:
-
name: Disable PowerShell Core telemetry
recommend: standard
docs: https://github.com/PowerShell/PowerShell/tree/release/v7.1.1#telemetry
docs: https://github.com/PowerShell/PowerShell/blob/v7.1.0/README.md#telemetry
call:
-
function: PersistUserEnvironmentConfiguration

View File

@@ -613,7 +613,7 @@ actions:
-
name: Disable cloud speech recognition
recommend: standard
docs: https://www.tenforums.com/tutorials/101902-turn-off-online-speech-recognition-windows-10-a.html
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#186-speech
code: reg add "HKCU\Software\Microsoft\Speech_OneCore\Settings\OnlineSpeechPrivacy" /v "HasAccepted" /t "REG_DWORD" /d 0 /f
revertCode: reg add "HKCU\Software\Microsoft\Speech_OneCore\Settings\OnlineSpeechPrivacy" /v "HasAccepted" /t "REG_DWORD" /d 1 /f
-
@@ -629,7 +629,7 @@ actions:
-
name: Disable Windows feedback
recommend: standard
docs: https://www.tenforums.com/tutorials/2441-change-feedback-frequency-windows-10-a.html
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#1816-feedback--diagnostics
code: |-
reg add "HKCU\SOFTWARE\Microsoft\Siuf\Rules" /v "NumberOfSIUFInPeriod" /t REG_DWORD /d 0 /f
reg delete "HKCU\SOFTWARE\Microsoft\Siuf\Rules" /v "PeriodInNanoSeconds" /f
@@ -656,8 +656,9 @@ actions:
name: Deny app access to location
recommend: standard
docs:
- https://r-pufky.github.io/docs/operating-systems/windows/10/securing-installation/privacy/location.html
- https://docs.microsoft.com/en-us/windows/client-management/mdm/policy-csp-privacy#privacy-letappsaccesslocation
- https://docs.microsoft.com/en-us/windows/client-management/mdm/policy-csp-privacy#privacy-letappsaccesslocation # LetAppsAccessLocation
- https://www.joseespitia.com/2019/07/24/registry-keys-for-windows-10-application-privacy-settings/ # ConsentStore\location
- https://social.technet.microsoft.com/Forums/en-US/63904312-04af-41e5-8b57-1dd446ea45c5/ # lfsvc\Service\Configuration
code: |-
reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\CapabilityAccessManager\ConsentStore\location" /v "Value" /d "Deny" /f
:: For older Windows (before 1903)
@@ -1039,7 +1040,6 @@ actions:
revertCode: reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\CapabilityAccessManager\ConsentStore\bluetoothSync" /v "Value" /d "Allow" /t REG_SZ /f
-
category: Disable app access to voice activation
docs: https://www.tenforums.com/tutorials/130122-allow-deny-apps-access-use-voice-activation-windows-10-a.html
children:
-
name: Disable apps and Cortana to activate with voice
@@ -1284,7 +1284,9 @@ actions:
-
name: Turn Off Suggested Content in Settings app
recommend: standard
docs: https://www.tenforums.com/tutorials/100541-turn-off-suggested-content-settings-app-windows-10-a.html
docs:
- https://docs.microsoft.com/en-us/windows-server/remote/remote-desktop-services/rds-vdi-recommendations-2004
- https://news.softpedia.com/news/how-to-block-banners-in-windows-10-version-1809-settings-app-523331.shtml
code: |-
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338393Enabled" /d "0" /t REG_DWORD /f
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-353694Enabled" /d "0" /t REG_DWORD /f
@@ -1311,9 +1313,7 @@ actions:
-
name: Do not start Windows Biometric Service
recommend: strict
docs:
- http://batcmd.com/windows/10/services/wbiosrvc/
- http://revertservice.com/10/wbiosrvc/
docs: https://docs.microsoft.com/en-us/windows-server/security/windows-services/security-guidelines-for-disabling-system-services-in-windows-server#windows-biometric-service
code: |-
reg add "HKLM\SYSTEM\CurrentControlSet\Services\WbioSrvc" /v "Start" /t REG_DWORD /d 4 /f
sc stop "WbioSrvc" & sc config "WbioSrvc" start=disabled
@@ -1341,7 +1341,7 @@ actions:
-
name: Disable Website Access of Language List
recommend: standard
docs: https://www.tenforums.com/tutorials/82980-turn-off-website-access-language-list-windows-10-a.html
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#181-general
code: reg add "HKCU\Control Panel\International\User Profile" /v "HttpAcceptLanguageOptOut" /t REG_DWORD /d 1 /f
revertCode: reg add "HKCU\Control Panel\International\User Profile" /v "HttpAcceptLanguageOptOut" /t REG_DWORD /d 0 /f
-
@@ -1380,7 +1380,7 @@ actions:
children:
-
name: Disable Windows Insider Service
docs: http://revertservice.com/10/wisvc/
docs: https://docs.microsoft.com/en-us/windows-server/security/windows-services/security-guidelines-for-disabling-system-services-in-windows-server#windows-insider-service
recommend: standard
code: sc stop "wisvc" & sc config "wisvc" start=disabled
revertCode: sc config "wisvc" start=demand
@@ -1404,7 +1404,7 @@ actions:
revertCode: reg delete "HKLM\SOFTWARE\Policies\Microsoft\Windows\PreviewBuilds" /v "AllowBuildPreview" /f
-
name: Remove "Windows Insider Program" from Settings
docs: https://www.askvg.com/windows-10-tip-remove-windows-insider-program-section-from-settings-page/
docs: https://winaero.com/how-to-hide-the-windows-insider-program-page-from-the-settings-app-in-windows-10/
code: reg add "HKLM\SOFTWARE\Microsoft\WindowsSelfHost\UI\Visibility" /v "HideInsiderPage" /t "REG_DWORD" /d "1" /f
revertCode: reg delete "HKLM\SOFTWARE\Microsoft\WindowsSelfHost\UI\Visibility" /v "HideInsiderPage" /f
-
@@ -1472,7 +1472,10 @@ actions:
-
name: Disable Language Setting Sync
recommend: standard
docs: https://www.tenforums.com/tutorials/4077-turn-off-sync-settings-microsoft-account-windows-10-a.html
docs:
- https://winaero.com/turn-on-off-sync-settings-windows-10/
- https://www.thewindowsclub.com/how-to-configure-windows-10-sync-settings-using-registry-editor
- https://tuxicoman.jesuislibre.net/blog/wp-content/uploads/Windows10_Telemetrie_1709.pdf # from guide on confidentiality and privacy with Windows 10 distributed to the French police, previous version of guide: https://www.pmenier.net/dotclear/docext/win10/.Windows10-Presentation.pdf
code: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\SettingSync\Groups\Language" /t REG_DWORD /v "Enabled" /d 0 /f
revertCode: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\SettingSync\Groups\Language" /t REG_DWORD /v "Enabled" /d 1 /f
-
@@ -1832,7 +1835,9 @@ actions:
revertCode: reg delete "HKLM\SOFTWARE\Policies\Microsoft\Edge" /v "SendSiteInfoToImproveServices" /f
-
name: Disable Automatic Installation of Microsoft Edge Chromium
docs: https://docs.microsoft.com/en-us/deployedge/microsoft-edge-blocker-toolkit
docs:
- https://admx.help/?Category=EdgeChromium_Blocker&Policy=Microsoft.Policies.EdgeUpdate::NoUpdate
- https://web.archive.org/web/20210118230052/https://docs.microsoft.com/en-us/deployedge/microsoft-edge-blocker-toolkit
code: reg add "HKLM\SOFTWARE\Microsoft\EdgeUpdate" /v "DoNotUpdateToEdgeWithChromium" /t REG_DWORD /d 1 /f
revertCode: reg delete "HKLM\SOFTWARE\Microsoft\EdgeUpdate" /v "DoNotUpdateToEdgeWithChromium" /f
-
@@ -2686,7 +2691,10 @@ actions:
-
name: Do not show recently used files in Quick Access
recommend: strict
docs: https://www.tenforums.com/tutorials/2713-add-remove-recent-files-quick-access-windows-10-a.html
docs:
- https://matthewhill.uk/windows/group-policy-disable-recent-files-frequent-folder-explorer/ # ShowRecent
- https://www.howto-connect.com/delete-recent-frequent-from-file-explorer-on-windows-10/ # 3134ef9c-6b18-4996-ad04-ed5912e00eb5
- https://docs.microsoft.com/en-us/windows/win32/sysinfo/32-bit-and-64-bit-application-data-in-the-registry # Wow6432Node
code: |-
if %PROCESSOR_ARCHITECTURE%==x86 ( REM is 32 bit?
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\Explorer" /v "ShowRecent" /d 0 /t REG_DWORD /f
@@ -2707,12 +2715,15 @@ actions:
revertCode: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "ShowSyncProviderNotifications" /d 1 /t REG_DWORD /f
-
name: Turn hibernate off to disable sleep for quick start
docs: http://www.windows10windows7.com/w10/win10zs/100102504.html
docs: https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/compact-os#ram-pagefilesys-and-hiberfilsys
code: powercfg -h off
revertCode: powercfg -h on
-
name: Enable camera on/off OSD notifications
docs: https://www.tenforums.com/tutorials/166065-how-enable-disable-camera-off-osd-notifications-windows-10-a.html
docs:
- https://docs.microsoft.com/en-us/windows-hardware/customize/desktop/unattend/microsoft-windows-coremmres-nophysicalcameraled
- https://www.reddit.com/r/Surface/comments/88nyln/the_webcamled_took_anyone_it_apart/dwm64p5
- https://answers.microsoft.com/en-us/windows/forum/all/enable-osd-notification-for-webcam/caf1fff4-78d3-4b93-905b-ef657097a44e
code: reg add "HKLM\SOFTWARE\Microsoft\OEM\Device\Capture" /v "NoPhysicalCameraLED" /d 1 /t REG_DWORD /f
revertCode: reg delete "HKLM\Software\Microsoft\OEM\Device\Capture" /v "NoPhysicalCameraLED" /f
-
@@ -2861,7 +2872,9 @@ actions:
revertCode: sc config "VSS" start=demand
-
name: Disable NetBios for all interfaces
docs: https://10dsecurity.com/saying-goodbye-netbios/
docs:
- https://bobcares.com/blog/disable-netbios-and-llmnr-protocols-in-windows-using-gpo/
- https://social.technet.microsoft.com/Forums/windowsserver/en-US/c5f3c095-1ad2-4963-b075-787f800b81f2/
call:
function: RunPowerShell
parameters:
@@ -3483,7 +3496,7 @@ actions:
packageName: Microsoft.ECApp
-
name: Lock app (shows lock screen)
docs: https://www.dashtech.org/can-you-disable-lockapp-exe-on-windows-10/
docs: https://www.getwox.com/what-is-lockapp-exe/
call:
function: UninstallSystemApp
parameters:
@@ -4351,10 +4364,12 @@ actions:
w32tm /config /update
w32tm /resync
-
name: Disable Reserved Storage for updates
name: Disable Reserved Storage for updates # since 19H1 (1903)
docs:
- https://techcommunity.microsoft.com/t5/storage-at-microsoft/windows-10-and-reserved-storage/ba-p/428327
- https://www.tenforums.com/tutorials/124858-enable-disable-reserved-storage-windows-10-a.html
- https://techcommunity.microsoft.com/t5/storage-at-microsoft/windows-10-and-reserved-storage/ba-p/428327 # Announcement
- https://techcommunity.microsoft.com/t5/windows-it-pro-blog/managing-reserved-storage-in-windows-10-environments/ba-p/1297070#toc-hId--8696946 # Set-ReservedStorageState
- https://www.howtogeek.com/425563/how-to-disable-reserved-storage-on-windows-10/ # ShippedWithReserves
- https://techcommunity.microsoft.com/t5/windows-servicing/reserve-manager-enabled-with-low-disk-space-block/m-p/2073132 # PassedPolicy
code: |-
dism /online /Set-ReservedStorageState /State:Disabled /NoRestart
reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\ReserveManager" /v "MiscPolicyInfo" /t REG_DWORD /d "2" /f

View File

@@ -0,0 +1,48 @@
import 'mocha';
import { expect } from 'chai';
import { parseApplication } from '@/application/Parser/ApplicationParser';
import { IApplication } from '@/domain/IApplication';
import { IUrlStatus } from './StatusChecker/IUrlStatus';
import { getUrlStatusesInParallelAsync, IBatchRequestOptions } from './StatusChecker/BatchStatusChecker';
describe('collections', () => {
// arrange
const app = parseApplication();
const urls = collectUniqueUrls(app);
const options: IBatchRequestOptions = {
domainOptions: {
sameDomainParallelize: true, // no need to be so nice until sources start failing
// sameDomainDelayInMs: 2 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 3 /* sec */ * 1000,
additionalHeaders: { referer: app.info.homepage },
},
};
const testTimeoutInMs = urls.length * 60000 /* 1 minute */;
it('have no dead urls', async () => {
// act
const results = await getUrlStatusesInParallelAsync(urls, options);
// assert
const deadUrls = results.filter((r) => r.statusCode !== 200);
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls));
}).timeout(testTimeoutInMs);
});
function collectUniqueUrls(app: IApplication): string[] {
return app
.collections
.flatMap((a) => a.getAllScripts())
.flatMap((script) => script.documentationUrls)
.filter((url, index, array) => array.indexOf(url) === index);
}
function printUrls(statuses: IUrlStatus[]): string {
return '\n' +
statuses.map((status) =>
`- ${status.url}\n` +
(status.statusCode ? `\tResponse code: ${status.statusCode}` : '') +
(status.error ? `\tException: ${JSON.stringify(status.error, null, '\t')}` : ''))
.join(`\n`)
+ '\n';
}

View File

@@ -0,0 +1,67 @@
import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
import { IUrlStatus } from './IUrlStatus';
import { getUrlStatusAsync, IRequestOptions } from './Requestor';
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
export async function getUrlStatusesInParallelAsync(
urls: string[],
options?: IBatchRequestOptions): Promise<IUrlStatus[]> {
const uniqueUrls = Array.from(new Set(urls));
options = { ...DefaultOptions, ...options };
console.log('Options: ', options); // tslint:disable-line: no-console
const results = await requestAsync(uniqueUrls, options);
return results;
}
export interface IBatchRequestOptions {
domainOptions?: IDomainOptions;
requestOptions?: IRequestOptions;
}
interface IDomainOptions {
sameDomainParallelize?: boolean;
sameDomainDelayInMs?: number;
}
const DefaultOptions: IBatchRequestOptions = {
domainOptions: {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
},
requestOptions: {
retryExponentialBaseInMs: 5 /* sec */ * 1000,
additionalHeaders: {},
},
};
function requestAsync(urls: string[], options: IBatchRequestOptions): Promise<IUrlStatus[]> {
if (!options.domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelayAsync(
urls,
(url) => getUrlStatusAsync(url, options.requestOptions),
options.domainOptions.sameDomainDelayInMs);
} else {
return Promise.all(
urls.map((url) => getUrlStatusAsync(url, options.requestOptions)));
}
}
async function runOnEachDomainWithDelayAsync(
urls: string[],
action: (url: string) => Promise<IUrlStatus>,
delayInMs: number): Promise<IUrlStatus[]> {
const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => {
const results = new Array<IUrlStatus>();
for (const url of group) {
const status = await action(url);
results.push(status);
if (results.length !== group.length) {
await sleepAsync(delayInMs);
}
}
return results;
});
const r = await Promise.all(tasks);
return r.flat();
}

View File

@@ -0,0 +1,44 @@
import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
import { IUrlStatus } from './IUrlStatus';
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
export async function retryWithExponentialBackOffAsync(
action: () => Promise<IUrlStatus>,
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
currentRetry = 1): Promise<IUrlStatus> {
const maxTries: number = 3;
const status = await action();
if (shouldRetry(status)) {
if (currentRetry <= maxTries) {
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
// tslint:disable-next-line: no-console
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status);
await sleepAsync(exponentialBackOffInMs);
return retryWithExponentialBackOffAsync(action, baseRetryIntervalInMs, currentRetry + 1);
}
}
return status;
}
function shouldRetry(status: IUrlStatus) {
if (status.error) {
return true;
}
return isTransientError(status.statusCode)
|| status.statusCode === 429; // Too Many Requests
}
function isTransientError(statusCode: number) {
return statusCode >= 500 && statusCode <= 599;
}
function getRetryTimeoutInMs(currentRetry: number, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs) {
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
// of the exponentially increasing base amount
const minRandom = 1 - retryRandomFactor;
const maxRandom = 1 + retryRandomFactor;
const randomization = (Math.random() * (maxRandom - minRandom)) + maxRandom;
const exponential = Math.pow(2, currentRetry - 1);
return Math.ceil(exponential * baseRetryIntervalInMs * randomization);
}

View File

@@ -0,0 +1,5 @@
export interface IUrlStatus {
url: string;
error?: any;
statusCode?: number;
}

View File

@@ -0,0 +1,47 @@
import { retryWithExponentialBackOffAsync } from './ExponentialBackOffRetryHandler';
import { IUrlStatus } from './IUrlStatus';
import fetch from 'cross-fetch';
export interface IRequestOptions {
retryExponentialBaseInMs?: number;
additionalHeaders?: Record<string, string>;
}
export async function getUrlStatusAsync(
url: string,
options: IRequestOptions = DefaultOptions): Promise<IUrlStatus> {
options = { ...DefaultOptions, ...options };
const fetchOptions = getFetchOptions(options);
return retryWithExponentialBackOffAsync(async () => {
console.log('Requesting', url); // tslint:disable-line: no-console
try {
const response = await fetch(url, fetchOptions);
return { url, statusCode: response.status};
} catch (err) {
return { url, error: err};
}
}, options.retryExponentialBaseInMs);
}
const DefaultOptions: IRequestOptions = {
retryExponentialBaseInMs: 5000,
additionalHeaders: {},
};
function getFetchOptions(options: IRequestOptions) {
return {
method: 'GET',
headers: { ...DefaultHeaders, ...options.additionalHeaders },
};
}
const DefaultHeaders: Record<string, string> = {
/* Chrome on macOS */
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
'upgrade-insecure-requests': '1',
'connection': 'keep-alive',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'cache-control': 'max-age=0',
'accept-language': 'en-US,en;q=0.9',
};

View File

@@ -0,0 +1,19 @@
export function groupUrlsByDomain(urls: string[]): string[][] {
const domains = new Set<string>();
const urlsWithDomain = urls.map((url) => ({
url,
domain: extractDomain(url),
}));
for (const url of urlsWithDomain) {
domains.add(url.domain);
}
return Array.from(domains).map((domain) => {
return urlsWithDomain
.filter((url) => url.domain === domain)
.map((url) => url.url);
});
}
function extractDomain(url: string): string {
return url.split('://')[1].split('/')[0].toLowerCase();
}