fix broken URLs and automate broken URL checks #70
This commit: - Fixes broken URLs using archive.org or other references. - Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source. - Changes all insecure http sources to https alternatives - Adds integration tests to check for broken URLs - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests. - Run test pipeline weekly to get notified about broken URls without commits
This commit is contained in:
2
.github/workflows/security-checks.yaml
vendored
2
.github/workflows/security-checks.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
pull_request:
|
||||
paths: [ '/package.json', '/package-lock.json' ] # Allow PRs to be green if they do not introduce dependency change
|
||||
schedule:
|
||||
- cron: '0 0 * * 0'
|
||||
- cron: '0 0 * * 0' # at 00:00 on every Sunday
|
||||
|
||||
jobs:
|
||||
npm-audit:
|
||||
|
||||
6
.github/workflows/test.yaml
vendored
6
.github/workflows/test.yaml
vendored
@@ -1,6 +1,10 @@
|
||||
name: Test
|
||||
|
||||
on: [ push, pull_request ]
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
schedule: # for integration tests
|
||||
- cron: '0 0 * * 0' # at 00:00 on every Sunday
|
||||
|
||||
jobs:
|
||||
run-tests:
|
||||
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
|
Before Width: | Height: | Size: 483 KiB After Width: | Height: | Size: 579 KiB |
30
package-lock.json
generated
30
package-lock.json
generated
@@ -16,6 +16,7 @@
|
||||
"@juggle/resize-observer": "^3.3.0",
|
||||
"ace-builds": "^1.4.12",
|
||||
"core-js": "^3.9.1",
|
||||
"cross-fetch": "^3.1.4",
|
||||
"file-saver": "^2.0.5",
|
||||
"inversify": "^5.0.5",
|
||||
"liquor-tree": "^0.2.70",
|
||||
@@ -5772,6 +5773,14 @@
|
||||
"sha.js": "^2.4.8"
|
||||
}
|
||||
},
|
||||
"node_modules/cross-fetch": {
|
||||
"version": "3.1.4",
|
||||
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.4.tgz",
|
||||
"integrity": "sha512-1eAtFWdIubi6T4XPy6ei9iUFoKpUkIF971QLN8lIvvvwueI65+Nw5haMNKUwfJxabqlIIDODJKGrQ66gxC0PbQ==",
|
||||
"dependencies": {
|
||||
"node-fetch": "2.6.1"
|
||||
}
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "6.0.5",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz",
|
||||
@@ -12658,6 +12667,14 @@
|
||||
"semver": "bin/semver"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz",
|
||||
"integrity": "sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==",
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-forge": {
|
||||
"version": "0.10.0",
|
||||
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.10.0.tgz",
|
||||
@@ -25705,6 +25722,14 @@
|
||||
"sha.js": "^2.4.8"
|
||||
}
|
||||
},
|
||||
"cross-fetch": {
|
||||
"version": "3.1.4",
|
||||
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.4.tgz",
|
||||
"integrity": "sha512-1eAtFWdIubi6T4XPy6ei9iUFoKpUkIF971QLN8lIvvvwueI65+Nw5haMNKUwfJxabqlIIDODJKGrQ66gxC0PbQ==",
|
||||
"requires": {
|
||||
"node-fetch": "2.6.1"
|
||||
}
|
||||
},
|
||||
"cross-spawn": {
|
||||
"version": "6.0.5",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz",
|
||||
@@ -31409,6 +31434,11 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node-fetch": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz",
|
||||
"integrity": "sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw=="
|
||||
},
|
||||
"node-forge": {
|
||||
"version": "0.10.0",
|
||||
"resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.10.0.tgz",
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
"@juggle/resize-observer": "^3.3.0",
|
||||
"ace-builds": "^1.4.12",
|
||||
"core-js": "^3.9.1",
|
||||
"cross-fetch": "^3.1.4",
|
||||
"file-saver": "^2.0.5",
|
||||
"inversify": "^5.0.5",
|
||||
"liquor-tree": "^0.2.70",
|
||||
|
||||
@@ -185,7 +185,7 @@ actions:
|
||||
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/sessionstore-backups/upgrade.js*-20*
|
||||
-
|
||||
name: Delete Firefox passwords
|
||||
docs: http://kb.mozillazine.org/Password_Manager
|
||||
docs: https://web.archive.org/web/20210425202923/http://kb.mozillazine.org/Password_Manager
|
||||
code: |-
|
||||
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/signons.txt
|
||||
rm -fv ~/Library/Application\ Support/Firefox/Profiles/*/signons2.txt
|
||||
@@ -437,7 +437,7 @@ actions:
|
||||
-
|
||||
name: Disable PowerShell Core telemetry
|
||||
recommend: standard
|
||||
docs: https://github.com/PowerShell/PowerShell/tree/release/v7.1.1#telemetry
|
||||
docs: https://github.com/PowerShell/PowerShell/blob/v7.1.0/README.md#telemetry
|
||||
call:
|
||||
-
|
||||
function: PersistUserEnvironmentConfiguration
|
||||
|
||||
@@ -613,7 +613,7 @@ actions:
|
||||
-
|
||||
name: Disable cloud speech recognition
|
||||
recommend: standard
|
||||
docs: https://www.tenforums.com/tutorials/101902-turn-off-online-speech-recognition-windows-10-a.html
|
||||
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#186-speech
|
||||
code: reg add "HKCU\Software\Microsoft\Speech_OneCore\Settings\OnlineSpeechPrivacy" /v "HasAccepted" /t "REG_DWORD" /d 0 /f
|
||||
revertCode: reg add "HKCU\Software\Microsoft\Speech_OneCore\Settings\OnlineSpeechPrivacy" /v "HasAccepted" /t "REG_DWORD" /d 1 /f
|
||||
-
|
||||
@@ -629,7 +629,7 @@ actions:
|
||||
-
|
||||
name: Disable Windows feedback
|
||||
recommend: standard
|
||||
docs: https://www.tenforums.com/tutorials/2441-change-feedback-frequency-windows-10-a.html
|
||||
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#1816-feedback--diagnostics
|
||||
code: |-
|
||||
reg add "HKCU\SOFTWARE\Microsoft\Siuf\Rules" /v "NumberOfSIUFInPeriod" /t REG_DWORD /d 0 /f
|
||||
reg delete "HKCU\SOFTWARE\Microsoft\Siuf\Rules" /v "PeriodInNanoSeconds" /f
|
||||
@@ -656,8 +656,9 @@ actions:
|
||||
name: Deny app access to location
|
||||
recommend: standard
|
||||
docs:
|
||||
- https://r-pufky.github.io/docs/operating-systems/windows/10/securing-installation/privacy/location.html
|
||||
- https://docs.microsoft.com/en-us/windows/client-management/mdm/policy-csp-privacy#privacy-letappsaccesslocation
|
||||
- https://docs.microsoft.com/en-us/windows/client-management/mdm/policy-csp-privacy#privacy-letappsaccesslocation # LetAppsAccessLocation
|
||||
- https://www.joseespitia.com/2019/07/24/registry-keys-for-windows-10-application-privacy-settings/ # ConsentStore\location
|
||||
- https://social.technet.microsoft.com/Forums/en-US/63904312-04af-41e5-8b57-1dd446ea45c5/ # lfsvc\Service\Configuration
|
||||
code: |-
|
||||
reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\CapabilityAccessManager\ConsentStore\location" /v "Value" /d "Deny" /f
|
||||
:: For older Windows (before 1903)
|
||||
@@ -1039,7 +1040,6 @@ actions:
|
||||
revertCode: reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\CapabilityAccessManager\ConsentStore\bluetoothSync" /v "Value" /d "Allow" /t REG_SZ /f
|
||||
-
|
||||
category: Disable app access to voice activation
|
||||
docs: https://www.tenforums.com/tutorials/130122-allow-deny-apps-access-use-voice-activation-windows-10-a.html
|
||||
children:
|
||||
-
|
||||
name: Disable apps and Cortana to activate with voice
|
||||
@@ -1284,7 +1284,9 @@ actions:
|
||||
-
|
||||
name: Turn Off Suggested Content in Settings app
|
||||
recommend: standard
|
||||
docs: https://www.tenforums.com/tutorials/100541-turn-off-suggested-content-settings-app-windows-10-a.html
|
||||
docs:
|
||||
- https://docs.microsoft.com/en-us/windows-server/remote/remote-desktop-services/rds-vdi-recommendations-2004
|
||||
- https://news.softpedia.com/news/how-to-block-banners-in-windows-10-version-1809-settings-app-523331.shtml
|
||||
code: |-
|
||||
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-338393Enabled" /d "0" /t REG_DWORD /f
|
||||
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v "SubscribedContent-353694Enabled" /d "0" /t REG_DWORD /f
|
||||
@@ -1311,9 +1313,7 @@ actions:
|
||||
-
|
||||
name: Do not start Windows Biometric Service
|
||||
recommend: strict
|
||||
docs:
|
||||
- http://batcmd.com/windows/10/services/wbiosrvc/
|
||||
- http://revertservice.com/10/wbiosrvc/
|
||||
docs: https://docs.microsoft.com/en-us/windows-server/security/windows-services/security-guidelines-for-disabling-system-services-in-windows-server#windows-biometric-service
|
||||
code: |-
|
||||
reg add "HKLM\SYSTEM\CurrentControlSet\Services\WbioSrvc" /v "Start" /t REG_DWORD /d 4 /f
|
||||
sc stop "WbioSrvc" & sc config "WbioSrvc" start=disabled
|
||||
@@ -1341,7 +1341,7 @@ actions:
|
||||
-
|
||||
name: Disable Website Access of Language List
|
||||
recommend: standard
|
||||
docs: https://www.tenforums.com/tutorials/82980-turn-off-website-access-language-list-windows-10-a.html
|
||||
docs: https://docs.microsoft.com/en-us/windows/privacy/manage-connections-from-windows-operating-system-components-to-microsoft-services#181-general
|
||||
code: reg add "HKCU\Control Panel\International\User Profile" /v "HttpAcceptLanguageOptOut" /t REG_DWORD /d 1 /f
|
||||
revertCode: reg add "HKCU\Control Panel\International\User Profile" /v "HttpAcceptLanguageOptOut" /t REG_DWORD /d 0 /f
|
||||
-
|
||||
@@ -1380,7 +1380,7 @@ actions:
|
||||
children:
|
||||
-
|
||||
name: Disable Windows Insider Service
|
||||
docs: http://revertservice.com/10/wisvc/
|
||||
docs: https://docs.microsoft.com/en-us/windows-server/security/windows-services/security-guidelines-for-disabling-system-services-in-windows-server#windows-insider-service
|
||||
recommend: standard
|
||||
code: sc stop "wisvc" & sc config "wisvc" start=disabled
|
||||
revertCode: sc config "wisvc" start=demand
|
||||
@@ -1404,7 +1404,7 @@ actions:
|
||||
revertCode: reg delete "HKLM\SOFTWARE\Policies\Microsoft\Windows\PreviewBuilds" /v "AllowBuildPreview" /f
|
||||
-
|
||||
name: Remove "Windows Insider Program" from Settings
|
||||
docs: https://www.askvg.com/windows-10-tip-remove-windows-insider-program-section-from-settings-page/
|
||||
docs: https://winaero.com/how-to-hide-the-windows-insider-program-page-from-the-settings-app-in-windows-10/
|
||||
code: reg add "HKLM\SOFTWARE\Microsoft\WindowsSelfHost\UI\Visibility" /v "HideInsiderPage" /t "REG_DWORD" /d "1" /f
|
||||
revertCode: reg delete "HKLM\SOFTWARE\Microsoft\WindowsSelfHost\UI\Visibility" /v "HideInsiderPage" /f
|
||||
-
|
||||
@@ -1472,7 +1472,10 @@ actions:
|
||||
-
|
||||
name: Disable Language Setting Sync
|
||||
recommend: standard
|
||||
docs: https://www.tenforums.com/tutorials/4077-turn-off-sync-settings-microsoft-account-windows-10-a.html
|
||||
docs:
|
||||
- https://winaero.com/turn-on-off-sync-settings-windows-10/
|
||||
- https://www.thewindowsclub.com/how-to-configure-windows-10-sync-settings-using-registry-editor
|
||||
- https://tuxicoman.jesuislibre.net/blog/wp-content/uploads/Windows10_Telemetrie_1709.pdf # from guide on confidentiality and privacy with Windows 10 distributed to the French police, previous version of guide: https://www.pmenier.net/dotclear/docext/win10/.Windows10-Presentation.pdf
|
||||
code: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\SettingSync\Groups\Language" /t REG_DWORD /v "Enabled" /d 0 /f
|
||||
revertCode: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\SettingSync\Groups\Language" /t REG_DWORD /v "Enabled" /d 1 /f
|
||||
-
|
||||
@@ -1832,7 +1835,9 @@ actions:
|
||||
revertCode: reg delete "HKLM\SOFTWARE\Policies\Microsoft\Edge" /v "SendSiteInfoToImproveServices" /f
|
||||
-
|
||||
name: Disable Automatic Installation of Microsoft Edge Chromium
|
||||
docs: https://docs.microsoft.com/en-us/deployedge/microsoft-edge-blocker-toolkit
|
||||
docs:
|
||||
- https://admx.help/?Category=EdgeChromium_Blocker&Policy=Microsoft.Policies.EdgeUpdate::NoUpdate
|
||||
- https://web.archive.org/web/20210118230052/https://docs.microsoft.com/en-us/deployedge/microsoft-edge-blocker-toolkit
|
||||
code: reg add "HKLM\SOFTWARE\Microsoft\EdgeUpdate" /v "DoNotUpdateToEdgeWithChromium" /t REG_DWORD /d 1 /f
|
||||
revertCode: reg delete "HKLM\SOFTWARE\Microsoft\EdgeUpdate" /v "DoNotUpdateToEdgeWithChromium" /f
|
||||
-
|
||||
@@ -2686,7 +2691,10 @@ actions:
|
||||
-
|
||||
name: Do not show recently used files in Quick Access
|
||||
recommend: strict
|
||||
docs: https://www.tenforums.com/tutorials/2713-add-remove-recent-files-quick-access-windows-10-a.html
|
||||
docs:
|
||||
- https://matthewhill.uk/windows/group-policy-disable-recent-files-frequent-folder-explorer/ # ShowRecent
|
||||
- https://www.howto-connect.com/delete-recent-frequent-from-file-explorer-on-windows-10/ # 3134ef9c-6b18-4996-ad04-ed5912e00eb5
|
||||
- https://docs.microsoft.com/en-us/windows/win32/sysinfo/32-bit-and-64-bit-application-data-in-the-registry # Wow6432Node
|
||||
code: |-
|
||||
if %PROCESSOR_ARCHITECTURE%==x86 ( REM is 32 bit?
|
||||
reg add "HKCU\Software\Microsoft\Windows\CurrentVersion\Explorer" /v "ShowRecent" /d 0 /t REG_DWORD /f
|
||||
@@ -2707,12 +2715,15 @@ actions:
|
||||
revertCode: reg add "HKCU\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Advanced" /v "ShowSyncProviderNotifications" /d 1 /t REG_DWORD /f
|
||||
-
|
||||
name: Turn hibernate off to disable sleep for quick start
|
||||
docs: http://www.windows10windows7.com/w10/win10zs/100102504.html
|
||||
docs: https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/compact-os#ram-pagefilesys-and-hiberfilsys
|
||||
code: powercfg -h off
|
||||
revertCode: powercfg -h on
|
||||
-
|
||||
name: Enable camera on/off OSD notifications
|
||||
docs: https://www.tenforums.com/tutorials/166065-how-enable-disable-camera-off-osd-notifications-windows-10-a.html
|
||||
docs:
|
||||
- https://docs.microsoft.com/en-us/windows-hardware/customize/desktop/unattend/microsoft-windows-coremmres-nophysicalcameraled
|
||||
- https://www.reddit.com/r/Surface/comments/88nyln/the_webcamled_took_anyone_it_apart/dwm64p5
|
||||
- https://answers.microsoft.com/en-us/windows/forum/all/enable-osd-notification-for-webcam/caf1fff4-78d3-4b93-905b-ef657097a44e
|
||||
code: reg add "HKLM\SOFTWARE\Microsoft\OEM\Device\Capture" /v "NoPhysicalCameraLED" /d 1 /t REG_DWORD /f
|
||||
revertCode: reg delete "HKLM\Software\Microsoft\OEM\Device\Capture" /v "NoPhysicalCameraLED" /f
|
||||
-
|
||||
@@ -2861,7 +2872,9 @@ actions:
|
||||
revertCode: sc config "VSS" start=demand
|
||||
-
|
||||
name: Disable NetBios for all interfaces
|
||||
docs: https://10dsecurity.com/saying-goodbye-netbios/
|
||||
docs:
|
||||
- https://bobcares.com/blog/disable-netbios-and-llmnr-protocols-in-windows-using-gpo/
|
||||
- https://social.technet.microsoft.com/Forums/windowsserver/en-US/c5f3c095-1ad2-4963-b075-787f800b81f2/
|
||||
call:
|
||||
function: RunPowerShell
|
||||
parameters:
|
||||
@@ -3483,7 +3496,7 @@ actions:
|
||||
packageName: Microsoft.ECApp
|
||||
-
|
||||
name: Lock app (shows lock screen)
|
||||
docs: https://www.dashtech.org/can-you-disable-lockapp-exe-on-windows-10/
|
||||
docs: https://www.getwox.com/what-is-lockapp-exe/
|
||||
call:
|
||||
function: UninstallSystemApp
|
||||
parameters:
|
||||
@@ -4351,10 +4364,12 @@ actions:
|
||||
w32tm /config /update
|
||||
w32tm /resync
|
||||
-
|
||||
name: Disable Reserved Storage for updates
|
||||
name: Disable Reserved Storage for updates # since 19H1 (1903)
|
||||
docs:
|
||||
- https://techcommunity.microsoft.com/t5/storage-at-microsoft/windows-10-and-reserved-storage/ba-p/428327
|
||||
- https://www.tenforums.com/tutorials/124858-enable-disable-reserved-storage-windows-10-a.html
|
||||
- https://techcommunity.microsoft.com/t5/storage-at-microsoft/windows-10-and-reserved-storage/ba-p/428327 # Announcement
|
||||
- https://techcommunity.microsoft.com/t5/windows-it-pro-blog/managing-reserved-storage-in-windows-10-environments/ba-p/1297070#toc-hId--8696946 # Set-ReservedStorageState
|
||||
- https://www.howtogeek.com/425563/how-to-disable-reserved-storage-on-windows-10/ # ShippedWithReserves
|
||||
- https://techcommunity.microsoft.com/t5/windows-servicing/reserve-manager-enabled-with-low-disk-space-block/m-p/2073132 # PassedPolicy
|
||||
code: |-
|
||||
dism /online /Set-ReservedStorageState /State:Disabled /NoRestart
|
||||
reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\ReserveManager" /v "MiscPolicyInfo" /t REG_DWORD /d "2" /f
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import 'mocha';
|
||||
import { expect } from 'chai';
|
||||
import { parseApplication } from '@/application/Parser/ApplicationParser';
|
||||
import { IApplication } from '@/domain/IApplication';
|
||||
import { IUrlStatus } from './StatusChecker/IUrlStatus';
|
||||
import { getUrlStatusesInParallelAsync, IBatchRequestOptions } from './StatusChecker/BatchStatusChecker';
|
||||
|
||||
describe('collections', () => {
|
||||
// arrange
|
||||
const app = parseApplication();
|
||||
const urls = collectUniqueUrls(app);
|
||||
const options: IBatchRequestOptions = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: true, // no need to be so nice until sources start failing
|
||||
// sameDomainDelayInMs: 2 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 3 /* sec */ * 1000,
|
||||
additionalHeaders: { referer: app.info.homepage },
|
||||
},
|
||||
};
|
||||
const testTimeoutInMs = urls.length * 60000 /* 1 minute */;
|
||||
it('have no dead urls', async () => {
|
||||
// act
|
||||
const results = await getUrlStatusesInParallelAsync(urls, options);
|
||||
// assert
|
||||
const deadUrls = results.filter((r) => r.statusCode !== 200);
|
||||
expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls));
|
||||
}).timeout(testTimeoutInMs);
|
||||
});
|
||||
|
||||
function collectUniqueUrls(app: IApplication): string[] {
|
||||
return app
|
||||
.collections
|
||||
.flatMap((a) => a.getAllScripts())
|
||||
.flatMap((script) => script.documentationUrls)
|
||||
.filter((url, index, array) => array.indexOf(url) === index);
|
||||
}
|
||||
|
||||
function printUrls(statuses: IUrlStatus[]): string {
|
||||
return '\n' +
|
||||
statuses.map((status) =>
|
||||
`- ${status.url}\n` +
|
||||
(status.statusCode ? `\tResponse code: ${status.statusCode}` : '') +
|
||||
(status.error ? `\tException: ${JSON.stringify(status.error, null, '\t')}` : ''))
|
||||
.join(`\n`)
|
||||
+ '\n';
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import { IUrlStatus } from './IUrlStatus';
|
||||
import { getUrlStatusAsync, IRequestOptions } from './Requestor';
|
||||
import { groupUrlsByDomain } from './UrlPerDomainGrouper';
|
||||
|
||||
export async function getUrlStatusesInParallelAsync(
|
||||
urls: string[],
|
||||
options?: IBatchRequestOptions): Promise<IUrlStatus[]> {
|
||||
const uniqueUrls = Array.from(new Set(urls));
|
||||
options = { ...DefaultOptions, ...options };
|
||||
console.log('Options: ', options); // tslint:disable-line: no-console
|
||||
const results = await requestAsync(uniqueUrls, options);
|
||||
return results;
|
||||
}
|
||||
|
||||
export interface IBatchRequestOptions {
|
||||
domainOptions?: IDomainOptions;
|
||||
requestOptions?: IRequestOptions;
|
||||
}
|
||||
|
||||
interface IDomainOptions {
|
||||
sameDomainParallelize?: boolean;
|
||||
sameDomainDelayInMs?: number;
|
||||
}
|
||||
|
||||
const DefaultOptions: IBatchRequestOptions = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
},
|
||||
requestOptions: {
|
||||
retryExponentialBaseInMs: 5 /* sec */ * 1000,
|
||||
additionalHeaders: {},
|
||||
},
|
||||
};
|
||||
|
||||
function requestAsync(urls: string[], options: IBatchRequestOptions): Promise<IUrlStatus[]> {
|
||||
if (!options.domainOptions.sameDomainParallelize) {
|
||||
return runOnEachDomainWithDelayAsync(
|
||||
urls,
|
||||
(url) => getUrlStatusAsync(url, options.requestOptions),
|
||||
options.domainOptions.sameDomainDelayInMs);
|
||||
} else {
|
||||
return Promise.all(
|
||||
urls.map((url) => getUrlStatusAsync(url, options.requestOptions)));
|
||||
}
|
||||
}
|
||||
|
||||
async function runOnEachDomainWithDelayAsync(
|
||||
urls: string[],
|
||||
action: (url: string) => Promise<IUrlStatus>,
|
||||
delayInMs: number): Promise<IUrlStatus[]> {
|
||||
const grouped = groupUrlsByDomain(urls);
|
||||
const tasks = grouped.map(async (group) => {
|
||||
const results = new Array<IUrlStatus>();
|
||||
for (const url of group) {
|
||||
const status = await action(url);
|
||||
results.push(status);
|
||||
if (results.length !== group.length) {
|
||||
await sleepAsync(delayInMs);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
});
|
||||
const r = await Promise.all(tasks);
|
||||
return r.flat();
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
import { sleepAsync } from '@/infrastructure/Threading/AsyncSleep';
|
||||
import { IUrlStatus } from './IUrlStatus';
|
||||
|
||||
const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000;
|
||||
|
||||
export async function retryWithExponentialBackOffAsync(
|
||||
action: () => Promise<IUrlStatus>,
|
||||
baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs,
|
||||
currentRetry = 1): Promise<IUrlStatus> {
|
||||
const maxTries: number = 3;
|
||||
const status = await action();
|
||||
if (shouldRetry(status)) {
|
||||
if (currentRetry <= maxTries) {
|
||||
const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs);
|
||||
// tslint:disable-next-line: no-console
|
||||
console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status);
|
||||
await sleepAsync(exponentialBackOffInMs);
|
||||
return retryWithExponentialBackOffAsync(action, baseRetryIntervalInMs, currentRetry + 1);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
function shouldRetry(status: IUrlStatus) {
|
||||
if (status.error) {
|
||||
return true;
|
||||
}
|
||||
return isTransientError(status.statusCode)
|
||||
|| status.statusCode === 429; // Too Many Requests
|
||||
}
|
||||
|
||||
function isTransientError(statusCode: number) {
|
||||
return statusCode >= 500 && statusCode <= 599;
|
||||
}
|
||||
|
||||
function getRetryTimeoutInMs(currentRetry: number, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs) {
|
||||
const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150%
|
||||
// of the exponentially increasing base amount
|
||||
const minRandom = 1 - retryRandomFactor;
|
||||
const maxRandom = 1 + retryRandomFactor;
|
||||
const randomization = (Math.random() * (maxRandom - minRandom)) + maxRandom;
|
||||
const exponential = Math.pow(2, currentRetry - 1);
|
||||
return Math.ceil(exponential * baseRetryIntervalInMs * randomization);
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
export interface IUrlStatus {
|
||||
url: string;
|
||||
error?: any;
|
||||
statusCode?: number;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
import { retryWithExponentialBackOffAsync } from './ExponentialBackOffRetryHandler';
|
||||
import { IUrlStatus } from './IUrlStatus';
|
||||
import fetch from 'cross-fetch';
|
||||
|
||||
export interface IRequestOptions {
|
||||
retryExponentialBaseInMs?: number;
|
||||
additionalHeaders?: Record<string, string>;
|
||||
}
|
||||
|
||||
export async function getUrlStatusAsync(
|
||||
url: string,
|
||||
options: IRequestOptions = DefaultOptions): Promise<IUrlStatus> {
|
||||
options = { ...DefaultOptions, ...options };
|
||||
const fetchOptions = getFetchOptions(options);
|
||||
return retryWithExponentialBackOffAsync(async () => {
|
||||
console.log('Requesting', url); // tslint:disable-line: no-console
|
||||
try {
|
||||
const response = await fetch(url, fetchOptions);
|
||||
return { url, statusCode: response.status};
|
||||
} catch (err) {
|
||||
return { url, error: err};
|
||||
}
|
||||
}, options.retryExponentialBaseInMs);
|
||||
}
|
||||
|
||||
const DefaultOptions: IRequestOptions = {
|
||||
retryExponentialBaseInMs: 5000,
|
||||
additionalHeaders: {},
|
||||
};
|
||||
|
||||
function getFetchOptions(options: IRequestOptions) {
|
||||
return {
|
||||
method: 'GET',
|
||||
headers: { ...DefaultHeaders, ...options.additionalHeaders },
|
||||
};
|
||||
}
|
||||
|
||||
const DefaultHeaders: Record<string, string> = {
|
||||
/* Chrome on macOS */
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'connection': 'keep-alive',
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'accept-encoding': 'gzip, deflate, br',
|
||||
'cache-control': 'max-age=0',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
};
|
||||
@@ -0,0 +1,19 @@
|
||||
export function groupUrlsByDomain(urls: string[]): string[][] {
|
||||
const domains = new Set<string>();
|
||||
const urlsWithDomain = urls.map((url) => ({
|
||||
url,
|
||||
domain: extractDomain(url),
|
||||
}));
|
||||
for (const url of urlsWithDomain) {
|
||||
domains.add(url.domain);
|
||||
}
|
||||
return Array.from(domains).map((domain) => {
|
||||
return urlsWithDomain
|
||||
.filter((url) => url.domain === domain)
|
||||
.map((url) => url.url);
|
||||
});
|
||||
}
|
||||
|
||||
function extractDomain(url: string): string {
|
||||
return url.split('://')[1].split('/')[0].toLowerCase();
|
||||
}
|
||||
Reference in New Issue
Block a user