Files
privacy.sexy/tests/integration/application/collections/StatusChecker/UrlPerDomainGrouper.ts
undergroundwires db62ed7f3a fix broken URLs and automate broken URL checks #70
This commit:
- Fixes broken URLs using archive.org or other references.
- Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source.
- Changes all insecure http sources to https alternatives
- Adds integration tests to check for broken URLs
  - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests.
- Run test pipeline weekly to get notified about broken URls without commits
2021-05-05 23:57:41 +02:00

20 lines
574 B
TypeScript

export function groupUrlsByDomain(urls: string[]): string[][] {
const domains = new Set<string>();
const urlsWithDomain = urls.map((url) => ({
url,
domain: extractDomain(url),
}));
for (const url of urlsWithDomain) {
domains.add(url.domain);
}
return Array.from(domains).map((domain) => {
return urlsWithDomain
.filter((url) => url.domain === domain)
.map((url) => url.url);
});
}
function extractDomain(url: string): string {
return url.split('://')[1].split('/')[0].toLowerCase();
}