fix broken URLs and automate broken URL checks #70
This commit: - Fixes broken URLs using archive.org or other references. - Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source. - Changes all insecure http sources to https alternatives - Adds integration tests to check for broken URLs - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests. - Run test pipeline weekly to get notified about broken URls without commits
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
export function groupUrlsByDomain(urls: string[]): string[][] {
|
||||
const domains = new Set<string>();
|
||||
const urlsWithDomain = urls.map((url) => ({
|
||||
url,
|
||||
domain: extractDomain(url),
|
||||
}));
|
||||
for (const url of urlsWithDomain) {
|
||||
domains.add(url.domain);
|
||||
}
|
||||
return Array.from(domains).map((domain) => {
|
||||
return urlsWithDomain
|
||||
.filter((url) => url.domain === domain)
|
||||
.map((url) => url.url);
|
||||
});
|
||||
}
|
||||
|
||||
function extractDomain(url: string): string {
|
||||
return url.split('://')[1].split('/')[0].toLowerCase();
|
||||
}
|
||||
Reference in New Issue
Block a user