This commit: - Fixes broken URLs using archive.org or other references. - Replaces tenforums.com URLs with better documentation as they tend to return HTTP status code 403 to tests and also are low quality source. - Changes all insecure http sources to https alternatives - Adds integration tests to check for broken URLs - There's logic implemented for having a delay inbetween when sending requests to same domains, however it's not used as the sources can respond to totally parallelized requests. - Run test pipeline weekly to get notified about broken URls without commits
20 lines
574 B
TypeScript
20 lines
574 B
TypeScript
export function groupUrlsByDomain(urls: string[]): string[][] {
|
|
const domains = new Set<string>();
|
|
const urlsWithDomain = urls.map((url) => ({
|
|
url,
|
|
domain: extractDomain(url),
|
|
}));
|
|
for (const url of urlsWithDomain) {
|
|
domains.add(url.domain);
|
|
}
|
|
return Array.from(domains).map((domain) => {
|
|
return urlsWithDomain
|
|
.filter((url) => url.domain === domain)
|
|
.map((url) => url.url);
|
|
});
|
|
}
|
|
|
|
function extractDomain(url: string): string {
|
|
return url.split('://')[1].split('/')[0].toLowerCase();
|
|
}
|