ci/cd: trigger URL checks more, and limit amount

Fix all URL checks failing in GitHub runner due to:

- Missing Happy Eyeballs in Node.js
  nodejs/undici$1531
  nodejs/node$41625
- Missing IPv6 support in GitHub runners:
  actions/runner$3138
  actions/runner-images$668

Tried (did not work):

1)

```
import dns from 'dns';
dns.setDefaultResultOrder('ipv4first');
```

2) Bumping node to v20.

3) TODO: Try autoSelectFamily

- Or is it due too to many max connections? Test this.
  Mentioned in comment nodejs/node$41625.

Key changes:

- Run URL checks more frequently on every change.
- Introduce environment variable to randomly select and limit URLs
  tested, this way the tests will provide quicker feedback on code
  changes.

Other supporting changes:

- Log more information about test before running the test to enable
  easier troubleshooting.
- Move shuffle function for arrays for reusability and missing tests.
This commit is contained in:
undergroundwires
2024-03-19 20:20:41 +01:00
parent 287b8e61a0
commit 6720a46d2e
6 changed files with 221 additions and 40 deletions

View File

@@ -1,6 +1,7 @@
name: checks.external-urls name: checks.external-urls
on: on:
push:
schedule: schedule:
- cron: '0 0 * * 0' # at 00:00 on every Sunday - cron: '0 0 * * 0' # at 00:00 on every Sunday
@@ -20,3 +21,7 @@ jobs:
- -
name: Test name: Test
run: npm run check:external-urls run: npm run check:external-urls
env:
RANDOMIZED_URL_CHECK_LIMIT: "${{ github.event_name == 'push' && '10' || '' }}"
# - Scheduled checks has no limits, ensuring thorough testing.
# - For push events, triggered by code changes, the amount of URLs are limited to provide quick feedback.

View File

@@ -0,0 +1,12 @@
/*
Shuffle an array of strings, returning a new array with elements in random order.
Uses the Fisher-Yates (or Durstenfeld) algorithm.
*/
export function shuffle<T>(array: readonly T[]): T[] {
const shuffledArray = [...array];
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[shuffledArray[i], shuffledArray[j]] = [shuffledArray[j], shuffledArray[i]];
}
return shuffledArray;
}

View File

@@ -0,0 +1,69 @@
import type { IApplication } from '@/domain/IApplication';
import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
interface UrlExtractionContext {
readonly logger: TestExecutionDetailsLogger;
readonly application: IApplication;
readonly urlExclusionPatterns: readonly RegExp[];
}
export function extractDocumentationUrls(
context: UrlExtractionContext,
): string[] {
const urlsInApplication = extractUrlsFromApplication(context.application);
context.logger.logLabeledInformation(
'Extracted URLs from application',
urlsInApplication.length.toString(),
);
const uniqueUrls = filterDuplicateUrls(urlsInApplication);
context.logger.logLabeledInformation(
'Unique URLs after deduplication',
`${uniqueUrls.length} (duplicates removed)`,
);
context.logger.logLabeledInformation(
'Exclusion patterns for URLs',
context.urlExclusionPatterns.length === 0
? 'None (all URLs included)'
: context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'),
);
const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns);
context.logger.logLabeledInformation(
'URLs extracted for testing',
`${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`,
);
return includedUrls;
}
function extractUrlsFromApplication(application: IApplication): string[] {
return [ // Get all executables
...application.collections.flatMap((c) => c.getAllCategories()),
...application.collections.flatMap((c) => c.getAllScripts()),
]
// Get all docs
.flatMap((documentable) => documentable.docs)
// Parse all URLs
.flatMap((docString) => extractUrlsExcludingCodeBlocks(docString));
}
function filterDuplicateUrls(urls: readonly string[]): string[] {
return urls.filter((url, index, array) => array.indexOf(url) === index);
}
function filterUrlsExcludingPatterns(
urls: readonly string[],
patterns: readonly RegExp[],
): string[] {
return urls.filter((url) => !patterns.some((pattern) => pattern.test(url)));
}
function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] {
/*
Matches URLs:
- Excludes inline code blocks as they may contain URLs not intended for user interaction
and not guaranteed to support expected HTTP methods, leading to false-negatives.
- Supports URLs containing parentheses, avoiding matches within code that might not represent
actual links.
*/
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
}

View File

@@ -0,0 +1,26 @@
import { indentText } from '@tests/shared/Text';
export class TestExecutionDetailsLogger {
public logTestSectionStartDelimiter(): void {
this.logSectionDelimiterLine();
}
public logTestSectionEndDelimiter(): void {
this.logSectionDelimiterLine();
}
public logLabeledInformation(
label: string,
detailedInformation: string,
): void {
console.log([
`${label}:`,
indentText(detailedInformation),
].join('\n'));
}
private logSectionDelimiterLine(): void {
const horizontalLine = '─'.repeat(40);
console.log(horizontalLine);
}
}

View File

@@ -1,19 +1,26 @@
import { test, expect } from 'vitest'; import { test, expect } from 'vitest';
import { parseApplication } from '@/application/Parser/ApplicationParser'; import { parseApplication } from '@/application/Parser/ApplicationParser';
import type { IApplication } from '@/domain/IApplication';
import { indentText } from '@tests/shared/Text'; import { indentText } from '@tests/shared/Text';
import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage'; import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
import { shuffle } from '@/application/Common/Shuffle';
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus'; import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker'; import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
import { extractDocumentationUrls } from './DocumentationUrlExtractor';
// arrange // arrange
const logger = new TestExecutionDetailsLogger();
logger.logTestSectionStartDelimiter();
const app = parseApplication(); const app = parseApplication();
const urls = collectUniqueUrls({ let urls = extractDocumentationUrls({
application: app, logger,
excludePatterns: [ urlExclusionPatterns: [
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium. /^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
], ],
application: app,
}); });
urls = filterUrlsToEnvironmentCheckLimit(urls);
logger.logLabeledInformation('URLs submitted for testing', urls.length.toString());
const requestOptions: BatchRequestOptions = { const requestOptions: BatchRequestOptions = {
domainOptions: { domainOptions: {
sameDomainParallelize: false, // be nice to our third-party servers sameDomainParallelize: false, // be nice to our third-party servers
@@ -30,55 +37,65 @@ const requestOptions: BatchRequestOptions = {
enableCookies: true, enableCookies: true,
}, },
}; };
logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2));
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000; const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs));
logger.logTestSectionEndDelimiter();
test(`all URLs (${urls.length}) should be alive`, async () => { test(`all URLs (${urls.length}) should be alive`, async () => {
// act // act
console.log('URLS', urls); // TODO: Delete
const results = await getUrlStatusesInParallel(urls, requestOptions); const results = await getUrlStatusesInParallel(urls, requestOptions);
// assert // assert
const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code)); const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)])); expect(deadUrls).to.have.lengthOf(
0,
formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]),
);
}, testTimeoutInMs); }, testTimeoutInMs);
function isOkStatusCode(statusCode: number): boolean { function isOkStatusCode(statusCode: number): boolean {
return statusCode >= 200 && statusCode < 300; return statusCode >= 200 && statusCode < 300;
} }
function collectUniqueUrls( function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string {
options: {
readonly application: IApplication,
readonly excludePatterns?: readonly RegExp[],
},
): string[] {
return [ // Get all nodes
...options.application.collections.flatMap((c) => c.getAllCategories()),
...options.application.collections.flatMap((c) => c.getAllScripts()),
]
// Get all docs
.flatMap((documentable) => documentable.docs)
// Parse all URLs
.flatMap((docString) => extractUrls(docString))
// Remove duplicates
.filter((url, index, array) => array.indexOf(url) === index)
// Exclude certain URLs based on patterns
.filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
}
function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
return patterns.some((pattern) => pattern.test(url));
}
function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`; return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
} }
function extractUrls(textWithInlineCode: string): string[] { function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] {
/* const { RANDOMIZED_URL_CHECK_LIMIT } = process.env;
Matches URLs: logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited');
- Excludes inline code blocks as they may contain URLs not intended for user interaction if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') {
and not guaranteed to support expected HTTP methods, leading to false-negatives. const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10);
- Supports URLs containing parentheses, avoiding matches within code that might not represent if (Number.isNaN(maxUrlsInTest)) {
actual links. throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`);
*/ }
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g; if (maxUrlsInTest < originalUrls.length) {
return textWithInlineCode.match(nonCodeBlockUrlRegex) || []; return shuffle(originalUrls).slice(0, maxUrlsInTest);
}
}
return originalUrls;
}
function convertMillisecondsToHumanReadableFormat(milliseconds: number): string {
const timeParts: string[] = [];
const addTimePart = (amount: number, label: string) => {
if (amount === 0) {
return;
}
timeParts.push(`${amount} ${label}`);
};
const hours = milliseconds / (1000 * 60 * 60);
const absoluteHours = Math.floor(hours);
addTimePart(absoluteHours, 'hours');
const minutes = (hours - absoluteHours) * 60;
const absoluteMinutes = Math.floor(minutes);
addTimePart(absoluteMinutes, 'minutes');
const seconds = (minutes - absoluteMinutes) * 60;
const absoluteSeconds = Math.floor(seconds);
addTimePart(absoluteSeconds, 'seconds');
return timeParts.join(', ');
} }

View File

@@ -0,0 +1,52 @@
import { describe, it, expect } from 'vitest';
import { shuffle } from '@/application/Common/Shuffle';
describe('Shuffle', () => {
describe('shuffle', () => {
it('returns a new array', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result).not.to.equal(inputArray);
});
it('returns an array of the same length', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result.length).toBe(inputArray.length);
});
it('contains the same elements', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result).to.have.members(inputArray);
});
it('does not modify the input array', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
const inputArrayCopy = [...inputArray];
// act
shuffle(inputArray);
// assert
expect(inputArray).to.deep.equal(inputArrayCopy);
});
it('handles an empty array correctly', () => {
// arrange
const inputArray: string[] = [];
// act
const result = shuffle(inputArray);
// assert
expect(result).have.lengthOf(0);
});
});
});