ci/cd: trigger URL checks more, and limit amount

Fix all URL checks failing in GitHub runner due to: - Missing Happy Eyeballs in Node.js nodejs/undici$1531 nodejs/node$41625 - Missing IPv6 support in GitHub runners: actions/runner$3138 actions/runner-images$668 Tried (did not work): 1) ``` import dns from 'dns'; dns.setDefaultResultOrder('ipv4first'); ``` 2) Bumping node to v20. 3) TODO: Try autoSelectFamily - Or is it due too to many max connections? Test this. Mentioned in comment nodejs/node$41625. Key changes: - Run URL checks more frequently on every change. - Introduce environment variable to randomly select and limit URLs tested, this way the tests will provide quicker feedback on code changes. Other supporting changes: - Log more information about test before running the test to enable easier troubleshooting. - Move shuffle function for arrays for reusability and missing tests.
2024-03-19 20:20:41 +01:00
parent 287b8e61a0
commit 6720a46d2e
6 changed files with 221 additions and 40 deletions
@@ -0,0 +1,69 @@
+import type { IApplication } from '@/domain/IApplication';
+import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
+
+interface UrlExtractionContext {
+  readonly logger: TestExecutionDetailsLogger;
+  readonly application: IApplication;
+  readonly urlExclusionPatterns: readonly RegExp[];
+}
+
+export function extractDocumentationUrls(
+  context: UrlExtractionContext,
+): string[] {
+  const urlsInApplication = extractUrlsFromApplication(context.application);
+  context.logger.logLabeledInformation(
+    'Extracted URLs from application',
+    urlsInApplication.length.toString(),
+  );
+  const uniqueUrls = filterDuplicateUrls(urlsInApplication);
+  context.logger.logLabeledInformation(
+    'Unique URLs after deduplication',
+    `${uniqueUrls.length} (duplicates removed)`,
+  );
+  context.logger.logLabeledInformation(
+    'Exclusion patterns for URLs',
+    context.urlExclusionPatterns.length === 0
+      ? 'None (all URLs included)'
+      : context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'),
+  );
+  const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns);
+  context.logger.logLabeledInformation(
+    'URLs extracted for testing',
+    `${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`,
+  );
+  return includedUrls;
+}
+
+function extractUrlsFromApplication(application: IApplication): string[] {
+  return [ // Get all executables
+    ...application.collections.flatMap((c) => c.getAllCategories()),
+    ...application.collections.flatMap((c) => c.getAllScripts()),
+  ]
+    // Get all docs
+    .flatMap((documentable) => documentable.docs)
+    // Parse all URLs
+    .flatMap((docString) => extractUrlsExcludingCodeBlocks(docString));
+}
+
+function filterDuplicateUrls(urls: readonly string[]): string[] {
+  return urls.filter((url, index, array) => array.indexOf(url) === index);
+}
+
+function filterUrlsExcludingPatterns(
+  urls: readonly string[],
+  patterns: readonly RegExp[],
+): string[] {
+  return urls.filter((url) => !patterns.some((pattern) => pattern.test(url)));
+}
+
+function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] {
+  /*
+    Matches URLs:
+    - Excludes inline code blocks as they may contain URLs not intended for user interaction
+      and not guaranteed to support expected HTTP methods, leading to false-negatives.
+    - Supports URLs containing parentheses, avoiding matches within code that might not represent
+      actual links.
+  */
+  const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
+  return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
+}
@@ -0,0 +1,26 @@
+import { indentText } from '@tests/shared/Text';
+
+export class TestExecutionDetailsLogger {
+  public logTestSectionStartDelimiter(): void {
+    this.logSectionDelimiterLine();
+  }
+
+  public logTestSectionEndDelimiter(): void {
+    this.logSectionDelimiterLine();
+  }
+
+  public logLabeledInformation(
+    label: string,
+    detailedInformation: string,
+  ): void {
+    console.log([
+      `${label}:`,
+      indentText(detailedInformation),
+    ].join('\n'));
+  }
+
+  private logSectionDelimiterLine(): void {
+    const horizontalLine = '─'.repeat(40);
+    console.log(horizontalLine);
+  }
+}
@@ -1,19 +1,26 @@
 import { test, expect } from 'vitest';
 import { parseApplication } from '@/application/Parser/ApplicationParser';
-import type { IApplication } from '@/domain/IApplication';
 import { indentText } from '@tests/shared/Text';
 import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
+import { shuffle } from '@/application/Common/Shuffle';
 import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
 import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
+import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
+import { extractDocumentationUrls } from './DocumentationUrlExtractor';

 // arrange
+const logger = new TestExecutionDetailsLogger();
+logger.logTestSectionStartDelimiter();
 const app = parseApplication();
-const urls = collectUniqueUrls({
-  application: app,
-  excludePatterns: [
+let urls = extractDocumentationUrls({
+  logger,
+  urlExclusionPatterns: [
    /^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
  ],
+  application: app,
 });
+urls = filterUrlsToEnvironmentCheckLimit(urls);
+logger.logLabeledInformation('URLs submitted for testing', urls.length.toString());
 const requestOptions: BatchRequestOptions = {
  domainOptions: {
    sameDomainParallelize: false, // be nice to our third-party servers
@@ -30,55 +37,65 @@ const requestOptions: BatchRequestOptions = {
    enableCookies: true,
  },
 };
+logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2));
 const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
+logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs));
+logger.logTestSectionEndDelimiter();
 test(`all URLs (${urls.length}) should be alive`, async () => {
  // act
+  console.log('URLS', urls); // TODO: Delete
  const results = await getUrlStatusesInParallel(urls, requestOptions);
  // assert
  const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
-  expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
+  expect(deadUrls).to.have.lengthOf(
+    0,
+    formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]),
+  );
 }, testTimeoutInMs);

 function isOkStatusCode(statusCode: number): boolean {
  return statusCode >= 200 && statusCode < 300;
 }

-function collectUniqueUrls(
-  options: {
-    readonly application: IApplication,
-    readonly excludePatterns?: readonly RegExp[],
-  },
-): string[] {
-  return [ // Get all nodes
-    ...options.application.collections.flatMap((c) => c.getAllCategories()),
-    ...options.application.collections.flatMap((c) => c.getAllScripts()),
-  ]
-    // Get all docs
-    .flatMap((documentable) => documentable.docs)
-    // Parse all URLs
-    .flatMap((docString) => extractUrls(docString))
-    // Remove duplicates
-    .filter((url, index, array) => array.indexOf(url) === index)
-    // Exclude certain URLs based on patterns
-    .filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
-}
-
-function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
-  return patterns.some((pattern) => pattern.test(url));
-}
-
-function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
+function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string {
  return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
 }

-function extractUrls(textWithInlineCode: string): string[] {
-  /*
-    Matches URLs:
-    - Excludes inline code blocks as they may contain URLs not intended for user interaction
-      and not guaranteed to support expected HTTP methods, leading to false-negatives.
-    - Supports URLs containing parentheses, avoiding matches within code that might not represent
-      actual links.
-  */
-  const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
-  return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
+function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] {
+  const { RANDOMIZED_URL_CHECK_LIMIT } = process.env;
+  logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited');
+  if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') {
+    const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10);
+    if (Number.isNaN(maxUrlsInTest)) {
+      throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`);
+    }
+    if (maxUrlsInTest < originalUrls.length) {
+      return shuffle(originalUrls).slice(0, maxUrlsInTest);
+    }
+  }
+  return originalUrls;
+}
+
+function convertMillisecondsToHumanReadableFormat(milliseconds: number): string {
+  const timeParts: string[] = [];
+  const addTimePart = (amount: number, label: string) => {
+    if (amount === 0) {
+      return;
+    }
+    timeParts.push(`${amount} ${label}`);
+  };
+
+  const hours = milliseconds / (1000 * 60 * 60);
+  const absoluteHours = Math.floor(hours);
+  addTimePart(absoluteHours, 'hours');
+
+  const minutes = (hours - absoluteHours) * 60;
+  const absoluteMinutes = Math.floor(minutes);
+  addTimePart(absoluteMinutes, 'minutes');
+
+  const seconds = (minutes - absoluteMinutes) * 60;
+  const absoluteSeconds = Math.floor(seconds);
+  addTimePart(absoluteSeconds, 'seconds');
+
+  return timeParts.join(', ');
 }
@@ -0,0 +1,52 @@
+import { describe, it, expect } from 'vitest';
+import { shuffle } from '@/application/Common/Shuffle';
+
+describe('Shuffle', () => {
+  describe('shuffle', () => {
+    it('returns a new array', () => {
+      // arrange
+      const inputArray = ['a', 'b', 'c', 'd'];
+      // act
+      const result = shuffle(inputArray);
+      // assert
+      expect(result).not.to.equal(inputArray);
+    });
+
+    it('returns an array of the same length', () => {
+      // arrange
+      const inputArray = ['a', 'b', 'c', 'd'];
+      // act
+      const result = shuffle(inputArray);
+      // assert
+      expect(result.length).toBe(inputArray.length);
+    });
+
+    it('contains the same elements', () => {
+      // arrange
+      const inputArray = ['a', 'b', 'c', 'd'];
+      // act
+      const result = shuffle(inputArray);
+      // assert
+      expect(result).to.have.members(inputArray);
+    });
+
+    it('does not modify the input array', () => {
+      // arrange
+      const inputArray = ['a', 'b', 'c', 'd'];
+      const inputArrayCopy = [...inputArray];
+      // act
+      shuffle(inputArray);
+      // assert
+      expect(inputArray).to.deep.equal(inputArrayCopy);
+    });
+
+    it('handles an empty array correctly', () => {
+      // arrange
+      const inputArray: string[] = [];
+      // act
+      const result = shuffle(inputArray);
+      // assert
+      expect(result).have.lengthOf(0);
+    });
+  });
+});