ci/cd: trigger URL checks more, and limit amount

Fix all URL checks failing in GitHub runner due to: - Missing Happy Eyeballs in Node.js nodejs/undici$1531 nodejs/node$41625 - Missing IPv6 support in GitHub runners: actions/runner$3138 actions/runner-images$668 Tried (did not work): 1) ``` import dns from 'dns'; dns.setDefaultResultOrder('ipv4first'); ``` 2) Bumping node to v20. 3) TODO: Try autoSelectFamily - Or is it due too to many max connections? Test this. Mentioned in comment nodejs/node$41625. Key changes: - Run URL checks more frequently on every change. - Introduce environment variable to randomly select and limit URLs tested, this way the tests will provide quicker feedback on code changes. Other supporting changes: - Log more information about test before running the test to enable easier troubleshooting. - Move shuffle function for arrays for reusability and missing tests.
2024-03-19 20:20:41 +01:00
parent 287b8e61a0
commit 6720a46d2e
6 changed files with 221 additions and 40 deletions
--- a/.github/workflows/checks.external-urls.yaml
+++ b/.github/workflows/checks.external-urls.yaml
@@ -1,6 +1,7 @@
 name: checks.external-urls
 on:
  push:
  schedule:
    - cron: '0 0 * * 0' # at 00:00 on every Sunday
@@ -20,3 +21,7 @@ jobs:
      -
        name: Test
        run: npm run check:external-urls
        env:
          RANDOMIZED_URL_CHECK_LIMIT: "${{ github.event_name == 'push' && '10' || '' }}"
          # - Scheduled checks has no limits, ensuring thorough testing.
          # - For push events, triggered by code changes, the amount of URLs are limited to provide quick feedback.
--- a/src/application/Common/Shuffle.ts
+++ b/src/application/Common/Shuffle.ts
@@ -0,0 +1,12 @@
 /*
  Shuffle an array of strings, returning a new array with elements in random order.
  Uses the Fisher-Yates (or Durstenfeld) algorithm.
 */
 export function shuffle<T>(array: readonly T[]): T[] {
  const shuffledArray = [...array];
  for (let i = array.length - 1; i > 0; i--) {
    const j = Math.floor(Math.random() * (i + 1));
    [shuffledArray[i], shuffledArray[j]] = [shuffledArray[j], shuffledArray[i]];
  }
  return shuffledArray;
 }
--- a/tests/checks/external-urls/DocumentationUrlExtractor.ts
+++ b/tests/checks/external-urls/DocumentationUrlExtractor.ts
@@ -0,0 +1,69 @@
 import type { IApplication } from '@/domain/IApplication';
 import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
 interface UrlExtractionContext {
  readonly logger: TestExecutionDetailsLogger;
  readonly application: IApplication;
  readonly urlExclusionPatterns: readonly RegExp[];
 }
 export function extractDocumentationUrls(
  context: UrlExtractionContext,
 ): string[] {
  const urlsInApplication = extractUrlsFromApplication(context.application);
  context.logger.logLabeledInformation(
    'Extracted URLs from application',
    urlsInApplication.length.toString(),
  );
  const uniqueUrls = filterDuplicateUrls(urlsInApplication);
  context.logger.logLabeledInformation(
    'Unique URLs after deduplication',
    `${uniqueUrls.length} (duplicates removed)`,
  );
  context.logger.logLabeledInformation(
    'Exclusion patterns for URLs',
    context.urlExclusionPatterns.length === 0
      ? 'None (all URLs included)'
      : context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'),
  );
  const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns);
  context.logger.logLabeledInformation(
    'URLs extracted for testing',
    `${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`,
  );
  return includedUrls;
 }
 function extractUrlsFromApplication(application: IApplication): string[] {
  return [ // Get all executables
    ...application.collections.flatMap((c) => c.getAllCategories()),
    ...application.collections.flatMap((c) => c.getAllScripts()),
  ]
    // Get all docs
    .flatMap((documentable) => documentable.docs)
    // Parse all URLs
    .flatMap((docString) => extractUrlsExcludingCodeBlocks(docString));
 }
 function filterDuplicateUrls(urls: readonly string[]): string[] {
  return urls.filter((url, index, array) => array.indexOf(url) === index);
 }
 function filterUrlsExcludingPatterns(
  urls: readonly string[],
  patterns: readonly RegExp[],
 ): string[] {
  return urls.filter((url) => !patterns.some((pattern) => pattern.test(url)));
 }
 function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] {
  /*
    Matches URLs:
    - Excludes inline code blocks as they may contain URLs not intended for user interaction
      and not guaranteed to support expected HTTP methods, leading to false-negatives.
    - Supports URLs containing parentheses, avoiding matches within code that might not represent
      actual links.
  */
  const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
  return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
 }
--- a/tests/checks/external-urls/TestExecutionDetailsLogger.ts
+++ b/tests/checks/external-urls/TestExecutionDetailsLogger.ts
@@ -0,0 +1,26 @@
 import { indentText } from '@tests/shared/Text';
 export class TestExecutionDetailsLogger {
  public logTestSectionStartDelimiter(): void {
    this.logSectionDelimiterLine();
  }
  public logTestSectionEndDelimiter(): void {
    this.logSectionDelimiterLine();
  }
  public logLabeledInformation(
    label: string,
    detailedInformation: string,
  ): void {
    console.log([
      `${label}:`,
      indentText(detailedInformation),
    ].join('\n'));
  }
  private logSectionDelimiterLine(): void {
    const horizontalLine = '─'.repeat(40);
    console.log(horizontalLine);
  }
 }
--- a/tests/checks/external-urls/main.spec.ts
+++ b/tests/checks/external-urls/main.spec.ts
@@ -1,19 +1,26 @@
 import { test, expect } from 'vitest';
 import { parseApplication } from '@/application/Parser/ApplicationParser';
 import type { IApplication } from '@/domain/IApplication';
 import { indentText } from '@tests/shared/Text';
 import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
 import { shuffle } from '@/application/Common/Shuffle';
 import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
 import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
 import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
 import { extractDocumentationUrls } from './DocumentationUrlExtractor';
 // arrange
 const logger = new TestExecutionDetailsLogger();
 logger.logTestSectionStartDelimiter();
 const app = parseApplication();
-const urls = collectUniqueUrls({
+let urls = extractDocumentationUrls({
-  application: app,
+  logger,
-  excludePatterns: [
+  urlExclusionPatterns: [
    /^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
  ],
  application: app,
 });
 urls = filterUrlsToEnvironmentCheckLimit(urls);
 logger.logLabeledInformation('URLs submitted for testing', urls.length.toString());
 const requestOptions: BatchRequestOptions = {
  domainOptions: {
    sameDomainParallelize: false, // be nice to our third-party servers
@@ -30,55 +37,65 @@ const requestOptions: BatchRequestOptions = {
    enableCookies: true,
  },
 };
 logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2));
 const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
 logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs));
 logger.logTestSectionEndDelimiter();
 test(`all URLs (${urls.length}) should be alive`, async () => {
  // act
  console.log('URLS', urls); // TODO: Delete
  const results = await getUrlStatusesInParallel(urls, requestOptions);
  // assert
  const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
-  expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
+  expect(deadUrls).to.have.lengthOf(
    0,
    formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]),
  );
 }, testTimeoutInMs);
 function isOkStatusCode(statusCode: number): boolean {
  return statusCode >= 200 && statusCode < 300;
 }
-function collectUniqueUrls(
+function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string {
  options: {
    readonly application: IApplication,
    readonly excludePatterns?: readonly RegExp[],
  },
 ): string[] {
  return [ // Get all nodes
    ...options.application.collections.flatMap((c) => c.getAllCategories()),
    ...options.application.collections.flatMap((c) => c.getAllScripts()),
  ]
    // Get all docs
    .flatMap((documentable) => documentable.docs)
    // Parse all URLs
    .flatMap((docString) => extractUrls(docString))
    // Remove duplicates
    .filter((url, index, array) => array.indexOf(url) === index)
    // Exclude certain URLs based on patterns
    .filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
 }
 function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
  return patterns.some((pattern) => pattern.test(url));
 }
 function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
  return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
 }
-function extractUrls(textWithInlineCode: string): string[] {
+function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] {
-  /*
+  const { RANDOMIZED_URL_CHECK_LIMIT } = process.env;
-    Matches URLs:
+  logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited');
-    - Excludes inline code blocks as they may contain URLs not intended for user interaction
+  if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') {
-      and not guaranteed to support expected HTTP methods, leading to false-negatives.
+    const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10);
-    - Supports URLs containing parentheses, avoiding matches within code that might not represent
+    if (Number.isNaN(maxUrlsInTest)) {
-      actual links.
+      throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`);
-  */
+    }
-  const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
+    if (maxUrlsInTest < originalUrls.length) {
-  return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
+      return shuffle(originalUrls).slice(0, maxUrlsInTest);
    }
  }
  return originalUrls;
 }
 function convertMillisecondsToHumanReadableFormat(milliseconds: number): string {
  const timeParts: string[] = [];
  const addTimePart = (amount: number, label: string) => {
    if (amount === 0) {
      return;
    }
    timeParts.push(`${amount} ${label}`);
  };
  const hours = milliseconds / (1000 * 60 * 60);
  const absoluteHours = Math.floor(hours);
  addTimePart(absoluteHours, 'hours');
  const minutes = (hours - absoluteHours) * 60;
  const absoluteMinutes = Math.floor(minutes);
  addTimePart(absoluteMinutes, 'minutes');
  const seconds = (minutes - absoluteMinutes) * 60;
  const absoluteSeconds = Math.floor(seconds);
  addTimePart(absoluteSeconds, 'seconds');
  return timeParts.join(', ');
 }
--- a/tests/unit/application/Common/Shuffle.spec.ts
+++ b/tests/unit/application/Common/Shuffle.spec.ts
@@ -0,0 +1,52 @@
 import { describe, it, expect } from 'vitest';
 import { shuffle } from '@/application/Common/Shuffle';
 describe('Shuffle', () => {
  describe('shuffle', () => {
    it('returns a new array', () => {
      // arrange
      const inputArray = ['a', 'b', 'c', 'd'];
      // act
      const result = shuffle(inputArray);
      // assert
      expect(result).not.to.equal(inputArray);
    });
    it('returns an array of the same length', () => {
      // arrange
      const inputArray = ['a', 'b', 'c', 'd'];
      // act
      const result = shuffle(inputArray);
      // assert
      expect(result.length).toBe(inputArray.length);
    });
    it('contains the same elements', () => {
      // arrange
      const inputArray = ['a', 'b', 'c', 'd'];
      // act
      const result = shuffle(inputArray);
      // assert
      expect(result).to.have.members(inputArray);
    });
    it('does not modify the input array', () => {
      // arrange
      const inputArray = ['a', 'b', 'c', 'd'];
      const inputArrayCopy = [...inputArray];
      // act
      shuffle(inputArray);
      // assert
      expect(inputArray).to.deep.equal(inputArrayCopy);
    });
    it('handles an empty array correctly', () => {
      // arrange
      const inputArray: string[] = [];
      // act
      const result = shuffle(inputArray);
      // assert
      expect(result).have.lengthOf(0);
    });
  });
 });