Refactor to enforce strictNullChecks

This commit applies `strictNullChecks` to the entire codebase to improve
maintainability and type safety. Key changes include:

- Remove some explicit null-checks where unnecessary.
- Add necessary null-checks.
- Refactor static factory functions for a more functional approach.
- Improve some test names and contexts for better debugging.
- Add unit tests for any additional logic introduced.
- Refactor `createPositionFromRegexFullMatch` to its own function as the
  logic is reused.
- Prefer `find` prefix on functions that may return `undefined` and
  `get` prefix for those that always return a value.
This commit is contained in:
undergroundwires
2023-11-12 22:54:00 +01:00
parent 7ab16ecccb
commit 949fac1a7c
294 changed files with 2477 additions and 2738 deletions

View File

@@ -9,9 +9,9 @@ export async function getUrlStatusesInParallel(
): Promise<IUrlStatus[]> {
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
const uniqueUrls = Array.from(new Set(urls));
options = { ...DefaultOptions, ...options };
console.log('Options: ', options);
const results = await request(uniqueUrls, options);
const defaultedOptions = { ...DefaultOptions, ...options };
console.log('Options: ', defaultedOptions);
const results = await request(uniqueUrls, defaultedOptions);
return results;
}
@@ -25,7 +25,7 @@ interface IDomainOptions {
sameDomainDelayInMs?: number;
}
const DefaultOptions: IBatchRequestOptions = {
const DefaultOptions: Required<IBatchRequestOptions> = {
domainOptions: {
sameDomainParallelize: false,
sameDomainDelayInMs: 3 /* sec */ * 1000,
@@ -39,7 +39,7 @@ const DefaultOptions: IBatchRequestOptions = {
function request(
urls: string[],
options: IBatchRequestOptions,
options: Required<IBatchRequestOptions>,
): Promise<IUrlStatus[]> {
if (!options.domainOptions.sameDomainParallelize) {
return runOnEachDomainWithDelay(
@@ -54,7 +54,7 @@ function request(
async function runOnEachDomainWithDelay(
urls: string[],
action: (url: string) => Promise<IUrlStatus>,
delayInMs: number,
delayInMs: number | undefined,
): Promise<IUrlStatus[]> {
const grouped = groupUrlsByDomain(urls);
const tasks = grouped.map(async (group) => {
@@ -64,7 +64,9 @@ async function runOnEachDomainWithDelay(
const status = await action(url);
results.push(status);
if (results.length !== group.length) {
await sleep(delayInMs);
if (delayInMs !== undefined) {
await sleep(delayInMs);
}
}
}
/* eslint-enable no-await-in-loop */

View File

@@ -26,6 +26,9 @@ function shouldRetry(status: IUrlStatus) {
if (status.error) {
return true;
}
if (status.code === undefined) {
return true;
}
return isTransientError(status.code)
|| status.code === 429; // Too Many Requests
}

View File

@@ -4,19 +4,22 @@ export function fetchFollow(
url: string,
timeoutInMs: number,
fetchOptions: RequestInit,
followOptions: IFollowOptions,
followOptions: IFollowOptions | undefined,
): Promise<Response> {
followOptions = { ...DefaultOptions, ...followOptions };
if (followRedirects(followOptions)) {
const defaultedFollowOptions = {
...DefaultFollowOptions,
...followOptions,
};
if (followRedirects(defaultedFollowOptions)) {
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
}
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
const cookies = new CookieStorage(followOptions.enableCookies);
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
return followRecursivelyWithCookies(
url,
timeoutInMs,
fetchOptions,
followOptions.maximumRedirectFollowDepth,
defaultedFollowOptions.maximumRedirectFollowDepth,
cookies,
);
}
@@ -27,7 +30,7 @@ export interface IFollowOptions {
enableCookies?: boolean;
}
const DefaultOptions: IFollowOptions = {
export const DefaultFollowOptions: Required<IFollowOptions> = {
followRedirects: true,
maximumRedirectFollowDepth: 20,
enableCookies: true,
@@ -53,9 +56,14 @@ async function followRecursivelyWithCookies(
if (newFollowDepth < 0) {
throw new Error(`[max-redirect] maximum redirect reached at: ${url}`);
}
const cookieHeader = response.headers.get('set-cookie');
cookies.addHeader(cookieHeader);
const nextUrl = response.headers.get('location');
if (!nextUrl) {
return response;
}
const cookieHeader = response.headers.get('set-cookie');
if (cookieHeader) {
cookies.addHeader(cookieHeader);
}
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
}

View File

@@ -1,48 +1,49 @@
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
import { IUrlStatus } from './IUrlStatus';
import { fetchFollow, IFollowOptions } from './FetchFollow';
import { fetchFollow, IFollowOptions, DefaultFollowOptions } from './FetchFollow';
import { getRandomUserAgent } from './UserAgents';
export function getUrlStatus(
url: string,
options: IRequestOptions = DefaultOptions,
): Promise<IUrlStatus> {
options = { ...DefaultOptions, ...options };
const fetchOptions = getFetchOptions(url, options);
const defaultedOptions = { ...DefaultOptions, ...options };
const fetchOptions = getFetchOptions(url, defaultedOptions);
return retryWithExponentialBackOff(async () => {
console.log('Requesting', url);
let result: IUrlStatus;
try {
const response = await fetchFollow(
url,
options.requestTimeoutInMs,
defaultedOptions.requestTimeoutInMs,
fetchOptions,
options.followOptions,
defaultedOptions.followOptions,
);
result = { url, code: response.status };
} catch (err) {
result = { url, error: JSON.stringify(err, null, '\t') };
}
return result;
}, options.retryExponentialBaseInMs);
}, defaultedOptions.retryExponentialBaseInMs);
}
export interface IRequestOptions {
retryExponentialBaseInMs?: number;
additionalHeaders?: Record<string, string>;
additionalHeadersUrlIgnore?: string[];
followOptions?: IFollowOptions;
requestTimeoutInMs: number;
readonly retryExponentialBaseInMs?: number;
readonly additionalHeaders?: Record<string, string>;
readonly additionalHeadersUrlIgnore?: string[];
readonly followOptions?: IFollowOptions;
readonly requestTimeoutInMs: number;
}
const DefaultOptions: IRequestOptions = {
const DefaultOptions: Required<IRequestOptions> = {
retryExponentialBaseInMs: 5000,
additionalHeaders: {},
additionalHeadersUrlIgnore: [],
requestTimeoutInMs: 60 /* seconds */ * 1000,
followOptions: DefaultFollowOptions,
};
function getFetchOptions(url: string, options: IRequestOptions): RequestInit {
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit {
const additionalHeaders = options.additionalHeadersUrlIgnore
.some((ignorePattern) => url.startsWith(ignorePattern))
? {}