Refactor to enforce strictNullChecks
This commit applies `strictNullChecks` to the entire codebase to improve maintainability and type safety. Key changes include: - Remove some explicit null-checks where unnecessary. - Add necessary null-checks. - Refactor static factory functions for a more functional approach. - Improve some test names and contexts for better debugging. - Add unit tests for any additional logic introduced. - Refactor `createPositionFromRegexFullMatch` to its own function as the logic is reused. - Prefer `find` prefix on functions that may return `undefined` and `get` prefix for those that always return a value.
This commit is contained in:
@@ -9,9 +9,9 @@ export async function getUrlStatusesInParallel(
|
||||
): Promise<IUrlStatus[]> {
|
||||
// urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing
|
||||
const uniqueUrls = Array.from(new Set(urls));
|
||||
options = { ...DefaultOptions, ...options };
|
||||
console.log('Options: ', options);
|
||||
const results = await request(uniqueUrls, options);
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
console.log('Options: ', defaultedOptions);
|
||||
const results = await request(uniqueUrls, defaultedOptions);
|
||||
return results;
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ interface IDomainOptions {
|
||||
sameDomainDelayInMs?: number;
|
||||
}
|
||||
|
||||
const DefaultOptions: IBatchRequestOptions = {
|
||||
const DefaultOptions: Required<IBatchRequestOptions> = {
|
||||
domainOptions: {
|
||||
sameDomainParallelize: false,
|
||||
sameDomainDelayInMs: 3 /* sec */ * 1000,
|
||||
@@ -39,7 +39,7 @@ const DefaultOptions: IBatchRequestOptions = {
|
||||
|
||||
function request(
|
||||
urls: string[],
|
||||
options: IBatchRequestOptions,
|
||||
options: Required<IBatchRequestOptions>,
|
||||
): Promise<IUrlStatus[]> {
|
||||
if (!options.domainOptions.sameDomainParallelize) {
|
||||
return runOnEachDomainWithDelay(
|
||||
@@ -54,7 +54,7 @@ function request(
|
||||
async function runOnEachDomainWithDelay(
|
||||
urls: string[],
|
||||
action: (url: string) => Promise<IUrlStatus>,
|
||||
delayInMs: number,
|
||||
delayInMs: number | undefined,
|
||||
): Promise<IUrlStatus[]> {
|
||||
const grouped = groupUrlsByDomain(urls);
|
||||
const tasks = grouped.map(async (group) => {
|
||||
@@ -64,7 +64,9 @@ async function runOnEachDomainWithDelay(
|
||||
const status = await action(url);
|
||||
results.push(status);
|
||||
if (results.length !== group.length) {
|
||||
await sleep(delayInMs);
|
||||
if (delayInMs !== undefined) {
|
||||
await sleep(delayInMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* eslint-enable no-await-in-loop */
|
||||
|
||||
@@ -26,6 +26,9 @@ function shouldRetry(status: IUrlStatus) {
|
||||
if (status.error) {
|
||||
return true;
|
||||
}
|
||||
if (status.code === undefined) {
|
||||
return true;
|
||||
}
|
||||
return isTransientError(status.code)
|
||||
|| status.code === 429; // Too Many Requests
|
||||
}
|
||||
|
||||
@@ -4,19 +4,22 @@ export function fetchFollow(
|
||||
url: string,
|
||||
timeoutInMs: number,
|
||||
fetchOptions: RequestInit,
|
||||
followOptions: IFollowOptions,
|
||||
followOptions: IFollowOptions | undefined,
|
||||
): Promise<Response> {
|
||||
followOptions = { ...DefaultOptions, ...followOptions };
|
||||
if (followRedirects(followOptions)) {
|
||||
const defaultedFollowOptions = {
|
||||
...DefaultFollowOptions,
|
||||
...followOptions,
|
||||
};
|
||||
if (followRedirects(defaultedFollowOptions)) {
|
||||
return fetchWithTimeout(url, timeoutInMs, fetchOptions);
|
||||
}
|
||||
fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ };
|
||||
const cookies = new CookieStorage(followOptions.enableCookies);
|
||||
const cookies = new CookieStorage(defaultedFollowOptions.enableCookies);
|
||||
return followRecursivelyWithCookies(
|
||||
url,
|
||||
timeoutInMs,
|
||||
fetchOptions,
|
||||
followOptions.maximumRedirectFollowDepth,
|
||||
defaultedFollowOptions.maximumRedirectFollowDepth,
|
||||
cookies,
|
||||
);
|
||||
}
|
||||
@@ -27,7 +30,7 @@ export interface IFollowOptions {
|
||||
enableCookies?: boolean;
|
||||
}
|
||||
|
||||
const DefaultOptions: IFollowOptions = {
|
||||
export const DefaultFollowOptions: Required<IFollowOptions> = {
|
||||
followRedirects: true,
|
||||
maximumRedirectFollowDepth: 20,
|
||||
enableCookies: true,
|
||||
@@ -53,9 +56,14 @@ async function followRecursivelyWithCookies(
|
||||
if (newFollowDepth < 0) {
|
||||
throw new Error(`[max-redirect] maximum redirect reached at: ${url}`);
|
||||
}
|
||||
const cookieHeader = response.headers.get('set-cookie');
|
||||
cookies.addHeader(cookieHeader);
|
||||
const nextUrl = response.headers.get('location');
|
||||
if (!nextUrl) {
|
||||
return response;
|
||||
}
|
||||
const cookieHeader = response.headers.get('set-cookie');
|
||||
if (cookieHeader) {
|
||||
cookies.addHeader(cookieHeader);
|
||||
}
|
||||
return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,48 +1,49 @@
|
||||
import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler';
|
||||
import { IUrlStatus } from './IUrlStatus';
|
||||
import { fetchFollow, IFollowOptions } from './FetchFollow';
|
||||
import { fetchFollow, IFollowOptions, DefaultFollowOptions } from './FetchFollow';
|
||||
import { getRandomUserAgent } from './UserAgents';
|
||||
|
||||
export function getUrlStatus(
|
||||
url: string,
|
||||
options: IRequestOptions = DefaultOptions,
|
||||
): Promise<IUrlStatus> {
|
||||
options = { ...DefaultOptions, ...options };
|
||||
const fetchOptions = getFetchOptions(url, options);
|
||||
const defaultedOptions = { ...DefaultOptions, ...options };
|
||||
const fetchOptions = getFetchOptions(url, defaultedOptions);
|
||||
return retryWithExponentialBackOff(async () => {
|
||||
console.log('Requesting', url);
|
||||
let result: IUrlStatus;
|
||||
try {
|
||||
const response = await fetchFollow(
|
||||
url,
|
||||
options.requestTimeoutInMs,
|
||||
defaultedOptions.requestTimeoutInMs,
|
||||
fetchOptions,
|
||||
options.followOptions,
|
||||
defaultedOptions.followOptions,
|
||||
);
|
||||
result = { url, code: response.status };
|
||||
} catch (err) {
|
||||
result = { url, error: JSON.stringify(err, null, '\t') };
|
||||
}
|
||||
return result;
|
||||
}, options.retryExponentialBaseInMs);
|
||||
}, defaultedOptions.retryExponentialBaseInMs);
|
||||
}
|
||||
|
||||
export interface IRequestOptions {
|
||||
retryExponentialBaseInMs?: number;
|
||||
additionalHeaders?: Record<string, string>;
|
||||
additionalHeadersUrlIgnore?: string[];
|
||||
followOptions?: IFollowOptions;
|
||||
requestTimeoutInMs: number;
|
||||
readonly retryExponentialBaseInMs?: number;
|
||||
readonly additionalHeaders?: Record<string, string>;
|
||||
readonly additionalHeadersUrlIgnore?: string[];
|
||||
readonly followOptions?: IFollowOptions;
|
||||
readonly requestTimeoutInMs: number;
|
||||
}
|
||||
|
||||
const DefaultOptions: IRequestOptions = {
|
||||
const DefaultOptions: Required<IRequestOptions> = {
|
||||
retryExponentialBaseInMs: 5000,
|
||||
additionalHeaders: {},
|
||||
additionalHeadersUrlIgnore: [],
|
||||
requestTimeoutInMs: 60 /* seconds */ * 1000,
|
||||
followOptions: DefaultFollowOptions,
|
||||
};
|
||||
|
||||
function getFetchOptions(url: string, options: IRequestOptions): RequestInit {
|
||||
function getFetchOptions(url: string, options: Required<IRequestOptions>): RequestInit {
|
||||
const additionalHeaders = options.additionalHeadersUrlIgnore
|
||||
.some((ignorePattern) => url.startsWith(ignorePattern))
|
||||
? {}
|
||||
|
||||
Reference in New Issue
Block a user