Fix compiler failing with nested with expression

The previous implementation of `WithParser` used regex, which struggles
with parsing nested structures correctly. This commit improves
`WithParser` to track and parse all nested `with` expressions.

Other improvements:

- Throw meaningful errors when syntax is wrong. Replacing the prior
  behavior of silently ignoring such issues.
- Remove `I` prefix from related interfaces to align with newer code
  conventions.
- Add more unit tests for `with` expression.
- Improve documentation for templating.
- `ExpressionRegexBuilder`:
  - Use words `capture` and `match` correctly.
  - Fix minor issues revealed by new and improved tests:
     - Change regex for matching anything except surrounding
       whitespaces. The new regex ensures that it works even without
       having any preceeding text.
     - Change regex for capturing pipelines. The old regex was only
       matching (non-greedy) first character of the pipeline in tests,
       new regex matches the full pipeline.
- `ExpressionRegexBuilder.spec.ts`:
  - Ensure consistent way to define `describe` and `it` blocks.
  - Replace `expectRegex` tests, regex expectations test internal
    behavior of the class, not the external.
  - Simplified tests by eliminating the need for UUID suffixes/prefixes.
This commit is contained in:
undergroundwires
2023-10-25 19:39:12 +02:00
parent dfd4451561
commit 80821fca07
7 changed files with 976 additions and 421 deletions

View File

@@ -1,126 +1,295 @@
import { randomUUID } from 'crypto';
import { describe, it, expect } from 'vitest';
import { ExpressionRegexBuilder } from '@/application/Parser/Script/Compiler/Expressions/Parser/Regex/ExpressionRegexBuilder';
const AllWhitespaceCharacters = ' \t\n\r\v\f\u00A0';
describe('ExpressionRegexBuilder', () => {
describe('expectCharacters', () => {
describe('escape single as expected', () => {
const charactersToEscape = ['.', '$'];
for (const character of charactersToEscape) {
it(character, () => {
expectRegex(
// act
describe('expectCharacters', () => {
describe('escapes single character as expected', () => {
const charactersToEscape = ['.', '$'];
for (const character of charactersToEscape) {
it(`escapes ${character} as expected`, () => expectMatch(
character,
(act) => act.expectCharacters(character),
// assert
`\\${character}`,
);
});
}
});
it('escapes multiple as expected', () => {
expectRegex(
// act
`${character}`,
));
}
});
it('escapes multiple characters as expected', () => expectMatch(
'.I have no $$.',
(act) => act.expectCharacters('.I have no $$.'),
// assert
'\\.I have no \\$\\$\\.',
);
});
it('adds as expected', () => {
expectRegex(
// act
(act) => act.expectCharacters('return as it is'),
// assert
'.I have no $$.',
));
it('adds characters as expected', () => expectMatch(
'return as it is',
);
(act) => act.expectCharacters('return as it is'),
'return as it is',
));
});
});
it('expectOneOrMoreWhitespaces', () => {
expectRegex(
// act
describe('expectOneOrMoreWhitespaces', () => {
it('matches one whitespace', () => expectMatch(
' ',
(act) => act.expectOneOrMoreWhitespaces(),
// assert
'\\s+',
);
' ',
));
it('matches multiple whitespaces', () => expectMatch(
AllWhitespaceCharacters,
(act) => act.expectOneOrMoreWhitespaces(),
AllWhitespaceCharacters,
));
it('matches whitespaces inside text', () => expectMatch(
`start${AllWhitespaceCharacters}end`,
(act) => act.expectOneOrMoreWhitespaces(),
AllWhitespaceCharacters,
));
it('does not match non-whitespace characters', () => expectNonMatch(
'a',
(act) => act.expectOneOrMoreWhitespaces(),
));
});
it('matchPipeline', () => {
expectRegex(
// act
(act) => act.matchPipeline(),
// assert
'\\s*(\\|\\s*.+?)?',
);
describe('captureOptionalPipeline', () => {
it('does not capture when no pipe is present', () => expectNonMatch(
'noPipeHere',
(act) => act.captureOptionalPipeline(),
));
it('captures when input starts with pipe', () => expectCapture(
'| afterPipe',
(act) => act.captureOptionalPipeline(),
'| afterPipe',
));
it('ignores without text before', () => expectCapture(
'stuff before | afterPipe',
(act) => act.captureOptionalPipeline(),
'| afterPipe',
));
it('ignores without text before', () => expectCapture(
'stuff before | afterPipe',
(act) => act.captureOptionalPipeline(),
'| afterPipe',
));
it('ignores whitespaces before the pipe', () => expectCapture(
' | afterPipe',
(act) => act.captureOptionalPipeline(),
'| afterPipe',
));
it('ignores text after whitespace', () => expectCapture(
'| first Pipe',
(act) => act.captureOptionalPipeline(),
'| first ',
));
describe('non-greedy matching', () => { // so the rest of the pattern can work
it('non-letter character in pipe', () => expectCapture(
'| firstPipe | sec0ndpipe',
(act) => act.captureOptionalPipeline(),
'| firstPipe ',
));
});
});
it('matchUntilFirstWhitespace', () => {
expectRegex(
// act
(act) => act.matchUntilFirstWhitespace(),
// assert
'([^|\\s]+)',
);
it('matches until first whitespace', () => expectMatch(
describe('captureUntilWhitespaceOrPipe', () => {
it('captures until first whitespace', () => expectCapture(
// arrange
'first second',
'first ',
// act
(act) => act.matchUntilFirstWhitespace(),
(act) => act.captureUntilWhitespaceOrPipe(),
// assert
'first',
));
});
describe('matchMultilineAnythingExceptSurroundingWhitespaces', () => {
it('returns expected regex', () => expectRegex(
// act
(act) => act.matchMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'\\s*([\\S\\s]+?)\\s*',
));
it('matches single line', () => expectMatch(
it('captures until first pipe', () => expectCapture(
// arrange
'single line',
'first|',
// act
(act) => act.matchMultilineAnythingExceptSurroundingWhitespaces(),
(act) => act.captureUntilWhitespaceOrPipe(),
// assert
'single line',
'first',
));
it('matches single line without surrounding whitespaces', () => expectMatch(
it('captures all without whitespace or pipe', () => expectCapture(
// arrange
' single line\t',
'all',
// act
(act) => act.matchMultilineAnythingExceptSurroundingWhitespaces(),
(act) => act.captureUntilWhitespaceOrPipe(),
// assert
'single line',
));
it('matches multiple lines', () => expectMatch(
// arrange
'first line\nsecond line',
// act
(act) => act.matchMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'first line\nsecond line',
));
it('matches multiple lines without surrounding whitespaces', () => expectMatch(
// arrange
' first line\nsecond line\t',
// act
(act) => act.matchMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'first line\nsecond line',
'all',
));
});
it('expectExpressionStart', () => {
expectRegex(
// act
describe('captureMultilineAnythingExceptSurroundingWhitespaces', () => {
describe('single line', () => {
it('captures a line without surrounding whitespaces', () => expectCapture(
// arrange
'line',
// act
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'line',
));
it('captures a line with internal whitespaces intact', () => expectCapture(
`start${AllWhitespaceCharacters}end`,
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
`start${AllWhitespaceCharacters}end`,
));
it('excludes surrounding whitespaces', () => expectCapture(
// arrange
`${AllWhitespaceCharacters}single line\t`,
// act
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'single line',
));
});
describe('multiple lines', () => {
it('captures text across multiple lines', () => expectCapture(
// arrange
'first line\nsecond line\r\nthird-line',
// act
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'first line\nsecond line\r\nthird-line',
));
it('captures text with empty lines in between', () => expectCapture(
'start\n\nend',
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
'start\n\nend',
));
it('excludes surrounding whitespaces from multiline text', () => expectCapture(
// arrange
` first line\nsecond line${AllWhitespaceCharacters}`,
// act
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
// assert
'first line\nsecond line',
));
});
describe('edge cases', () => {
it('does not capture for input with only whitespaces', () => expectNonCapture(
AllWhitespaceCharacters,
(act) => act.captureMultilineAnythingExceptSurroundingWhitespaces(),
));
});
});
describe('expectExpressionStart', () => {
it('matches expression start without trailing whitespaces', () => expectMatch(
'{{expression',
(act) => act.expectExpressionStart(),
// assert
'{{\\s*',
);
'{{',
));
it('matches expression start with trailing whitespaces', () => expectMatch(
`{{${AllWhitespaceCharacters}expression`,
(act) => act.expectExpressionStart(),
`{{${AllWhitespaceCharacters}`,
));
it('does not match whitespaces not directly after expression start', () => expectMatch(
' {{expression',
(act) => act.expectExpressionStart(),
'{{',
));
it('does not match if expression start is not present', () => expectNonMatch(
'noExpressionStartHere',
(act) => act.expectExpressionStart(),
));
});
it('expectExpressionEnd', () => {
expectRegex(
// act
describe('expectExpressionEnd', () => {
it('matches expression end without preceding whitespaces', () => expectMatch(
'expression}}',
(act) => act.expectExpressionEnd(),
// assert
'\\s*}}',
);
'}}',
));
it('matches expression end with preceding whitespaces', () => expectMatch(
`expression${AllWhitespaceCharacters}}}`,
(act) => act.expectExpressionEnd(),
`${AllWhitespaceCharacters}}}`,
));
it('does not capture whitespaces not directly before expression end', () => expectMatch(
'expression}} ',
(act) => act.expectExpressionEnd(),
'}}',
));
it('does not match if expression end is not present', () => expectNonMatch(
'noExpressionEndHere',
(act) => act.expectExpressionEnd(),
));
});
describe('expectOptionalWhitespaces', () => {
describe('matching', () => {
it('matches multiple Unix lines', () => expectMatch(
// arrange
'\n\n',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\n\n',
));
it('matches multiple Windows lines', () => expectMatch(
// arrange
'\r\n',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\r\n',
));
it('matches multiple spaces', () => expectMatch(
// arrange
' ',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
' ',
));
it('matches horizontal and vertical tabs', () => expectMatch(
// arrange
'\t\v',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\t\v',
));
it('matches form feed character', () => expectMatch(
// arrange
'\f',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\f',
));
it('matches a non-breaking space character', () => expectMatch(
// arrange
'\u00A0',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\u00A0',
));
it('matches a combination of whitespace characters', () => expectMatch(
// arrange
AllWhitespaceCharacters,
// act
(act) => act.expectOptionalWhitespaces(),
// assert
AllWhitespaceCharacters,
));
it('matches whitespace characters on different positions', () => expectMatch(
// arrange
'\ta\nb\rc\v',
// act
(act) => act.expectOptionalWhitespaces(),
// assert
'\t\n\r\v',
));
});
describe('non-matching', () => {
it('a non-whitespace character', () => expectNonMatch(
// arrange
'a',
// act
(act) => act.expectOptionalWhitespaces(),
));
it('multiple non-whitespace characters', () => expectNonMatch(
// arrange
'abc',
// act
(act) => act.expectOptionalWhitespaces(),
));
});
});
describe('buildRegExp', () => {
it('sets global flag', () => {
@@ -134,84 +303,126 @@ describe('ExpressionRegexBuilder', () => {
expect(actual).to.equal(expected);
});
describe('can combine multiple parts', () => {
it('with', () => {
expectRegex(
(sut) => sut
// act
// {{ with $variable }}
.expectExpressionStart()
.expectCharacters('with')
.expectOneOrMoreWhitespaces()
.expectCharacters('$')
.matchUntilFirstWhitespace()
.expectExpressionEnd()
// scope
.matchMultilineAnythingExceptSurroundingWhitespaces()
// {{ end }}
.expectExpressionStart()
.expectCharacters('end')
.expectExpressionEnd(),
// assert
'{{\\s*with\\s+\\$([^|\\s]+)\\s*}}\\s*([\\S\\s]+?)\\s*{{\\s*end\\s*}}',
);
});
it('scoped substitution', () => {
expectRegex(
(sut) => sut
// act
.expectExpressionStart().expectCharacters('.')
.matchPipeline()
.expectExpressionEnd(),
// assert
'{{\\s*\\.\\s*(\\|\\s*.+?)?\\s*}}',
);
});
it('parameter substitution', () => {
expectRegex(
(sut) => sut
// act
.expectExpressionStart().expectCharacters('$')
.matchUntilFirstWhitespace()
.matchPipeline()
.expectExpressionEnd(),
// assert
'{{\\s*\\$([^|\\s]+)\\s*(\\|\\s*.+?)?\\s*}}',
);
});
it('combines character and whitespace expectations', () => expectMatch(
'abc def',
(act) => act
.expectCharacters('abc')
.expectOneOrMoreWhitespaces()
.expectCharacters('def'),
'abc def',
));
it('captures optional pipeline and text after it', () => expectCapture(
'abc | def',
(act) => act
.expectCharacters('abc ')
.captureOptionalPipeline(),
'| def',
));
it('combines multiline capture with optional whitespaces', () => expectCapture(
'\n abc \n',
(act) => act
.expectOptionalWhitespaces()
.captureMultilineAnythingExceptSurroundingWhitespaces()
.expectOptionalWhitespaces(),
'abc',
));
it('combines expression start, optional whitespaces, and character expectation', () => expectMatch(
'{{ abc',
(act) => act
.expectExpressionStart()
.expectOptionalWhitespaces()
.expectCharacters('abc'),
'{{ abc',
));
it('combines character expectation, optional whitespaces, and expression end', () => expectMatch(
'abc }}',
(act) => act
.expectCharacters('abc')
.expectOptionalWhitespaces()
.expectExpressionEnd(),
'abc }}',
));
});
});
});
function expectRegex(
act: (sut: ExpressionRegexBuilder) => ExpressionRegexBuilder,
expected: string,
) {
enum MatchGroupIndex {
FullMatch = 0,
FirstCapturingGroup = 1,
}
function expectCapture(
input: string,
act: (regexBuilder: ExpressionRegexBuilder) => ExpressionRegexBuilder,
expectedCombinedCaptures: string | undefined,
): void {
// arrange
const sut = new ExpressionRegexBuilder();
const matchGroupIndex = MatchGroupIndex.FirstCapturingGroup;
// act
const actual = act(sut).buildRegExp().source;
// assert
expect(actual).to.equal(expected);
expectMatch(input, act, expectedCombinedCaptures, matchGroupIndex);
}
function expectNonMatch(
input: string,
act: (sut: ExpressionRegexBuilder) => ExpressionRegexBuilder,
matchGroupIndex = MatchGroupIndex.FullMatch,
): void {
expectMatch(input, act, undefined, matchGroupIndex);
}
function expectNonCapture(
input: string,
act: (sut: ExpressionRegexBuilder) => ExpressionRegexBuilder,
): void {
expectNonMatch(input, act, MatchGroupIndex.FirstCapturingGroup);
}
function expectMatch(
input: string,
act: (sut: ExpressionRegexBuilder) => ExpressionRegexBuilder,
expectedMatch: string,
) {
act: (regexBuilder: ExpressionRegexBuilder) => ExpressionRegexBuilder,
expectedCombinedMatches: string | undefined,
matchGroupIndex = MatchGroupIndex.FullMatch,
): void {
// arrange
const [startMarker, endMarker] = [randomUUID(), randomUUID()];
const markedInput = `${startMarker}${input}${endMarker}`;
const builder = new ExpressionRegexBuilder()
.expectCharacters(startMarker);
act(builder);
const markedRegex = builder.expectCharacters(endMarker).buildRegExp();
const regexBuilder = new ExpressionRegexBuilder();
act(regexBuilder);
const regex = regexBuilder.buildRegExp();
// act
const match = Array.from(markedInput.matchAll(markedRegex))
.filter((matches) => matches.length > 1)
.map((matches) => matches[1])
.filter(Boolean)
.join();
const allMatchGroups = Array.from(input.matchAll(regex));
// assert
expect(match).to.equal(expectedMatch);
const actualMatches = allMatchGroups
.filter((matches) => matches.length > matchGroupIndex)
.map((matches) => matches[matchGroupIndex])
.filter(Boolean) // matchAll returns `""` for full matches, `null` for capture groups
.flat();
const actualCombinedMatches = actualMatches.length ? actualMatches.join('') : undefined;
expect(actualCombinedMatches).equal(
expectedCombinedMatches,
[
'\n\n---',
'Expected combined matches:',
getTestDataText(expectedCombinedMatches),
'Actual combined matches:',
getTestDataText(actualCombinedMatches),
'Input:',
getTestDataText(input),
'Regex:',
getTestDataText(regex.toString()),
'All match groups:',
getTestDataText(JSON.stringify(allMatchGroups)),
`Match index in group: ${matchGroupIndex}`,
'---\n\n',
].join('\n'),
);
}
function getTestDataText(data: string | undefined): string {
const outputPrefix = '\t> ';
if (data === undefined) {
return `${outputPrefix}undefined (no matches)`;
}
const getLiteralString = (text: string) => JSON.stringify(text).slice(1, -1);
const text = `${outputPrefix}\`${getLiteralString(data)}\``;
return text;
}