Add schema validation for collection files #369

This commit improves collection file editing and error detection
directly in the IDE. It adds YAML schema, IDE configuration and
automatic tests to validate it.

- Introduce a YAML schema for collection file.
- Use `yaml-language-server` for enhanced YAML support in VSCode.
- Add telemetry disabling in `configure_vscode.py` to respect user
  privacy.
- Add automated checks to validate YAML file structure against the
  schema.
- Remove unused properties and do not allow them in compiler.
This commit is contained in:
undergroundwires
2024-06-17 14:01:07 +02:00
parent e9a52859f6
commit dc03bff324
19 changed files with 383 additions and 11 deletions

View File

@@ -74,3 +74,28 @@ jobs:
-
name: Analyzing the code with pylint
run: npm run lint:pylint
validate-collection-files:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
os: [ macos, ubuntu, windows ]
fail-fast: false # Still interested to see results from other combinations
steps:
-
name: Checkout
uses: actions/checkout@v4
-
name: Setup node
uses: ./.github/actions/setup-node
-
name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
-
name: Install dependencies
run: python3 -m pip install -r ./scripts/validate-collections-yaml/requirements.txt
-
name: Validate
run: python3 ./scripts/validate-collections-yaml

4
.gitignore vendored
View File

@@ -14,3 +14,7 @@ node_modules
# macOS
.DS_Store
# Python
__pycache__
.venv

View File

@@ -5,8 +5,10 @@
"wengerk.highlight-bad-chars", // Highlights bad chars.
"wayou.vscode-todo-highlight", // Highlights TODO.
"wix.vscode-import-cost", // Shows in KB how much a require include in code.
// Documentation
// Markdown
"davidanson.vscode-markdownlint", // Lints markdown.
// YAML
"redhat.vscode-yaml", // Lints YAML files, validates against schema.
// TypeScript / JavaScript
"dbaeumer.vscode-eslint", // Lints JavaScript/TypeScript.
"pmneo.tsimporter", // Provides better auto-complete for TypeScripts imports.

View File

@@ -1,11 +1,11 @@
# Collection files
privacy.sexy is a data-driven application that reads YAML files.
This document details the structure and syntax of the YAML files located in [`application/collections`](./../src/application/collections/), which form the backbone of the application's data model.
This document details the structure and syntax of the YAML files located in [`application/collections`](./../src/application/collections/), which form the backbone of the application's data model. The YAML schema [`.schema.yaml`](./../src/application/collections/.schema.yaml) is provided to provide better IDE support and be used in automated validations.
Related documentation:
- 📖 [`collection.yaml.d.ts`](./../src/application/collections/collection.yaml.d.ts) outlines code types.
- 📖 [`Collections README`](./../src/application/collections/README.md) includes references to code as documentation.
- 📖 [Script Guidelines](./script-guidelines.md) provide guidance on script creation including best-practices.
## Objects

View File

@@ -80,8 +80,10 @@ See [ci-cd.md](./ci-cd.md) for more information.
- [**`npm run install-deps [-- <options>]`**](../scripts/npm-install.js):
- Manages NPM dependency installation, it offers capabilities like doing a fresh install, retries on network errors, and other features.
- For example, you can run `npm run install-deps -- --fresh` to do clean installation of dependencies.
- [**`python ./scripts/configure_vscode.py`**](../scripts/configure_vscode.py):
- [**`python3 ./scripts/configure_vscode.py`**](../scripts/configure_vscode.py):
- Optimizes Visual Studio Code settings and installs essential extensions, enhancing the development environment.
- [**`python3 ./scripts/validate-collections-yaml`**](../scripts/validate-collections-yaml/README.md):
- Validates the syntax and structure of collection YAML files.
#### Automation scripts

View File

@@ -58,6 +58,10 @@ def add_or_update_settings() -> None:
# Details: # pylint: disable-next=line-too-long
# - https://archive.ph/2024.01.06-003914/https://github.com/microsoft/vscode/issues/179274, https://web.archive.org/web/20240106003915/https://github.com/microsoft/vscode/issues/179274
# Disable telemetry
configure_setting_key('redhat.telemetry.enabled', False)
configure_setting_key('gitlens.telemetry.enabled', False)
def configure_setting_key(configuration_key: str, desired_value: Any) -> None:
try:
with open(VSCODE_SETTINGS_JSON_FILE, 'r+', encoding='utf-8') as file:

View File

@@ -0,0 +1,51 @@
# validate-collections-yaml
This script validates YAML collection files against a predefined schema to ensure their integrity.
## Prerequisites
- Python 3.x installed on your system.
## Running in a Virtual Environment (Recommended)
Using a virtual environment isolates dependencies and prevents conflicts.
1. **Create a virtual environment:**
```bash
python3 -m venv ./scripts/validate-collections-yaml/.venv
```
2. **Activate the virtual environment:**
```bash
source ./scripts/validate-collections-yaml/.venv/bin/activate
```
3. **Install dependencies:**
```bash
python3 -m pip install -r ./scripts/validate-collections-yaml/requirements.txt
```
4. **Run the script:**
```bash
python3 ./scripts/validate-collections-yaml
```
## Running Globally
Running the script globally is less recommended due to potential dependency conflicts.
1. **Install dependencies:**
```bash
python3 -m pip install -r ./scripts/validate-collections-yaml/requirements.txt
```
2. **Run the script:**
```bash
python3 ./scripts/validate-collections-yaml
```

View File

@@ -0,0 +1,62 @@
"""
Description:
This script validates collection YAML files against the expected schema.
Usage:
python3 ./scripts/validate-collections-yaml
Notes:
This script requires the `jsonschema` and `pyyaml` packages (see requirements.txt).
"""
# pylint: disable=missing-function-docstring
from os import path
import sys
from glob import glob
from typing import List
from jsonschema import exceptions, validate # pylint: disable=import-error
import yaml # pylint: disable=import-error
SCHEMA_FILE_PATH = './src/application/collections/.schema.yaml'
COLLECTIONS_GLOB_PATTERN = './src/application/collections/*.yaml'
def main() -> None:
schema_yaml = read_file(SCHEMA_FILE_PATH)
schema_json = convert_yaml_to_json(schema_yaml)
collection_file_paths = find_collection_files(COLLECTIONS_GLOB_PATTERN)
print(f'Found {len(collection_file_paths)} YAML files to validate.')
total_invalid_files = 0
for collection_file_path in collection_file_paths:
file_name = path.basename(collection_file_path)
print(f'Validating {file_name}...')
collection_yaml = read_file(collection_file_path)
collection_json = convert_yaml_to_json(collection_yaml)
try:
validate(instance=collection_json, schema=schema_json)
print(f'Success: {file_name} is valid.')
except exceptions.ValidationError as err:
print(f'Error: Validation failed for {file_name}.', file=sys.stderr)
print(str(err), file=sys.stderr)
total_invalid_files += 1
if total_invalid_files > 0:
print(f'Validation complete with {total_invalid_files} invalid files.', file=sys.stderr)
sys.exit(1)
else:
print('Validation complete. All files are valid.')
sys.exit(0)
def read_file(file_path: str) -> str:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def find_collection_files(glob_pattern: str) -> List[str]:
files = glob(glob_pattern)
filtered_files = [f for f in files if not path.basename(f).startswith('.')]
return filtered_files
def convert_yaml_to_json(yaml_content: str) -> dict:
return yaml.safe_load(yaml_content)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,6 @@
attrs==23.2.0
jsonschema==4.22.0
jsonschema-specifications==2023.12.1
PyYAML==6.0.1
referencing==0.35.1
rpds-py==0.18.1

View File

@@ -0,0 +1,192 @@
# Schema Definition for Collection Files
# Purpose:
# - Defines the structure and data types for collection YAML files.
# - Enhances IDE support with features like auto-completion and error checking.
# - Used for automated validation of YAML files to ensure data integrity.
$schema: 'https://json-schema.org/draft/2020-12/schema'
$ref: '#/definitions/Collection'
definitions:
Collection:
type: object
additionalProperties: false
properties:
os:
type: string
enum: [windows, macos, linux]
scripting:
$ref: '#/definitions/ScriptingDefinition'
actions:
type: array
items:
$ref: '#/definitions/Category'
functions:
type: array
items:
$ref: '#/definitions/Function'
ScriptingDefinition:
type: object
additionalProperties: false
required: [language, startCode, endCode]
properties:
language:
type: string
startCode:
type: string
endCode:
type: string
Category:
type: object
allOf:
- $ref: '#/definitions/ExecutableDefinition'
unevaluatedProperties: false
required: [children, category]
properties:
children:
type: array
items:
$ref: '#/definitions/Executable'
category:
type: string
Executable:
oneOf:
- $ref: '#/definitions/Category'
- $ref: '#/definitions/Script'
ExecutableDefinition:
allOf:
- $ref: '#/definitions/Documentable'
Script:
type: object
unevaluatedProperties: false
anyOf:
- $ref: '#/definitions/CodeScript'
- $ref: '#/definitions/CallScript'
ScriptDefinition:
type: object
allOf:
- $ref: '#/definitions/ExecutableDefinition'
required: [name]
properties:
name:
type: string
recommend:
type: string
enum: [standard, strict]
CodeScript:
type: object
unevaluatedProperties: false
anyOf:
- $ref: '#/definitions/ScriptDefinition'
- $ref: '#/definitions/CodeInstruction'
CallScript:
type: object
unevaluatedProperties: false
anyOf:
- $ref: '#/definitions/ScriptDefinition'
- $ref: '#/definitions/CallInstruction'
Documentable:
type: object
properties:
docs:
$ref: '#/definitions/Documentation'
Documentation:
unevaluatedProperties: false
oneOf:
- type: string
- type: array
items:
type: string
Function:
unevaluatedProperties: false
oneOf:
- $ref: '#/definitions/CodeFunction'
- $ref: '#/definitions/CallFunction'
FunctionDefinition:
type: object
required: [name]
properties:
name:
type: string
parameters:
type: array
items:
$ref: '#/definitions/ParameterDefinition'
docs:
type: string
ParameterDefinition:
required: [name]
unevaluatedProperties: false
properties:
name:
type: string
optional:
type: boolean
CodeFunction:
type: object
unevaluatedProperties: false
allOf:
- $ref: '#/definitions/FunctionDefinition'
- $ref: '#/definitions/CodeInstruction'
CallFunction:
type: object
unevaluatedProperties: false
allOf:
- $ref: '#/definitions/FunctionDefinition'
- $ref: '#/definitions/CallInstruction'
CodeInstruction:
type: object
required: [code]
properties:
code:
type: string
revertCode:
type: string
CallInstruction:
type: object
required: [call]
properties:
call:
$ref: '#/definitions/FunctionCalls'
FunctionCalls:
unevaluatedProperties: false
oneOf:
- $ref: '#/definitions/FunctionCall'
- type: array
items:
$ref: '#/definitions/FunctionCall'
FunctionCall:
type: object
required: [function]
unevaluatedProperties: false
properties:
function:
type: string
parameters:
$ref: '#/definitions/FunctionCallParameters'
FunctionCallParameters:
type: object
unevaluatedProperties: true
additionalProperties:
type: string

View File

@@ -0,0 +1,13 @@
# Collections
This directory contains the **collection files**, which are the central source of truth for scripts and categories within privacy.sexy.
In addition to the collection files, this folder contains two special files:
- [`.schema.yaml`](./.schema.yaml): Provides the schema definition for collection files.
- [`collection.yaml.d.ts`](./collection.yaml.d.ts): Defines TypeScript typings for the collection files.
## Additional documentation
- Refer to [`collection-files.md`](./../../../docs/collection-files.md) for details on the structure of these files.
- To validate these files, use the `validate-collections-yaml` script. For instructions, see its [`README.md`](./../../../scripts/validate-collections-yaml/README.md).

View File

@@ -16,7 +16,7 @@ declare module '@/application/collections/*' {
}
export type ExecutableData = CategoryData | ScriptData;
export type DocumentationData = ReadonlyArray<string> | string | undefined;
export type DocumentationData = ReadonlyArray<string> | string;
export interface DocumentableData {
readonly docs?: DocumentationData;
@@ -73,7 +73,6 @@ declare module '@/application/collections/*' {
export interface ScriptingDefinitionData {
readonly language: string;
readonly fileExtension: string;
readonly startCode: string;
readonly endCode: string;
}

View File

@@ -1,4 +1,8 @@
# yaml-language-server: $schema=./.schema.yaml
# ↑ Adds a schema support in VS Code for auto-completion and validation.
# Structure is documented in "docs/collection-files.md"
os: linux
scripting:
language: shellscript

View File

@@ -1,4 +1,8 @@
# yaml-language-server: $schema=./.schema.yaml
# ↑ Adds a schema support in VS Code for auto-completion and validation.
# Structure is documented in "docs/collection-files.md"
os: macos
scripting:
language: shellscript

View File

@@ -1,4 +1,8 @@
# yaml-language-server: $schema=./.schema.yaml
# ↑ Adds a schema support in VS Code for auto-completion and validation.
# Structure is documented in "docs/collection-files.md"
os: windows
scripting:
language: batchfile

View File

@@ -21,7 +21,7 @@ describe('ScriptingDefinitionParser', () => {
const expectedAssertion: ObjectAssertion<ScriptingDefinitionData> = {
value: data,
valueName: 'scripting definition',
allowedProperties: ['language', 'fileExtension', 'startCode', 'endCode'],
allowedProperties: ['language', 'startCode', 'endCode'],
};
const validatorStub = new TypeValidatorStub();
const context = new TestContext()

View File

@@ -72,6 +72,7 @@ function createTestCases(collectionsDirFromRoot: string): ITestCase[] {
throw new Error(`Could not find any collection in ${collectionsDir}`);
}
const collectionFilePaths = fileNames
.filter((name) => !name.startsWith('.'))
.filter((name) => name.endsWith('.yaml'))
.map((name) => join(collectionsDir, name));
return collectionFilePaths.map((path) => ({

View File

@@ -47,7 +47,6 @@ export function getCategoryStub(scriptPrefix = 'testScript'): CategoryData {
function getTestDefinitionStub(): ScriptingDefinitionData {
return {
fileExtension: '.bat',
language: ScriptingLanguage[ScriptingLanguage.batchfile],
startCode: 'start',
endCode: 'end',

View File

@@ -19,7 +19,7 @@ export function createScriptDataWithCall(
} else {
instance = instance.withMockCall();
}
return instance;
return instance as ScriptDataStub & CallScriptData;
}
export function createScriptDataWithoutCallOrCodes(): ScriptDataStub {
@@ -50,12 +50,12 @@ class ScriptDataStub implements CallScriptData, CodeScriptData {
return this;
}
public withCode(code: string): this {
public withCode(code: string): this & CodeScriptData {
this.code = code;
return this;
}
public withRevertCode(revertCode: string | undefined): this {
public withRevertCode(revertCode: string | undefined): this & CodeScriptData {
this.revertCode = revertCode;
return this;
}