Add ruleset packs, linter, fixtures, and JSON schema

This commit is contained in:
2025-12-31 22:21:43 -05:00
parent dddac108fe
commit 21bb7cae5a
16 changed files with 475 additions and 174 deletions

View File

@@ -0,0 +1,113 @@
use std::collections::{HashMap, HashSet};
use regex::Regex;
use super::model::{FixTier, Rule};
#[derive(Debug, Default, Clone)]
pub struct LintReport {
pub errors: Vec<String>,
pub warnings: Vec<String>,
}
impl LintReport {
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
}
pub fn lint_rules(rules: &[Rule]) -> LintReport {
let mut report = LintReport::default();
let mut ids = HashSet::new();
let mut pattern_map: HashMap<String, String> = HashMap::new();
for rule in rules {
if rule.id.trim().is_empty() {
report.errors.push("Rule id is empty".to_string());
}
if !ids.insert(rule.id.clone()) {
report
.errors
.push(format!("Duplicate rule id: {}", rule.id));
}
if rule.domain.trim().is_empty() {
report
.errors
.push(format!("Rule {} has empty domain", rule.id));
}
if rule.patterns.is_empty() {
report
.errors
.push(format!("Rule {} has no patterns", rule.id));
}
if !(0.0..=1.0).contains(&rule.confidence) {
report.errors.push(format!(
"Rule {} has invalid confidence {}",
rule.id, rule.confidence
));
}
if rule.stop_scan && rule.fix_tier != FixTier::Reencode {
report.errors.push(format!(
"Rule {} has stop_scan=true but fix_tier is {:?}",
rule.id, rule.fix_tier
));
}
if let Some(action) = &rule.action {
if action.eq_ignore_ascii_case("faststart") && rule.fix_tier == FixTier::Reencode {
report.warnings.push(format!(
"Rule {} uses faststart action but fix_tier is reencode",
rule.id
));
}
}
for pattern in &rule.patterns {
if let Err(err) = Regex::new(pattern) {
report.errors.push(format!(
"Rule {} has invalid regex '{}': {}",
rule.id, pattern, err
));
}
if let Some(existing) = pattern_map.get(pattern) {
if existing != &rule.id {
report.warnings.push(format!(
"Pattern '{}' appears in rules {} and {}",
pattern, existing, rule.id
));
}
} else {
pattern_map.insert(pattern.clone(), rule.id.clone());
}
}
}
report
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rules::model::{FixTier, Rule, Severity};
#[test]
fn detects_duplicate_ids() {
let rule = Rule {
id: "DUP".to_string(),
domain: "test".to_string(),
severity: Severity::Low,
confidence: 0.5,
fix_tier: FixTier::None,
stop_scan: false,
patterns: vec!["foo".to_string()],
notes: None,
action: None,
requires: vec![],
excludes: vec![],
};
let report = lint_rules(&[rule.clone(), rule]);
assert!(report.has_errors());
}
}

View File

@@ -5,9 +5,13 @@ use anyhow::Result;
use crate::scan::ProbeData;
mod loader;
mod lint;
mod matcher;
mod model;
use model::Rule;
pub use lint::{lint_rules, LintReport};
pub use matcher::{RuleContext, RuleMatch};
pub use model::{FixTier, Severity};
@@ -41,6 +45,12 @@ impl RuleSet {
Ok(Self { rules: Vec::new() })
}
pub fn load_from_dir(dir: &std::path::Path) -> Result<Self> {
let rules = loader::load_rules_from_dir(dir)?;
let compiled = loader::compile_rules(rules)?;
Ok(Self { rules: compiled })
}
pub fn match_lines(&self, lines: &[String], context: &RuleContext) -> Vec<RuleMatch> {
let mut matches = Vec::new();
for rule in &self.rules {
@@ -61,6 +71,29 @@ impl RuleSet {
}
}
pub fn load_raw_rules() -> Result<Vec<Rule>> {
let mut candidates = Vec::new();
if let Ok(current) = std::env::current_dir() {
candidates.push(current.join("rulesets"));
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
candidates.push(parent.join("rulesets"));
}
}
for dir in candidates {
let rules = loader::load_rules_from_dir(&dir)?;
if !rules.is_empty() {
return Ok(rules);
}
}
Ok(Vec::new())
}
pub fn build_context(probe: &ProbeData) -> RuleContext {
let mut context = RuleContext::default();