Initial vid-repair scaffold

This commit is contained in:
2025-12-31 22:07:42 -05:00
commit dddac108fe
30 changed files with 3220 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use regex::Regex;
use super::matcher::CompiledRule;
use super::model::{Rule, RuleFile};
pub fn load_rules_from_dir(dir: &Path) -> Result<Vec<Rule>> {
if !dir.exists() {
return Ok(Vec::new());
}
let mut entries: Vec<PathBuf> = fs::read_dir(dir)
.with_context(|| format!("Failed to read ruleset dir {}", dir.display()))?
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
.filter(|path| path.extension().map(|ext| ext == "toml").unwrap_or(false))
.collect();
entries.sort();
let mut rules = Vec::new();
for path in entries {
let raw = fs::read_to_string(&path)
.with_context(|| format!("Failed to read ruleset {}", path.display()))?;
let file: RuleFile = toml::from_str(&raw)
.with_context(|| format!("Failed to parse ruleset {}", path.display()))?;
rules.extend(file.rules);
}
Ok(rules)
}
pub fn compile_rules(rules: Vec<Rule>) -> Result<Vec<CompiledRule>> {
let mut compiled = Vec::new();
for rule in rules {
let patterns = rule
.patterns
.iter()
.map(|pattern| Regex::new(pattern))
.collect::<std::result::Result<Vec<_>, _>>()
.with_context(|| format!("Invalid regex in rule {}", rule.id))?;
compiled.push(CompiledRule { rule, patterns });
}
Ok(compiled)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_rule_file() {
let toml = r#"
[[rule]]
id = "TEST_RULE"
domain = "test"
severity = "low"
confidence = 0.8
fix_tier = "none"
stop_scan = false
patterns = ["(?i)foo"]
notes = "test"
"#;
let file: RuleFile = toml::from_str(toml).expect("rule file parse");
assert_eq!(file.rules.len(), 1);
let compiled = compile_rules(file.rules).expect("compile rules");
assert_eq!(compiled.len(), 1);
assert_eq!(compiled[0].rule.id, "TEST_RULE");
}
}

View File

@@ -0,0 +1,78 @@
use std::collections::HashSet;
use regex::Regex;
use super::model::{FixTier, Rule, Severity};
#[derive(Debug, Clone)]
pub struct CompiledRule {
pub rule: Rule,
pub patterns: Vec<Regex>,
}
#[derive(Debug, Clone)]
pub struct RuleMatch {
pub rule_id: String,
pub domain: String,
pub severity: Severity,
pub confidence: f32,
pub fix_tier: FixTier,
pub stop_scan: bool,
pub notes: Option<String>,
pub action: Option<String>,
pub evidence: Vec<String>,
}
#[derive(Debug, Default, Clone)]
pub struct RuleContext {
pub tags: HashSet<String>,
}
impl RuleContext {
pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
self.tags.insert(tag.into());
self
}
}
impl CompiledRule {
pub fn matches(&self, lines: &[String], context: &RuleContext) -> Option<RuleMatch> {
if !self.rule.requires.is_empty()
&& !self.rule.requires.iter().all(|req| context.tags.contains(req))
{
return None;
}
if !self.rule.excludes.is_empty()
&& self.rule.excludes.iter().any(|ex| context.tags.contains(ex))
{
return None;
}
let mut evidence = Vec::new();
for line in lines {
if self.patterns.iter().any(|re| re.is_match(line)) {
if evidence.len() < 3 {
evidence.push(line.clone());
}
}
}
if evidence.is_empty() {
return None;
}
Some(RuleMatch {
rule_id: self.rule.id.clone(),
domain: self.rule.domain.clone(),
severity: self.rule.severity,
confidence: self.rule.confidence,
fix_tier: self.rule.fix_tier,
stop_scan: self.rule.stop_scan,
notes: self.rule.notes.clone(),
action: self.rule.action.clone(),
evidence,
})
}
}

View File

@@ -0,0 +1,109 @@
use std::path::PathBuf;
use anyhow::Result;
use crate::scan::ProbeData;
mod loader;
mod matcher;
mod model;
pub use matcher::{RuleContext, RuleMatch};
pub use model::{FixTier, Severity};
#[derive(Debug, Clone)]
pub struct RuleSet {
pub rules: Vec<matcher::CompiledRule>,
}
impl RuleSet {
pub fn load() -> Result<Self> {
let mut candidates = Vec::new();
if let Ok(current) = std::env::current_dir() {
candidates.push(current.join("rulesets"));
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
candidates.push(parent.join("rulesets"));
}
}
for dir in candidates {
let rules = loader::load_rules_from_dir(&dir)?;
if !rules.is_empty() {
let compiled = loader::compile_rules(rules)?;
return Ok(Self { rules: compiled });
}
}
Ok(Self { rules: Vec::new() })
}
pub fn match_lines(&self, lines: &[String], context: &RuleContext) -> Vec<RuleMatch> {
let mut matches = Vec::new();
for rule in &self.rules {
if let Some(hit) = rule.matches(lines, context) {
matches.push(hit);
}
}
matches
}
pub fn best_match<'a>(&self, matches: &'a [RuleMatch]) -> Option<&'a RuleMatch> {
matches.iter().max_by(|a, b| {
a.severity
.rank()
.cmp(&b.severity.rank())
.then_with(|| a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal))
})
}
}
pub fn build_context(probe: &ProbeData) -> RuleContext {
let mut context = RuleContext::default();
if let Some(format) = &probe.format_name {
context = context.with_tag(format!("container:{}", format.to_lowercase()));
}
for stream in &probe.streams {
if let Some(codec_type) = &stream.codec_type {
context = context.with_tag(format!("stream:{}", codec_type.to_lowercase()));
}
if let Some(codec) = &stream.codec_name {
context = context.with_tag(format!("codec:{}", codec.to_lowercase()));
}
}
context
}
pub fn ruleset_dir_for_display() -> Result<PathBuf> {
if let Ok(current) = std::env::current_dir() {
let dir = current.join("rulesets");
if dir.exists() {
return Ok(dir);
}
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
return Ok(parent.join("rulesets"));
}
}
Err(anyhow::anyhow!("No ruleset directory found"))
}
pub fn ensure_ruleset_loaded(ruleset: &RuleSet) -> Result<()> {
if ruleset.rules.is_empty() {
let dir = ruleset_dir_for_display().unwrap_or_else(|_| PathBuf::from("rulesets"));
return Err(anyhow::anyhow!(
"No rulesets found. Expected TOML files in {}",
dir.display()
));
}
Ok(())
}

View File

@@ -0,0 +1,72 @@
use serde::Deserialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Info,
Low,
Medium,
High,
Severe,
}
impl Severity {
pub fn rank(self) -> u8 {
match self {
Severity::Info => 0,
Severity::Low => 1,
Severity::Medium => 2,
Severity::High => 3,
Severity::Severe => 4,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum FixTier {
None,
Remux,
Reencode,
}
impl FixTier {
pub fn rank(self) -> u8 {
match self {
FixTier::None => 0,
FixTier::Remux => 1,
FixTier::Reencode => 2,
}
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct Rule {
pub id: String,
pub domain: String,
pub severity: Severity,
#[serde(default = "default_confidence")]
pub confidence: f32,
pub fix_tier: FixTier,
#[serde(default)]
pub stop_scan: bool,
pub patterns: Vec<String>,
#[serde(default)]
pub notes: Option<String>,
#[serde(default)]
pub action: Option<String>,
#[serde(default)]
pub requires: Vec<String>,
#[serde(default)]
pub excludes: Vec<String>,
}
fn default_confidence() -> f32 {
0.5
}
#[derive(Debug, Clone, Deserialize)]
pub struct RuleFile {
#[serde(rename = "rule")]
pub rules: Vec<Rule>,
}