Initial vid-repair scaffold
This commit is contained in:
78
vid-repair-core/src/rules/loader.rs
Normal file
78
vid-repair-core/src/rules/loader.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use regex::Regex;
|
||||
|
||||
use super::matcher::CompiledRule;
|
||||
use super::model::{Rule, RuleFile};
|
||||
|
||||
pub fn load_rules_from_dir(dir: &Path) -> Result<Vec<Rule>> {
|
||||
if !dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut entries: Vec<PathBuf> = fs::read_dir(dir)
|
||||
.with_context(|| format!("Failed to read ruleset dir {}", dir.display()))?
|
||||
.filter_map(|entry| entry.ok())
|
||||
.map(|entry| entry.path())
|
||||
.filter(|path| path.extension().map(|ext| ext == "toml").unwrap_or(false))
|
||||
.collect();
|
||||
|
||||
entries.sort();
|
||||
|
||||
let mut rules = Vec::new();
|
||||
|
||||
for path in entries {
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("Failed to read ruleset {}", path.display()))?;
|
||||
let file: RuleFile = toml::from_str(&raw)
|
||||
.with_context(|| format!("Failed to parse ruleset {}", path.display()))?;
|
||||
rules.extend(file.rules);
|
||||
}
|
||||
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
pub fn compile_rules(rules: Vec<Rule>) -> Result<Vec<CompiledRule>> {
|
||||
let mut compiled = Vec::new();
|
||||
|
||||
for rule in rules {
|
||||
let patterns = rule
|
||||
.patterns
|
||||
.iter()
|
||||
.map(|pattern| Regex::new(pattern))
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.with_context(|| format!("Invalid regex in rule {}", rule.id))?;
|
||||
|
||||
compiled.push(CompiledRule { rule, patterns });
|
||||
}
|
||||
|
||||
Ok(compiled)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_rule_file() {
|
||||
let toml = r#"
|
||||
[[rule]]
|
||||
id = "TEST_RULE"
|
||||
domain = "test"
|
||||
severity = "low"
|
||||
confidence = 0.8
|
||||
fix_tier = "none"
|
||||
stop_scan = false
|
||||
patterns = ["(?i)foo"]
|
||||
notes = "test"
|
||||
"#;
|
||||
let file: RuleFile = toml::from_str(toml).expect("rule file parse");
|
||||
assert_eq!(file.rules.len(), 1);
|
||||
|
||||
let compiled = compile_rules(file.rules).expect("compile rules");
|
||||
assert_eq!(compiled.len(), 1);
|
||||
assert_eq!(compiled[0].rule.id, "TEST_RULE");
|
||||
}
|
||||
}
|
||||
78
vid-repair-core/src/rules/matcher.rs
Normal file
78
vid-repair-core/src/rules/matcher.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use super::model::{FixTier, Rule, Severity};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledRule {
|
||||
pub rule: Rule,
|
||||
pub patterns: Vec<Regex>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RuleMatch {
|
||||
pub rule_id: String,
|
||||
pub domain: String,
|
||||
pub severity: Severity,
|
||||
pub confidence: f32,
|
||||
pub fix_tier: FixTier,
|
||||
pub stop_scan: bool,
|
||||
pub notes: Option<String>,
|
||||
pub action: Option<String>,
|
||||
pub evidence: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct RuleContext {
|
||||
pub tags: HashSet<String>,
|
||||
}
|
||||
|
||||
impl RuleContext {
|
||||
pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
|
||||
self.tags.insert(tag.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl CompiledRule {
|
||||
pub fn matches(&self, lines: &[String], context: &RuleContext) -> Option<RuleMatch> {
|
||||
if !self.rule.requires.is_empty()
|
||||
&& !self.rule.requires.iter().all(|req| context.tags.contains(req))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
if !self.rule.excludes.is_empty()
|
||||
&& self.rule.excludes.iter().any(|ex| context.tags.contains(ex))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut evidence = Vec::new();
|
||||
|
||||
for line in lines {
|
||||
if self.patterns.iter().any(|re| re.is_match(line)) {
|
||||
if evidence.len() < 3 {
|
||||
evidence.push(line.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if evidence.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(RuleMatch {
|
||||
rule_id: self.rule.id.clone(),
|
||||
domain: self.rule.domain.clone(),
|
||||
severity: self.rule.severity,
|
||||
confidence: self.rule.confidence,
|
||||
fix_tier: self.rule.fix_tier,
|
||||
stop_scan: self.rule.stop_scan,
|
||||
notes: self.rule.notes.clone(),
|
||||
action: self.rule.action.clone(),
|
||||
evidence,
|
||||
})
|
||||
}
|
||||
}
|
||||
109
vid-repair-core/src/rules/mod.rs
Normal file
109
vid-repair-core/src/rules/mod.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::scan::ProbeData;
|
||||
|
||||
mod loader;
|
||||
mod matcher;
|
||||
mod model;
|
||||
|
||||
pub use matcher::{RuleContext, RuleMatch};
|
||||
pub use model::{FixTier, Severity};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RuleSet {
|
||||
pub rules: Vec<matcher::CompiledRule>,
|
||||
}
|
||||
|
||||
impl RuleSet {
|
||||
pub fn load() -> Result<Self> {
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
if let Ok(current) = std::env::current_dir() {
|
||||
candidates.push(current.join("rulesets"));
|
||||
}
|
||||
|
||||
if let Ok(exe) = std::env::current_exe() {
|
||||
if let Some(parent) = exe.parent() {
|
||||
candidates.push(parent.join("rulesets"));
|
||||
}
|
||||
}
|
||||
|
||||
for dir in candidates {
|
||||
let rules = loader::load_rules_from_dir(&dir)?;
|
||||
if !rules.is_empty() {
|
||||
let compiled = loader::compile_rules(rules)?;
|
||||
return Ok(Self { rules: compiled });
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { rules: Vec::new() })
|
||||
}
|
||||
|
||||
pub fn match_lines(&self, lines: &[String], context: &RuleContext) -> Vec<RuleMatch> {
|
||||
let mut matches = Vec::new();
|
||||
for rule in &self.rules {
|
||||
if let Some(hit) = rule.matches(lines, context) {
|
||||
matches.push(hit);
|
||||
}
|
||||
}
|
||||
matches
|
||||
}
|
||||
|
||||
pub fn best_match<'a>(&self, matches: &'a [RuleMatch]) -> Option<&'a RuleMatch> {
|
||||
matches.iter().max_by(|a, b| {
|
||||
a.severity
|
||||
.rank()
|
||||
.cmp(&b.severity.rank())
|
||||
.then_with(|| a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_context(probe: &ProbeData) -> RuleContext {
|
||||
let mut context = RuleContext::default();
|
||||
|
||||
if let Some(format) = &probe.format_name {
|
||||
context = context.with_tag(format!("container:{}", format.to_lowercase()));
|
||||
}
|
||||
|
||||
for stream in &probe.streams {
|
||||
if let Some(codec_type) = &stream.codec_type {
|
||||
context = context.with_tag(format!("stream:{}", codec_type.to_lowercase()));
|
||||
}
|
||||
if let Some(codec) = &stream.codec_name {
|
||||
context = context.with_tag(format!("codec:{}", codec.to_lowercase()));
|
||||
}
|
||||
}
|
||||
|
||||
context
|
||||
}
|
||||
|
||||
pub fn ruleset_dir_for_display() -> Result<PathBuf> {
|
||||
if let Ok(current) = std::env::current_dir() {
|
||||
let dir = current.join("rulesets");
|
||||
if dir.exists() {
|
||||
return Ok(dir);
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(exe) = std::env::current_exe() {
|
||||
if let Some(parent) = exe.parent() {
|
||||
return Ok(parent.join("rulesets"));
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow::anyhow!("No ruleset directory found"))
|
||||
}
|
||||
|
||||
pub fn ensure_ruleset_loaded(ruleset: &RuleSet) -> Result<()> {
|
||||
if ruleset.rules.is_empty() {
|
||||
let dir = ruleset_dir_for_display().unwrap_or_else(|_| PathBuf::from("rulesets"));
|
||||
return Err(anyhow::anyhow!(
|
||||
"No rulesets found. Expected TOML files in {}",
|
||||
dir.display()
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
72
vid-repair-core/src/rules/model.rs
Normal file
72
vid-repair-core/src/rules/model.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Severity {
|
||||
Info,
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
Severe,
|
||||
}
|
||||
|
||||
impl Severity {
|
||||
pub fn rank(self) -> u8 {
|
||||
match self {
|
||||
Severity::Info => 0,
|
||||
Severity::Low => 1,
|
||||
Severity::Medium => 2,
|
||||
Severity::High => 3,
|
||||
Severity::Severe => 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum FixTier {
|
||||
None,
|
||||
Remux,
|
||||
Reencode,
|
||||
}
|
||||
|
||||
impl FixTier {
|
||||
pub fn rank(self) -> u8 {
|
||||
match self {
|
||||
FixTier::None => 0,
|
||||
FixTier::Remux => 1,
|
||||
FixTier::Reencode => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Rule {
|
||||
pub id: String,
|
||||
pub domain: String,
|
||||
pub severity: Severity,
|
||||
#[serde(default = "default_confidence")]
|
||||
pub confidence: f32,
|
||||
pub fix_tier: FixTier,
|
||||
#[serde(default)]
|
||||
pub stop_scan: bool,
|
||||
pub patterns: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub notes: Option<String>,
|
||||
#[serde(default)]
|
||||
pub action: Option<String>,
|
||||
#[serde(default)]
|
||||
pub requires: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub excludes: Vec<String>,
|
||||
}
|
||||
|
||||
fn default_confidence() -> f32 {
|
||||
0.5
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct RuleFile {
|
||||
#[serde(rename = "rule")]
|
||||
pub rules: Vec<Rule>,
|
||||
}
|
||||
Reference in New Issue
Block a user