Initial vid-repair scaffold

This commit is contained in:
2025-12-31 22:07:42 -05:00
commit dddac108fe
30 changed files with 3220 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
pub const DEFAULT_CONFIG: &str = r#"# Vid-Repair configuration
# Lines beginning with '#' are comments.
# Values here are defaults; CLI flags override config.
ffmpeg_path = "ffmpeg"
ffprobe_path = "ffprobe"
[scan]
# quick|standard|deep - deep runs full decode (slowest, most thorough)
depth = "deep"
# If quick/standard find suspicious signals, auto-escalate to deep.
auto_escalate = true
# Follow symlinks while scanning directories.
follow_symlinks = false
# Recurse into subdirectories when scanning directories.
recursive = true
# Extensions to include (lowercase, no dot). Add/remove as needed.
include_ext = ["mp4","m4v","mov","mkv","avi","wmv","flv","webm","mpg","mpeg","m2ts","mts","ts","3gp","3g2","ogv","vob","f4v"]
# Glob patterns to exclude.
exclude = ["**/.git/**", "**/node_modules/**"]
[repair]
# safe|aggressive - aggressive allows re-encode when corruption is found.
policy = "safe"
# Empty means in-place replacement with temp + atomic rename.
output_dir = ""
# If true (and in-place), rename original to *.original.* after successful fix.
keep_original = false
[report]
# Emit JSON instead of human-readable text.
json = false
# Pretty-print JSON output.
pretty = true
[performance]
# 0 = auto (num CPU threads). Higher values spawn more concurrent scans.
jobs = 0
[watch]
# Enable watch mode (monitor and process files as they settle).
enabled = false
# Wait for no changes for this many seconds before processing.
settle_seconds = 10
# Ignore these extensions during watch (lowercase, no dot).
ignore_ext = ["part","crdownload","partial","tmp","download"]
"#;

View File

@@ -0,0 +1,299 @@
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use directories::ProjectDirs;
use fs_err as fs;
use serde::{Deserialize, Serialize};
mod defaults;
pub use defaults::DEFAULT_CONFIG;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
#[serde(default)]
pub ffmpeg_path: String,
#[serde(default)]
pub ffprobe_path: String,
#[serde(default)]
pub scan: ScanConfig,
#[serde(default)]
pub repair: RepairConfig,
#[serde(default)]
pub report: ReportConfig,
#[serde(default)]
pub performance: PerformanceConfig,
#[serde(default)]
pub watch: WatchConfig,
}
impl Default for Config {
fn default() -> Self {
Self {
ffmpeg_path: "ffmpeg".to_string(),
ffprobe_path: "ffprobe".to_string(),
scan: ScanConfig::default(),
repair: RepairConfig::default(),
report: ReportConfig::default(),
performance: PerformanceConfig::default(),
watch: WatchConfig::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanConfig {
#[serde(default)]
pub depth: ScanDepth,
#[serde(default)]
pub auto_escalate: bool,
#[serde(default)]
pub follow_symlinks: bool,
#[serde(default)]
pub recursive: bool,
#[serde(default)]
pub include_ext: Vec<String>,
#[serde(default)]
pub exclude: Vec<String>,
}
impl Default for ScanConfig {
fn default() -> Self {
Self {
depth: ScanDepth::Deep,
auto_escalate: true,
follow_symlinks: false,
recursive: true,
include_ext: vec![
"mp4", "m4v", "mov", "mkv", "avi", "wmv", "flv", "webm", "mpg",
"mpeg", "m2ts", "mts", "ts", "3gp", "3g2", "ogv", "vob", "f4v",
]
.into_iter()
.map(|s| s.to_string())
.collect(),
exclude: vec!["**/.git/**".to_string(), "**/node_modules/**".to_string()],
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ScanDepth {
Quick,
Standard,
Deep,
}
impl Default for ScanDepth {
fn default() -> Self {
ScanDepth::Deep
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepairConfig {
#[serde(default)]
pub policy: FixPolicy,
#[serde(default)]
pub output_dir: String,
#[serde(default)]
pub keep_original: bool,
}
impl Default for RepairConfig {
fn default() -> Self {
Self {
policy: FixPolicy::Safe,
output_dir: String::new(),
keep_original: false,
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum FixPolicy {
Safe,
Aggressive,
}
impl Default for FixPolicy {
fn default() -> Self {
FixPolicy::Safe
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReportConfig {
#[serde(default)]
pub json: bool,
#[serde(default)]
pub pretty: bool,
}
impl Default for ReportConfig {
fn default() -> Self {
Self {
json: false,
pretty: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceConfig {
#[serde(default)]
pub jobs: usize,
}
impl Default for PerformanceConfig {
fn default() -> Self {
Self { jobs: 0 }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default)]
pub settle_seconds: u64,
#[serde(default)]
pub ignore_ext: Vec<String>,
}
impl Default for WatchConfig {
fn default() -> Self {
Self {
enabled: false,
settle_seconds: 10,
ignore_ext: vec!["part", "crdownload", "partial", "tmp", "download"]
.into_iter()
.map(|s| s.to_string())
.collect(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct ConfigOverrides {
pub ffmpeg_path: Option<String>,
pub ffprobe_path: Option<String>,
pub scan_depth: Option<ScanDepth>,
pub scan_recursive: Option<bool>,
pub policy: Option<FixPolicy>,
pub output_dir: Option<String>,
pub keep_original: Option<bool>,
pub json: Option<bool>,
pub jobs: Option<usize>,
pub watch: Option<bool>,
}
impl Config {
pub fn apply_overrides(&mut self, overrides: &ConfigOverrides) {
if let Some(value) = &overrides.ffmpeg_path {
self.ffmpeg_path = value.clone();
}
if let Some(value) = &overrides.ffprobe_path {
self.ffprobe_path = value.clone();
}
if let Some(value) = overrides.scan_depth {
self.scan.depth = value;
}
if let Some(value) = overrides.scan_recursive {
self.scan.recursive = value;
}
if let Some(value) = overrides.policy {
self.repair.policy = value;
}
if let Some(value) = &overrides.output_dir {
self.repair.output_dir = value.clone();
}
if let Some(value) = overrides.keep_original {
self.repair.keep_original = value;
}
if let Some(value) = overrides.json {
self.report.json = value;
}
if let Some(value) = overrides.jobs {
self.performance.jobs = value;
}
if let Some(value) = overrides.watch {
self.watch.enabled = value;
}
}
pub fn load_or_init(path: Option<PathBuf>) -> Result<(Self, PathBuf)> {
let path = match path {
Some(path) => path,
None => default_config_path()?,
};
if !path.exists() {
init_config_at(&path, false)?;
}
let raw = fs::read_to_string(&path)
.with_context(|| format!("Failed to read config file at {}", path.display()))?;
let mut config: Config = toml::from_str(&raw)
.with_context(|| format!("Failed to parse config file at {}", path.display()))?;
config.normalize();
Ok((config, path))
}
pub fn normalize(&mut self) {
self.scan.include_ext = self
.scan
.include_ext
.iter()
.map(|ext| ext.trim_start_matches('.').to_lowercase())
.collect();
self.watch.ignore_ext = self
.watch
.ignore_ext
.iter()
.map(|ext| ext.trim_start_matches('.').to_lowercase())
.collect();
if self.watch.settle_seconds == 0 {
self.watch.settle_seconds = 10;
}
}
}
pub fn default_config_path() -> Result<PathBuf> {
let proj_dirs = ProjectDirs::from("cc", "44r0n", "vid-repair")
.context("Unable to resolve default config directory")?;
Ok(proj_dirs.config_dir().join("config.toml"))
}
pub fn init_config_at(path: &Path, force: bool) -> Result<()> {
if path.exists() && !force {
return Ok(());
}
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("Failed to create config directory {}", parent.display()))?;
}
fs::write(path, DEFAULT_CONFIG)
.with_context(|| format!("Failed to write default config to {}", path.display()))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_config_parses() {
let config: Config = toml::from_str(DEFAULT_CONFIG).expect("default config parse");
assert_eq!(config.scan.depth, ScanDepth::Deep);
assert!(!config.scan.include_ext.is_empty());
}
}

View File

@@ -0,0 +1,202 @@
use std::path::{Path, PathBuf};
use std::process::Command;
use anyhow::{Context, Result};
use fs_err as fs;
use tempfile::NamedTempFile;
use crate::config::Config;
use crate::fix::{FixKind, FixOutcome, FixPlan};
use crate::scan::scan_file;
use crate::rules::RuleSet;
pub fn apply_fix(path: &Path, plan: &FixPlan, config: &Config, ruleset: &RuleSet) -> Result<FixOutcome> {
if plan.actions.is_empty() {
return Ok(FixOutcome {
plan: plan.clone(),
applied: false,
success: false,
message: plan
.blocked_reason
.clone()
.unwrap_or_else(|| "No fix actions available".to_string()),
output_path: None,
re_scan_required: false,
});
}
let action = &plan.actions[0];
let output = prepare_output_path(path, config)?;
run_ffmpeg_fix(path, &output.temp_path, action.kind, config)?;
let verification = scan_file(&output.temp_path, config, ruleset)
.with_context(|| format!("Failed to verify output {}", output.temp_path.display()))?;
if has_severe_issues(&verification) {
return Ok(FixOutcome {
plan: plan.clone(),
applied: true,
success: false,
message: "Verification failed; repaired file still has severe issues".to_string(),
output_path: Some(output.temp_path.display().to_string()),
re_scan_required: true,
});
}
finalize_output(path, &output, config)?;
Ok(FixOutcome {
plan: plan.clone(),
applied: true,
success: true,
message: "Fix applied successfully".to_string(),
output_path: Some(output.final_path.display().to_string()),
re_scan_required: false,
})
}
fn run_ffmpeg_fix(path: &Path, output: &Path, kind: FixKind, config: &Config) -> Result<()> {
let mut cmd = Command::new(&config.ffmpeg_path);
cmd.arg("-y").arg("-v").arg("error").arg("-i").arg(path);
match kind {
FixKind::Remux => {
cmd.arg("-c").arg("copy");
}
FixKind::Faststart => {
cmd.arg("-c").arg("copy");
cmd.arg("-movflags").arg("+faststart");
}
FixKind::Reencode => {
cmd.arg("-c:v").arg("libx264");
cmd.arg("-c:a").arg("aac");
cmd.arg("-movflags").arg("+faststart");
}
}
cmd.arg(output);
let output = cmd
.output()
.with_context(|| format!("Failed to run ffmpeg fix for {}", path.display()))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ffmpeg fix failed: {}", stderr.trim());
}
Ok(())
}
fn has_severe_issues(report: &crate::scan::ScanOutcome) -> bool {
report
.issues
.iter()
.any(|issue| matches!(issue.severity, crate::rules::Severity::High | crate::rules::Severity::Severe))
}
struct OutputPaths {
temp_path: PathBuf,
final_path: PathBuf,
}
fn prepare_output_path(path: &Path, config: &Config) -> Result<OutputPaths> {
if config.repair.output_dir.is_empty() {
let parent = path
.parent()
.context("Input file has no parent directory")?;
let temp = NamedTempFile::new_in(parent)
.with_context(|| format!("Failed to create temp file in {}", parent.display()))?;
let temp_path = temp.path().to_path_buf();
temp.keep()?;
Ok(OutputPaths {
temp_path,
final_path: path.to_path_buf(),
})
} else {
let output_dir = PathBuf::from(&config.repair.output_dir);
fs::create_dir_all(&output_dir).with_context(|| {
format!("Failed to create output directory {}", output_dir.display())
})?;
let file_name = path
.file_name()
.context("Input file has no filename")?
.to_os_string();
let final_path = output_dir.join(file_name);
let temp = NamedTempFile::new_in(&output_dir).with_context(|| {
format!("Failed to create temp file in {}", output_dir.display())
})?;
let temp_path = temp.path().to_path_buf();
temp.keep()?;
Ok(OutputPaths {
temp_path,
final_path,
})
}
}
fn finalize_output(input: &Path, output: &OutputPaths, config: &Config) -> Result<()> {
if output.final_path == output.temp_path {
return Ok(());
}
if output.final_path == input {
if config.repair.keep_original {
let backup = next_original_path(input)?;
fs::rename(input, &backup).with_context(|| {
format!("Failed to rename original {}", input.display())
})?;
}
fs::rename(&output.temp_path, input).with_context(|| {
format!("Failed to move repaired file into place for {}", input.display())
})?;
return Ok(());
}
fs::rename(&output.temp_path, &output.final_path).with_context(|| {
format!(
"Failed to move repaired file to {}",
output.final_path.display()
)
})?;
Ok(())
}
fn next_original_path(path: &Path) -> Result<PathBuf> {
let parent = path
.parent()
.context("Input file has no parent directory")?;
let stem = path
.file_stem()
.context("Input file has no stem")?
.to_string_lossy();
let ext = path.extension().map(|ext| ext.to_string_lossy());
for idx in 0..1000 {
let candidate = if idx == 0 {
if let Some(ext) = &ext {
format!("{}.original.{}", stem, ext)
} else {
format!("{}.original", stem)
}
} else if let Some(ext) = &ext {
format!("{}.original.{}.{}", stem, idx, ext)
} else {
format!("{}.original.{}", stem, idx)
};
let path = parent.join(candidate);
if !path.exists() {
return Ok(path);
}
}
anyhow::bail!("Unable to find available .original name for {}", path.display());
}

View File

@@ -0,0 +1,48 @@
use serde::{Deserialize, Serialize};
use crate::config::FixPolicy;
use crate::rules::FixTier;
pub mod executor;
pub mod planner;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FixAction {
pub kind: FixKind,
pub command: Vec<String>,
pub destructive: bool,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum FixKind {
Remux,
Faststart,
Reencode,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FixPlan {
pub policy: FixPolicy,
pub recommended: Option<FixKind>,
pub actions: Vec<FixAction>,
pub blocked_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FixOutcome {
pub plan: FixPlan,
pub applied: bool,
pub success: bool,
pub message: String,
pub output_path: Option<String>,
pub re_scan_required: bool,
}
pub fn highest_fix_tier(tiers: &[FixTier]) -> FixTier {
tiers
.iter()
.copied()
.max_by_key(|tier| tier.rank())
.unwrap_or(FixTier::None)
}

View File

@@ -0,0 +1,67 @@
use crate::config::FixPolicy;
use crate::fix::{FixAction, FixKind, FixOutcome, FixPlan};
use crate::rules::FixTier;
use crate::scan::Issue;
pub fn plan_fix(issues: &[Issue], policy: FixPolicy) -> FixPlan {
let mut recommended = None;
let mut has_faststart = false;
let mut has_remux = false;
let mut has_reencode = false;
for issue in issues {
if let Some(action) = &issue.action {
if action.eq_ignore_ascii_case("faststart") {
has_faststart = true;
}
}
match issue.fix_tier {
FixTier::Remux => has_remux = true,
FixTier::Reencode => has_reencode = true,
FixTier::None => {}
}
}
if has_faststart {
recommended = Some(FixKind::Faststart);
} else if has_remux {
recommended = Some(FixKind::Remux);
} else if has_reencode {
recommended = Some(FixKind::Reencode);
}
let mut actions = Vec::new();
let mut blocked_reason = None;
if let Some(kind) = recommended {
if kind == FixKind::Reencode && policy == FixPolicy::Safe {
blocked_reason = Some("Re-encode required but policy is safe".to_string());
} else {
actions.push(FixAction {
kind,
command: Vec::new(),
destructive: true,
});
}
}
FixPlan {
policy,
recommended,
actions,
blocked_reason,
}
}
pub fn plan_outcome(plan: FixPlan) -> FixOutcome {
FixOutcome {
plan,
applied: false,
success: false,
message: "Fix plan generated".to_string(),
output_path: None,
re_scan_required: false,
}
}

View File

@@ -0,0 +1,69 @@
use std::path::{Path, PathBuf};
use anyhow::Result;
use globset::{Glob, GlobSetBuilder};
use walkdir::WalkDir;
use crate::config::Config;
pub fn collect_files(paths: &[PathBuf], config: &Config) -> Result<Vec<PathBuf>> {
let mut builder = GlobSetBuilder::new();
for pattern in &config.scan.exclude {
if let Ok(glob) = Glob::new(pattern) {
builder.add(glob);
}
}
let exclude_set = builder.build()?;
let mut files = Vec::new();
for input in paths {
if input.is_file() {
if should_include(input, config, &exclude_set) {
files.push(input.clone());
}
continue;
}
if input.is_dir() {
if config.scan.recursive {
for entry in WalkDir::new(input)
.follow_links(config.scan.follow_symlinks)
.into_iter()
.filter_map(|entry| entry.ok())
{
if entry.file_type().is_file() {
let path = entry.path();
if should_include(path, config, &exclude_set) {
files.push(path.to_path_buf());
}
}
}
} else {
for entry in std::fs::read_dir(input)? {
let entry = entry?;
let path = entry.path();
if path.is_file() && should_include(&path, config, &exclude_set) {
files.push(path);
}
}
}
}
}
Ok(files)
}
fn should_include(path: &Path, config: &Config, exclude_set: &globset::GlobSet) -> bool {
if exclude_set.is_match(path) {
return false;
}
let ext = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
let ext = ext.to_lowercase();
if ext.is_empty() {
return false;
}
config.scan.include_ext.iter().any(|e| e == &ext)
}

View File

@@ -0,0 +1,3 @@
mod collect;
pub use collect::collect_files;

View File

@@ -0,0 +1,14 @@
pub mod config;
pub mod fix;
pub mod fs;
pub mod report;
pub mod rules;
pub mod scan;
pub mod watch;
pub use config::{Config, ConfigOverrides};
pub use config::FixPolicy;
pub use fix::{FixOutcome, FixPlan};
pub use report::{Report, ScanReport};
pub use rules::{RuleSet, Severity};
pub use scan::{ScanOutcome, ScanRequest};

View File

@@ -0,0 +1,10 @@
use anyhow::Result;
use serde::Serialize;
pub fn render_json<T: Serialize>(value: &T, pretty: bool) -> Result<String> {
if pretty {
Ok(serde_json::to_string_pretty(value)?)
} else {
Ok(serde_json::to_string(value)?)
}
}

View File

@@ -0,0 +1,7 @@
mod json;
mod text;
mod types;
pub use json::render_json;
pub use text::{render_fix_line, render_scan_line, render_summary};
pub use types::{Report, ScanReport};

View File

@@ -0,0 +1,54 @@
use crate::fix::FixOutcome;
use crate::rules::Severity;
use crate::scan::ScanOutcome;
pub fn render_scan_line(scan: &ScanOutcome) -> String {
if scan.issues.is_empty() {
format!("[OK] {}", scan.path.display())
} else {
let max = max_severity(scan);
format!(
"[ISSUES] {} ({} issues, max {:?})",
scan.path.display(),
scan.issues.len(),
max
)
}
}
pub fn render_fix_line(scan: &ScanOutcome, fix: &FixOutcome) -> String {
if fix.success {
format!("[FIXED] {}", scan.path.display())
} else if fix.applied {
format!("[FAILED] {} - {}", scan.path.display(), fix.message)
} else {
format!("[SKIPPED] {} - {}", scan.path.display(), fix.message)
}
}
pub fn render_summary(scans: &[ScanOutcome], fixes: Option<&[FixOutcome]>) -> String {
let total = scans.len();
let issues = scans.iter().filter(|scan| !scan.issues.is_empty()).count();
let mut line = format!("Summary: {} files, {} with issues", total, issues);
if let Some(fixes) = fixes {
let fixed = fixes.iter().filter(|fix| fix.success).count();
let failed = fixes.iter().filter(|fix| fix.applied && !fix.success).count();
let skipped = fixes.iter().filter(|fix| !fix.applied).count();
line.push_str(&format!(
", {} fixed, {} failed, {} skipped",
fixed, failed, skipped
));
}
line
}
fn max_severity(scan: &ScanOutcome) -> Severity {
scan.issues
.iter()
.map(|issue| issue.severity)
.max_by_key(|sev| sev.rank())
.unwrap_or(Severity::Info)
}

View File

@@ -0,0 +1,15 @@
use serde::{Deserialize, Serialize};
use crate::fix::FixOutcome;
use crate::scan::ScanOutcome;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanReport {
pub scan: ScanOutcome,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Report {
pub scan: ScanOutcome,
pub fix: Option<FixOutcome>,
}

View File

@@ -0,0 +1,78 @@
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use regex::Regex;
use super::matcher::CompiledRule;
use super::model::{Rule, RuleFile};
pub fn load_rules_from_dir(dir: &Path) -> Result<Vec<Rule>> {
if !dir.exists() {
return Ok(Vec::new());
}
let mut entries: Vec<PathBuf> = fs::read_dir(dir)
.with_context(|| format!("Failed to read ruleset dir {}", dir.display()))?
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
.filter(|path| path.extension().map(|ext| ext == "toml").unwrap_or(false))
.collect();
entries.sort();
let mut rules = Vec::new();
for path in entries {
let raw = fs::read_to_string(&path)
.with_context(|| format!("Failed to read ruleset {}", path.display()))?;
let file: RuleFile = toml::from_str(&raw)
.with_context(|| format!("Failed to parse ruleset {}", path.display()))?;
rules.extend(file.rules);
}
Ok(rules)
}
pub fn compile_rules(rules: Vec<Rule>) -> Result<Vec<CompiledRule>> {
let mut compiled = Vec::new();
for rule in rules {
let patterns = rule
.patterns
.iter()
.map(|pattern| Regex::new(pattern))
.collect::<std::result::Result<Vec<_>, _>>()
.with_context(|| format!("Invalid regex in rule {}", rule.id))?;
compiled.push(CompiledRule { rule, patterns });
}
Ok(compiled)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_rule_file() {
let toml = r#"
[[rule]]
id = "TEST_RULE"
domain = "test"
severity = "low"
confidence = 0.8
fix_tier = "none"
stop_scan = false
patterns = ["(?i)foo"]
notes = "test"
"#;
let file: RuleFile = toml::from_str(toml).expect("rule file parse");
assert_eq!(file.rules.len(), 1);
let compiled = compile_rules(file.rules).expect("compile rules");
assert_eq!(compiled.len(), 1);
assert_eq!(compiled[0].rule.id, "TEST_RULE");
}
}

View File

@@ -0,0 +1,78 @@
use std::collections::HashSet;
use regex::Regex;
use super::model::{FixTier, Rule, Severity};
#[derive(Debug, Clone)]
pub struct CompiledRule {
pub rule: Rule,
pub patterns: Vec<Regex>,
}
#[derive(Debug, Clone)]
pub struct RuleMatch {
pub rule_id: String,
pub domain: String,
pub severity: Severity,
pub confidence: f32,
pub fix_tier: FixTier,
pub stop_scan: bool,
pub notes: Option<String>,
pub action: Option<String>,
pub evidence: Vec<String>,
}
#[derive(Debug, Default, Clone)]
pub struct RuleContext {
pub tags: HashSet<String>,
}
impl RuleContext {
pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
self.tags.insert(tag.into());
self
}
}
impl CompiledRule {
pub fn matches(&self, lines: &[String], context: &RuleContext) -> Option<RuleMatch> {
if !self.rule.requires.is_empty()
&& !self.rule.requires.iter().all(|req| context.tags.contains(req))
{
return None;
}
if !self.rule.excludes.is_empty()
&& self.rule.excludes.iter().any(|ex| context.tags.contains(ex))
{
return None;
}
let mut evidence = Vec::new();
for line in lines {
if self.patterns.iter().any(|re| re.is_match(line)) {
if evidence.len() < 3 {
evidence.push(line.clone());
}
}
}
if evidence.is_empty() {
return None;
}
Some(RuleMatch {
rule_id: self.rule.id.clone(),
domain: self.rule.domain.clone(),
severity: self.rule.severity,
confidence: self.rule.confidence,
fix_tier: self.rule.fix_tier,
stop_scan: self.rule.stop_scan,
notes: self.rule.notes.clone(),
action: self.rule.action.clone(),
evidence,
})
}
}

View File

@@ -0,0 +1,109 @@
use std::path::PathBuf;
use anyhow::Result;
use crate::scan::ProbeData;
mod loader;
mod matcher;
mod model;
pub use matcher::{RuleContext, RuleMatch};
pub use model::{FixTier, Severity};
#[derive(Debug, Clone)]
pub struct RuleSet {
pub rules: Vec<matcher::CompiledRule>,
}
impl RuleSet {
pub fn load() -> Result<Self> {
let mut candidates = Vec::new();
if let Ok(current) = std::env::current_dir() {
candidates.push(current.join("rulesets"));
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
candidates.push(parent.join("rulesets"));
}
}
for dir in candidates {
let rules = loader::load_rules_from_dir(&dir)?;
if !rules.is_empty() {
let compiled = loader::compile_rules(rules)?;
return Ok(Self { rules: compiled });
}
}
Ok(Self { rules: Vec::new() })
}
pub fn match_lines(&self, lines: &[String], context: &RuleContext) -> Vec<RuleMatch> {
let mut matches = Vec::new();
for rule in &self.rules {
if let Some(hit) = rule.matches(lines, context) {
matches.push(hit);
}
}
matches
}
pub fn best_match<'a>(&self, matches: &'a [RuleMatch]) -> Option<&'a RuleMatch> {
matches.iter().max_by(|a, b| {
a.severity
.rank()
.cmp(&b.severity.rank())
.then_with(|| a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal))
})
}
}
pub fn build_context(probe: &ProbeData) -> RuleContext {
let mut context = RuleContext::default();
if let Some(format) = &probe.format_name {
context = context.with_tag(format!("container:{}", format.to_lowercase()));
}
for stream in &probe.streams {
if let Some(codec_type) = &stream.codec_type {
context = context.with_tag(format!("stream:{}", codec_type.to_lowercase()));
}
if let Some(codec) = &stream.codec_name {
context = context.with_tag(format!("codec:{}", codec.to_lowercase()));
}
}
context
}
pub fn ruleset_dir_for_display() -> Result<PathBuf> {
if let Ok(current) = std::env::current_dir() {
let dir = current.join("rulesets");
if dir.exists() {
return Ok(dir);
}
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
return Ok(parent.join("rulesets"));
}
}
Err(anyhow::anyhow!("No ruleset directory found"))
}
pub fn ensure_ruleset_loaded(ruleset: &RuleSet) -> Result<()> {
if ruleset.rules.is_empty() {
let dir = ruleset_dir_for_display().unwrap_or_else(|_| PathBuf::from("rulesets"));
return Err(anyhow::anyhow!(
"No rulesets found. Expected TOML files in {}",
dir.display()
));
}
Ok(())
}

View File

@@ -0,0 +1,72 @@
use serde::Deserialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Info,
Low,
Medium,
High,
Severe,
}
impl Severity {
pub fn rank(self) -> u8 {
match self {
Severity::Info => 0,
Severity::Low => 1,
Severity::Medium => 2,
Severity::High => 3,
Severity::Severe => 4,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum FixTier {
None,
Remux,
Reencode,
}
impl FixTier {
pub fn rank(self) -> u8 {
match self {
FixTier::None => 0,
FixTier::Remux => 1,
FixTier::Reencode => 2,
}
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct Rule {
pub id: String,
pub domain: String,
pub severity: Severity,
#[serde(default = "default_confidence")]
pub confidence: f32,
pub fix_tier: FixTier,
#[serde(default)]
pub stop_scan: bool,
pub patterns: Vec<String>,
#[serde(default)]
pub notes: Option<String>,
#[serde(default)]
pub action: Option<String>,
#[serde(default)]
pub requires: Vec<String>,
#[serde(default)]
pub excludes: Vec<String>,
}
fn default_confidence() -> f32 {
0.5
}
#[derive(Debug, Clone, Deserialize)]
pub struct RuleFile {
#[serde(rename = "rule")]
pub rules: Vec<Rule>,
}

View File

@@ -0,0 +1,72 @@
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::process::{Command, Stdio};
use std::sync::{Arc, atomic::{AtomicBool, Ordering}};
use anyhow::{Context, Result};
use crate::rules::RuleSet;
#[derive(Debug)]
pub struct DecodeOutput {
pub lines: Vec<String>,
pub early_stop: bool,
}
pub fn run_decode(path: &Path, ffmpeg_path: &str, ruleset: &RuleSet) -> Result<DecodeOutput> {
let mut child = Command::new(ffmpeg_path)
.arg("-v")
.arg("error")
.arg("-i")
.arg(path)
.arg("-f")
.arg("null")
.arg("-")
.stderr(Stdio::piped())
.stdout(Stdio::null())
.spawn()
.with_context(|| format!("Failed to run ffmpeg decode for {}", path.display()))?;
let stderr = child.stderr.take().context("Failed to capture ffmpeg stderr")?;
let reader = BufReader::new(stderr);
let early_stop = Arc::new(AtomicBool::new(false));
let early_stop_flag = early_stop.clone();
let mut lines = Vec::new();
for line in reader.lines() {
let line = line.unwrap_or_default();
if line.is_empty() {
continue;
}
lines.push(line.clone());
if should_stop(&line, ruleset) {
early_stop_flag.store(true, Ordering::SeqCst);
let _ = child.kill();
break;
}
}
let _ = child.wait();
Ok(DecodeOutput {
lines,
early_stop: early_stop.load(Ordering::SeqCst),
})
}
fn should_stop(line: &str, ruleset: &RuleSet) -> bool {
for rule in &ruleset.rules {
if !rule.rule.stop_scan {
continue;
}
if rule.patterns.iter().any(|re| re.is_match(line)) {
return true;
}
}
false
}

View File

@@ -0,0 +1,83 @@
use std::path::Path;
use std::process::Command;
use anyhow::{Context, Result};
use serde_json::Value;
use super::types::{ProbeData, StreamInfo};
pub fn run_ffprobe(path: &Path, ffprobe_path: &str) -> Result<ProbeData> {
let output = Command::new(ffprobe_path)
.arg("-v")
.arg("error")
.arg("-print_format")
.arg("json")
.arg("-show_format")
.arg("-show_streams")
.arg(path)
.output()
.with_context(|| format!("Failed to run ffprobe on {}", path.display()))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!(
"ffprobe failed for {}: {}",
path.display(),
stderr.trim()
);
}
let raw: Value = serde_json::from_slice(&output.stdout)
.with_context(|| format!("Failed to parse ffprobe output for {}", path.display()))?;
let format_name = raw
.get("format")
.and_then(|fmt| fmt.get("format_name"))
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let duration = raw
.get("format")
.and_then(|fmt| fmt.get("duration"))
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<f64>().ok());
let streams = raw
.get("streams")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.map(|stream| StreamInfo {
codec_type: stream
.get("codec_type")
.and_then(|v| v.as_str())
.map(|s| s.to_string()),
codec_name: stream
.get("codec_name")
.and_then(|v| v.as_str())
.map(|s| s.to_string()),
width: stream.get("width").and_then(|v| v.as_u64()).map(|v| v as u32),
height: stream
.get("height")
.and_then(|v| v.as_u64())
.map(|v| v as u32),
sample_rate: stream
.get("sample_rate")
.and_then(|v| v.as_str())
.map(|s| s.to_string()),
channels: stream
.get("channels")
.and_then(|v| v.as_u64())
.map(|v| v as u32),
})
.collect::<Vec<_>>()
})
.unwrap_or_default();
Ok(ProbeData {
format_name,
duration,
streams,
raw,
})
}

View File

@@ -0,0 +1,48 @@
use std::path::Path;
use anyhow::Result;
use crate::config::Config;
use crate::rules::{build_context, RuleMatch, RuleSet};
mod decode;
mod ffprobe;
mod types;
pub use types::{Issue, ProbeData, ScanOutcome, ScanRequest};
pub fn scan_file(path: &Path, config: &Config, ruleset: &RuleSet) -> Result<ScanOutcome> {
let probe = ffprobe::run_ffprobe(path, &config.ffprobe_path)?;
let decode = decode::run_decode(path, &config.ffmpeg_path, ruleset)?;
let context = build_context(&probe);
let matches = ruleset.match_lines(&decode.lines, &context);
let issues = matches
.iter()
.map(|hit| issue_from_match(hit))
.collect::<Vec<_>>();
Ok(ScanOutcome {
path: path.to_path_buf(),
probe,
issues,
decode_errors: decode.lines,
early_stop: decode.early_stop,
})
}
fn issue_from_match(hit: &RuleMatch) -> Issue {
Issue {
code: hit.rule_id.clone(),
severity: hit.severity,
fix_tier: hit.fix_tier,
message: hit
.notes
.clone()
.unwrap_or_else(|| hit.rule_id.clone()),
evidence: hit.evidence.clone(),
action: hit.action.clone(),
}
}

View File

@@ -0,0 +1,47 @@
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use crate::rules::{FixTier, Severity};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamInfo {
pub codec_type: Option<String>,
pub codec_name: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub sample_rate: Option<String>,
pub channels: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProbeData {
pub format_name: Option<String>,
pub duration: Option<f64>,
pub streams: Vec<StreamInfo>,
pub raw: serde_json::Value,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Issue {
pub code: String,
pub severity: Severity,
pub fix_tier: FixTier,
pub message: String,
pub evidence: Vec<String>,
pub action: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanOutcome {
pub path: PathBuf,
pub probe: ProbeData,
pub issues: Vec<Issue>,
pub decode_errors: Vec<String>,
pub early_stop: bool,
}
#[derive(Debug, Clone)]
pub struct ScanRequest {
pub path: PathBuf,
}

View File

@@ -0,0 +1,109 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::mpsc::{self, RecvTimeoutError};
use std::time::{Duration, Instant};
use anyhow::Result;
use notify::{RecommendedWatcher, RecursiveMode, Watcher, EventKind};
use crate::config::Config;
#[derive(Debug)]
struct WatchEntry {
last_event: Instant,
size: u64,
mtime: std::time::SystemTime,
}
pub fn watch_paths<F>(paths: &[PathBuf], config: &Config, mut handler: F) -> Result<()>
where
F: FnMut(PathBuf),
{
let (tx, rx) = mpsc::channel();
let mut watcher: RecommendedWatcher = Watcher::new(tx, notify::Config::default())?;
for path in paths {
let mode = if path.is_dir() {
RecursiveMode::Recursive
} else {
RecursiveMode::NonRecursive
};
watcher.watch(path, mode)?;
}
let mut entries: HashMap<PathBuf, WatchEntry> = HashMap::new();
let settle = Duration::from_secs(config.watch.settle_seconds);
loop {
match rx.recv_timeout(Duration::from_secs(1)) {
Ok(event) => {
if let Ok(event) = event {
if matches!(event.kind, EventKind::Modify(_) | EventKind::Create(_)) {
for path in event.paths {
if should_ignore(&path, config) {
continue;
}
if let Ok(metadata) = std::fs::metadata(&path) {
if !metadata.is_file() {
continue;
}
let entry = WatchEntry {
last_event: Instant::now(),
size: metadata.len(),
mtime: metadata.modified().unwrap_or_else(|_| std::time::SystemTime::UNIX_EPOCH),
};
entries.insert(path, entry);
}
}
}
}
}
Err(RecvTimeoutError::Timeout) => {
// tick
}
Err(RecvTimeoutError::Disconnected) => break,
}
let ready: Vec<PathBuf> = entries
.iter()
.filter_map(|(path, entry)| {
if entry.last_event.elapsed() < settle {
return None;
}
let metadata = std::fs::metadata(path).ok()?;
if !metadata.is_file() {
return None;
}
let size = metadata.len();
let mtime = metadata.modified().ok()?;
if size == entry.size && mtime == entry.mtime {
Some(path.clone())
} else {
None
}
})
.collect();
for path in ready {
entries.remove(&path);
handler(path);
}
}
Ok(())
}
fn should_ignore(path: &Path, config: &Config) -> bool {
let ext = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
let ext = ext.to_lowercase();
if ext.is_empty() {
return true;
}
if config.watch.ignore_ext.iter().any(|e| e == &ext) {
return true;
}
!config.scan.include_ext.iter().any(|e| e == &ext)
}