Files
mov-renamarr/src/pipeline.rs

580 lines
18 KiB
Rust

use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::io;
use anyhow::{anyhow, Context, Result};
use rayon::prelude::*;
use walkdir::WalkDir;
use crate::config::Settings;
use crate::fsops::{self, CollisionPolicy, OpMode};
use crate::llm::{LlmClient, LlmHints};
use crate::media;
use crate::metadata::{MatchOutcome, MetadataClient, MovieMetadata, Provider, ScoredCandidate};
use crate::output::{Output, StatusKind};
use crate::parse::{parse_filename, FileHints};
use crate::report::{summarize_candidates, Report, ReportEntry};
use crate::utils::{sanitize_filename, Semaphore};
pub fn run(mut settings: Settings) -> Result<Report> {
ensure_ffprobe()?;
let output = Arc::new(Output::new(
&settings.color,
settings.verbose,
settings.dry_run_summary,
));
if settings.no_lookup {
output.warn("No-lookup mode enabled: using filename/LLM only (no external providers).");
}
if settings.verbose {
output.info(&format!(
"jobs: {} | net-jobs: {} | report format: {:?}",
settings.jobs, settings.net_jobs, settings.report_format
));
}
if settings.interactive {
settings.jobs = 1;
settings.net_jobs = settings.net_jobs.max(1);
}
let files = discover_files(&settings.input, &settings.output)?;
let total = files.len();
if total == 0 {
output.warn("no video files found");
return Ok(Report::default());
}
let settings = Arc::new(settings);
let net_sem = Arc::new(Semaphore::new(settings.net_jobs));
let metadata = if settings.no_lookup {
None
} else {
let client = Arc::new(MetadataClient::new(settings.clone(), net_sem)?);
client.validate()?;
Some(client)
};
let llm = build_llm_client(&settings, &output)?;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(settings.jobs)
.build()
.context("failed to build thread pool")?;
let results: Vec<ReportEntry> = pool.install(|| {
files
.par_iter()
.enumerate()
.map(|(idx, path)| {
process_file(
idx + 1,
total,
path,
settings.clone(),
metadata.clone(),
llm.clone(),
output.clone(),
)
.unwrap_or_else(|err| ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
})
})
.collect()
});
let mut report = Report::default();
for entry in results {
report.record(entry);
}
Ok(report)
}
fn ensure_ffprobe() -> Result<()> {
let output = std::process::Command::new("ffprobe")
.arg("-version")
.output();
match output {
Ok(output) if output.status.success() => Ok(()),
_ => Err(anyhow!(
"ffprobe not found. Please install ffmpeg/ffprobe and ensure it is in PATH."
)),
}
}
fn discover_files(input: &Path, output: &Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
for entry in WalkDir::new(input).follow_links(true) {
let entry = entry?;
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
if output != input && path.starts_with(output) {
continue;
}
if is_video_file(path) {
files.push(path.to_path_buf());
}
}
Ok(files)
}
fn is_video_file(path: &Path) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(ext) => ext.to_ascii_lowercase(),
None => return false,
};
matches!(
ext.as_str(),
"mkv" | "mp4" | "avi" | "mov" | "m4v" | "mpg" | "mpeg" | "wmv" | "webm" | "ts" | "m2ts"
)
}
fn process_file(
index: usize,
total: usize,
path: &Path,
settings: Arc<Settings>,
metadata: Option<Arc<MetadataClient>>,
llm: Option<Arc<LlmClient>>,
output: Arc<Output>,
) -> Result<ReportEntry> {
let filename = path.file_name().unwrap_or_default().to_string_lossy().to_string();
let mut hints = parse_filename(path);
if let Some(llm) = &llm {
if settings.llm.mode != crate::cli::LlmMode::Off {
if let Ok(llm_hints) = llm.parse_filename(&filename) {
merge_llm_hints(&mut hints, llm_hints, settings.llm.mode.clone());
} else {
output.warn(&format!("LLM parse failed for {filename}, using heuristic parse"));
}
}
}
let media = match media::probe(path) {
Ok(info) => info,
Err(err) => {
output.status_line(
index,
total,
StatusKind::Failed,
&filename,
None,
"ffprobe failed",
None,
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
});
}
};
let runtime_minutes = media
.duration_seconds
.map(|seconds| (seconds / 60.0).round() as u32);
let outcome = if settings.no_lookup {
MatchOutcome {
best: match_offline(&hints, settings.interactive)?,
candidates: Vec::new(),
}
} else {
match metadata
.as_ref()
.ok_or_else(|| anyhow!("metadata client unavailable"))?
.match_movie(&hints, runtime_minutes)
{
Ok(outcome) => outcome,
Err(err) => {
output.status_line(
index,
total,
StatusKind::Failed,
&filename,
None,
"metadata lookup failed",
None,
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
});
}
}
};
let mut chosen = outcome.best.clone();
if settings.interactive && !settings.no_lookup {
let client = metadata.as_ref().ok_or_else(|| anyhow!("metadata client unavailable"))?;
chosen = interactive_choice(&outcome, path, client)?;
}
if chosen.is_none() {
let reason = if settings.no_lookup {
if hints.title.is_none() || hints.year.is_none() {
"no-lookup missing title/year".to_string()
} else {
"no-lookup skipped".to_string()
}
} else {
"no match above threshold".to_string()
};
output.status_line(
index,
total,
StatusKind::Skipped,
&filename,
None,
"no match",
None,
);
if settings.explain && !outcome.candidates.is_empty() {
output.detail(" Candidates:");
for candidate in summarize_candidates(&outcome.candidates, 5) {
output.detail(&format!(
" - {} ({}) [{}] score {:.1}",
candidate.title,
candidate.year.map(|y| y.to_string()).unwrap_or_else(|| "?".into()),
candidate.provider,
candidate.score * 100.0
));
}
}
let entry = ReportEntry {
input: path.display().to_string(),
status: "skipped".to_string(),
provider: None,
result: None,
output: None,
reason: Some(reason),
candidates: summarize_candidates(&outcome.candidates, 3),
};
if settings.sidecar_notes {
write_sidecar_note(path, &entry)?;
}
return Ok(entry);
}
let metadata = chosen.unwrap();
let quality = media::quality_tag(&media, &settings.quality_tags);
let output_path = build_output_path(&metadata, &settings, path, quality.as_deref());
if settings.dry_run {
output.status_line(
index,
total,
StatusKind::Renamed,
&filename,
Some(metadata.provider.as_str()),
"dry-run",
Some(&output_path.display().to_string()),
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "renamed".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: Some(output_path.display().to_string()),
reason: Some("dry-run".to_string()),
candidates: Vec::new(),
});
}
let op_mode = if settings.move_files {
OpMode::Move
} else if settings.rename_in_place {
OpMode::RenameInPlace
} else {
OpMode::Copy
};
let policy = if settings.overwrite {
CollisionPolicy::Overwrite
} else if settings.suffix {
CollisionPolicy::Suffix
} else {
CollisionPolicy::Skip
};
let outcome = fsops::execute(path, &output_path, op_mode, policy, settings.sidecars)?;
if outcome.final_path.is_none() {
output.status_line(
index,
total,
StatusKind::Skipped,
&filename,
Some(metadata.provider.as_str()),
"destination exists",
None,
);
let entry = ReportEntry {
input: path.display().to_string(),
status: "skipped".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: None,
reason: outcome.skipped_reason,
candidates: Vec::new(),
};
if settings.sidecar_notes {
write_sidecar_note(path, &entry)?;
}
return Ok(entry);
}
let final_path = outcome.final_path.unwrap();
output.status_line(
index,
total,
StatusKind::Renamed,
&filename,
Some(metadata.provider.as_str()),
"renamed",
Some(&final_path.display().to_string()),
);
Ok(ReportEntry {
input: path.display().to_string(),
status: "renamed".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: Some(final_path.display().to_string()),
reason: None,
candidates: Vec::new(),
})
}
fn merge_llm_hints(hints: &mut FileHints, llm_hints: LlmHints, mode: crate::cli::LlmMode) {
if let Some(title) = llm_hints.title {
if hints.title.is_none() || mode == crate::cli::LlmMode::Parse {
hints.title = Some(title.clone());
hints.normalized_title = Some(crate::utils::normalize_title(&title));
} else if hints.title.as_deref() != Some(title.as_str()) {
hints.alt_titles.push(title);
}
}
if let Some(year) = llm_hints.year {
if hints.year.is_none() || mode == crate::cli::LlmMode::Parse {
hints.year = Some(year);
}
}
if !llm_hints.alt_titles.is_empty() {
hints.alt_titles.extend(llm_hints.alt_titles);
}
}
fn match_offline(hints: &FileHints, interactive: bool) -> Result<Option<MovieMetadata>> {
if let (Some(title), Some(year)) = (&hints.title, hints.year) {
return Ok(Some(MovieMetadata {
title: title.clone(),
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Parsed,
runtime_minutes: None,
}));
}
if interactive {
let title = prompt("Title")?;
let year = prompt("Year")?;
if let Ok(year) = year.parse::<i32>() {
return Ok(Some(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Manual,
runtime_minutes: None,
}));
}
}
Ok(None)
}
fn build_output_path(
metadata: &MovieMetadata,
settings: &Settings,
source: &Path,
quality: Option<&str>,
) -> PathBuf {
let mut folder = format!("{} ({})", metadata.title, metadata.year);
folder = sanitize_filename(&folder);
let mut filename = folder.clone();
if let Some(quality) = quality {
filename.push_str(&format!(" [{}]", quality));
}
if settings.include_id {
if let Some(id) = id_tag(metadata) {
filename.push_str(&format!(" [{}]", id));
}
}
let ext = source.extension().and_then(|e| e.to_str()).unwrap_or("");
if !ext.is_empty() {
filename.push('.');
filename.push_str(ext);
}
settings.output.join(folder).join(filename)
}
fn id_tag(metadata: &MovieMetadata) -> Option<String> {
match metadata.provider {
Provider::Tmdb => metadata.tmdb_id.map(|id| format!("tmdb-{id}")),
Provider::Omdb => metadata.imdb_id.as_ref().map(|id| format!("imdb-{id}")),
Provider::Parsed | Provider::Manual => None,
}
}
fn interactive_choice(
outcome: &MatchOutcome,
path: &Path,
metadata: &MetadataClient,
) -> Result<Option<MovieMetadata>> {
if outcome.candidates.is_empty() {
return Ok(outcome.best.clone());
}
let ambiguous = is_ambiguous(&outcome.candidates);
if !ambiguous && outcome.best.is_some() {
return Ok(outcome.best.clone());
}
let filename = path.file_name().unwrap_or_default().to_string_lossy();
println!("Ambiguous match for {filename}");
for (idx, candidate) in outcome.candidates.iter().take(3).enumerate() {
let label = format!(
" {}) {} ({}) [{}] score {:.1}",
idx + 1,
candidate.candidate.title,
candidate.candidate.year.unwrap_or(0),
candidate.candidate.provider.as_str(),
candidate.score * 100.0
);
println!("{label}");
}
println!(" s) skip");
println!(" m) manual title/year");
print!("Choose: ");
io::Write::flush(&mut std::io::stdout())?;
let mut choice = String::new();
std::io::stdin().read_line(&mut choice)?;
let choice = choice.trim();
if choice.eq_ignore_ascii_case("s") {
return Ok(None);
}
if choice.eq_ignore_ascii_case("m") {
let title = prompt("Title")?;
let year = prompt("Year")?;
if let Ok(year) = year.parse::<i32>() {
return Ok(Some(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Manual,
runtime_minutes: None,
}));
}
return Ok(None);
}
if let Ok(index) = choice.parse::<usize>() {
if let Some(candidate) = outcome.candidates.get(index - 1) {
if let Ok(details) = metadata.resolve_candidate(&candidate.candidate) {
return Ok(Some(details));
}
return Ok(Some(MovieMetadata {
title: candidate.candidate.title.clone(),
year: candidate.candidate.year.unwrap_or(0),
tmdb_id: None,
imdb_id: None,
provider: candidate.candidate.provider.clone(),
runtime_minutes: None,
}));
}
}
Ok(outcome.best.clone())
}
fn is_ambiguous(candidates: &[ScoredCandidate]) -> bool {
if candidates.len() < 2 {
return false;
}
(candidates[0].score - candidates[1].score).abs() < 0.02
}
fn prompt(label: &str) -> Result<String> {
print!("{label}: ");
io::Write::flush(&mut std::io::stdout())?;
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
Ok(input.trim().to_string())
}
fn build_llm_client(settings: &Settings, output: &Output) -> Result<Option<Arc<LlmClient>>> {
if settings.llm.mode == crate::cli::LlmMode::Off {
return Ok(None);
}
let model = match &settings.llm.model {
Some(model) => model.clone(),
None => {
output.warn("LLM mode enabled but no model provided; disabling LLM");
return Ok(None);
}
};
let client = LlmClient::new(
settings.llm.endpoint.clone(),
model,
settings.llm.timeout_seconds,
settings.llm.max_tokens,
)?;
Ok(Some(Arc::new(client)))
}
fn write_sidecar_note(path: &Path, entry: &ReportEntry) -> Result<()> {
let note_path = path.with_extension("mov-renamarr.txt");
let mut note = String::new();
note.push_str(&format!("Status: {}\n", entry.status));
if let Some(reason) = &entry.reason {
note.push_str(&format!("Reason: {}\n", reason));
}
if !entry.candidates.is_empty() {
note.push_str("Candidates:\n");
for candidate in &entry.candidates {
note.push_str(&format!(
" - {} ({}) [{}] {:.1}\n",
candidate.title,
candidate.year.unwrap_or(0),
candidate.provider,
candidate.score * 100.0
));
}
}
std::fs::write(&note_path, note)
.with_context(|| format!("failed to write sidecar note: {}", note_path.display()))?;
Ok(())
}