Compare commits

...

15 Commits

Author SHA1 Message Date
18010798bb Fix arm64 cross-linker in release workflow
All checks were successful
Release binaries / build (push) Successful in 4m25s
2025-12-30 12:33:50 -05:00
41dc4c0532 Use rustls-only reqwest to avoid OpenSSL
Some checks failed
Release binaries / build (push) Failing after 4m21s
2025-12-30 12:27:56 -05:00
6370f1bdf4 Fix release workflow YAML
Some checks failed
Release binaries / build (push) Failing after 3m32s
2025-12-30 12:04:28 -05:00
5d96fcab11 Use RELEASE_TOKEN secret for releases 2025-12-30 11:58:09 -05:00
7f685c4216 Add release workflow and docs 2025-12-30 11:47:41 -05:00
62dd897cd8 Remove release section from README 2025-12-30 11:40:25 -05:00
03ab8b5abd Add release metadata and changelog 2025-12-30 11:37:13 -05:00
c5b20c252f Optimize filename parsing 2025-12-30 11:30:56 -05:00
32fa0e8539 Performance pass and remove plan 2025-12-30 11:30:45 -05:00
86c4396c5e Update config docs and remove local script from repo 2025-12-30 11:19:39 -05:00
3b0b5b9cde Document recommended LLM models 2025-12-30 11:13:13 -05:00
d8e328d33a Link config reference in README 2025-12-30 11:06:56 -05:00
a096e93723 Add usage section and config reference 2025-12-30 11:02:24 -05:00
5b9aa55e81 Update .gitignore 2025-12-30 10:55:12 -05:00
3c0c022c79 Initial commit 2025-12-30 10:52:59 -05:00
25 changed files with 6722 additions and 1 deletions

View File

@@ -0,0 +1,81 @@
name: Release binaries
on:
push:
tags:
- "v*"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
targets: aarch64-unknown-linux-gnu
- name: Install cross compiler
run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu
- name: Build x86_64
run: cargo build --release
- name: Build arm64
env:
CC_aarch64_unknown_linux_gnu: aarch64-linux-gnu-gcc
AR_aarch64_unknown_linux_gnu: aarch64-linux-gnu-ar
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_AR: aarch64-linux-gnu-ar
run: cargo build --release --target aarch64-unknown-linux-gnu
- name: Package artifacts
run: |
set -euo pipefail
TAG="${GITHUB_REF_NAME:-${GITEA_REF_NAME:-unknown}}"
mkdir -p dist/pkg-x86_64 dist/pkg-arm64
cp target/release/mov-renamarr dist/pkg-x86_64/
cp README.md LICENSE CHANGELOG.md dist/pkg-x86_64/
tar -C dist/pkg-x86_64 -czf "dist/mov-renamarr-${TAG}-x86_64-linux-gnu.tar.gz" .
cp target/aarch64-unknown-linux-gnu/release/mov-renamarr dist/pkg-arm64/
cp README.md LICENSE CHANGELOG.md dist/pkg-arm64/
tar -C dist/pkg-arm64 -czf "dist/mov-renamarr-${TAG}-arm64-linux-gnu.tar.gz" .
- name: Upload release assets
env:
RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
run: |
set -euo pipefail
if [ -z "${RELEASE_TOKEN:-}" ]; then
echo "RELEASE_TOKEN secret is required to upload release assets."
exit 1
fi
TAG="${GITHUB_REF_NAME:-${GITEA_REF_NAME:-unknown}}"
API_URL="${GITHUB_API_URL:-${GITEA_API_URL:-}}"
if [ -z "$API_URL" ]; then
API_URL="${GITHUB_SERVER_URL}/api/v1"
fi
REPO="${GITHUB_REPOSITORY}"
release_json=$(curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
"${API_URL}/repos/${REPO}/releases/tags/${TAG}")
release_id=$(python3 -c 'import json,sys; print(json.load(sys.stdin).get("id",""))' <<<"$release_json")
if [ -z "$release_id" ]; then
create_payload=$(python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": os.environ["TAG"], "body": ""}))')
release_json=$(curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
-H "Content-Type: application/json" \
-d "$create_payload" \
"${API_URL}/repos/${REPO}/releases")
release_id=$(python3 -c 'import json,sys; print(json.load(sys.stdin).get("id",""))' <<<"$release_json")
fi
for file in dist/*.tar.gz; do
name=$(basename "$file")
curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
-H "Content-Type: application/octet-stream" \
--data-binary @"$file" \
"${API_URL}/repos/${REPO}/releases/${release_id}/assets?name=${name}"
done

15
.gitignore vendored
View File

@@ -10,6 +10,21 @@ target/
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# Editor/IDE
.idea/
.vscode/
*.iml
# OS noise
.DS_Store
Thumbs.db
# Env/config
.env
.env.*
.direnv/
.envrc
# RustRover
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore

10
CHANGELOG.md Normal file
View File

@@ -0,0 +1,10 @@
# Changelog
## 0.1.0 - 2025-12-30
Initial release.
- Radarr-style naming for movies
- TMDb/OMDb providers with caching
- Optional Ollama LLM integration
- Safe defaults (copy, skip on collision)
- Reports (text/json/csv) + sidecars support

2815
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

38
Cargo.toml Normal file
View File

@@ -0,0 +1,38 @@
[package]
name = "mov-renamarr"
version = "0.1.0"
edition = "2024"
license = "MIT"
description = "Fast, safe CLI to rename movie files into Radarr-compatible names."
readme = "README.md"
repository = "https://git.44r0n.cc/44r0n7/mov-renamarr.git"
keywords = ["media", "movies", "rename", "radarr", "cli"]
categories = ["command-line-utilities", "filesystem"]
[dependencies]
anyhow = "1.0"
chrono = "0.4"
clap = { version = "4.5", features = ["derive"] }
csv = "1.3"
directories = "5.0"
is-terminal = "0.4"
libc = "0.2"
num_cpus = "1.16"
owo-colors = "4.1"
once_cell = "1.19"
rayon = "1.10"
regex = "1.10"
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
rusqlite = { version = "0.31", features = ["bundled"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
strsim = "0.11"
thiserror = "1.0"
toml = "0.8"
walkdir = "2.5"
[dev-dependencies]
assert_cmd = "2.0"
httpmock = "0.7"
predicates = "3.1"
tempfile = "3.10"

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 44r0n7
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

124
README.md
View File

@@ -1,2 +1,124 @@
# mov-renamarr
# mov-renamarr :clapper:
Fast, safe CLI to rename movie files into Radarr-compatible folders and filenames on Linux. It uses `ffprobe` for media details, filename parsing for hints, and optional online metadata (TMDb/OMDb) with caching. Default action is copy; move/rename-in-place are opt-in.
## Features :sparkles:
- Radarr-style output: `Title (Year)/Title (Year) [quality] [id].ext`
- Safe defaults: copy by default, skip on collision (opt-in overwrite/suffix)
- Metadata providers: TMDb, OMDb, or both (auto picks TMDb if available)
- Optional local LLM (Ollama) for filename parsing and lookup assist
- SQLite cache to reduce repeated lookups
- Reports in text/json/csv (stdout by default)
- Concurrency controls with sensible defaults
## Requirements :clipboard:
- Linux
- `ffprobe` in `PATH` (install via ffmpeg)
## Install :package:
From source:
```bash
cargo build --release
```
Binary will be at `target/release/mov-renamarr`.
Install with Cargo:
```bash
# From a git repo
cargo install --git <repo-url> --locked
# From a local checkout
cargo install --path . --locked
```
Download a prebuilt binary from the Releases page.
## Quick start :rocket:
Create a default config (with comments) and see the config path:
```bash
mov-renamarr
```
Dry-run with TMDb:
```bash
mov-renamarr --input /path/to/in --output /path/to/out --dry-run --provider tmdb
```
Rename in place (no network lookups):
```bash
mov-renamarr --input /path/to/in --rename-in-place --no-lookup
```
## Usage :keyboard:
```
mov-renamarr --input <dir> --output <dir> [flags]
```
Common flags:
- `--move` or `--rename-in-place` (default is copy)
- `--provider auto|tmdb|omdb|both`
- `--no-lookup` (skip external lookups)
- `--report [path]` and `--report-format text|json|csv`
- `--sidecars` (copy/move subtitle/nfo/etc files)
- `--quality-tags resolution,codec,source`
- `--min-score 0-100` (match threshold)
- `--jobs auto|N` and `--net-jobs auto|N`
## Configuration :gear:
Default config location:
`$XDG_CONFIG_HOME/mov-renamarr/config.toml` (fallback `~/.config/mov-renamarr/config.toml`)
Cache location:
`$XDG_CACHE_HOME/mov-renamarr/cache.db` (fallback `~/.cache/mov-renamarr/cache.db`)
The app creates a commented default config on first run and prints the path.
Key options (TOML):
- `provider = "auto"|"tmdb"|"omdb"|"both"`
- `tmdb.api_key` or `tmdb.bearer_token` (TMDb read access token supported)
- `omdb.api_key`
- `quality_tags = ["resolution"]` (or add `codec`, `source`)
- `llm.mode = "off"|"parse"|"assist"`
- `llm.endpoint = "http://localhost:11434"`
- `llm.model = "Qwen2.5:latest"` (recommended for accuracy: `Qwen2.5:14b`)
- `jobs = "auto"|N`, `net_jobs = "auto"|N`
- `sidecars = false` (copy/move sidecars when true)
CLI flags override config, and env vars override config as well.
Full reference: [docs/CONFIG.md](docs/CONFIG.md)
## Providers :globe_with_meridians:
- **TMDb**: preferred when available. Supports API key or read-access bearer token.
- **OMDb**: optional, API key required.
- **Auto**: uses TMDb if configured, else OMDb.
- **No-lookup**: `--no-lookup` (or `--offline`) uses filename/LLM only.
## LLM (optional) :robot:
If enabled, Ollama is used for:
- filename parsing (`llm.mode = "parse"`)
- lookup assistance (`llm.mode = "assist"`)
LLM output is treated as hints; provider results (when enabled) remain the source of truth.
## Reports :memo:
By default, output is printed to stdout.
To write a report file:
```bash
mov-renamarr --input ... --output ... --report
```
This creates `mov-renamarr-report-YYYYMMDD-HHMMSS.txt` in the current directory.
Formats: `--report-format text|json|csv`
## Safety and collisions :shield:
Default is **skip** if the destination exists. Options:
- `--overwrite` to overwrite
- `--suffix` to append ` (1)`, ` (2)`, ...
## Testing :test_tube:
```bash
cargo test
```
## License :scroll:
MIT (see `LICENSE`).

108
docs/CONFIG.md Normal file
View File

@@ -0,0 +1,108 @@
# Configuration Reference
This document describes all supported options in `config.toml`.
## Locations
- Config: `$XDG_CONFIG_HOME/mov-renamarr/config.toml` (fallback `~/.config/mov-renamarr/config.toml`)
- Cache: `$XDG_CACHE_HOME/mov-renamarr/cache.db` (fallback `~/.cache/mov-renamarr/cache.db`)
The app creates a commented default config on first run if none exists.
## Precedence
1) Built-in defaults
2) Config file
3) Environment variables
4) CLI flags
## Minimal example
```toml
provider = "auto"
api_key_tmdb = "YOUR_TMDB_KEY_OR_READ_ACCESS_TOKEN"
quality_tags = ["resolution"]
[llm]
mode = "off"
```
## Providers
```toml
provider = "auto" # auto|tmdb|omdb|both
api_key_tmdb = "..." # TMDb API key or Read Access Token (Bearer)
api_key_omdb = "..." # OMDb API key
```
## Cache
```toml
cache_path = "/home/user/.cache/mov-renamarr/cache.db"
cache_ttl_days = 30
refresh_cache = false
```
## Reporting and sidecars
```toml
report_format = "text" # text|json|csv
sidecar_notes = false # write per-file notes for skipped/failed
sidecars = false # move/copy sidecar files (srt, nfo, etc)
```
## Matching and naming
```toml
min_score = 80 # 0-100 match threshold
include_id = false # include tmdb/imdb id in filenames
no_lookup = false # skip external providers (filename/LLM only)
```
## Quality tags
```toml
quality_tags = ["resolution"] # list
# or
quality_tags = "resolution,codec" # comma-separated string
```
Supported tags: `resolution`, `codec`, `source`, `all`, `none`.
## Output safety
```toml
overwrite = false # replace existing files
suffix = false # add " (1)", " (2)" on collision
```
## Console
```toml
color = "auto" # auto|always|never
```
## Concurrency
```toml
jobs = "auto" # auto or integer
net_jobs = "auto" # auto or integer
```
## LLM (Ollama)
Install Ollama: [ollama.com](https://ollama.com)
```toml
[llm]
mode = "off" # off|parse|assist
endpoint = "http://localhost:11434"
model = "Qwen2.5:latest"
timeout_seconds = 30
max_tokens = 256
```
Recommended models:
- `Qwen2.5:latest` (safe default)
- `Qwen2.5:14b` (higher accuracy, more RAM/VRAM)
Alternatives:
- `llama3.1:8b` (strong general-purpose)
- `gemma2:9b` (fast, efficient)
- `mistral-nemo:12b` (mid-size, long context)
- `phi3:14b` (compact, good structure)
## Provider base URLs (for testing)
```toml
tmdb_base_url = "https://api.themoviedb.org/3"
omdb_base_url = "https://www.omdbapi.com"
```
## Environment variables
All config settings can be overridden with environment variables using the
`MOV_RENAMARR_` prefix (e.g., `MOV_RENAMARR_TMDB_API_KEY`).

21
docs/RELEASING.md Normal file
View File

@@ -0,0 +1,21 @@
# Releasing
This repo includes a Gitea Actions workflow that builds Linux binaries for
`x86_64` and `arm64` and uploads them to the release.
## One-time setup (Gitea Actions)
1) Create a personal access token with repo write access.
2) Add it to the repo secrets as `RELEASE_TOKEN`.
## Release steps
1) Update `CHANGELOG.md`.
2) Create and push a tag:
```bash
git tag -a vX.Y.Z -m "vX.Y.Z"
git push origin vX.Y.Z
```
3) The workflow builds and uploads binaries to the release.
## Artifacts
- `mov-renamarr-<tag>-x86_64-linux-gnu.tar.gz`
- `mov-renamarr-<tag>-arm64-linux-gnu.tar.gz`

157
src/cli.rs Normal file
View File

@@ -0,0 +1,157 @@
use std::path::PathBuf;
use std::str::FromStr;
use clap::{Parser, ValueEnum};
use serde::Deserialize;
#[derive(Parser, Debug)]
#[command(name = "mov-renamarr", version, about = "Rename movie files into Radarr-compatible naming")]
pub struct Cli {
#[arg(long, value_name = "DIR")]
pub input: PathBuf,
#[arg(long, value_name = "DIR")]
pub output: Option<PathBuf>,
#[arg(long, value_name = "PATH")]
pub config: Option<PathBuf>,
#[arg(long, value_enum)]
pub provider: Option<ProviderChoice>,
#[arg(long = "api-key-omdb")]
pub api_key_omdb: Option<String>,
#[arg(long = "api-key-tmdb")]
pub api_key_tmdb: Option<String>,
#[arg(long, value_name = "PATH")]
pub cache: Option<PathBuf>,
#[arg(long)]
pub refresh_cache: bool,
#[arg(long)]
pub dry_run: bool,
#[arg(long = "move", conflicts_with = "rename_in_place")]
pub move_files: bool,
#[arg(long = "rename-in-place", conflicts_with = "move_files")]
pub rename_in_place: bool,
#[arg(long)]
pub interactive: bool,
#[arg(
long,
value_name = "PATH",
num_args = 0..=1,
default_missing_value = "__DEFAULT__"
)]
pub report: Option<PathBuf>,
#[arg(long, value_enum)]
pub report_format: Option<ReportFormat>,
#[arg(long)]
pub sidecar_notes: bool,
#[arg(long)]
pub sidecars: bool,
#[arg(long)]
pub overwrite: bool,
#[arg(long)]
pub suffix: bool,
#[arg(long)]
pub min_score: Option<u8>,
#[arg(long)]
pub include_id: bool,
#[arg(long, value_name = "LIST")]
pub quality_tags: Option<String>,
#[arg(long, value_enum)]
pub color: Option<ColorMode>,
#[arg(long, value_enum)]
pub llm_mode: Option<LlmMode>,
#[arg(long, value_name = "URL")]
pub llm_endpoint: Option<String>,
#[arg(long, value_name = "NAME")]
pub llm_model: Option<String>,
#[arg(long, value_name = "SECONDS")]
pub llm_timeout: Option<u64>,
#[arg(long, value_name = "N")]
pub llm_max_tokens: Option<u32>,
#[arg(long, value_parser = parse_jobs_arg)]
pub jobs: Option<JobsArg>,
#[arg(long, value_parser = parse_jobs_arg)]
pub net_jobs: Option<JobsArg>,
#[arg(long, alias = "offline")]
pub no_lookup: bool,
#[arg(long)]
pub verbose: bool,
}
#[derive(Clone, Debug, ValueEnum, Eq, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ProviderChoice {
Auto,
Omdb,
Tmdb,
Both,
}
#[derive(Clone, Debug, ValueEnum, Eq, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ReportFormat {
Text,
Json,
Csv,
}
#[derive(Clone, Debug, ValueEnum, Eq, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ColorMode {
Auto,
Always,
Never,
}
#[derive(Clone, Debug, ValueEnum, Eq, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum LlmMode {
Off,
Parse,
Assist,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum JobsArg {
Auto,
Fixed(usize),
}
fn parse_jobs_arg(value: &str) -> Result<JobsArg, String> {
if value.eq_ignore_ascii_case("auto") {
return Ok(JobsArg::Auto);
}
let parsed = usize::from_str(value).map_err(|_| "jobs must be an integer or 'auto'".to_string())?;
if parsed == 0 {
return Err("jobs must be >= 1".to_string());
}
Ok(JobsArg::Fixed(parsed))
}

774
src/config.rs Normal file
View File

@@ -0,0 +1,774 @@
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use anyhow::{anyhow, Context, Result};
use directories::BaseDirs;
use serde::Deserialize;
use crate::cli::{Cli, ColorMode, JobsArg, LlmMode, ProviderChoice, ReportFormat};
#[derive(Clone, Debug)]
pub struct Settings {
pub input: PathBuf,
pub output: PathBuf,
pub provider: ProviderChoice,
pub api_key_omdb: Option<String>,
pub api_key_tmdb: Option<String>,
pub cache_path: PathBuf,
pub cache_ttl_days: u32,
pub refresh_cache: bool,
pub report_format: ReportFormat,
pub report_path: Option<PathBuf>,
pub sidecar_notes: bool,
pub sidecars: bool,
pub overwrite: bool,
pub suffix: bool,
pub min_score: u8,
pub include_id: bool,
pub quality_tags: QualityTags,
pub color: ColorMode,
pub llm: LlmSettings,
pub jobs: usize,
pub net_jobs: usize,
pub no_lookup: bool,
pub dry_run: bool,
pub move_files: bool,
pub rename_in_place: bool,
pub interactive: bool,
pub verbose: bool,
pub omdb_base_url: String,
pub tmdb_base_url: String,
}
#[derive(Clone, Debug)]
pub struct QualityTags {
pub resolution: bool,
pub codec: bool,
pub source: bool,
}
impl Default for QualityTags {
fn default() -> Self {
Self {
resolution: true,
codec: false,
source: false,
}
}
}
#[derive(Clone, Debug)]
pub struct LlmSettings {
pub mode: LlmMode,
pub endpoint: String,
pub model: Option<String>,
pub timeout_seconds: u64,
pub max_tokens: Option<u32>,
}
impl Default for LlmSettings {
fn default() -> Self {
Self {
mode: LlmMode::Off,
endpoint: "http://localhost:11434".to_string(),
model: None,
timeout_seconds: 30,
max_tokens: None,
}
}
}
#[derive(Debug, Deserialize, Default)]
struct FileConfig {
provider: Option<ProviderChoice>,
api_key_omdb: Option<String>,
api_key_tmdb: Option<String>,
cache_path: Option<PathBuf>,
cache_ttl_days: Option<u32>,
refresh_cache: Option<bool>,
report_format: Option<ReportFormat>,
sidecar_notes: Option<bool>,
sidecars: Option<bool>,
overwrite: Option<bool>,
suffix: Option<bool>,
min_score: Option<u8>,
include_id: Option<bool>,
quality_tags: Option<QualityTagsValue>,
color: Option<ColorMode>,
jobs: Option<JobValue>,
net_jobs: Option<JobValue>,
llm: Option<FileLlmConfig>,
omdb_base_url: Option<String>,
tmdb_base_url: Option<String>,
no_lookup: Option<bool>,
}
#[derive(Debug, Deserialize, Default)]
struct FileLlmConfig {
mode: Option<LlmMode>,
endpoint: Option<String>,
model: Option<String>,
timeout_seconds: Option<u64>,
max_tokens: Option<u32>,
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum QualityTagsValue {
List(Vec<String>),
Single(String),
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum JobValue {
String(String),
Number(u64),
}
pub fn build_settings(cli: &Cli) -> Result<Settings> {
let config_path = resolve_config_path(cli.config.as_deref())?;
if let Err(err) = ensure_default_config(&config_path) {
eprintln!(
"Warning: failed to create default config at {}: {}",
config_path.display(),
err
);
}
let file_config = load_config_file(&config_path)?;
let input = cli.input.clone();
let output = resolve_output(cli)?;
let mut settings = Settings {
input,
output,
provider: ProviderChoice::Auto,
api_key_omdb: None,
api_key_tmdb: None,
cache_path: default_cache_path()?,
cache_ttl_days: 30,
refresh_cache: false,
report_format: ReportFormat::Text,
report_path: resolve_report_path(cli)?,
sidecar_notes: false,
sidecars: false,
overwrite: false,
suffix: false,
min_score: 80,
include_id: false,
quality_tags: QualityTags::default(),
color: ColorMode::Auto,
llm: LlmSettings::default(),
jobs: default_jobs(),
net_jobs: default_net_jobs(default_jobs()),
no_lookup: false,
dry_run: cli.dry_run,
move_files: cli.move_files,
rename_in_place: cli.rename_in_place,
interactive: cli.interactive,
verbose: cli.verbose,
omdb_base_url: "https://www.omdbapi.com".to_string(),
tmdb_base_url: "https://api.themoviedb.org/3".to_string(),
};
apply_file_config(&mut settings, &file_config)?;
apply_env_overrides(&mut settings)?;
apply_cli_overrides(&mut settings, cli)?;
validate_settings(&mut settings)?;
Ok(settings)
}
pub fn init_default_config() -> Result<PathBuf> {
let config_path = resolve_config_path(None)?;
ensure_default_config(&config_path)?;
Ok(config_path)
}
fn resolve_output(cli: &Cli) -> Result<PathBuf> {
match (cli.rename_in_place, cli.output.as_ref()) {
(true, None) => Ok(cli.input.clone()),
(true, Some(out)) => {
if out != &cli.input {
Err(anyhow!(
"--rename-in-place requires output to be omitted or the same as input"
))
} else {
Ok(out.clone())
}
}
(false, Some(out)) => {
if out == &cli.input {
Err(anyhow!(
"output directory must be different from input unless --rename-in-place is set"
))
} else {
Ok(out.clone())
}
}
(false, None) => Err(anyhow!("--output is required unless --rename-in-place is set")),
}
}
fn resolve_config_path(cli_path: Option<&Path>) -> Result<PathBuf> {
if let Some(path) = cli_path {
return Ok(path.to_path_buf());
}
let dirs = BaseDirs::new().ok_or_else(|| anyhow!("unable to resolve XDG config directory"))?;
Ok(dirs.config_dir().join("mov-renamarr").join("config.toml"))
}
fn default_cache_path() -> Result<PathBuf> {
let dirs = BaseDirs::new().ok_or_else(|| anyhow!("unable to resolve XDG cache directory"))?;
Ok(dirs.cache_dir().join("mov-renamarr").join("cache.db"))
}
fn load_config_file(path: &Path) -> Result<FileConfig> {
if !path.exists() {
return Ok(FileConfig::default());
}
let raw = fs::read_to_string(path)
.with_context(|| format!("failed to read config file: {}", path.display()))?;
let cfg: FileConfig = toml::from_str(&raw)
.with_context(|| format!("failed to parse config TOML: {}", path.display()))?;
Ok(cfg)
}
fn ensure_default_config(path: &Path) -> Result<bool> {
if path.exists() {
return Ok(false);
}
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create config dir: {}", parent.display()))?;
}
fs::write(path, default_config_template())
.with_context(|| format!("failed to write default config: {}", path.display()))?;
eprintln!(
"Created default config at {}. You can edit it to set API keys and preferences.",
path.display()
);
Ok(true)
}
fn default_config_template() -> String {
[
"# Mov Renamarr configuration (TOML)",
"# Edit this file to set API keys and defaults.",
"# Values here override built-in defaults and can be overridden by env/CLI.",
"",
"# Provider selection:",
"# - auto: pick based on available API keys (prefers TMDb if both set).",
"# - tmdb / omdb: force a single provider.",
"# - both: query both and choose best match.",
"provider = \"auto\"",
"",
"# API keys (set at least one).",
"# TMDb accepts either v3 API key or v4 Read Access Token (Bearer).",
"# api_key_tmdb = \"YOUR_TMDB_KEY_OR_READ_ACCESS_TOKEN\"",
"# api_key_omdb = \"YOUR_OMDB_KEY\"",
"",
"# Cache settings",
"# cache_path lets you override the default XDG cache location.",
"# cache_path = \"/home/user/.cache/mov-renamarr/cache.db\"",
"# cache_ttl_days controls how long cached API results are reused.",
"cache_ttl_days = 30",
"# refresh_cache forces new lookups on next run.",
"refresh_cache = false",
"",
"# Output and reporting",
"# report_format: text (default), json, or csv.",
"report_format = \"text\"",
"# sidecar_notes writes a per-file note when a file is skipped/failed.",
"sidecar_notes = false",
"# sidecars copies/moves subtitle/nfo/etc files with the movie file.",
"sidecars = false",
"# overwrite replaces existing files; suffix adds \" (1)\", \" (2)\", etc.",
"overwrite = false",
"suffix = false",
"# Disable external lookups (use filename/LLM only).",
"# When true, provider selection is ignored.",
"no_lookup = false",
"# min_score is 0-100 (match confidence threshold).",
"min_score = 80",
"# include_id adds tmdb-XXXX or imdb-ttXXXX in the filename.",
"include_id = false",
"",
"# Quality tags: list or comma-separated string.",
"# Supported tags: resolution, codec, source, all, none.",
"quality_tags = [\"resolution\"]",
"",
"# Console colors: auto, always, never",
"color = \"auto\"",
"",
"# Concurrency: auto or a number",
"# jobs controls file processing threads.",
"# net_jobs controls concurrent API calls.",
"jobs = \"auto\"",
"net_jobs = \"auto\"",
"",
"# Optional: override provider base URLs (useful for testing).",
"# tmdb_base_url = \"https://api.themoviedb.org/3\"",
"# omdb_base_url = \"https://www.omdbapi.com\"",
"",
"[llm]",
"# LLM usage:",
"# - off: no LLM usage",
"# - parse: LLM can replace filename parsing hints",
"# - assist: LLM adds alternate hints but still verifies via providers",
"# Ollama expected at endpoint.",
"mode = \"off\"",
"endpoint = \"http://localhost:11434\"",
"model = \"Qwen2.5:latest\"",
"# For higher accuracy (more RAM/VRAM): \"Qwen2.5:14b\"",
"# timeout_seconds limits LLM request time.",
"timeout_seconds = 30",
"# max_tokens caps response length.",
"# max_tokens = 256",
"",
]
.join("\n")
}
fn apply_file_config(settings: &mut Settings, file: &FileConfig) -> Result<()> {
if let Some(provider) = &file.provider {
settings.provider = provider.clone();
}
if let Some(key) = &file.api_key_omdb {
settings.api_key_omdb = Some(key.clone());
}
if let Some(key) = &file.api_key_tmdb {
settings.api_key_tmdb = Some(key.clone());
}
if let Some(path) = &file.cache_path {
settings.cache_path = path.clone();
}
if let Some(ttl) = file.cache_ttl_days {
settings.cache_ttl_days = ttl;
}
if let Some(refresh) = file.refresh_cache {
settings.refresh_cache = refresh;
}
if let Some(format) = &file.report_format {
settings.report_format = format.clone();
}
if let Some(sidecar_notes) = file.sidecar_notes {
settings.sidecar_notes = sidecar_notes;
}
if let Some(sidecars) = file.sidecars {
settings.sidecars = sidecars;
}
if let Some(overwrite) = file.overwrite {
settings.overwrite = overwrite;
}
if let Some(suffix) = file.suffix {
settings.suffix = suffix;
}
if let Some(min_score) = file.min_score {
settings.min_score = min_score;
}
if let Some(include_id) = file.include_id {
settings.include_id = include_id;
}
if let Some(tags) = &file.quality_tags {
let values = match tags {
QualityTagsValue::List(list) => list.clone(),
QualityTagsValue::Single(value) => split_list(value),
};
settings.quality_tags = parse_quality_tags(&values)?;
}
if let Some(color) = &file.color {
settings.color = color.clone();
}
if let Some(raw) = &file.jobs {
settings.jobs = parse_jobs_setting_value(raw, default_jobs())?;
}
if let Some(raw) = &file.net_jobs {
settings.net_jobs = parse_jobs_setting_value(raw, default_net_jobs(settings.jobs))?;
}
if let Some(no_lookup) = file.no_lookup {
settings.no_lookup = no_lookup;
}
if let Some(llm) = &file.llm {
apply_file_llm(settings, llm);
}
if let Some(url) = &file.omdb_base_url {
settings.omdb_base_url = url.clone();
}
if let Some(url) = &file.tmdb_base_url {
settings.tmdb_base_url = url.clone();
}
Ok(())
}
fn apply_file_llm(settings: &mut Settings, llm: &FileLlmConfig) {
if let Some(mode) = &llm.mode {
settings.llm.mode = mode.clone();
}
if let Some(endpoint) = &llm.endpoint {
settings.llm.endpoint = endpoint.clone();
}
if let Some(model) = &llm.model {
settings.llm.model = Some(model.clone());
}
if let Some(timeout) = llm.timeout_seconds {
settings.llm.timeout_seconds = timeout;
}
if let Some(max_tokens) = llm.max_tokens {
settings.llm.max_tokens = Some(max_tokens);
}
}
fn apply_env_overrides(settings: &mut Settings) -> Result<()> {
apply_env_string("MOV_RENAMARR_PROVIDER", |value| {
if let Ok(provider) = ProviderChoice::from_str(&value.to_ascii_lowercase()) {
settings.provider = provider;
}
});
apply_env_string("MOV_RENAMARR_OMDB_API_KEY", |value| {
settings.api_key_omdb = Some(value);
});
apply_env_string("MOV_RENAMARR_TMDB_API_KEY", |value| {
settings.api_key_tmdb = Some(value);
});
apply_env_string("MOV_RENAMARR_CACHE", |value| {
settings.cache_path = PathBuf::from(value);
});
apply_env_string("MOV_RENAMARR_REPORT_FORMAT", |value| {
if let Ok(format) = ReportFormat::from_str(&value.to_ascii_lowercase()) {
settings.report_format = format;
}
});
apply_env_string("MOV_RENAMARR_JOBS", |value| {
if let Ok(jobs) = parse_jobs_setting(&value, default_jobs()) {
settings.jobs = jobs;
}
});
apply_env_string("MOV_RENAMARR_NET_JOBS", |value| {
if let Ok(jobs) = parse_jobs_setting(&value, default_net_jobs(settings.jobs)) {
settings.net_jobs = jobs;
}
});
apply_env_string("MOV_RENAMARR_MIN_SCORE", |value| {
if let Ok(min_score) = value.parse::<u8>() {
settings.min_score = min_score;
}
});
apply_env_bool("MOV_RENAMARR_INCLUDE_ID", |value| settings.include_id = value);
apply_env_bool("MOV_RENAMARR_SIDECARS", |value| settings.sidecars = value);
apply_env_bool("MOV_RENAMARR_SIDECAR_NOTES", |value| settings.sidecar_notes = value);
apply_env_bool("MOV_RENAMARR_OVERWRITE", |value| settings.overwrite = value);
apply_env_bool("MOV_RENAMARR_SUFFIX", |value| settings.suffix = value);
apply_env_bool("MOV_RENAMARR_NO_LOOKUP", |value| settings.no_lookup = value);
apply_env_string("MOV_RENAMARR_QUALITY_TAGS", |value| {
if let Ok(tags) = parse_quality_tags(&split_list(&value)) {
settings.quality_tags = tags;
}
});
apply_env_string("MOV_RENAMARR_COLOR", |value| {
if let Ok(mode) = ColorMode::from_str(&value.to_ascii_lowercase()) {
settings.color = mode;
}
});
apply_env_string("MOV_RENAMARR_LLM_MODE", |value| {
if let Ok(mode) = LlmMode::from_str(&value.to_ascii_lowercase()) {
settings.llm.mode = mode;
}
});
apply_env_string("MOV_RENAMARR_LLM_ENDPOINT", |value| settings.llm.endpoint = value);
apply_env_string("MOV_RENAMARR_LLM_MODEL", |value| settings.llm.model = Some(value));
apply_env_string("MOV_RENAMARR_LLM_TIMEOUT", |value| {
if let Ok(timeout) = value.parse::<u64>() {
settings.llm.timeout_seconds = timeout;
}
});
apply_env_string("MOV_RENAMARR_LLM_MAX_TOKENS", |value| {
if let Ok(max_tokens) = value.parse::<u32>() {
settings.llm.max_tokens = Some(max_tokens);
}
});
apply_env_string("MOV_RENAMARR_OMDB_BASE_URL", |value| settings.omdb_base_url = value);
apply_env_string("MOV_RENAMARR_TMDB_BASE_URL", |value| settings.tmdb_base_url = value);
Ok(())
}
fn apply_env_string<F: FnMut(String)>(key: &str, mut setter: F) {
if let Ok(value) = env::var(key) {
if !value.trim().is_empty() {
setter(value);
}
}
}
fn apply_env_bool<F: FnMut(bool)>(key: &str, mut setter: F) {
if let Ok(value) = env::var(key) {
if let Ok(parsed) = parse_bool(&value) {
setter(parsed);
}
}
}
fn parse_bool(value: &str) -> Result<bool> {
match value.trim().to_ascii_lowercase().as_str() {
"1" | "true" | "yes" | "on" => Ok(true),
"0" | "false" | "no" | "off" => Ok(false),
_ => Err(anyhow!("invalid boolean value: {value}")),
}
}
fn apply_cli_overrides(settings: &mut Settings, cli: &Cli) -> Result<()> {
if let Some(provider) = &cli.provider {
settings.provider = provider.clone();
}
if let Some(key) = &cli.api_key_omdb {
settings.api_key_omdb = Some(key.clone());
}
if let Some(key) = &cli.api_key_tmdb {
settings.api_key_tmdb = Some(key.clone());
}
if let Some(path) = &cli.cache {
settings.cache_path = path.clone();
}
if cli.refresh_cache {
settings.refresh_cache = true;
}
if let Some(format) = &cli.report_format {
settings.report_format = format.clone();
}
if cli.sidecar_notes {
settings.sidecar_notes = true;
}
if cli.sidecars {
settings.sidecars = true;
}
if cli.overwrite {
settings.overwrite = true;
}
if cli.suffix {
settings.suffix = true;
}
if let Some(min_score) = cli.min_score {
settings.min_score = min_score;
}
if cli.include_id {
settings.include_id = true;
}
if let Some(tags) = &cli.quality_tags {
settings.quality_tags = parse_quality_tags(&split_list(tags))?;
}
if let Some(color) = &cli.color {
settings.color = color.clone();
}
if let Some(jobs) = &cli.jobs {
settings.jobs = resolve_jobs_arg(jobs, default_jobs());
}
if let Some(net_jobs) = &cli.net_jobs {
settings.net_jobs = resolve_jobs_arg(net_jobs, default_net_jobs(settings.jobs));
}
if cli.no_lookup {
settings.no_lookup = true;
}
if let Some(mode) = &cli.llm_mode {
settings.llm.mode = mode.clone();
}
if let Some(endpoint) = &cli.llm_endpoint {
settings.llm.endpoint = endpoint.clone();
}
if let Some(model) = &cli.llm_model {
settings.llm.model = Some(model.clone());
}
if let Some(timeout) = cli.llm_timeout {
settings.llm.timeout_seconds = timeout;
}
if let Some(max_tokens) = cli.llm_max_tokens {
settings.llm.max_tokens = Some(max_tokens);
}
if cli.verbose {
settings.verbose = true;
}
Ok(())
}
fn validate_settings(settings: &mut Settings) -> Result<()> {
if settings.overwrite && settings.suffix {
return Err(anyhow!("--overwrite and --suffix cannot both be set"));
}
if settings.min_score > 100 {
return Err(anyhow!("min-score must be between 0 and 100"));
}
if settings.net_jobs == 0 {
settings.net_jobs = 1;
}
if settings.net_jobs > settings.jobs {
settings.net_jobs = settings.jobs;
}
Ok(())
}
pub fn default_jobs() -> usize {
let cores = num_cpus::get();
let half = std::cmp::max(1, cores / 2);
let limit = std::cmp::min(4, half);
if limit == 0 { 1 } else { limit }
}
pub fn default_net_jobs(jobs: usize) -> usize {
std::cmp::max(1, std::cmp::min(2, jobs))
}
fn parse_jobs_setting(raw: &str, fallback: usize) -> Result<usize> {
if raw.eq_ignore_ascii_case("auto") {
return Ok(fallback);
}
let parsed: usize = raw.parse().context("invalid jobs value")?;
if parsed == 0 {
return Err(anyhow!("jobs must be >= 1"));
}
Ok(parsed)
}
fn parse_jobs_setting_value(raw: &JobValue, fallback: usize) -> Result<usize> {
match raw {
JobValue::String(value) => parse_jobs_setting(value, fallback),
JobValue::Number(value) => {
if *value == 0 {
return Err(anyhow!("jobs must be >= 1"));
}
Ok(*value as usize)
}
}
}
fn resolve_jobs_arg(arg: &JobsArg, fallback: usize) -> usize {
match arg {
JobsArg::Auto => fallback,
JobsArg::Fixed(value) => *value,
}
}
fn parse_quality_tags(values: &[String]) -> Result<QualityTags> {
let mut tags = QualityTags::default();
tags.resolution = false;
for value in values {
let token = value.trim().to_ascii_lowercase();
match token.as_str() {
"resolution" => tags.resolution = true,
"codec" => tags.codec = true,
"source" => tags.source = true,
"all" => {
tags.resolution = true;
tags.codec = true;
tags.source = true;
}
"none" => {
tags.resolution = false;
tags.codec = false;
tags.source = false;
}
_ if token.is_empty() => {}
_ => return Err(anyhow!("unknown quality tag: {token}")),
}
}
Ok(tags)
}
fn split_list(raw: &str) -> Vec<String> {
raw.split([',', ';', ' '])
.filter(|token| !token.trim().is_empty())
.map(|token| token.trim().to_string())
.collect()
}
fn resolve_report_path(cli: &Cli) -> Result<Option<PathBuf>> {
match &cli.report {
None => Ok(None),
Some(path) => {
if path.as_os_str() == "__DEFAULT__" {
let filename = default_report_filename();
Ok(Some(PathBuf::from(filename)))
} else {
Ok(Some(path.clone()))
}
}
}
}
fn default_report_filename() -> String {
let now = chrono::Local::now();
let timestamp = now.format("%Y%m%d-%H%M%S").to_string();
format!("mov-renamarr-report-{timestamp}.txt")
}
// Needed for ValueEnum parsing from env string
impl FromStr for ProviderChoice {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"auto" => Ok(ProviderChoice::Auto),
"omdb" => Ok(ProviderChoice::Omdb),
"tmdb" => Ok(ProviderChoice::Tmdb),
"both" => Ok(ProviderChoice::Both),
_ => Err(anyhow!("invalid provider choice")),
}
}
}
impl FromStr for ReportFormat {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"text" => Ok(ReportFormat::Text),
"json" => Ok(ReportFormat::Json),
"csv" => Ok(ReportFormat::Csv),
_ => Err(anyhow!("invalid report format")),
}
}
}
impl FromStr for ColorMode {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"auto" => Ok(ColorMode::Auto),
"always" => Ok(ColorMode::Always),
"never" => Ok(ColorMode::Never),
_ => Err(anyhow!("invalid color mode")),
}
}
}
impl FromStr for LlmMode {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"off" => Ok(LlmMode::Off),
"parse" => Ok(LlmMode::Parse),
"assist" => Ok(LlmMode::Assist),
_ => Err(anyhow!("invalid LLM mode")),
}
}
}

161
src/fsops.rs Normal file
View File

@@ -0,0 +1,161 @@
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
#[derive(Clone, Copy, Debug)]
pub enum OpMode {
Copy,
Move,
RenameInPlace,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CollisionPolicy {
Skip,
Overwrite,
Suffix,
}
#[derive(Debug)]
pub struct OperationOutcome {
pub final_path: Option<PathBuf>,
pub skipped_reason: Option<String>,
}
pub fn execute(
src: &Path,
dest: &Path,
mode: OpMode,
policy: CollisionPolicy,
sidecars: bool,
) -> Result<OperationOutcome> {
let dest = resolve_collision(dest, policy)?;
if dest.is_none() {
return Ok(OperationOutcome {
final_path: None,
skipped_reason: Some("destination exists".to_string()),
});
}
let dest = dest.unwrap();
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create output dir: {}", parent.display()))?;
}
match mode {
OpMode::Copy => copy_file(src, &dest)?,
OpMode::Move | OpMode::RenameInPlace => move_file(src, &dest)?,
}
if sidecars {
process_sidecars(src, &dest, mode, policy)?;
}
Ok(OperationOutcome {
final_path: Some(dest),
skipped_reason: None,
})
}
fn resolve_collision(dest: &Path, policy: CollisionPolicy) -> Result<Option<PathBuf>> {
if !dest.exists() {
return Ok(Some(dest.to_path_buf()));
}
match policy {
CollisionPolicy::Skip => Ok(None),
CollisionPolicy::Overwrite => Ok(Some(dest.to_path_buf())),
CollisionPolicy::Suffix => Ok(Some(append_suffix(dest)?)),
}
}
fn append_suffix(dest: &Path) -> Result<PathBuf> {
let parent = dest.parent().ok_or_else(|| anyhow!("invalid destination path"))?;
let stem = dest
.file_stem()
.ok_or_else(|| anyhow!("invalid destination filename"))?
.to_string_lossy();
let ext = dest.extension().map(|e| e.to_string_lossy());
for idx in 1..=999 {
let candidate_name = if let Some(ext) = ext.as_ref() {
format!("{} ({}).{}", stem, idx, ext)
} else {
format!("{} ({})", stem, idx)
};
let candidate = parent.join(candidate_name);
if !candidate.exists() {
return Ok(candidate);
}
}
Err(anyhow!("unable to find available suffix for {}", dest.display()))
}
fn copy_file(src: &Path, dest: &Path) -> Result<()> {
fs::copy(src, dest)
.with_context(|| format!("failed to copy {} -> {}", src.display(), dest.display()))?;
Ok(())
}
fn move_file(src: &Path, dest: &Path) -> Result<()> {
match fs::rename(src, dest) {
Ok(()) => Ok(()),
Err(err) if err.raw_os_error() == Some(libc::EXDEV) => {
copy_file(src, dest)?;
fs::remove_file(src)
.with_context(|| format!("failed to remove source after copy: {}", src.display()))?;
Ok(())
}
Err(err) => Err(anyhow!("failed to move {} -> {}: {}", src.display(), dest.display(), err)),
}
}
fn process_sidecars(src: &Path, dest: &Path, mode: OpMode, policy: CollisionPolicy) -> Result<usize> {
let src_dir = src.parent().ok_or_else(|| anyhow!("source has no parent"))?;
let src_stem = src.file_stem().ok_or_else(|| anyhow!("source has no stem"))?;
let dest_dir = dest.parent().ok_or_else(|| anyhow!("destination has no parent"))?;
let dest_stem = dest.file_stem().ok_or_else(|| anyhow!("destination has no stem"))?;
let mut processed = 0;
for entry in fs::read_dir(src_dir)? {
let entry = entry?;
let path = entry.path();
if path == src {
continue;
}
if path.is_dir() {
continue;
}
let stem = match path.file_stem() {
Some(stem) => stem,
None => continue,
};
if stem != src_stem {
continue;
}
let ext = path.extension().map(|e| e.to_string_lossy().to_string());
let mut dest_name = dest_stem.to_string_lossy().to_string();
if let Some(ext) = ext {
dest_name.push('.');
dest_name.push_str(&ext);
}
let dest_path = dest_dir.join(dest_name);
let dest_path = resolve_collision(&dest_path, policy)?;
if let Some(dest_path) = dest_path {
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent).with_context(|| {
format!("failed to create sidecar output dir: {}", parent.display())
})?;
}
match mode {
OpMode::Copy => copy_file(&path, &dest_path)?,
OpMode::Move | OpMode::RenameInPlace => move_file(&path, &dest_path)?,
}
processed += 1;
}
}
Ok(processed)
}

118
src/llm.rs Normal file
View File

@@ -0,0 +1,118 @@
use anyhow::{Context, Result};
use reqwest::blocking::Client;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default)]
pub struct LlmHints {
pub title: Option<String>,
pub year: Option<i32>,
pub alt_titles: Vec<String>,
}
#[derive(Clone)]
pub struct LlmClient {
endpoint: String,
model: String,
max_tokens: Option<u32>,
client: Client,
}
impl LlmClient {
pub fn new(endpoint: String, model: String, timeout_seconds: u64, max_tokens: Option<u32>) -> Result<Self> {
let client = Client::builder()
.timeout(std::time::Duration::from_secs(timeout_seconds))
.build()
.context("failed to build HTTP client for LLM")?;
Ok(Self {
endpoint,
model,
max_tokens,
client,
})
}
pub fn parse_filename(&self, raw: &str) -> Result<LlmHints> {
let prompt = build_prompt(raw);
let request = OllamaRequest {
model: self.model.clone(),
prompt,
stream: false,
format: Some("json".to_string()),
options: Some(OllamaOptions {
num_predict: self.max_tokens,
temperature: 0.0,
}),
};
let url = format!("{}/api/generate", self.endpoint.trim_end_matches('/'));
let response = self
.client
.post(url)
.json(&request)
.send()
.context("LLM request failed")?;
let status = response.status();
if !status.is_success() {
return Err(anyhow::anyhow!("LLM returned HTTP {status}"));
}
let body: OllamaResponse = response.json().context("failed to parse LLM response")?;
let hints = parse_hints(&body.response).unwrap_or_default();
Ok(hints)
}
}
fn build_prompt(raw: &str) -> String {
format!(
"You are a strict parser. Extract the full movie title and year from the filename below.\n\nRules:\n- Output JSON only.\n- Title must include all words of the movie name in order (no partial tokens).\n- Strip release metadata (resolution, codec, source, group tags).\n- Year must be a 4-digit number if present.\n- If unsure, use null for fields and empty array for alt_titles.\n- Do NOT invent data.\n\nReturn JSON with keys: title, year, alt_titles.\n\nFilename: {raw}\n"
)
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
prompt: String,
stream: bool,
format: Option<String>,
options: Option<OllamaOptions>,
}
#[derive(Serialize)]
struct OllamaOptions {
#[serde(skip_serializing_if = "Option::is_none")]
num_predict: Option<u32>,
temperature: f32,
}
#[derive(Deserialize)]
struct OllamaResponse {
response: String,
}
#[derive(Deserialize, Default)]
struct LlmHintsRaw {
title: Option<String>,
year: Option<YearValue>,
alt_titles: Option<Vec<String>>,
}
#[derive(Deserialize)]
#[serde(untagged)]
enum YearValue {
Number(i32),
String(String),
}
fn parse_hints(raw: &str) -> Option<LlmHints> {
let parsed: LlmHintsRaw = serde_json::from_str(raw).ok()?;
let year = parsed.year.and_then(|value| match value {
YearValue::Number(num) => Some(num),
YearValue::String(s) => s.chars().filter(|c| c.is_ascii_digit()).collect::<String>().parse().ok(),
});
Some(LlmHints {
title: parsed.title,
year,
alt_titles: parsed.alt_titles.unwrap_or_default(),
})
}

34
src/main.rs Normal file
View File

@@ -0,0 +1,34 @@
mod cli;
mod config;
mod fsops;
mod llm;
mod media;
mod metadata;
mod output;
mod parse;
mod pipeline;
mod report;
mod utils;
use anyhow::Result;
use clap::Parser;
use crate::cli::Cli;
fn main() -> Result<()> {
if std::env::args_os().len() == 1 {
let path = config::init_default_config()?;
eprintln!("Config file: {} (edit to set API keys and defaults)", path.display());
return Ok(());
}
let cli = Cli::parse();
let settings = config::build_settings(&cli)?;
let report_format = settings.report_format.clone();
let report_path = settings.report_path.clone();
let report = pipeline::run(settings)?;
report.write(&report_format, report_path.as_deref())?;
Ok(())
}

154
src/media.rs Normal file
View File

@@ -0,0 +1,154 @@
use std::path::Path;
use std::process::Command;
use anyhow::{anyhow, Context, Result};
use serde::Deserialize;
use crate::config::QualityTags;
#[derive(Debug, Clone)]
pub struct MediaInfo {
pub duration_seconds: Option<f64>,
pub height: Option<u32>,
pub codec: Option<String>,
}
#[derive(Debug, Deserialize)]
struct FfprobeOutput {
format: Option<FfprobeFormat>,
streams: Option<Vec<FfprobeStream>>,
}
#[derive(Debug, Deserialize)]
struct FfprobeFormat {
duration: Option<String>,
}
#[derive(Debug, Deserialize)]
struct FfprobeStream {
codec_type: Option<String>,
codec_name: Option<String>,
height: Option<u32>,
}
pub fn probe(path: &Path) -> Result<MediaInfo> {
let output = Command::new("ffprobe")
.arg("-v")
.arg("error")
.arg("-print_format")
.arg("json")
.arg("-show_format")
.arg("-show_streams")
.arg(path)
.output()
.with_context(|| format!("failed to run ffprobe on {}", path.display()))?;
if !output.status.success() {
return Err(anyhow!(
"ffprobe failed for {}: {}",
path.display(),
String::from_utf8_lossy(&output.stderr)
));
}
let parsed: FfprobeOutput = serde_json::from_slice(&output.stdout)
.with_context(|| "failed to parse ffprobe JSON")?;
let duration_seconds = parsed
.format
.and_then(|fmt| fmt.duration)
.and_then(|dur| dur.parse::<f64>().ok());
let video_stream = parsed
.streams
.unwrap_or_default()
.into_iter()
.find(|stream| stream.codec_type.as_deref() == Some("video"));
let (height, codec) = if let Some(stream) = video_stream {
(stream.height, stream.codec_name)
} else {
(None, None)
};
Ok(MediaInfo {
duration_seconds,
height,
codec,
})
}
pub fn quality_tag(info: &MediaInfo, tags: &QualityTags) -> Option<String> {
let mut parts: Vec<String> = Vec::new();
if tags.resolution {
if let Some(res) = resolution_tag(info.height) {
parts.push(res);
}
}
if tags.codec {
if let Some(codec) = codec_tag(info.codec.as_deref()) {
parts.push(codec);
}
}
if tags.source {
// Source tagging not implemented yet; placeholder for future expansion.
}
if parts.is_empty() {
None
} else {
Some(parts.join(" "))
}
}
pub fn resolution_tag(height: Option<u32>) -> Option<String> {
let height = height?;
let tag = if height >= 2160 {
"2160p"
} else if height >= 1080 {
"1080p"
} else if height >= 720 {
"720p"
} else if height >= 480 {
"480p"
} else {
"360p"
};
Some(tag.to_string())
}
pub fn codec_tag(codec: Option<&str>) -> Option<String> {
let codec = codec?.to_ascii_lowercase();
let tag = if codec.contains("hevc") || codec.contains("h265") || codec.contains("x265") {
"x265"
} else if codec.contains("h264") || codec.contains("x264") {
"x264"
} else if codec.contains("av1") {
"av1"
} else {
return None;
};
Some(tag.to_string())
}
#[cfg(test)]
mod tests {
use super::{codec_tag, resolution_tag};
#[test]
fn resolution_tags() {
assert_eq!(resolution_tag(Some(2160)).as_deref(), Some("2160p"));
assert_eq!(resolution_tag(Some(1080)).as_deref(), Some("1080p"));
assert_eq!(resolution_tag(Some(720)).as_deref(), Some("720p"));
assert_eq!(resolution_tag(Some(480)).as_deref(), Some("480p"));
assert_eq!(resolution_tag(Some(360)).as_deref(), Some("360p"));
}
#[test]
fn codec_tags() {
assert_eq!(codec_tag(Some("h264")).as_deref(), Some("x264"));
assert_eq!(codec_tag(Some("hevc")).as_deref(), Some("x265"));
assert_eq!(codec_tag(Some("av1")).as_deref(), Some("av1"));
assert_eq!(codec_tag(Some("vp9")), None);
}
}

80
src/metadata/cache.rs Normal file
View File

@@ -0,0 +1,80 @@
use std::path::PathBuf;
use anyhow::{Context, Result};
use rusqlite::{params, Connection};
pub struct Cache {
path: PathBuf,
ttl_days: u32,
refresh: bool,
}
impl Cache {
pub fn new(path: PathBuf, ttl_days: u32, refresh: bool) -> Self {
Self {
path,
ttl_days,
refresh,
}
}
pub fn get(&self, namespace: &str, key: &str) -> Result<Option<String>> {
if self.refresh {
return Ok(None);
}
let conn = self.open()?;
let mut stmt = conn.prepare(
"SELECT value, fetched_at FROM cache WHERE namespace = ?1 AND key = ?2 LIMIT 1",
)?;
let row = stmt.query_row(params![namespace, key], |row| {
let value: String = row.get(0)?;
let fetched_at: i64 = row.get(1)?;
Ok((value, fetched_at))
});
let (value, fetched_at) = match row {
Ok(row) => row,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(err) => return Err(err.into()),
};
let now = current_timestamp();
let age_days = (now - fetched_at) as f64 / 86_400.0;
if age_days > self.ttl_days as f64 {
return Ok(None);
}
Ok(Some(value))
}
pub fn set(&self, namespace: &str, key: &str, value: &str) -> Result<()> {
let conn = self.open()?;
conn.execute(
"INSERT INTO cache (namespace, key, value, fetched_at) VALUES (?1, ?2, ?3, ?4)
ON CONFLICT(namespace, key) DO UPDATE SET value = excluded.value, fetched_at = excluded.fetched_at",
params![namespace, key, value, current_timestamp()],
)?;
Ok(())
}
fn open(&self) -> Result<Connection> {
if let Some(parent) = self.path.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("failed to create cache dir: {}", parent.display()))?;
}
let conn = Connection::open(&self.path)
.with_context(|| format!("failed to open cache db: {}", self.path.display()))?;
conn.execute(
"CREATE TABLE IF NOT EXISTS cache (
namespace TEXT NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
fetched_at INTEGER NOT NULL,
PRIMARY KEY(namespace, key)
)",
[],
)?;
Ok(conn)
}
}
fn current_timestamp() -> i64 {
chrono::Utc::now().timestamp()
}

336
src/metadata/mod.rs Normal file
View File

@@ -0,0 +1,336 @@
use std::collections::HashMap;
use std::sync::Arc;
use anyhow::{anyhow, Result};
use reqwest::blocking::Client;
use crate::config::Settings;
use crate::metadata::cache::Cache;
use crate::parse::FileHints;
use crate::utils::{normalize_title, Semaphore};
mod cache;
mod omdb;
mod tmdb;
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub enum Provider {
Omdb,
Tmdb,
Parsed,
Manual,
}
impl Provider {
pub fn as_str(&self) -> &'static str {
match self {
Provider::Omdb => "omdb",
Provider::Tmdb => "tmdb",
Provider::Parsed => "parsed",
Provider::Manual => "manual",
}
}
}
#[derive(Clone, Debug)]
pub struct Candidate {
pub provider: Provider,
pub id: String,
pub title: String,
pub year: Option<i32>,
pub runtime_minutes: Option<u32>,
}
#[derive(Clone, Debug)]
pub struct ScoredCandidate {
pub candidate: Candidate,
pub score: f64,
}
#[derive(Clone, Debug)]
pub struct MovieMetadata {
pub title: String,
pub year: i32,
pub tmdb_id: Option<u32>,
pub imdb_id: Option<String>,
pub provider: Provider,
pub runtime_minutes: Option<u32>,
}
#[derive(Clone, Debug)]
pub struct MatchOutcome {
pub best: Option<MovieMetadata>,
pub candidates: Vec<ScoredCandidate>,
}
pub struct MetadataClient {
settings: Arc<Settings>,
cache: Arc<Cache>,
client: Client,
net_sem: Arc<Semaphore>,
}
impl MetadataClient {
pub fn new(settings: Arc<Settings>, net_sem: Arc<Semaphore>) -> Result<Self> {
let client = Client::builder().build()?;
let cache = Arc::new(Cache::new(
settings.cache_path.clone(),
settings.cache_ttl_days,
settings.refresh_cache,
));
Ok(Self {
settings,
cache,
client,
net_sem,
})
}
pub fn validate(&self) -> Result<()> {
self.selected_providers().map(|_| ())
}
pub fn match_movie(&self, hints: &FileHints, runtime_minutes: Option<u32>) -> Result<MatchOutcome> {
let providers = self.selected_providers()?;
let queries = build_queries(hints);
let mut candidates = Vec::new();
for provider in providers {
for query in &queries {
let mut results = match provider {
Provider::Omdb => omdb::search(
&self.client,
&self.settings.omdb_base_url,
self.settings.api_key_omdb.as_deref().ok_or_else(|| anyhow!("OMDb API key missing"))?,
query,
&self.cache,
&self.net_sem,
)?,
Provider::Tmdb => tmdb::search(
&self.client,
&self.settings.tmdb_base_url,
self.settings.api_key_tmdb.as_deref().ok_or_else(|| anyhow!("TMDb API key missing"))?,
query,
&self.cache,
&self.net_sem,
)?,
Provider::Parsed | Provider::Manual => Vec::new(),
};
candidates.append(&mut results);
}
}
let candidates = dedupe_candidates(candidates);
let mut scored = score_candidates(hints, runtime_minutes, candidates);
scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
if runtime_minutes.is_some() && !scored.is_empty() {
self.enrich_runtime(&mut scored)?;
for entry in &mut scored {
entry.score = score_candidate(hints, runtime_minutes, &entry.candidate);
}
scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
}
let best = if let Some(best) = scored.first() {
if best.score * 100.0 >= self.settings.min_score as f64 {
Some(self.fetch_details(&best.candidate)?)
} else {
None
}
} else {
None
};
Ok(MatchOutcome { best, candidates: scored })
}
pub fn resolve_candidate(&self, candidate: &Candidate) -> Result<MovieMetadata> {
self.fetch_details(candidate)
}
fn fetch_details(&self, candidate: &Candidate) -> Result<MovieMetadata> {
match candidate.provider {
Provider::Omdb => {
let key = self.settings.api_key_omdb.as_deref().ok_or_else(|| anyhow!("OMDb API key missing"))?;
omdb::details(
&self.client,
&self.settings.omdb_base_url,
key,
&candidate.id,
&self.cache,
&self.net_sem,
)
}
Provider::Tmdb => {
let key = self.settings.api_key_tmdb.as_deref().ok_or_else(|| anyhow!("TMDb API key missing"))?;
tmdb::details(
&self.client,
&self.settings.tmdb_base_url,
key,
&candidate.id,
&self.cache,
&self.net_sem,
)
}
Provider::Parsed | Provider::Manual => {
Err(anyhow!("parsed/manual provider has no metadata lookup"))
}
}
}
fn enrich_runtime(&self, candidates: &mut [ScoredCandidate]) -> Result<()> {
let top_n = 3.min(candidates.len());
for entry in candidates.iter_mut().take(top_n) {
if entry.candidate.runtime_minutes.is_some() {
continue;
}
if let Ok(details) = self.fetch_details(&entry.candidate) {
entry.candidate.runtime_minutes = details.runtime_minutes;
entry.candidate.year = Some(details.year);
}
}
Ok(())
}
fn selected_providers(&self) -> Result<Vec<Provider>> {
use crate::cli::ProviderChoice;
match self.settings.provider {
ProviderChoice::Auto => {
if self.settings.api_key_tmdb.is_some() {
Ok(vec![Provider::Tmdb])
} else if self.settings.api_key_omdb.is_some() {
Ok(vec![Provider::Omdb])
} else {
Err(anyhow!("no API keys available for provider selection"))
}
}
ProviderChoice::Omdb => {
if self.settings.api_key_omdb.is_none() {
Err(anyhow!("OMDb provider selected but API key missing"))
} else {
Ok(vec![Provider::Omdb])
}
}
ProviderChoice::Tmdb => {
if self.settings.api_key_tmdb.is_none() {
Err(anyhow!("TMDb provider selected but API key missing"))
} else {
Ok(vec![Provider::Tmdb])
}
}
ProviderChoice::Both => {
if self.settings.api_key_tmdb.is_none() || self.settings.api_key_omdb.is_none() {
Err(anyhow!("both providers requested but one or more API keys missing"))
} else {
Ok(vec![Provider::Tmdb, Provider::Omdb])
}
}
}
}
}
#[derive(Clone, Debug)]
pub(crate) struct SearchQuery {
title: String,
year: Option<i32>,
}
fn build_queries(hints: &FileHints) -> Vec<SearchQuery> {
let mut queries = Vec::new();
if let Some(title) = &hints.title {
queries.push(SearchQuery {
title: title.clone(),
year: hints.year,
});
}
for alt in &hints.alt_titles {
queries.push(SearchQuery {
title: alt.clone(),
year: hints.year,
});
}
dedupe_queries(queries)
}
fn dedupe_queries(queries: Vec<SearchQuery>) -> Vec<SearchQuery> {
let mut seen = HashMap::new();
let mut out = Vec::new();
for query in queries {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if seen.insert(key, true).is_none() {
out.push(query);
}
}
out
}
fn score_candidates(
hints: &FileHints,
runtime_minutes: Option<u32>,
candidates: Vec<Candidate>,
) -> Vec<ScoredCandidate> {
let mut scored = Vec::new();
for candidate in candidates {
let score = score_candidate(hints, runtime_minutes, &candidate);
scored.push(ScoredCandidate { candidate, score });
}
scored
}
fn dedupe_candidates(candidates: Vec<Candidate>) -> Vec<Candidate> {
let mut seen = HashMap::new();
let mut out = Vec::new();
for candidate in candidates {
let key = format!("{}:{}", candidate.provider.as_str(), candidate.id);
if seen.insert(key, true).is_none() {
out.push(candidate);
}
}
out
}
fn score_candidate(hints: &FileHints, runtime_minutes: Option<u32>, candidate: &Candidate) -> f64 {
let title_score = best_title_score(hints, &candidate.title);
let mut score = title_score;
if let (Some(target_year), Some(candidate_year)) = (hints.year, candidate.year) {
let diff = (target_year - candidate_year).abs();
if diff == 0 {
score += 0.10;
} else if diff == 1 {
score += 0.05;
} else {
score -= 0.05;
}
}
if let (Some(target_runtime), Some(candidate_runtime)) = (runtime_minutes, candidate.runtime_minutes) {
let diff = target_runtime.abs_diff(candidate_runtime);
if diff <= 2 {
score += 0.05;
} else if diff <= 5 {
score += 0.02;
}
}
score.clamp(0.0, 1.0)
}
fn best_title_score(hints: &FileHints, candidate_title: &str) -> f64 {
let candidate_norm = normalize_title(candidate_title);
let mut best = 0.0;
if let Some(title) = &hints.title {
let score = strsim::jaro_winkler(&normalize_title(title), &candidate_norm);
if score > best {
best = score;
}
}
for alt in &hints.alt_titles {
let score = strsim::jaro_winkler(&normalize_title(alt), &candidate_norm);
if score > best {
best = score;
}
}
best
}

161
src/metadata/omdb.rs Normal file
View File

@@ -0,0 +1,161 @@
use anyhow::{anyhow, Context, Result};
use reqwest::blocking::Client;
use serde::Deserialize;
use crate::metadata::{Candidate, MovieMetadata, Provider};
use crate::metadata::cache::Cache;
use crate::metadata::SearchQuery;
use crate::utils::{normalize_title, Semaphore};
#[derive(Debug, Deserialize)]
struct OmdbSearchResponse {
#[serde(rename = "Search")]
search: Option<Vec<OmdbSearchItem>>,
#[serde(rename = "Response")]
response: Option<String>,
}
#[derive(Debug, Deserialize)]
struct OmdbSearchItem {
#[serde(rename = "Title")]
title: String,
#[serde(rename = "Year")]
year: String,
#[serde(rename = "imdbID")]
imdb_id: String,
}
#[derive(Debug, Deserialize)]
struct OmdbDetailResponse {
#[serde(rename = "Title")]
title: Option<String>,
#[serde(rename = "Year")]
year: Option<String>,
#[serde(rename = "imdbID")]
imdb_id: Option<String>,
#[serde(rename = "Runtime")]
runtime: Option<String>,
#[serde(rename = "Response")]
response: Option<String>,
#[serde(rename = "Error")]
error: Option<String>,
}
pub fn search(
client: &Client,
base_url: &str,
api_key: &str,
query: &SearchQuery,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<Vec<Candidate>> {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if let Some(cached) = cache.get("omdb_search", &key)? {
return parse_search(&cached);
}
let _permit = net_sem.acquire();
let mut req = client
.get(base_url)
.query(&[("apikey", api_key), ("s", &query.title), ("type", "movie")]);
if let Some(year) = query.year {
req = req.query(&[("y", year.to_string())]);
}
let resp = req.send().context("OMDb search request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("OMDb search failed with HTTP {status}"));
}
let text = resp.text().context("failed to read OMDb response")?;
cache.set("omdb_search", &key, &text)?;
parse_search(&text)
}
fn parse_search(raw: &str) -> Result<Vec<Candidate>> {
let parsed: OmdbSearchResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse OMDb search JSON")?;
if parsed.response.as_deref() == Some("False") {
return Ok(Vec::new());
}
let mut candidates = Vec::new();
if let Some(items) = parsed.search {
for item in items {
let year = parse_year(&item.year);
candidates.push(Candidate {
provider: Provider::Omdb,
id: item.imdb_id,
title: item.title,
year,
runtime_minutes: None,
});
}
}
Ok(candidates)
}
pub fn details(
client: &Client,
base_url: &str,
api_key: &str,
imdb_id: &str,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<MovieMetadata> {
if let Some(cached) = cache.get("omdb_details", imdb_id)? {
return parse_details(&cached);
}
let _permit = net_sem.acquire();
let resp = client
.get(base_url)
.query(&[("apikey", api_key), ("i", imdb_id), ("plot", "short")])
.send()
.context("OMDb details request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("OMDb details failed with HTTP {status}"));
}
let text = resp.text().context("failed to read OMDb details")?;
cache.set("omdb_details", imdb_id, &text)?;
parse_details(&text)
}
fn parse_details(raw: &str) -> Result<MovieMetadata> {
let parsed: OmdbDetailResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse OMDb details JSON")?;
if parsed.response.as_deref() == Some("False") {
let msg = parsed.error.unwrap_or_else(|| "OMDb details not found".to_string());
return Err(anyhow!(msg));
}
let title = parsed.title.unwrap_or_else(|| "Unknown Title".to_string());
let year = parsed
.year
.and_then(|y| parse_year(&y))
.unwrap_or(0);
let imdb_id = parsed.imdb_id;
let runtime_minutes = parsed.runtime.as_deref().and_then(parse_runtime);
Ok(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id,
provider: Provider::Omdb,
runtime_minutes,
})
}
fn parse_year(raw: &str) -> Option<i32> {
raw.chars()
.filter(|c| c.is_ascii_digit())
.collect::<String>()
.get(0..4)
.and_then(|s| s.parse::<i32>().ok())
}
fn parse_runtime(raw: &str) -> Option<u32> {
let digits: String = raw.chars().take_while(|c| c.is_ascii_digit()).collect();
digits.parse().ok()
}

142
src/metadata/tmdb.rs Normal file
View File

@@ -0,0 +1,142 @@
use anyhow::{anyhow, Context, Result};
use reqwest::blocking::{Client, RequestBuilder};
use serde::Deserialize;
use crate::metadata::{Candidate, MovieMetadata, Provider};
use crate::metadata::cache::Cache;
use crate::metadata::SearchQuery;
use crate::utils::{normalize_title, Semaphore};
#[derive(Debug, Deserialize)]
struct TmdbSearchResponse {
results: Option<Vec<TmdbSearchItem>>,
}
#[derive(Debug, Deserialize)]
struct TmdbSearchItem {
id: u32,
title: String,
release_date: Option<String>,
}
#[derive(Debug, Deserialize)]
struct TmdbDetailResponse {
id: u32,
title: Option<String>,
release_date: Option<String>,
runtime: Option<u32>,
imdb_id: Option<String>,
}
pub fn search(
client: &Client,
base_url: &str,
api_key: &str,
query: &SearchQuery,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<Vec<Candidate>> {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if let Some(cached) = cache.get("tmdb_search", &key)? {
return parse_search(&cached);
}
let _permit = net_sem.acquire();
let url = format!("{}/search/movie", base_url.trim_end_matches('/'));
let mut req = apply_auth(client.get(url), api_key)
.query(&[("query", &query.title)]);
if let Some(year) = query.year {
req = req.query(&[("year", year.to_string())]);
}
let resp = req.send().context("TMDb search request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("TMDb search failed with HTTP {status}"));
}
let text = resp.text().context("failed to read TMDb response")?;
cache.set("tmdb_search", &key, &text)?;
parse_search(&text)
}
fn parse_search(raw: &str) -> Result<Vec<Candidate>> {
let parsed: TmdbSearchResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse TMDb search JSON")?;
let mut candidates = Vec::new();
if let Some(items) = parsed.results {
for item in items {
let year = item.release_date.as_deref().and_then(parse_year);
candidates.push(Candidate {
provider: Provider::Tmdb,
id: item.id.to_string(),
title: item.title,
year,
runtime_minutes: None,
});
}
}
Ok(candidates)
}
pub fn details(
client: &Client,
base_url: &str,
api_key: &str,
id: &str,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<MovieMetadata> {
if let Some(cached) = cache.get("tmdb_details", id)? {
return parse_details(&cached);
}
let _permit = net_sem.acquire();
let url = format!("{}/movie/{}", base_url.trim_end_matches('/'), id);
let resp = apply_auth(client.get(url), api_key).send()
.context("TMDb details request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("TMDb details failed with HTTP {status}"));
}
let text = resp.text().context("failed to read TMDb details")?;
cache.set("tmdb_details", id, &text)?;
parse_details(&text)
}
fn parse_details(raw: &str) -> Result<MovieMetadata> {
let parsed: TmdbDetailResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse TMDb details JSON")?;
let title = parsed.title.unwrap_or_else(|| "Unknown Title".to_string());
let year = parsed
.release_date
.as_deref()
.and_then(parse_year)
.unwrap_or(0);
let tmdb_id = Some(parsed.id);
Ok(MovieMetadata {
title,
year,
tmdb_id,
imdb_id: parsed.imdb_id,
provider: Provider::Tmdb,
runtime_minutes: parsed.runtime,
})
}
fn apply_auth(req: RequestBuilder, api_key: &str) -> RequestBuilder {
if looks_like_bearer(api_key) {
req.bearer_auth(api_key)
} else {
req.query(&[("api_key", api_key)])
}
}
fn looks_like_bearer(value: &str) -> bool {
value.contains('.') && value.len() > 30
}
fn parse_year(raw: &str) -> Option<i32> {
raw.get(0..4).and_then(|s| s.parse::<i32>().ok())
}

95
src/output.rs Normal file
View File

@@ -0,0 +1,95 @@
use std::io;
use std::sync::Mutex;
use is_terminal::IsTerminal;
use owo_colors::OwoColorize;
use crate::cli::ColorMode;
#[derive(Clone, Copy, Debug)]
pub enum StatusKind {
Renamed,
Skipped,
Failed,
}
pub struct Output {
use_color: bool,
verbose: bool,
lock: Mutex<()>,
}
impl Output {
pub fn new(color_mode: &ColorMode, verbose: bool) -> Self {
let use_color = match color_mode {
ColorMode::Always => true,
ColorMode::Never => false,
ColorMode::Auto => io::stdout().is_terminal(),
};
Self {
use_color,
verbose,
lock: Mutex::new(()),
}
}
pub fn status_line(
&self,
index: usize,
total: usize,
status: StatusKind,
filename: &str,
provider: Option<&str>,
result: &str,
output_name: Option<&str>,
) {
let _guard = self.lock.lock().unwrap();
let prefix = format!("[{}/{}]", index, total);
let status_label = match status {
StatusKind::Renamed => "renamed",
StatusKind::Skipped => "skipped",
StatusKind::Failed => "failed",
};
let status_label = self.colorize_status(status_label, status);
let provider_label = provider.map(|p| format!("{p}"));
let mut line = format!("{prefix} {status_label} {filename}");
if let Some(provider) = provider_label {
line.push_str(&format!(" | {provider}"));
}
line.push_str(&format!(" | {result}"));
if let Some(output_name) = output_name {
line.push_str(&format!(" -> {output_name}"));
}
println!("{line}");
}
pub fn warn(&self, message: &str) {
let _guard = self.lock.lock().unwrap();
let msg = if self.use_color {
message.yellow().to_string()
} else {
message.to_string()
};
eprintln!("{msg}");
}
pub fn info(&self, message: &str) {
if self.verbose {
let _guard = self.lock.lock().unwrap();
println!("{message}");
}
}
fn colorize_status(&self, text: &str, status: StatusKind) -> String {
if !self.use_color {
return text.to_string();
}
match status {
StatusKind::Renamed => text.green().to_string(),
StatusKind::Skipped => text.yellow().to_string(),
StatusKind::Failed => text.red().to_string(),
}
}
}

153
src/parse.rs Normal file
View File

@@ -0,0 +1,153 @@
use std::collections::HashSet;
use std::path::Path;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::utils::{collapse_whitespace, normalize_title};
#[derive(Debug, Clone)]
pub struct FileHints {
pub title: Option<String>,
pub normalized_title: Option<String>,
pub year: Option<i32>,
pub alt_titles: Vec<String>,
}
static YEAR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(19|20)\d{2}").expect("year regex"));
static BRACKET_SQUARE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\[[^\]]*\]").expect("square bracket regex"));
static BRACKET_ROUND_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\([^\)]*\)").expect("round bracket regex"));
static STOPWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
[
"1080p", "720p", "2160p", "480p", "360p", "4k", "uhd", "hdr", "dvdrip",
"bdrip", "brrip", "bluray", "blu", "webdl", "web-dl", "webrip", "hdrip",
"remux", "x264", "x265", "h264", "h265", "hevc", "aac", "dts", "ac3",
"proper", "repack", "limited", "extended", "uncut", "remastered", "subbed",
"subs", "multi", "dubbed", "dub", "yts", "yify", "rarbg", "web", "hd",
"hq", "cam", "ts", "dvdscr", "r5", "r6",
]
.into_iter()
.collect()
});
pub fn parse_filename(path: &Path) -> FileHints {
let stem = path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
let year = extract_year(&stem);
let cleaned = strip_bracketed(&stem);
let alt_titles = extract_alt_titles(&cleaned, year);
let tokens = tokenize(&cleaned, year);
let title = if tokens.is_empty() {
let mut fallback = cleaned.clone();
if let Some(year) = year {
fallback = fallback.replace(&year.to_string(), "");
}
let fallback = collapse_whitespace(&fallback);
if fallback.is_empty() { None } else { Some(fallback) }
} else {
Some(collapse_whitespace(&tokens.join(" ")))
};
let normalized_title = title.as_deref().map(normalize_title);
FileHints {
title,
normalized_title,
year,
alt_titles,
}
}
fn extract_year(raw: &str) -> Option<i32> {
let mut year: Option<i32> = None;
for mat in YEAR_RE.find_iter(raw) {
if let Ok(parsed) = mat.as_str().parse::<i32>() {
year = Some(parsed);
}
}
year
}
fn strip_bracketed(raw: &str) -> String {
let without_square = BRACKET_SQUARE_RE.replace_all(raw, " ");
let without_round = BRACKET_ROUND_RE.replace_all(&without_square, " ");
without_round.to_string()
}
fn extract_alt_titles(raw: &str, year: Option<i32>) -> Vec<String> {
let mut alt_titles = Vec::new();
if let Some((left, right)) = raw.split_once(" - ") {
let left = clean_title_fragment(left, year);
let right = collapse_whitespace(right);
if !left.is_empty() && !right.is_empty() {
alt_titles.push(left);
}
}
alt_titles
}
fn clean_title_fragment(fragment: &str, year: Option<i32>) -> String {
let mut cleaned = fragment.to_string();
if let Some(year) = year {
cleaned = cleaned.replace(&year.to_string(), " ");
}
collapse_whitespace(&cleaned)
}
fn tokenize(raw: &str, year: Option<i32>) -> Vec<String> {
let mut tokens = Vec::new();
for token in raw.split(|c: char| !c.is_alphanumeric()) {
if token.is_empty() {
continue;
}
let lower = token.to_ascii_lowercase();
if let Some(year) = year {
if lower == year.to_string() {
continue;
}
}
if STOPWORDS.contains(lower.as_str()) {
continue;
}
if token.chars().all(|c| c.is_ascii_uppercase()) && token.len() <= 8 {
continue;
}
tokens.push(token.to_string());
}
tokens
}
#[cfg(test)]
mod tests {
use super::parse_filename;
use std::path::Path;
#[test]
fn parses_basic_title_and_year() {
let path = Path::new("Some.Movie.2020.1080p.BluRay.x264-GROUP.mkv");
let hints = parse_filename(path);
assert_eq!(hints.title.as_deref(), Some("Some Movie"));
assert_eq!(hints.year, Some(2020));
}
#[test]
fn handles_brackets_and_stopwords() {
let path = Path::new("[YTS] The.Matrix.(1999).1080p.BluRay.mkv");
let hints = parse_filename(path);
assert_eq!(hints.title.as_deref(), Some("The Matrix"));
assert_eq!(hints.year, Some(1999));
}
#[test]
fn adds_alt_title_for_dash_suffix() {
let path = Path::new("Zootopia - Vlix.mp4");
let hints = parse_filename(path);
assert_eq!(hints.title.as_deref(), Some("Zootopia Vlix"));
assert!(hints.alt_titles.iter().any(|t| t == "Zootopia"));
}
}

563
src/pipeline.rs Normal file
View File

@@ -0,0 +1,563 @@
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::io;
use anyhow::{anyhow, Context, Result};
use rayon::prelude::*;
use walkdir::WalkDir;
use crate::config::Settings;
use crate::fsops::{self, CollisionPolicy, OpMode};
use crate::llm::{LlmClient, LlmHints};
use crate::media;
use crate::metadata::{MatchOutcome, MetadataClient, MovieMetadata, Provider, ScoredCandidate};
use crate::output::{Output, StatusKind};
use crate::parse::{parse_filename, FileHints};
use crate::report::{summarize_candidates, Report, ReportEntry};
use crate::utils::{sanitize_filename, Semaphore};
pub fn run(mut settings: Settings) -> Result<Report> {
ensure_ffprobe()?;
let output = Arc::new(Output::new(&settings.color, settings.verbose));
if settings.no_lookup {
output.warn("No-lookup mode enabled: using filename/LLM only (no external providers).");
}
if settings.verbose {
output.info(&format!(
"jobs: {} | net-jobs: {} | report format: {:?}",
settings.jobs, settings.net_jobs, settings.report_format
));
}
if settings.interactive {
settings.jobs = 1;
settings.net_jobs = settings.net_jobs.max(1);
}
let files = discover_files(&settings.input, &settings.output)?;
let total = files.len();
if total == 0 {
output.warn("no video files found");
return Ok(Report::default());
}
let settings = Arc::new(settings);
let net_sem = Arc::new(Semaphore::new(settings.net_jobs));
let metadata = if settings.no_lookup {
None
} else {
let client = Arc::new(MetadataClient::new(settings.clone(), net_sem)?);
client.validate()?;
Some(client)
};
let llm = build_llm_client(&settings, &output)?;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(settings.jobs)
.build()
.context("failed to build thread pool")?;
let results: Vec<ReportEntry> = pool.install(|| {
files
.par_iter()
.enumerate()
.map(|(idx, path)| {
process_file(
idx + 1,
total,
path,
settings.clone(),
metadata.clone(),
llm.clone(),
output.clone(),
)
.unwrap_or_else(|err| ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
})
})
.collect()
});
let mut report = Report::default();
for entry in results {
report.record(entry);
}
Ok(report)
}
fn ensure_ffprobe() -> Result<()> {
let output = std::process::Command::new("ffprobe")
.arg("-version")
.output();
match output {
Ok(output) if output.status.success() => Ok(()),
_ => Err(anyhow!(
"ffprobe not found. Please install ffmpeg/ffprobe and ensure it is in PATH."
)),
}
}
fn discover_files(input: &Path, output: &Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
for entry in WalkDir::new(input).follow_links(true) {
let entry = entry?;
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
if output != input && path.starts_with(output) {
continue;
}
if is_video_file(path) {
files.push(path.to_path_buf());
}
}
Ok(files)
}
fn is_video_file(path: &Path) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(ext) => ext.to_ascii_lowercase(),
None => return false,
};
matches!(
ext.as_str(),
"mkv" | "mp4" | "avi" | "mov" | "m4v" | "mpg" | "mpeg" | "wmv" | "webm" | "ts" | "m2ts"
)
}
fn process_file(
index: usize,
total: usize,
path: &Path,
settings: Arc<Settings>,
metadata: Option<Arc<MetadataClient>>,
llm: Option<Arc<LlmClient>>,
output: Arc<Output>,
) -> Result<ReportEntry> {
let filename = path.file_name().unwrap_or_default().to_string_lossy().to_string();
let mut hints = parse_filename(path);
if let Some(llm) = &llm {
if settings.llm.mode != crate::cli::LlmMode::Off {
if let Ok(llm_hints) = llm.parse_filename(&filename) {
merge_llm_hints(&mut hints, llm_hints, settings.llm.mode.clone());
} else {
output.warn(&format!("LLM parse failed for {filename}, using heuristic parse"));
}
}
}
let media = match media::probe(path) {
Ok(info) => info,
Err(err) => {
output.status_line(
index,
total,
StatusKind::Failed,
&filename,
None,
"ffprobe failed",
None,
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
});
}
};
let runtime_minutes = media
.duration_seconds
.map(|seconds| (seconds / 60.0).round() as u32);
let outcome = if settings.no_lookup {
MatchOutcome {
best: match_offline(&hints, settings.interactive)?,
candidates: Vec::new(),
}
} else {
match metadata
.as_ref()
.ok_or_else(|| anyhow!("metadata client unavailable"))?
.match_movie(&hints, runtime_minutes)
{
Ok(outcome) => outcome,
Err(err) => {
output.status_line(
index,
total,
StatusKind::Failed,
&filename,
None,
"metadata lookup failed",
None,
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "failed".to_string(),
provider: None,
result: None,
output: None,
reason: Some(err.to_string()),
candidates: Vec::new(),
});
}
}
};
let mut chosen = outcome.best.clone();
if settings.interactive && !settings.no_lookup {
let client = metadata.as_ref().ok_or_else(|| anyhow!("metadata client unavailable"))?;
chosen = interactive_choice(&outcome, path, client)?;
}
if chosen.is_none() {
let reason = if settings.no_lookup {
if hints.title.is_none() || hints.year.is_none() {
"no-lookup missing title/year".to_string()
} else {
"no-lookup skipped".to_string()
}
} else {
"no match above threshold".to_string()
};
output.status_line(
index,
total,
StatusKind::Skipped,
&filename,
None,
"no match",
None,
);
let entry = ReportEntry {
input: path.display().to_string(),
status: "skipped".to_string(),
provider: None,
result: None,
output: None,
reason: Some(reason),
candidates: summarize_candidates(&outcome.candidates, 3),
};
if settings.sidecar_notes {
write_sidecar_note(path, &entry)?;
}
return Ok(entry);
}
let metadata = chosen.unwrap();
let quality = media::quality_tag(&media, &settings.quality_tags);
let output_path = build_output_path(&metadata, &settings, path, quality.as_deref());
if settings.dry_run {
output.status_line(
index,
total,
StatusKind::Renamed,
&filename,
Some(metadata.provider.as_str()),
"dry-run",
Some(&output_path.display().to_string()),
);
return Ok(ReportEntry {
input: path.display().to_string(),
status: "renamed".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: Some(output_path.display().to_string()),
reason: Some("dry-run".to_string()),
candidates: Vec::new(),
});
}
let op_mode = if settings.move_files {
OpMode::Move
} else if settings.rename_in_place {
OpMode::RenameInPlace
} else {
OpMode::Copy
};
let policy = if settings.overwrite {
CollisionPolicy::Overwrite
} else if settings.suffix {
CollisionPolicy::Suffix
} else {
CollisionPolicy::Skip
};
let outcome = fsops::execute(path, &output_path, op_mode, policy, settings.sidecars)?;
if outcome.final_path.is_none() {
output.status_line(
index,
total,
StatusKind::Skipped,
&filename,
Some(metadata.provider.as_str()),
"destination exists",
None,
);
let entry = ReportEntry {
input: path.display().to_string(),
status: "skipped".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: None,
reason: outcome.skipped_reason,
candidates: Vec::new(),
};
if settings.sidecar_notes {
write_sidecar_note(path, &entry)?;
}
return Ok(entry);
}
let final_path = outcome.final_path.unwrap();
output.status_line(
index,
total,
StatusKind::Renamed,
&filename,
Some(metadata.provider.as_str()),
"renamed",
Some(&final_path.display().to_string()),
);
Ok(ReportEntry {
input: path.display().to_string(),
status: "renamed".to_string(),
provider: Some(metadata.provider.as_str().to_string()),
result: Some(format!("{} ({})", metadata.title, metadata.year)),
output: Some(final_path.display().to_string()),
reason: None,
candidates: Vec::new(),
})
}
fn merge_llm_hints(hints: &mut FileHints, llm_hints: LlmHints, mode: crate::cli::LlmMode) {
if let Some(title) = llm_hints.title {
if hints.title.is_none() || mode == crate::cli::LlmMode::Parse {
hints.title = Some(title.clone());
hints.normalized_title = Some(crate::utils::normalize_title(&title));
} else if hints.title.as_deref() != Some(title.as_str()) {
hints.alt_titles.push(title);
}
}
if let Some(year) = llm_hints.year {
if hints.year.is_none() || mode == crate::cli::LlmMode::Parse {
hints.year = Some(year);
}
}
if !llm_hints.alt_titles.is_empty() {
hints.alt_titles.extend(llm_hints.alt_titles);
}
}
fn match_offline(hints: &FileHints, interactive: bool) -> Result<Option<MovieMetadata>> {
if let (Some(title), Some(year)) = (&hints.title, hints.year) {
return Ok(Some(MovieMetadata {
title: title.clone(),
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Parsed,
runtime_minutes: None,
}));
}
if interactive {
let title = prompt("Title")?;
let year = prompt("Year")?;
if let Ok(year) = year.parse::<i32>() {
return Ok(Some(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Manual,
runtime_minutes: None,
}));
}
}
Ok(None)
}
fn build_output_path(
metadata: &MovieMetadata,
settings: &Settings,
source: &Path,
quality: Option<&str>,
) -> PathBuf {
let mut folder = format!("{} ({})", metadata.title, metadata.year);
folder = sanitize_filename(&folder);
let mut filename = folder.clone();
if let Some(quality) = quality {
filename.push_str(&format!(" [{}]", quality));
}
if settings.include_id {
if let Some(id) = id_tag(metadata) {
filename.push_str(&format!(" [{}]", id));
}
}
let ext = source.extension().and_then(|e| e.to_str()).unwrap_or("");
if !ext.is_empty() {
filename.push('.');
filename.push_str(ext);
}
settings.output.join(folder).join(filename)
}
fn id_tag(metadata: &MovieMetadata) -> Option<String> {
match metadata.provider {
Provider::Tmdb => metadata.tmdb_id.map(|id| format!("tmdb-{id}")),
Provider::Omdb => metadata.imdb_id.as_ref().map(|id| format!("imdb-{id}")),
Provider::Parsed | Provider::Manual => None,
}
}
fn interactive_choice(
outcome: &MatchOutcome,
path: &Path,
metadata: &MetadataClient,
) -> Result<Option<MovieMetadata>> {
if outcome.candidates.is_empty() {
return Ok(outcome.best.clone());
}
let ambiguous = is_ambiguous(&outcome.candidates);
if !ambiguous && outcome.best.is_some() {
return Ok(outcome.best.clone());
}
let filename = path.file_name().unwrap_or_default().to_string_lossy();
println!("Ambiguous match for {filename}");
for (idx, candidate) in outcome.candidates.iter().take(3).enumerate() {
let label = format!(
" {}) {} ({}) [{}] score {:.1}",
idx + 1,
candidate.candidate.title,
candidate.candidate.year.unwrap_or(0),
candidate.candidate.provider.as_str(),
candidate.score * 100.0
);
println!("{label}");
}
println!(" s) skip");
println!(" m) manual title/year");
print!("Choose: ");
io::Write::flush(&mut std::io::stdout())?;
let mut choice = String::new();
std::io::stdin().read_line(&mut choice)?;
let choice = choice.trim();
if choice.eq_ignore_ascii_case("s") {
return Ok(None);
}
if choice.eq_ignore_ascii_case("m") {
let title = prompt("Title")?;
let year = prompt("Year")?;
if let Ok(year) = year.parse::<i32>() {
return Ok(Some(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id: None,
provider: Provider::Manual,
runtime_minutes: None,
}));
}
return Ok(None);
}
if let Ok(index) = choice.parse::<usize>() {
if let Some(candidate) = outcome.candidates.get(index - 1) {
if let Ok(details) = metadata.resolve_candidate(&candidate.candidate) {
return Ok(Some(details));
}
return Ok(Some(MovieMetadata {
title: candidate.candidate.title.clone(),
year: candidate.candidate.year.unwrap_or(0),
tmdb_id: None,
imdb_id: None,
provider: candidate.candidate.provider.clone(),
runtime_minutes: None,
}));
}
}
Ok(outcome.best.clone())
}
fn is_ambiguous(candidates: &[ScoredCandidate]) -> bool {
if candidates.len() < 2 {
return false;
}
(candidates[0].score - candidates[1].score).abs() < 0.02
}
fn prompt(label: &str) -> Result<String> {
print!("{label}: ");
io::Write::flush(&mut std::io::stdout())?;
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
Ok(input.trim().to_string())
}
fn build_llm_client(settings: &Settings, output: &Output) -> Result<Option<Arc<LlmClient>>> {
if settings.llm.mode == crate::cli::LlmMode::Off {
return Ok(None);
}
let model = match &settings.llm.model {
Some(model) => model.clone(),
None => {
output.warn("LLM mode enabled but no model provided; disabling LLM");
return Ok(None);
}
};
let client = LlmClient::new(
settings.llm.endpoint.clone(),
model,
settings.llm.timeout_seconds,
settings.llm.max_tokens,
)?;
Ok(Some(Arc::new(client)))
}
fn write_sidecar_note(path: &Path, entry: &ReportEntry) -> Result<()> {
let note_path = path.with_extension("mov-renamarr.txt");
let mut note = String::new();
note.push_str(&format!("Status: {}\n", entry.status));
if let Some(reason) = &entry.reason {
note.push_str(&format!("Reason: {}\n", reason));
}
if !entry.candidates.is_empty() {
note.push_str("Candidates:\n");
for candidate in &entry.candidates {
note.push_str(&format!(
" - {} ({}) [{}] {:.1}\n",
candidate.title,
candidate.year.unwrap_or(0),
candidate.provider,
candidate.score * 100.0
));
}
}
std::fs::write(&note_path, note)
.with_context(|| format!("failed to write sidecar note: {}", note_path.display()))?;
Ok(())
}

165
src/report.rs Normal file
View File

@@ -0,0 +1,165 @@
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use anyhow::{Context, Result};
use serde::Serialize;
use crate::cli::ReportFormat;
use crate::metadata::ScoredCandidate;
#[derive(Debug, Default, Serialize)]
pub struct Report {
pub processed: usize,
pub renamed: usize,
pub skipped: usize,
pub failed: usize,
pub entries: Vec<ReportEntry>,
}
#[derive(Debug, Serialize)]
pub struct ReportEntry {
pub input: String,
pub status: String,
pub provider: Option<String>,
pub result: Option<String>,
pub output: Option<String>,
pub reason: Option<String>,
pub candidates: Vec<CandidateSummary>,
}
#[derive(Debug, Serialize)]
pub struct CandidateSummary {
pub title: String,
pub year: Option<i32>,
pub provider: String,
pub score: f64,
}
impl Report {
pub fn record(&mut self, entry: ReportEntry) {
self.processed += 1;
match entry.status.as_str() {
"renamed" => self.renamed += 1,
"skipped" => self.skipped += 1,
"failed" => self.failed += 1,
_ => {}
}
self.entries.push(entry);
}
pub fn write(&self, format: &ReportFormat, path: Option<&Path>) -> Result<()> {
match format {
ReportFormat::Text => self.write_text(path),
ReportFormat::Json => self.write_json(path),
ReportFormat::Csv => self.write_csv(path),
}
}
fn write_text(&self, path: Option<&Path>) -> Result<()> {
let mut writer = open_writer(path)?;
writeln!(
writer,
"Processed: {} | Renamed: {} | Skipped: {} | Failed: {}",
self.processed, self.renamed, self.skipped, self.failed
)?;
for entry in &self.entries {
writeln!(writer, "\n[{}] {}", entry.status, entry.input)?;
if let Some(provider) = &entry.provider {
writeln!(writer, " Provider: {}", provider)?;
}
if let Some(result) = &entry.result {
writeln!(writer, " Result: {}", result)?;
}
if let Some(output) = &entry.output {
writeln!(writer, " Output: {}", output)?;
}
if let Some(reason) = &entry.reason {
writeln!(writer, " Reason: {}", reason)?;
}
if !entry.candidates.is_empty() {
writeln!(writer, " Candidates:")?;
for candidate in &entry.candidates {
writeln!(
writer,
" - {} ({}) [{}] score {:.1}",
candidate.title,
candidate.year.map(|y| y.to_string()).unwrap_or_else(|| "?".into()),
candidate.provider,
candidate.score * 100.0
)?;
}
}
}
Ok(())
}
fn write_json(&self, path: Option<&Path>) -> Result<()> {
let writer = open_writer(path)?;
serde_json::to_writer_pretty(writer, self).context("failed to write JSON report")?;
Ok(())
}
fn write_csv(&self, path: Option<&Path>) -> Result<()> {
let mut writer = csv::Writer::from_writer(open_writer(path)?);
writer.write_record([
"input",
"status",
"provider",
"result",
"output",
"reason",
"candidates",
])?;
for entry in &self.entries {
let candidates = entry
.candidates
.iter()
.map(|c| {
format!(
"{} ({}) [{}] {:.1}",
c.title,
c.year.map(|y| y.to_string()).unwrap_or_else(|| "?".into()),
c.provider,
c.score * 100.0
)
})
.collect::<Vec<_>>()
.join(" | ");
writer.write_record([
&entry.input,
&entry.status,
entry.provider.as_deref().unwrap_or(""),
entry.result.as_deref().unwrap_or(""),
entry.output.as_deref().unwrap_or(""),
entry.reason.as_deref().unwrap_or(""),
&candidates,
])?;
}
writer.flush()?;
Ok(())
}
}
fn open_writer(path: Option<&Path>) -> Result<Box<dyn Write>> {
if let Some(path) = path {
let file = File::create(path).with_context(|| format!("failed to create report: {}", path.display()))?;
Ok(Box::new(file))
} else {
Ok(Box::new(io::stdout()))
}
}
pub fn summarize_candidates(candidates: &[ScoredCandidate], limit: usize) -> Vec<CandidateSummary> {
candidates
.iter()
.take(limit)
.map(|entry| CandidateSummary {
title: entry.candidate.title.clone(),
year: entry.candidate.year,
provider: entry.candidate.provider.as_str().to_string(),
score: entry.score,
})
.collect()
}

99
src/utils.rs Normal file
View File

@@ -0,0 +1,99 @@
use std::sync::{Condvar, Mutex};
pub fn normalize_title(input: &str) -> String {
let mut out = String::with_capacity(input.len());
for ch in input.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch.to_ascii_lowercase());
} else if ch.is_whitespace() {
out.push(' ');
} else {
out.push(' ');
}
}
collapse_whitespace(&out)
}
pub fn sanitize_filename(input: &str) -> String {
let mut out = String::with_capacity(input.len());
for ch in input.chars() {
if matches!(ch, '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*') {
out.push(' ');
} else {
out.push(ch);
}
}
collapse_whitespace(&out)
}
pub fn collapse_whitespace(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut last_space = false;
for ch in input.chars() {
if ch.is_whitespace() {
if !last_space {
out.push(' ');
last_space = true;
}
} else {
last_space = false;
out.push(ch);
}
}
out.trim().to_string()
}
pub struct Semaphore {
state: Mutex<usize>,
cvar: Condvar,
}
impl Semaphore {
pub fn new(count: usize) -> Self {
Self {
state: Mutex::new(count),
cvar: Condvar::new(),
}
}
pub fn acquire(&self) -> SemaphoreGuard<'_> {
let mut count = self.state.lock().unwrap();
while *count == 0 {
count = self.cvar.wait(count).unwrap();
}
*count -= 1;
SemaphoreGuard { sem: self }
}
}
pub struct SemaphoreGuard<'a> {
sem: &'a Semaphore,
}
impl Drop for SemaphoreGuard<'_> {
fn drop(&mut self) {
let mut count = self.sem.state.lock().unwrap();
*count += 1;
self.sem.cvar.notify_one();
}
}
#[cfg(test)]
mod tests {
use super::{collapse_whitespace, normalize_title, sanitize_filename};
#[test]
fn normalizes_title() {
assert_eq!(normalize_title("The.Matrix!!"), "the matrix");
}
#[test]
fn sanitizes_filename() {
assert_eq!(sanitize_filename("Bad:Name/Here"), "Bad Name Here");
}
#[test]
fn collapses_whitespace() {
assert_eq!(collapse_whitespace("a b c"), "a b c");
}
}

298
tests/integration.rs Normal file
View File

@@ -0,0 +1,298 @@
use std::fs;
use std::path::Path;
use assert_cmd::Command;
use httpmock::Method::{GET, POST};
use httpmock::MockServer;
use predicates::str::contains;
use tempfile::TempDir;
fn make_ffprobe_stub(dir: &Path) -> std::path::PathBuf {
let bin_dir = dir.join("bin");
fs::create_dir_all(&bin_dir).unwrap();
let script_path = bin_dir.join("ffprobe");
let script = r#"#!/usr/bin/env sh
echo '{"format":{"duration":"7200"},"streams":[{"codec_type":"video","codec_name":"h264","height":1080}]}'
"#;
fs::write(&script_path, script).unwrap();
let mut perms = fs::metadata(&script_path).unwrap().permissions();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
perms.set_mode(0o755);
fs::set_permissions(&script_path, perms).unwrap();
}
script_path
}
fn prepend_path(path: &Path) -> String {
let current = std::env::var("PATH").unwrap_or_default();
format!("{}:{}", path.display(), current)
}
#[test]
fn tmdb_flow_dry_run_with_mock_server() {
let server = MockServer::start();
let search_mock = server.mock(|when, then| {
when.method(GET)
.path("/search/movie")
.query_param("api_key", "test")
.query_param("query", "Some Movie")
.query_param("year", "2020");
then.status(200)
.header("content-type", "application/json")
.body(r#"{"results":[{"id":123,"title":"Some Movie","release_date":"2020-01-02"}]}"#);
});
let details_mock = server.mock(|when, then| {
when.method(GET)
.path("/movie/123")
.query_param("api_key", "test");
then.status(200)
.header("content-type", "application/json")
.body(r#"{"id":123,"title":"Some Movie","release_date":"2020-01-02","runtime":120,"imdb_id":"tt123"}"#);
});
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Some.Movie.2020.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--dry-run")
.env("MOV_RENAMARR_PROVIDER", "tmdb")
.env("MOV_RENAMARR_TMDB_API_KEY", "test")
.env("MOV_RENAMARR_TMDB_BASE_URL", server.url(""))
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success().stdout(contains("renamed"));
search_mock.assert_hits(1);
details_mock.assert_hits(1);
}
#[test]
fn omdb_flow_dry_run_with_mock_server() {
let server = MockServer::start();
let search_mock = server.mock(|when, then| {
when.method(GET)
.path("/")
.query_param("apikey", "test")
.query_param("s", "Another Movie")
.query_param("type", "movie")
.query_param("y", "2019");
then.status(200)
.header("content-type", "application/json")
.body(r#"{"Search":[{"Title":"Another Movie","Year":"2019","imdbID":"tt999"}],"Response":"True"}"#);
});
let details_mock = server.mock(|when, then| {
when.method(GET)
.path("/")
.query_param("apikey", "test")
.query_param("i", "tt999")
.query_param("plot", "short");
then.status(200)
.header("content-type", "application/json")
.body(r#"{"Title":"Another Movie","Year":"2019","imdbID":"tt999","Runtime":"95 min","Response":"True"}"#);
});
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Another.Movie.2019.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--dry-run")
.env("MOV_RENAMARR_PROVIDER", "omdb")
.env("MOV_RENAMARR_OMDB_API_KEY", "test")
.env("MOV_RENAMARR_OMDB_BASE_URL", server.url(""))
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success().stdout(contains("renamed"));
search_mock.assert_hits(1);
details_mock.assert_hits(1);
}
#[test]
fn creates_default_config_on_no_args() {
let temp = TempDir::new().unwrap();
let config_home = temp.path().join("config");
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.env("XDG_CONFIG_HOME", &config_home);
cmd.assert().success().stderr(contains("Config file:"));
let config_path = config_home.join("mov-renamarr").join("config.toml");
assert!(config_path.exists());
let contents = fs::read_to_string(config_path).unwrap();
assert!(contents.contains("provider = \"auto\""));
}
#[test]
fn no_lookup_uses_parsed_title_and_year() {
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Test.Movie.2021.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--dry-run")
.arg("--no-lookup")
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success().stdout(contains("parsed"));
}
#[test]
fn no_lookup_with_llm_parse_renames_missing_year() {
let server = MockServer::start();
let llm_mock = server.mock(|when, then| {
when.method(POST)
.path("/api/generate");
then.status(200)
.header("content-type", "application/json")
.body(r#"{"response":"{\"title\":\"Mystery Movie\",\"year\":\"2011\",\"alt_titles\":[]}"}"#);
});
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Mystery.Movie.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--dry-run")
.arg("--no-lookup")
.arg("--llm-mode").arg("parse")
.arg("--llm-endpoint").arg(server.url(""))
.arg("--llm-model").arg("qwen")
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert()
.success()
.stdout(contains("Mystery Movie (2011)"))
.stdout(contains("parsed"));
llm_mock.assert_hits(1);
}
#[test]
fn collision_policy_skips_existing_destination() {
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Some.Movie.2020.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
// Pre-create destination to trigger collision skip.
let dest_dir = output.join("Some Movie (2020)");
fs::create_dir_all(&dest_dir).unwrap();
let dest_path = dest_dir.join("Some Movie (2020) [1080p].mkv");
fs::write(&dest_path, b"existing").unwrap();
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--no-lookup")
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success().stdout(contains("destination exists"));
assert!(dest_path.exists());
assert!(input.join("Some.Movie.2020.mkv").exists());
}
#[test]
fn sidecars_are_copied_when_enabled() {
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
let output = temp.path().join("output");
fs::create_dir_all(&input).unwrap();
fs::create_dir_all(&output).unwrap();
fs::write(input.join("Film.2020.mkv"), b"stub").unwrap();
fs::write(input.join("Film.2020.srt"), b"sub").unwrap();
fs::write(input.join("Film.2020.nfo"), b"nfo").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--output").arg(&output)
.arg("--no-lookup")
.arg("--sidecars")
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success();
let out_dir = output.join("Film (2020)");
assert!(out_dir.join("Film (2020) [1080p].mkv").exists());
assert!(out_dir.join("Film (2020) [1080p].srt").exists());
assert!(out_dir.join("Film (2020) [1080p].nfo").exists());
}
#[test]
fn rename_in_place_uses_input_as_output() {
let temp = TempDir::new().unwrap();
let input = temp.path().join("input");
fs::create_dir_all(&input).unwrap();
fs::write(input.join("Alien.1979.1080p.mkv"), b"stub").unwrap();
let ffprobe = make_ffprobe_stub(temp.path());
let mut cmd = Command::new(assert_cmd::cargo_bin!("mov-renamarr"));
cmd.arg("--input").arg(&input)
.arg("--rename-in-place")
.arg("--no-lookup")
.env("XDG_CONFIG_HOME", temp.path().join("config"))
.env("XDG_CACHE_HOME", temp.path().join("cache"))
.env("PATH", prepend_path(ffprobe.parent().unwrap()));
cmd.assert().success().stdout(contains("renamed"));
let renamed = input.join("Alien (1979)").join("Alien (1979) [1080p].mkv");
assert!(renamed.exists());
assert!(!input.join("Alien.1979.1080p.mkv").exists());
}