Initial commit

This commit is contained in:
2025-12-30 10:51:50 -05:00
parent 12315c4925
commit 3c0c022c79
21 changed files with 6689 additions and 1 deletions

80
src/metadata/cache.rs Normal file
View File

@@ -0,0 +1,80 @@
use std::path::PathBuf;
use anyhow::{Context, Result};
use rusqlite::{params, Connection};
pub struct Cache {
path: PathBuf,
ttl_days: u32,
refresh: bool,
}
impl Cache {
pub fn new(path: PathBuf, ttl_days: u32, refresh: bool) -> Self {
Self {
path,
ttl_days,
refresh,
}
}
pub fn get(&self, namespace: &str, key: &str) -> Result<Option<String>> {
if self.refresh {
return Ok(None);
}
let conn = self.open()?;
let mut stmt = conn.prepare(
"SELECT value, fetched_at FROM cache WHERE namespace = ?1 AND key = ?2 LIMIT 1",
)?;
let row = stmt.query_row(params![namespace, key], |row| {
let value: String = row.get(0)?;
let fetched_at: i64 = row.get(1)?;
Ok((value, fetched_at))
});
let (value, fetched_at) = match row {
Ok(row) => row,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(err) => return Err(err.into()),
};
let now = current_timestamp();
let age_days = (now - fetched_at) as f64 / 86_400.0;
if age_days > self.ttl_days as f64 {
return Ok(None);
}
Ok(Some(value))
}
pub fn set(&self, namespace: &str, key: &str, value: &str) -> Result<()> {
let conn = self.open()?;
conn.execute(
"INSERT INTO cache (namespace, key, value, fetched_at) VALUES (?1, ?2, ?3, ?4)
ON CONFLICT(namespace, key) DO UPDATE SET value = excluded.value, fetched_at = excluded.fetched_at",
params![namespace, key, value, current_timestamp()],
)?;
Ok(())
}
fn open(&self) -> Result<Connection> {
if let Some(parent) = self.path.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("failed to create cache dir: {}", parent.display()))?;
}
let conn = Connection::open(&self.path)
.with_context(|| format!("failed to open cache db: {}", self.path.display()))?;
conn.execute(
"CREATE TABLE IF NOT EXISTS cache (
namespace TEXT NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
fetched_at INTEGER NOT NULL,
PRIMARY KEY(namespace, key)
)",
[],
)?;
Ok(conn)
}
}
fn current_timestamp() -> i64 {
chrono::Utc::now().timestamp()
}

336
src/metadata/mod.rs Normal file
View File

@@ -0,0 +1,336 @@
use std::collections::HashMap;
use std::sync::Arc;
use anyhow::{anyhow, Result};
use reqwest::blocking::Client;
use crate::config::Settings;
use crate::metadata::cache::Cache;
use crate::parse::FileHints;
use crate::utils::{normalize_title, Semaphore};
mod cache;
mod omdb;
mod tmdb;
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub enum Provider {
Omdb,
Tmdb,
Parsed,
Manual,
}
impl Provider {
pub fn as_str(&self) -> &'static str {
match self {
Provider::Omdb => "omdb",
Provider::Tmdb => "tmdb",
Provider::Parsed => "parsed",
Provider::Manual => "manual",
}
}
}
#[derive(Clone, Debug)]
pub struct Candidate {
pub provider: Provider,
pub id: String,
pub title: String,
pub year: Option<i32>,
pub runtime_minutes: Option<u32>,
}
#[derive(Clone, Debug)]
pub struct ScoredCandidate {
pub candidate: Candidate,
pub score: f64,
}
#[derive(Clone, Debug)]
pub struct MovieMetadata {
pub title: String,
pub year: i32,
pub tmdb_id: Option<u32>,
pub imdb_id: Option<String>,
pub provider: Provider,
pub runtime_minutes: Option<u32>,
}
#[derive(Clone, Debug)]
pub struct MatchOutcome {
pub best: Option<MovieMetadata>,
pub candidates: Vec<ScoredCandidate>,
}
pub struct MetadataClient {
settings: Arc<Settings>,
cache: Arc<Cache>,
client: Client,
net_sem: Arc<Semaphore>,
}
impl MetadataClient {
pub fn new(settings: Arc<Settings>, net_sem: Arc<Semaphore>) -> Result<Self> {
let client = Client::builder().build()?;
let cache = Arc::new(Cache::new(
settings.cache_path.clone(),
settings.cache_ttl_days,
settings.refresh_cache,
));
Ok(Self {
settings,
cache,
client,
net_sem,
})
}
pub fn validate(&self) -> Result<()> {
self.selected_providers().map(|_| ())
}
pub fn match_movie(&self, hints: &FileHints, runtime_minutes: Option<u32>) -> Result<MatchOutcome> {
let providers = self.selected_providers()?;
let queries = build_queries(hints);
let mut candidates = Vec::new();
for provider in providers {
for query in &queries {
let mut results = match provider {
Provider::Omdb => omdb::search(
&self.client,
&self.settings.omdb_base_url,
self.settings.api_key_omdb.as_deref().ok_or_else(|| anyhow!("OMDb API key missing"))?,
query,
&self.cache,
&self.net_sem,
)?,
Provider::Tmdb => tmdb::search(
&self.client,
&self.settings.tmdb_base_url,
self.settings.api_key_tmdb.as_deref().ok_or_else(|| anyhow!("TMDb API key missing"))?,
query,
&self.cache,
&self.net_sem,
)?,
Provider::Parsed | Provider::Manual => Vec::new(),
};
candidates.append(&mut results);
}
}
let candidates = dedupe_candidates(candidates);
let mut scored = score_candidates(hints, runtime_minutes, candidates);
scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
if runtime_minutes.is_some() && !scored.is_empty() {
self.enrich_runtime(&mut scored)?;
for entry in &mut scored {
entry.score = score_candidate(hints, runtime_minutes, &entry.candidate);
}
scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
}
let best = if let Some(best) = scored.first() {
if best.score * 100.0 >= self.settings.min_score as f64 {
Some(self.fetch_details(&best.candidate)?)
} else {
None
}
} else {
None
};
Ok(MatchOutcome { best, candidates: scored })
}
pub fn resolve_candidate(&self, candidate: &Candidate) -> Result<MovieMetadata> {
self.fetch_details(candidate)
}
fn fetch_details(&self, candidate: &Candidate) -> Result<MovieMetadata> {
match candidate.provider {
Provider::Omdb => {
let key = self.settings.api_key_omdb.as_deref().ok_or_else(|| anyhow!("OMDb API key missing"))?;
omdb::details(
&self.client,
&self.settings.omdb_base_url,
key,
&candidate.id,
&self.cache,
&self.net_sem,
)
}
Provider::Tmdb => {
let key = self.settings.api_key_tmdb.as_deref().ok_or_else(|| anyhow!("TMDb API key missing"))?;
tmdb::details(
&self.client,
&self.settings.tmdb_base_url,
key,
&candidate.id,
&self.cache,
&self.net_sem,
)
}
Provider::Parsed | Provider::Manual => {
Err(anyhow!("parsed/manual provider has no metadata lookup"))
}
}
}
fn enrich_runtime(&self, candidates: &mut [ScoredCandidate]) -> Result<()> {
let top_n = 3.min(candidates.len());
for entry in candidates.iter_mut().take(top_n) {
if entry.candidate.runtime_minutes.is_some() {
continue;
}
if let Ok(details) = self.fetch_details(&entry.candidate) {
entry.candidate.runtime_minutes = details.runtime_minutes;
entry.candidate.year = Some(details.year);
}
}
Ok(())
}
fn selected_providers(&self) -> Result<Vec<Provider>> {
use crate::cli::ProviderChoice;
match self.settings.provider {
ProviderChoice::Auto => {
if self.settings.api_key_tmdb.is_some() {
Ok(vec![Provider::Tmdb])
} else if self.settings.api_key_omdb.is_some() {
Ok(vec![Provider::Omdb])
} else {
Err(anyhow!("no API keys available for provider selection"))
}
}
ProviderChoice::Omdb => {
if self.settings.api_key_omdb.is_none() {
Err(anyhow!("OMDb provider selected but API key missing"))
} else {
Ok(vec![Provider::Omdb])
}
}
ProviderChoice::Tmdb => {
if self.settings.api_key_tmdb.is_none() {
Err(anyhow!("TMDb provider selected but API key missing"))
} else {
Ok(vec![Provider::Tmdb])
}
}
ProviderChoice::Both => {
if self.settings.api_key_tmdb.is_none() || self.settings.api_key_omdb.is_none() {
Err(anyhow!("both providers requested but one or more API keys missing"))
} else {
Ok(vec![Provider::Tmdb, Provider::Omdb])
}
}
}
}
}
#[derive(Clone, Debug)]
pub(crate) struct SearchQuery {
title: String,
year: Option<i32>,
}
fn build_queries(hints: &FileHints) -> Vec<SearchQuery> {
let mut queries = Vec::new();
if let Some(title) = &hints.title {
queries.push(SearchQuery {
title: title.clone(),
year: hints.year,
});
}
for alt in &hints.alt_titles {
queries.push(SearchQuery {
title: alt.clone(),
year: hints.year,
});
}
dedupe_queries(queries)
}
fn dedupe_queries(queries: Vec<SearchQuery>) -> Vec<SearchQuery> {
let mut seen = HashMap::new();
let mut out = Vec::new();
for query in queries {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if seen.insert(key, true).is_none() {
out.push(query);
}
}
out
}
fn score_candidates(
hints: &FileHints,
runtime_minutes: Option<u32>,
candidates: Vec<Candidate>,
) -> Vec<ScoredCandidate> {
let mut scored = Vec::new();
for candidate in candidates {
let score = score_candidate(hints, runtime_minutes, &candidate);
scored.push(ScoredCandidate { candidate, score });
}
scored
}
fn dedupe_candidates(candidates: Vec<Candidate>) -> Vec<Candidate> {
let mut seen = HashMap::new();
let mut out = Vec::new();
for candidate in candidates {
let key = format!("{}:{}", candidate.provider.as_str(), candidate.id);
if seen.insert(key, true).is_none() {
out.push(candidate);
}
}
out
}
fn score_candidate(hints: &FileHints, runtime_minutes: Option<u32>, candidate: &Candidate) -> f64 {
let title_score = best_title_score(hints, &candidate.title);
let mut score = title_score;
if let (Some(target_year), Some(candidate_year)) = (hints.year, candidate.year) {
let diff = (target_year - candidate_year).abs();
if diff == 0 {
score += 0.10;
} else if diff == 1 {
score += 0.05;
} else {
score -= 0.05;
}
}
if let (Some(target_runtime), Some(candidate_runtime)) = (runtime_minutes, candidate.runtime_minutes) {
let diff = target_runtime.abs_diff(candidate_runtime);
if diff <= 2 {
score += 0.05;
} else if diff <= 5 {
score += 0.02;
}
}
score.clamp(0.0, 1.0)
}
fn best_title_score(hints: &FileHints, candidate_title: &str) -> f64 {
let candidate_norm = normalize_title(candidate_title);
let mut best = 0.0;
if let Some(title) = &hints.title {
let score = strsim::jaro_winkler(&normalize_title(title), &candidate_norm);
if score > best {
best = score;
}
}
for alt in &hints.alt_titles {
let score = strsim::jaro_winkler(&normalize_title(alt), &candidate_norm);
if score > best {
best = score;
}
}
best
}

161
src/metadata/omdb.rs Normal file
View File

@@ -0,0 +1,161 @@
use anyhow::{anyhow, Context, Result};
use reqwest::blocking::Client;
use serde::Deserialize;
use crate::metadata::{Candidate, MovieMetadata, Provider};
use crate::metadata::cache::Cache;
use crate::metadata::SearchQuery;
use crate::utils::{normalize_title, Semaphore};
#[derive(Debug, Deserialize)]
struct OmdbSearchResponse {
#[serde(rename = "Search")]
search: Option<Vec<OmdbSearchItem>>,
#[serde(rename = "Response")]
response: Option<String>,
}
#[derive(Debug, Deserialize)]
struct OmdbSearchItem {
#[serde(rename = "Title")]
title: String,
#[serde(rename = "Year")]
year: String,
#[serde(rename = "imdbID")]
imdb_id: String,
}
#[derive(Debug, Deserialize)]
struct OmdbDetailResponse {
#[serde(rename = "Title")]
title: Option<String>,
#[serde(rename = "Year")]
year: Option<String>,
#[serde(rename = "imdbID")]
imdb_id: Option<String>,
#[serde(rename = "Runtime")]
runtime: Option<String>,
#[serde(rename = "Response")]
response: Option<String>,
#[serde(rename = "Error")]
error: Option<String>,
}
pub fn search(
client: &Client,
base_url: &str,
api_key: &str,
query: &SearchQuery,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<Vec<Candidate>> {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if let Some(cached) = cache.get("omdb_search", &key)? {
return parse_search(&cached);
}
let _permit = net_sem.acquire();
let mut req = client
.get(base_url)
.query(&[("apikey", api_key), ("s", &query.title), ("type", "movie")]);
if let Some(year) = query.year {
req = req.query(&[("y", year.to_string())]);
}
let resp = req.send().context("OMDb search request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("OMDb search failed with HTTP {status}"));
}
let text = resp.text().context("failed to read OMDb response")?;
cache.set("omdb_search", &key, &text)?;
parse_search(&text)
}
fn parse_search(raw: &str) -> Result<Vec<Candidate>> {
let parsed: OmdbSearchResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse OMDb search JSON")?;
if parsed.response.as_deref() == Some("False") {
return Ok(Vec::new());
}
let mut candidates = Vec::new();
if let Some(items) = parsed.search {
for item in items {
let year = parse_year(&item.year);
candidates.push(Candidate {
provider: Provider::Omdb,
id: item.imdb_id,
title: item.title,
year,
runtime_minutes: None,
});
}
}
Ok(candidates)
}
pub fn details(
client: &Client,
base_url: &str,
api_key: &str,
imdb_id: &str,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<MovieMetadata> {
if let Some(cached) = cache.get("omdb_details", imdb_id)? {
return parse_details(&cached);
}
let _permit = net_sem.acquire();
let resp = client
.get(base_url)
.query(&[("apikey", api_key), ("i", imdb_id), ("plot", "short")])
.send()
.context("OMDb details request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("OMDb details failed with HTTP {status}"));
}
let text = resp.text().context("failed to read OMDb details")?;
cache.set("omdb_details", imdb_id, &text)?;
parse_details(&text)
}
fn parse_details(raw: &str) -> Result<MovieMetadata> {
let parsed: OmdbDetailResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse OMDb details JSON")?;
if parsed.response.as_deref() == Some("False") {
let msg = parsed.error.unwrap_or_else(|| "OMDb details not found".to_string());
return Err(anyhow!(msg));
}
let title = parsed.title.unwrap_or_else(|| "Unknown Title".to_string());
let year = parsed
.year
.and_then(|y| parse_year(&y))
.unwrap_or(0);
let imdb_id = parsed.imdb_id;
let runtime_minutes = parsed.runtime.as_deref().and_then(parse_runtime);
Ok(MovieMetadata {
title,
year,
tmdb_id: None,
imdb_id,
provider: Provider::Omdb,
runtime_minutes,
})
}
fn parse_year(raw: &str) -> Option<i32> {
raw.chars()
.filter(|c| c.is_ascii_digit())
.collect::<String>()
.get(0..4)
.and_then(|s| s.parse::<i32>().ok())
}
fn parse_runtime(raw: &str) -> Option<u32> {
let digits: String = raw.chars().take_while(|c| c.is_ascii_digit()).collect();
digits.parse().ok()
}

142
src/metadata/tmdb.rs Normal file
View File

@@ -0,0 +1,142 @@
use anyhow::{anyhow, Context, Result};
use reqwest::blocking::{Client, RequestBuilder};
use serde::Deserialize;
use crate::metadata::{Candidate, MovieMetadata, Provider};
use crate::metadata::cache::Cache;
use crate::metadata::SearchQuery;
use crate::utils::{normalize_title, Semaphore};
#[derive(Debug, Deserialize)]
struct TmdbSearchResponse {
results: Option<Vec<TmdbSearchItem>>,
}
#[derive(Debug, Deserialize)]
struct TmdbSearchItem {
id: u32,
title: String,
release_date: Option<String>,
}
#[derive(Debug, Deserialize)]
struct TmdbDetailResponse {
id: u32,
title: Option<String>,
release_date: Option<String>,
runtime: Option<u32>,
imdb_id: Option<String>,
}
pub fn search(
client: &Client,
base_url: &str,
api_key: &str,
query: &SearchQuery,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<Vec<Candidate>> {
let key = format!("{}:{}", normalize_title(&query.title), query.year.unwrap_or(0));
if let Some(cached) = cache.get("tmdb_search", &key)? {
return parse_search(&cached);
}
let _permit = net_sem.acquire();
let url = format!("{}/search/movie", base_url.trim_end_matches('/'));
let mut req = apply_auth(client.get(url), api_key)
.query(&[("query", &query.title)]);
if let Some(year) = query.year {
req = req.query(&[("year", year.to_string())]);
}
let resp = req.send().context("TMDb search request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("TMDb search failed with HTTP {status}"));
}
let text = resp.text().context("failed to read TMDb response")?;
cache.set("tmdb_search", &key, &text)?;
parse_search(&text)
}
fn parse_search(raw: &str) -> Result<Vec<Candidate>> {
let parsed: TmdbSearchResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse TMDb search JSON")?;
let mut candidates = Vec::new();
if let Some(items) = parsed.results {
for item in items {
let year = item.release_date.as_deref().and_then(parse_year);
candidates.push(Candidate {
provider: Provider::Tmdb,
id: item.id.to_string(),
title: item.title,
year,
runtime_minutes: None,
});
}
}
Ok(candidates)
}
pub fn details(
client: &Client,
base_url: &str,
api_key: &str,
id: &str,
cache: &Cache,
net_sem: &Semaphore,
) -> Result<MovieMetadata> {
if let Some(cached) = cache.get("tmdb_details", id)? {
return parse_details(&cached);
}
let _permit = net_sem.acquire();
let url = format!("{}/movie/{}", base_url.trim_end_matches('/'), id);
let resp = apply_auth(client.get(url), api_key).send()
.context("TMDb details request failed")?;
let status = resp.status();
if !status.is_success() {
return Err(anyhow!("TMDb details failed with HTTP {status}"));
}
let text = resp.text().context("failed to read TMDb details")?;
cache.set("tmdb_details", id, &text)?;
parse_details(&text)
}
fn parse_details(raw: &str) -> Result<MovieMetadata> {
let parsed: TmdbDetailResponse = serde_json::from_str(raw)
.with_context(|| "failed to parse TMDb details JSON")?;
let title = parsed.title.unwrap_or_else(|| "Unknown Title".to_string());
let year = parsed
.release_date
.as_deref()
.and_then(parse_year)
.unwrap_or(0);
let tmdb_id = Some(parsed.id);
Ok(MovieMetadata {
title,
year,
tmdb_id,
imdb_id: parsed.imdb_id,
provider: Provider::Tmdb,
runtime_minutes: parsed.runtime,
})
}
fn apply_auth(req: RequestBuilder, api_key: &str) -> RequestBuilder {
if looks_like_bearer(api_key) {
req.bearer_auth(api_key)
} else {
req.query(&[("api_key", api_key)])
}
}
fn looks_like_bearer(value: &str) -> bool {
value.contains('.') && value.len() > 30
}
fn parse_year(raw: &str) -> Option<i32> {
raw.get(0..4).and_then(|s| s.parse::<i32>().ok())
}