use anyhow::{Context, Result}; use reqwest::blocking::Client; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Default)] pub struct LlmHints { pub title: Option, pub year: Option, pub alt_titles: Vec, } #[derive(Clone)] pub struct LlmClient { endpoint: String, model: String, max_tokens: Option, client: Client, } impl LlmClient { pub fn new(endpoint: String, model: String, timeout_seconds: u64, max_tokens: Option) -> Result { let client = Client::builder() .timeout(std::time::Duration::from_secs(timeout_seconds)) .build() .context("failed to build HTTP client for LLM")?; Ok(Self { endpoint, model, max_tokens, client, }) } pub fn parse_filename(&self, raw: &str) -> Result { let prompt = build_prompt(raw); let request = OllamaRequest { model: self.model.clone(), prompt, stream: false, format: Some("json".to_string()), options: Some(OllamaOptions { num_predict: self.max_tokens, temperature: 0.0, }), }; let url = format!("{}/api/generate", self.endpoint.trim_end_matches('/')); let response = self .client .post(url) .json(&request) .send() .context("LLM request failed")?; let status = response.status(); if !status.is_success() { return Err(anyhow::anyhow!("LLM returned HTTP {status}")); } let body: OllamaResponse = response.json().context("failed to parse LLM response")?; let hints = parse_hints(&body.response).unwrap_or_default(); Ok(hints) } } fn build_prompt(raw: &str) -> String { format!( "You are a strict parser. Extract the full movie title and year from the filename below.\n\nRules:\n- Output JSON only.\n- Title must include all words of the movie name in order (no partial tokens).\n- Strip release metadata (resolution, codec, source, group tags).\n- Year must be a 4-digit number if present.\n- If unsure, use null for fields and empty array for alt_titles.\n- Do NOT invent data.\n\nReturn JSON with keys: title, year, alt_titles.\n\nFilename: {raw}\n" ) } #[derive(Serialize)] struct OllamaRequest { model: String, prompt: String, stream: bool, format: Option, options: Option, } #[derive(Serialize)] struct OllamaOptions { #[serde(skip_serializing_if = "Option::is_none")] num_predict: Option, temperature: f32, } #[derive(Deserialize)] struct OllamaResponse { response: String, } #[derive(Deserialize, Default)] struct LlmHintsRaw { title: Option, year: Option, alt_titles: Option>, } #[derive(Deserialize)] #[serde(untagged)] enum YearValue { Number(i32), String(String), } fn parse_hints(raw: &str) -> Option { let parsed: LlmHintsRaw = serde_json::from_str(raw).ok()?; let year = parsed.year.and_then(|value| match value { YearValue::Number(num) => Some(num), YearValue::String(s) => s.chars().filter(|c| c.is_ascii_digit()).collect::().parse().ok(), }); Some(LlmHints { title: parsed.title, year, alt_titles: parsed.alt_titles.unwrap_or_default(), }) }