use std::path::Path; use regex::Regex; use crate::utils::{collapse_whitespace, normalize_title}; #[derive(Debug, Clone)] pub struct FileHints { pub title: Option, pub normalized_title: Option, pub year: Option, pub alt_titles: Vec, } pub fn parse_filename(path: &Path) -> FileHints { let stem = path .file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_default(); let year = extract_year(&stem); let cleaned = strip_bracketed(&stem); let alt_titles = extract_alt_titles(&cleaned, year); let tokens = tokenize(&cleaned, year); let title = if tokens.is_empty() { let mut fallback = cleaned.clone(); if let Some(year) = year { fallback = fallback.replace(&year.to_string(), ""); } let fallback = collapse_whitespace(&fallback); if fallback.is_empty() { None } else { Some(fallback) } } else { Some(collapse_whitespace(&tokens.join(" "))) }; let normalized_title = title.as_deref().map(normalize_title); FileHints { title, normalized_title, year, alt_titles, } } fn extract_year(raw: &str) -> Option { let re = Regex::new(r"(19|20)\d{2}").ok()?; let mut year: Option = None; for mat in re.find_iter(raw) { if let Ok(parsed) = mat.as_str().parse::() { year = Some(parsed); } } year } fn strip_bracketed(raw: &str) -> String { let re_square = Regex::new(r"\[[^\]]*\]").unwrap(); let re_round = Regex::new(r"\([^\)]*\)").unwrap(); let without_square = re_square.replace_all(raw, " "); let without_round = re_round.replace_all(&without_square, " "); without_round.to_string() } fn extract_alt_titles(raw: &str, year: Option) -> Vec { let mut alt_titles = Vec::new(); if let Some((left, right)) = raw.split_once(" - ") { let left = clean_title_fragment(left, year); let right = collapse_whitespace(right); if !left.is_empty() && !right.is_empty() { alt_titles.push(left); } } alt_titles } fn clean_title_fragment(fragment: &str, year: Option) -> String { let mut cleaned = fragment.to_string(); if let Some(year) = year { cleaned = cleaned.replace(&year.to_string(), " "); } collapse_whitespace(&cleaned) } fn tokenize(raw: &str, year: Option) -> Vec { let stopwords = stopwords(); let mut tokens = Vec::new(); for token in raw.split(|c: char| !c.is_alphanumeric()) { if token.is_empty() { continue; } let lower = token.to_ascii_lowercase(); if let Some(year) = year { if lower == year.to_string() { continue; } } if stopwords.contains(lower.as_str()) { continue; } if token.chars().all(|c| c.is_ascii_uppercase()) && token.len() <= 8 { continue; } tokens.push(token.to_string()); } tokens } fn stopwords() -> std::collections::HashSet<&'static str> { [ "1080p", "720p", "2160p", "480p", "360p", "4k", "uhd", "hdr", "dvdrip", "bdrip", "brrip", "bluray", "blu", "webdl", "web-dl", "webrip", "hdrip", "remux", "x264", "x265", "h264", "h265", "hevc", "aac", "dts", "ac3", "proper", "repack", "limited", "extended", "uncut", "remastered", "subbed", "subs", "multi", "dubbed", "dub", "yts", "yify", "rarbg", "web", "hd", "hq", "cam", "ts", "dvdscr", "r5", "r6", ] .into_iter() .collect() } #[cfg(test)] mod tests { use super::parse_filename; use std::path::Path; #[test] fn parses_basic_title_and_year() { let path = Path::new("Some.Movie.2020.1080p.BluRay.x264-GROUP.mkv"); let hints = parse_filename(path); assert_eq!(hints.title.as_deref(), Some("Some Movie")); assert_eq!(hints.year, Some(2020)); } #[test] fn handles_brackets_and_stopwords() { let path = Path::new("[YTS] The.Matrix.(1999).1080p.BluRay.mkv"); let hints = parse_filename(path); assert_eq!(hints.title.as_deref(), Some("The Matrix")); assert_eq!(hints.year, Some(1999)); } #[test] fn adds_alt_title_for_dash_suffix() { let path = Path::new("Zootopia - Vlix.mp4"); let hints = parse_filename(path); assert_eq!(hints.title.as_deref(), Some("Zootopia Vlix")); assert!(hints.alt_titles.iter().any(|t| t == "Zootopia")); } }