Compare commits
15 Commits
13bea20035
...
v0.1.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 18010798bb | |||
| 41dc4c0532 | |||
| 6370f1bdf4 | |||
| 5d96fcab11 | |||
| 7f685c4216 | |||
| 62dd897cd8 | |||
| 03ab8b5abd | |||
| c5b20c252f | |||
| 32fa0e8539 | |||
| 86c4396c5e | |||
| 3b0b5b9cde | |||
| d8e328d33a | |||
| a096e93723 | |||
| 5b9aa55e81 | |||
| 3c0c022c79 |
81
.gitea/workflows/release.yml
Normal file
81
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,81 @@
|
||||
name: Release binaries
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: aarch64-unknown-linux-gnu
|
||||
|
||||
- name: Install cross compiler
|
||||
run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu
|
||||
|
||||
- name: Build x86_64
|
||||
run: cargo build --release
|
||||
|
||||
- name: Build arm64
|
||||
env:
|
||||
CC_aarch64_unknown_linux_gnu: aarch64-linux-gnu-gcc
|
||||
AR_aarch64_unknown_linux_gnu: aarch64-linux-gnu-ar
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_AR: aarch64-linux-gnu-ar
|
||||
run: cargo build --release --target aarch64-unknown-linux-gnu
|
||||
|
||||
- name: Package artifacts
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG="${GITHUB_REF_NAME:-${GITEA_REF_NAME:-unknown}}"
|
||||
mkdir -p dist/pkg-x86_64 dist/pkg-arm64
|
||||
|
||||
cp target/release/mov-renamarr dist/pkg-x86_64/
|
||||
cp README.md LICENSE CHANGELOG.md dist/pkg-x86_64/
|
||||
tar -C dist/pkg-x86_64 -czf "dist/mov-renamarr-${TAG}-x86_64-linux-gnu.tar.gz" .
|
||||
|
||||
cp target/aarch64-unknown-linux-gnu/release/mov-renamarr dist/pkg-arm64/
|
||||
cp README.md LICENSE CHANGELOG.md dist/pkg-arm64/
|
||||
tar -C dist/pkg-arm64 -czf "dist/mov-renamarr-${TAG}-arm64-linux-gnu.tar.gz" .
|
||||
|
||||
- name: Upload release assets
|
||||
env:
|
||||
RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${RELEASE_TOKEN:-}" ]; then
|
||||
echo "RELEASE_TOKEN secret is required to upload release assets."
|
||||
exit 1
|
||||
fi
|
||||
TAG="${GITHUB_REF_NAME:-${GITEA_REF_NAME:-unknown}}"
|
||||
API_URL="${GITHUB_API_URL:-${GITEA_API_URL:-}}"
|
||||
if [ -z "$API_URL" ]; then
|
||||
API_URL="${GITHUB_SERVER_URL}/api/v1"
|
||||
fi
|
||||
REPO="${GITHUB_REPOSITORY}"
|
||||
|
||||
release_json=$(curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
"${API_URL}/repos/${REPO}/releases/tags/${TAG}")
|
||||
release_id=$(python3 -c 'import json,sys; print(json.load(sys.stdin).get("id",""))' <<<"$release_json")
|
||||
|
||||
if [ -z "$release_id" ]; then
|
||||
create_payload=$(python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": os.environ["TAG"], "body": ""}))')
|
||||
release_json=$(curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$create_payload" \
|
||||
"${API_URL}/repos/${REPO}/releases")
|
||||
release_id=$(python3 -c 'import json,sys; print(json.load(sys.stdin).get("id",""))' <<<"$release_json")
|
||||
fi
|
||||
|
||||
for file in dist/*.tar.gz; do
|
||||
name=$(basename "$file")
|
||||
curl -sS -H "Authorization: token ${RELEASE_TOKEN}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary @"$file" \
|
||||
"${API_URL}/repos/${REPO}/releases/${release_id}/assets?name=${name}"
|
||||
done
|
||||
10
CHANGELOG.md
Normal file
10
CHANGELOG.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Changelog
|
||||
|
||||
## 0.1.0 - 2025-12-30
|
||||
Initial release.
|
||||
|
||||
- Radarr-style naming for movies
|
||||
- TMDb/OMDb providers with caching
|
||||
- Optional Ollama LLM integration
|
||||
- Safe defaults (copy, skip on collision)
|
||||
- Reports (text/json/csv) + sidecars support
|
||||
135
Cargo.lock
generated
135
Cargo.lock
generated
@@ -718,21 +718,6 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.2"
|
||||
@@ -1005,19 +990,6 @@ dependencies = [
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"hyper",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.64"
|
||||
@@ -1348,6 +1320,7 @@ dependencies = [
|
||||
"is-terminal",
|
||||
"libc",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"owo-colors",
|
||||
"predicates",
|
||||
"rayon",
|
||||
@@ -1363,23 +1336,6 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"openssl",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"security-framework",
|
||||
"security-framework-sys",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
@@ -1423,50 +1379,6 @@ version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.75"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"cfg-if",
|
||||
"foreign-types",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
"openssl-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.111"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "option-ext"
|
||||
version = "0.2.0"
|
||||
@@ -1736,12 +1648,10 @@ dependencies = [
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-rustls",
|
||||
"hyper-tls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
@@ -1753,7 +1663,6 @@ dependencies = [
|
||||
"sync_wrapper",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-rustls",
|
||||
"tower-service",
|
||||
"url",
|
||||
@@ -1857,15 +1766,6 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
@@ -1882,29 +1782,6 @@ dependencies = [
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"core-foundation",
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
"security-framework-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework-sys"
|
||||
version = "2.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
@@ -2219,16 +2096,6 @@ dependencies = [
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-native-tls"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.24.1"
|
||||
|
||||
@@ -3,6 +3,11 @@ name = "mov-renamarr"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
description = "Fast, safe CLI to rename movie files into Radarr-compatible names."
|
||||
readme = "README.md"
|
||||
repository = "https://git.44r0n.cc/44r0n7/mov-renamarr.git"
|
||||
keywords = ["media", "movies", "rename", "radarr", "cli"]
|
||||
categories = ["command-line-utilities", "filesystem"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
@@ -14,9 +19,10 @@ is-terminal = "0.4"
|
||||
libc = "0.2"
|
||||
num_cpus = "1.16"
|
||||
owo-colors = "4.1"
|
||||
once_cell = "1.19"
|
||||
rayon = "1.10"
|
||||
regex = "1.10"
|
||||
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls"] }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
rusqlite = { version = "0.31", features = ["bundled"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
96
PLAN.md
96
PLAN.md
@@ -1,96 +0,0 @@
|
||||
# Mov Renamarr CLI - Project Plan
|
||||
|
||||
## Goal
|
||||
Build a Linux CLI that scans a directory of movie files, queries online metadata (OMDb/TMDb), and writes Radarr-compatible folder and file names to an output directory. Project name: Mov Renamarr.
|
||||
|
||||
## Core Requirements
|
||||
- Input: directory tree containing video files.
|
||||
- Output: `Movie Title (Year)/Movie Title (Year) [Quality] [id].ext`.
|
||||
- Uses `ffprobe` for media info and filename parsing for hints.
|
||||
- Queries OMDb/TMDb, with caching to avoid repeat lookups.
|
||||
- Non-interactive by default: skip ambiguous/unmatched files, report them at end.
|
||||
- Optional `--interactive` to confirm matches.
|
||||
- Linux support only (for now).
|
||||
|
||||
## Non-Goals (for MVP)
|
||||
- No Radarr API integration.
|
||||
- No TV/series handling.
|
||||
- No transcoding or media repair.
|
||||
|
||||
## Decisions to Lock In
|
||||
- Default action: copy (safe default).
|
||||
- Optional flags: `--move` and `--rename-in-place`.
|
||||
- Config file support (XDG by default) + CLI overrides. Config format: TOML.
|
||||
- Provider selection: auto based on available API keys, with optional user preference. Default auto priority: TMDb.
|
||||
- Match scoring and minimum confidence threshold.
|
||||
- Cache storage format (SQLite vs JSON) + TTL + `--refresh-cache`.
|
||||
- Quality tags default to resolution only; configurable via CLI/config.
|
||||
- Optional local LLM integration (Ollama) for filename parsing and lookup assist, disabled by default.
|
||||
- Default report format: text.
|
||||
- Sidecar notes: off by default; opt-in only.
|
||||
- Include top-3 candidates in unresolved items by default.
|
||||
- Emphasize performance, broad Linux compatibility, and robust error handling.
|
||||
- UX: per-file status line (file/provider/result/new name), progress counts, color when TTY, `--verbose` for debug details.
|
||||
- Collision policy: default skip if destination exists; optional `--overwrite` or `--suffix` to avoid data loss.
|
||||
- Sidecar files: optionally move/copy all sidecar files with `--sidecars` flag (off by default).
|
||||
- Concurrency: default jobs = min(4, max(1, floor(cores/2))); default net-jobs = min(2, jobs); allow overrides.
|
||||
- ffprobe required (no native parsing fallback).
|
||||
- Reports: stdout by default; optional report file name pattern `mov-renamarr-report-YYYYMMDD-HHMMSS.txt` when `--report` is set without a path.
|
||||
- Config precedence: defaults -> config TOML -> env -> CLI flags.
|
||||
- Config path: `$XDG_CONFIG_HOME/mov-renamarr/config.toml` (fallback `~/.config/mov-renamarr/config.toml`).
|
||||
- Cache path: `$XDG_CACHE_HOME/mov-renamarr/cache.db` (fallback `~/.cache/mov-renamarr/cache.db`).
|
||||
- Report file default location: current working directory when `--report` is set without a path.
|
||||
- Provider base URLs configurable in config/env to support testing/mocking.
|
||||
- Create a commented default config file on first run and notify the user.
|
||||
|
||||
## Proposed CLI (Draft)
|
||||
- `mov-renamarr --input <dir> --output <dir>`
|
||||
- `--config <path>` (default: XDG config)
|
||||
- `--provider auto|omdb|tmdb|both`
|
||||
- `--api-key-omdb <key>` / `--api-key-tmdb <key>` (override config/env)
|
||||
- `--cache <path>` (default: `~/.cache/mov-renamarr.db`)
|
||||
- `--refresh-cache` (bypass cache)
|
||||
- `--dry-run`
|
||||
- `--move` / `--rename-in-place`
|
||||
- `--interactive`
|
||||
- `--report <path>` + `--report-format text|json|csv`
|
||||
- `--sidecar-notes` (write per-file skip notes)
|
||||
- `--min-score <0-100>`
|
||||
- `--include-id` (tmdb/omdb/imdb if available)
|
||||
- `--quality-tags resolution|resolution,codec,source`
|
||||
- `--color auto|always|never`
|
||||
- `--jobs <n|auto>`
|
||||
- `--net-jobs <n|auto>`
|
||||
- `--no-lookup` (skip external providers; use filename/LLM only)
|
||||
- `--llm-mode off|parse|assist` (default: off)
|
||||
- `--llm-endpoint <url>` (Ollama, default `http://localhost:11434`)
|
||||
- `--llm-model <name>` (Ollama model name)
|
||||
- `--llm-timeout <seconds>` / `--llm-max-tokens <n>`
|
||||
|
||||
## Matching Heuristics (Draft)
|
||||
- Parse filename for title/year hints; strip extra release metadata.
|
||||
- Use `ffprobe` for duration and resolution.
|
||||
- Prefer exact year match; allow +/- 1 year when missing.
|
||||
- Use string similarity + runtime delta to choose best match.
|
||||
|
||||
## Pipeline (Draft)
|
||||
1. Load config (XDG) + merge CLI overrides.
|
||||
2. Discover files and filter by extension; skip output subtree when output != input to avoid reprocessing.
|
||||
3. Parse filename hints (title/year) and strip release metadata (optionally via LLM parse).
|
||||
4. Run `ffprobe` for duration/resolution/codec.
|
||||
5. Select provider(s) based on available API keys and user preference.
|
||||
6. Query provider(s) with hints (LLM assist may propose candidates but must be verified).
|
||||
7. Score and select match; if below threshold, mark as unresolved.
|
||||
8. Build Radarr-compatible output path.
|
||||
9. Copy/move/rename-in-place file to output directory.
|
||||
10. Write summary report of successes and unresolved items.
|
||||
|
||||
## Milestones
|
||||
- M0: Project scaffold and plan (done).
|
||||
- M1: CLI skeleton and config parsing.
|
||||
- M2: `ffprobe` integration and media metadata model.
|
||||
- M3: OMDb/TMDb client + caching.
|
||||
- M4: Matching, naming, and file move/copy.
|
||||
- M5: Reporting, tests, and polish.
|
||||
- M6: Automated test harness and fixtures.
|
||||
- M7: Performance pass and profiling.
|
||||
@@ -31,6 +31,8 @@ cargo install --git <repo-url> --locked
|
||||
cargo install --path . --locked
|
||||
```
|
||||
|
||||
Download a prebuilt binary from the Releases page.
|
||||
|
||||
## Quick start :rocket:
|
||||
Create a default config (with comments) and see the config path:
|
||||
```bash
|
||||
|
||||
21
docs/RELEASING.md
Normal file
21
docs/RELEASING.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Releasing
|
||||
|
||||
This repo includes a Gitea Actions workflow that builds Linux binaries for
|
||||
`x86_64` and `arm64` and uploads them to the release.
|
||||
|
||||
## One-time setup (Gitea Actions)
|
||||
1) Create a personal access token with repo write access.
|
||||
2) Add it to the repo secrets as `RELEASE_TOKEN`.
|
||||
|
||||
## Release steps
|
||||
1) Update `CHANGELOG.md`.
|
||||
2) Create and push a tag:
|
||||
```bash
|
||||
git tag -a vX.Y.Z -m "vX.Y.Z"
|
||||
git push origin vX.Y.Z
|
||||
```
|
||||
3) The workflow builds and uploads binaries to the release.
|
||||
|
||||
## Artifacts
|
||||
- `mov-renamarr-<tag>-x86_64-linux-gnu.tar.gz`
|
||||
- `mov-renamarr-<tag>-arm64-linux-gnu.tar.gz`
|
||||
45
src/parse.rs
45
src/parse.rs
@@ -1,5 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::utils::{collapse_whitespace, normalize_title};
|
||||
@@ -12,6 +14,24 @@ pub struct FileHints {
|
||||
pub alt_titles: Vec<String>,
|
||||
}
|
||||
|
||||
static YEAR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(19|20)\d{2}").expect("year regex"));
|
||||
static BRACKET_SQUARE_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\[[^\]]*\]").expect("square bracket regex"));
|
||||
static BRACKET_ROUND_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\([^\)]*\)").expect("round bracket regex"));
|
||||
static STOPWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
|
||||
[
|
||||
"1080p", "720p", "2160p", "480p", "360p", "4k", "uhd", "hdr", "dvdrip",
|
||||
"bdrip", "brrip", "bluray", "blu", "webdl", "web-dl", "webrip", "hdrip",
|
||||
"remux", "x264", "x265", "h264", "h265", "hevc", "aac", "dts", "ac3",
|
||||
"proper", "repack", "limited", "extended", "uncut", "remastered", "subbed",
|
||||
"subs", "multi", "dubbed", "dub", "yts", "yify", "rarbg", "web", "hd",
|
||||
"hq", "cam", "ts", "dvdscr", "r5", "r6",
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
});
|
||||
|
||||
pub fn parse_filename(path: &Path) -> FileHints {
|
||||
let stem = path
|
||||
.file_stem()
|
||||
@@ -44,9 +64,8 @@ pub fn parse_filename(path: &Path) -> FileHints {
|
||||
}
|
||||
|
||||
fn extract_year(raw: &str) -> Option<i32> {
|
||||
let re = Regex::new(r"(19|20)\d{2}").ok()?;
|
||||
let mut year: Option<i32> = None;
|
||||
for mat in re.find_iter(raw) {
|
||||
for mat in YEAR_RE.find_iter(raw) {
|
||||
if let Ok(parsed) = mat.as_str().parse::<i32>() {
|
||||
year = Some(parsed);
|
||||
}
|
||||
@@ -55,10 +74,8 @@ fn extract_year(raw: &str) -> Option<i32> {
|
||||
}
|
||||
|
||||
fn strip_bracketed(raw: &str) -> String {
|
||||
let re_square = Regex::new(r"\[[^\]]*\]").unwrap();
|
||||
let re_round = Regex::new(r"\([^\)]*\)").unwrap();
|
||||
let without_square = re_square.replace_all(raw, " ");
|
||||
let without_round = re_round.replace_all(&without_square, " ");
|
||||
let without_square = BRACKET_SQUARE_RE.replace_all(raw, " ");
|
||||
let without_round = BRACKET_ROUND_RE.replace_all(&without_square, " ");
|
||||
without_round.to_string()
|
||||
}
|
||||
|
||||
@@ -83,7 +100,6 @@ fn clean_title_fragment(fragment: &str, year: Option<i32>) -> String {
|
||||
}
|
||||
|
||||
fn tokenize(raw: &str, year: Option<i32>) -> Vec<String> {
|
||||
let stopwords = stopwords();
|
||||
let mut tokens = Vec::new();
|
||||
for token in raw.split(|c: char| !c.is_alphanumeric()) {
|
||||
if token.is_empty() {
|
||||
@@ -95,7 +111,7 @@ fn tokenize(raw: &str, year: Option<i32>) -> Vec<String> {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if stopwords.contains(lower.as_str()) {
|
||||
if STOPWORDS.contains(lower.as_str()) {
|
||||
continue;
|
||||
}
|
||||
if token.chars().all(|c| c.is_ascii_uppercase()) && token.len() <= 8 {
|
||||
@@ -106,19 +122,6 @@ fn tokenize(raw: &str, year: Option<i32>) -> Vec<String> {
|
||||
tokens
|
||||
}
|
||||
|
||||
fn stopwords() -> std::collections::HashSet<&'static str> {
|
||||
[
|
||||
"1080p", "720p", "2160p", "480p", "360p", "4k", "uhd", "hdr", "dvdrip",
|
||||
"bdrip", "brrip", "bluray", "blu", "webdl", "web-dl", "webrip", "hdrip",
|
||||
"remux", "x264", "x265", "h264", "h265", "hevc", "aac", "dts", "ac3",
|
||||
"proper", "repack", "limited", "extended", "uncut", "remastered", "subbed",
|
||||
"subs", "multi", "dubbed", "dub", "yts", "yify", "rarbg", "web", "hd",
|
||||
"hq", "cam", "ts", "dvdscr", "r5", "r6",
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::parse_filename;
|
||||
|
||||
Reference in New Issue
Block a user