Files
arte-dl/tmdb.py
T
dev 0866a875ba
Docker / docker (push) Successful in 2m58s
feat: TMDB year fallback + PUID/PGID support
- tmdb.py: store release_date year in cache, expose as tmdb_year
- main.py + app.js: use tmdb_year when subtitle has no year
- Dockerfile: add gosu + abc user for PUID/PGID runtime privilege drop
- entrypoint.sh: new entrypoint handling PUID/PGID ownership of /app/data

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-05 18:02:18 +02:00

121 lines
4.0 KiB
Python

import difflib
import json
import logging
import os
import sqlite3
import urllib.parse
import urllib.request
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
_CACHE_DAYS = 30
_THRESHOLD = 0.45
_DB = "data/arte_dl.db"
_IMG_BASE = "https://image.tmdb.org/t/p"
_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
_key: str = ""
def _get_key() -> str:
global _key
if not _key:
_key = os.environ.get("TMDB_API_KEY", "")
return _key
def _init_db():
with sqlite3.connect(_DB) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS tmdb_cache (
arte_id TEXT PRIMARY KEY,
tmdb_id INTEGER,
poster TEXT,
backdrop TEXT,
year INTEGER,
cached_at TEXT NOT NULL
)
""")
try:
conn.execute("ALTER TABLE tmdb_cache ADD COLUMN year INTEGER")
except Exception:
pass
def _search(query: str) -> list[dict]:
url = f"{_SEARCH_URL}?api_key={_get_key()}&query={urllib.parse.quote_plus(query)}&language=fr"
try:
with urllib.request.urlopen(urllib.request.Request(url), timeout=8) as r:
return json.loads(r.read()).get("results", [])
except Exception as e:
logger.debug("TMDB search failed for %r: %s", query, e)
return []
def _best_match(results: list[dict], title: str, subtitle: str) -> dict | None:
q = f"{title} {subtitle}".strip().lower().replace("-", " ").replace(":", "")
best, best_score = None, 0.0
for r in results[:10]:
rt = (r.get("title") or "").lower().replace("-", " ").replace(":", "")
score = difflib.SequenceMatcher(None, q, rt).ratio()
# bonus when both artist name and subtitle start appear in the TMDB title
if title.lower() in rt and (not subtitle or subtitle.lower()[:6] in rt):
score = min(score + 0.2, 1.0)
if score > best_score:
best_score, best = score, r
return best if best and best_score >= _THRESHOLD else None
def lookup(arte_id: str, title: str, subtitle: str) -> dict | None:
if not _get_key():
return None
_init_db()
# Return cached result if fresh enough
cutoff = (datetime.now() - timedelta(days=_CACHE_DAYS)).isoformat()
with sqlite3.connect(_DB) as conn:
row = conn.execute(
"SELECT tmdb_id, poster, backdrop, year FROM tmdb_cache WHERE arte_id=? AND cached_at>?",
(arte_id, cutoff),
).fetchone()
if row is not None:
tmdb_id, poster, backdrop, year = row
if tmdb_id is None:
return None # cached "no match"
return _build(tmdb_id, poster, backdrop, year)
# Query TMDB
query = f"{title} {subtitle}".strip()
results = _search(query) or (_search(title) if subtitle else [])
match = _best_match(results, title, subtitle)
tmdb_id = match["id"] if match else None
poster = match.get("poster_path") if match else None
backdrop = match.get("backdrop_path") if match else None
rd = (match.get("release_date") or "")[:4] if match else ""
year = int(rd) if rd.isdigit() else None
with sqlite3.connect(_DB) as conn:
conn.execute(
"INSERT OR REPLACE INTO tmdb_cache VALUES (?,?,?,?,?,?)",
(arte_id, tmdb_id, poster, backdrop, year, datetime.now().isoformat()),
)
return _build(tmdb_id, poster, backdrop, year) if tmdb_id else None
def _build(tmdb_id: int, poster: str | None, backdrop: str | None, year: int | None = None) -> dict:
return {
"tmdb_id": tmdb_id,
"tmdb_poster": f"{_IMG_BASE}/w500{poster}" if poster else None,
"tmdb_backdrop": f"{_IMG_BASE}/w1280{backdrop}" if backdrop else None,
"tmdb_year": year,
}
def poster_url(path: str | None, size: str = "w500") -> str | None:
return f"{_IMG_BASE}/{size}{path}" if path else None