import difflib import json import logging import os import sqlite3 import urllib.parse import urllib.request from datetime import datetime, timedelta logger = logging.getLogger(__name__) _CACHE_DAYS = 30 _THRESHOLD = 0.45 _DB = "data/arte_dl.db" _IMG_BASE = "https://image.tmdb.org/t/p" _SEARCH_URL = "https://api.themoviedb.org/3/search/movie" _key: str = "" def _get_key() -> str: global _key if not _key: _key = os.environ.get("TMDB_API_KEY", "") return _key def _init_db(): with sqlite3.connect(_DB) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS tmdb_cache ( arte_id TEXT PRIMARY KEY, tmdb_id INTEGER, poster TEXT, backdrop TEXT, cached_at TEXT NOT NULL ) """) def _search(query: str) -> list[dict]: url = f"{_SEARCH_URL}?api_key={_get_key()}&query={urllib.parse.quote_plus(query)}&language=fr" try: with urllib.request.urlopen(urllib.request.Request(url), timeout=8) as r: return json.loads(r.read()).get("results", []) except Exception as e: logger.debug("TMDB search failed for %r: %s", query, e) return [] def _best_match(results: list[dict], title: str, subtitle: str) -> dict | None: q = f"{title} {subtitle}".strip().lower().replace("-", " ").replace(":", "") best, best_score = None, 0.0 for r in results[:10]: rt = (r.get("title") or "").lower().replace("-", " ").replace(":", "") score = difflib.SequenceMatcher(None, q, rt).ratio() # bonus when both artist name and subtitle start appear in the TMDB title if title.lower() in rt and (not subtitle or subtitle.lower()[:6] in rt): score = min(score + 0.2, 1.0) if score > best_score: best_score, best = score, r return best if best and best_score >= _THRESHOLD else None def lookup(arte_id: str, title: str, subtitle: str) -> dict | None: if not _get_key(): return None _init_db() # Return cached result if fresh enough cutoff = (datetime.now() - timedelta(days=_CACHE_DAYS)).isoformat() with sqlite3.connect(_DB) as conn: row = conn.execute( "SELECT tmdb_id, poster, backdrop FROM tmdb_cache WHERE arte_id=? AND cached_at>?", (arte_id, cutoff), ).fetchone() if row is not None: tmdb_id, poster, backdrop = row if tmdb_id is None: return None # cached "no match" return _build(tmdb_id, poster, backdrop) # Query TMDB query = f"{title} {subtitle}".strip() results = _search(query) or (_search(title) if subtitle else []) match = _best_match(results, title, subtitle) tmdb_id = match["id"] if match else None poster = match.get("poster_path") if match else None backdrop = match.get("backdrop_path") if match else None with sqlite3.connect(_DB) as conn: conn.execute( "INSERT OR REPLACE INTO tmdb_cache VALUES (?,?,?,?,?)", (arte_id, tmdb_id, poster, backdrop, datetime.now().isoformat()), ) return _build(tmdb_id, poster, backdrop) if tmdb_id else None def _build(tmdb_id: int, poster: str | None, backdrop: str | None) -> dict: return { "tmdb_id": tmdb_id, "tmdb_poster": f"{_IMG_BASE}/w500{poster}" if poster else None, "tmdb_backdrop": f"{_IMG_BASE}/w1280{backdrop}" if backdrop else None, } def poster_url(path: str | None, size: str = "w500") -> str | None: return f"{_IMG_BASE}/{size}{path}" if path else None