Files

121 lines
4.1 KiB
Python
Raw Permalink Normal View History

import difflib
import json
import logging
import os
import sqlite3
import urllib.parse
import urllib.request
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
_CACHE_DAYS = 30
_THRESHOLD = 0.45
_DB = "data/arte_dl.db"
_IMG_BASE = "https://image.tmdb.org/t/p"
_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
_key: str = ""
def _get_key() -> str:
global _key
if not _key:
_key = os.environ.get("TMDB_API_KEY", "")
return _key
def _init_db():
with sqlite3.connect(_DB) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS tmdb_cache (
arte_id TEXT PRIMARY KEY,
tmdb_id INTEGER,
poster TEXT,
backdrop TEXT,
year INTEGER,
cached_at TEXT NOT NULL
)
""")
try:
conn.execute("ALTER TABLE tmdb_cache ADD COLUMN year INTEGER")
except Exception:
pass
def _search(query: str) -> list[dict]:
url = f"{_SEARCH_URL}?api_key={_get_key()}&query={urllib.parse.quote_plus(query)}&language=fr"
try:
with urllib.request.urlopen(urllib.request.Request(url), timeout=8) as r:
return json.loads(r.read()).get("results", [])
except Exception as e:
logger.debug("TMDB search failed for %r: %s", query, e)
return []
def _best_match(results: list[dict], title: str, subtitle: str) -> dict | None:
q = f"{title} {subtitle}".strip().lower().replace("-", " ").replace(":", "")
best, best_score = None, 0.0
for r in results[:10]:
rt = (r.get("title") or "").lower().replace("-", " ").replace(":", "")
score = difflib.SequenceMatcher(None, q, rt).ratio()
# bonus when both artist name and subtitle start appear in the TMDB title
if title.lower() in rt and (not subtitle or subtitle.lower()[:6] in rt):
score = min(score + 0.2, 1.0)
if score > best_score:
best_score, best = score, r
return best if best and best_score >= _THRESHOLD else None
def lookup(arte_id: str, title: str, subtitle: str) -> dict | None:
if not _get_key():
return None
_init_db()
# Return cached result if fresh enough
cutoff = (datetime.now() - timedelta(days=_CACHE_DAYS)).isoformat()
with sqlite3.connect(_DB) as conn:
row = conn.execute(
"SELECT tmdb_id, poster, backdrop, year FROM tmdb_cache WHERE arte_id=? AND cached_at>?",
(arte_id, cutoff),
).fetchone()
if row is not None:
tmdb_id, poster, backdrop, year = row
if tmdb_id is None:
return None # cached "no match"
return _build(tmdb_id, poster, backdrop, year)
# Query TMDB
query = f"{title} {subtitle}".strip()
results = _search(query) or (_search(title) if subtitle else [])
match = _best_match(results, title, subtitle)
tmdb_id = match["id"] if match else None
poster = match.get("poster_path") if match else None
backdrop = match.get("backdrop_path") if match else None
rd = (match.get("release_date") or "")[:4] if match else ""
year = int(rd) if rd.isdigit() else None
with sqlite3.connect(_DB) as conn:
conn.execute(
"INSERT OR REPLACE INTO tmdb_cache (arte_id, tmdb_id, poster, backdrop, year, cached_at) VALUES (?,?,?,?,?,?)",
(arte_id, tmdb_id, poster, backdrop, year, datetime.now().isoformat()),
)
return _build(tmdb_id, poster, backdrop, year) if tmdb_id else None
def _build(tmdb_id: int, poster: str | None, backdrop: str | None, year: int | None = None) -> dict:
return {
"tmdb_id": tmdb_id,
"tmdb_poster": f"{_IMG_BASE}/w500{poster}" if poster else None,
"tmdb_backdrop": f"{_IMG_BASE}/w1280{backdrop}" if backdrop else None,
"tmdb_year": year,
}
def poster_url(path: str | None, size: str = "w500") -> str | None:
return f"{_IMG_BASE}/{size}{path}" if path else None