a4ffd6d63e
Docker / docker (push) Successful in 3m1s
- DB_PATH → data/arte_dl.db (mappé sur /mnt/user/appdata/arte-dl) - mkdir data/ au démarrage dans downloader.py et arte_api.py - asyncio.Lock sur get_all_concerts() : une seule scrape à la fois, les requêtes concurrentes attendent le résultat au lieu de relancer Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
113 lines
3.6 KiB
Python
113 lines
3.6 KiB
Python
import difflib
|
|
import json
|
|
import logging
|
|
import os
|
|
import sqlite3
|
|
import urllib.parse
|
|
import urllib.request
|
|
from datetime import datetime, timedelta
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_CACHE_DAYS = 30
|
|
_THRESHOLD = 0.45
|
|
_DB = "data/arte_dl.db"
|
|
_IMG_BASE = "https://image.tmdb.org/t/p"
|
|
_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
|
|
|
|
_key: str = ""
|
|
|
|
|
|
def _get_key() -> str:
|
|
global _key
|
|
if not _key:
|
|
_key = os.environ.get("TMDB_API_KEY", "")
|
|
return _key
|
|
|
|
|
|
def _init_db():
|
|
with sqlite3.connect(_DB) as conn:
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS tmdb_cache (
|
|
arte_id TEXT PRIMARY KEY,
|
|
tmdb_id INTEGER,
|
|
poster TEXT,
|
|
backdrop TEXT,
|
|
cached_at TEXT NOT NULL
|
|
)
|
|
""")
|
|
|
|
|
|
def _search(query: str) -> list[dict]:
|
|
url = f"{_SEARCH_URL}?api_key={_get_key()}&query={urllib.parse.quote_plus(query)}&language=fr"
|
|
try:
|
|
with urllib.request.urlopen(urllib.request.Request(url), timeout=8) as r:
|
|
return json.loads(r.read()).get("results", [])
|
|
except Exception as e:
|
|
logger.debug("TMDB search failed for %r: %s", query, e)
|
|
return []
|
|
|
|
|
|
def _best_match(results: list[dict], title: str, subtitle: str) -> dict | None:
|
|
q = f"{title} {subtitle}".strip().lower().replace("-", " ").replace(":", "")
|
|
best, best_score = None, 0.0
|
|
for r in results[:10]:
|
|
rt = (r.get("title") or "").lower().replace("-", " ").replace(":", "")
|
|
score = difflib.SequenceMatcher(None, q, rt).ratio()
|
|
# bonus when both artist name and subtitle start appear in the TMDB title
|
|
if title.lower() in rt and (not subtitle or subtitle.lower()[:6] in rt):
|
|
score = min(score + 0.2, 1.0)
|
|
if score > best_score:
|
|
best_score, best = score, r
|
|
return best if best and best_score >= _THRESHOLD else None
|
|
|
|
|
|
def lookup(arte_id: str, title: str, subtitle: str) -> dict | None:
|
|
if not _get_key():
|
|
return None
|
|
|
|
_init_db()
|
|
|
|
# Return cached result if fresh enough
|
|
cutoff = (datetime.now() - timedelta(days=_CACHE_DAYS)).isoformat()
|
|
with sqlite3.connect(_DB) as conn:
|
|
row = conn.execute(
|
|
"SELECT tmdb_id, poster, backdrop FROM tmdb_cache WHERE arte_id=? AND cached_at>?",
|
|
(arte_id, cutoff),
|
|
).fetchone()
|
|
|
|
if row is not None:
|
|
tmdb_id, poster, backdrop = row
|
|
if tmdb_id is None:
|
|
return None # cached "no match"
|
|
return _build(tmdb_id, poster, backdrop)
|
|
|
|
# Query TMDB
|
|
query = f"{title} {subtitle}".strip()
|
|
results = _search(query) or (_search(title) if subtitle else [])
|
|
match = _best_match(results, title, subtitle)
|
|
|
|
tmdb_id = match["id"] if match else None
|
|
poster = match.get("poster_path") if match else None
|
|
backdrop = match.get("backdrop_path") if match else None
|
|
|
|
with sqlite3.connect(_DB) as conn:
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO tmdb_cache VALUES (?,?,?,?,?)",
|
|
(arte_id, tmdb_id, poster, backdrop, datetime.now().isoformat()),
|
|
)
|
|
|
|
return _build(tmdb_id, poster, backdrop) if tmdb_id else None
|
|
|
|
|
|
def _build(tmdb_id: int, poster: str | None, backdrop: str | None) -> dict:
|
|
return {
|
|
"tmdb_id": tmdb_id,
|
|
"tmdb_poster": f"{_IMG_BASE}/w500{poster}" if poster else None,
|
|
"tmdb_backdrop": f"{_IMG_BASE}/w1280{backdrop}" if backdrop else None,
|
|
}
|
|
|
|
|
|
def poster_url(path: str | None, size: str = "w500") -> str | None:
|
|
return f"{_IMG_BASE}/{size}{path}" if path else None
|