Quand la recherche ne trouve rien en local (ex: rock-pop chargé côté client), interroge l'API search d'Arte, récupère les IDs manquants et les résout via le player API en parallèle. Permet de trouver n'importe quel concert présent sur arte-concert. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+56
-30
@@ -5,6 +5,7 @@ import asyncio
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
import json
|
import json
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -12,16 +13,17 @@ CACHE_TTL = 6 * 3600
|
|||||||
_cache: dict = {"data": [], "ts": 0}
|
_cache: dict = {"data": [], "ts": 0}
|
||||||
|
|
||||||
PLAYER_API = "https://api.arte.tv/api/player/v2/config/fr/{pid}"
|
PLAYER_API = "https://api.arte.tv/api/player/v2/config/fr/{pid}"
|
||||||
|
SEARCH_URL = "https://www.arte.tv/fr/search/?q={q}"
|
||||||
|
|
||||||
GENRE_PAGES = [
|
GENRE_PAGES = [
|
||||||
("classique", "https://www.arte.tv/fr/arte-concert/classique/"),
|
"https://www.arte.tv/fr/arte-concert/classique/",
|
||||||
("jazz", "https://www.arte.tv/fr/arte-concert/jazz/"),
|
"https://www.arte.tv/fr/arte-concert/jazz/",
|
||||||
("rock-pop", "https://www.arte.tv/fr/arte-concert/rock-pop/"),
|
"https://www.arte.tv/fr/arte-concert/rock-pop/",
|
||||||
("opéra", "https://www.arte.tv/fr/arte-concert/opera/"),
|
"https://www.arte.tv/fr/arte-concert/opera/",
|
||||||
("musique du monde","https://www.arte.tv/fr/arte-concert/musique-du-monde/"),
|
"https://www.arte.tv/fr/arte-concert/musique-du-monde/",
|
||||||
("électronique", "https://www.arte.tv/fr/arte-concert/electronica/"),
|
"https://www.arte.tv/fr/arte-concert/electronica/",
|
||||||
("agenda", "https://www.arte.tv/fr/arte-concert/agenda/"),
|
"https://www.arte.tv/fr/arte-concert/agenda/",
|
||||||
("", "https://www.arte.tv/fr/arte-concert/"),
|
"https://www.arte.tv/fr/arte-concert/",
|
||||||
]
|
]
|
||||||
|
|
||||||
_HEADERS = {
|
_HEADERS = {
|
||||||
@@ -43,8 +45,7 @@ def _fetch_url(url: str, headers: dict | None = None) -> str:
|
|||||||
|
|
||||||
def _prog_ids_from_page(url: str) -> set[str]:
|
def _prog_ids_from_page(url: str) -> set[str]:
|
||||||
try:
|
try:
|
||||||
html = _fetch_url(url)
|
return set(_PROG_RE.findall(_fetch_url(url)))
|
||||||
return set(_PROG_RE.findall(html))
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning("Failed to fetch %s: %s", url, ex)
|
logger.warning("Failed to fetch %s: %s", url, ex)
|
||||||
return set()
|
return set()
|
||||||
@@ -68,13 +69,13 @@ def _metadata_for_pid(pid: str) -> dict | None:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"id": pid,
|
"id": pid,
|
||||||
"title": meta.get("title", ""),
|
"title": meta.get("title") or "",
|
||||||
"subtitle": meta.get("subtitle", ""),
|
"subtitle": meta.get("subtitle") or "",
|
||||||
"url": url,
|
"url": url,
|
||||||
"thumbnail": thumbnail,
|
"thumbnail": thumbnail,
|
||||||
"duration": duration_s,
|
"duration": duration_s,
|
||||||
"description": meta.get("description", "") or "",
|
"description": meta.get("description") or "",
|
||||||
"expiry": rights.get("end", ""),
|
"expiry": rights.get("end") or "",
|
||||||
}
|
}
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.debug("Failed to get metadata for %s: %s", pid, ex)
|
logger.debug("Failed to get metadata for %s: %s", pid, ex)
|
||||||
@@ -82,33 +83,41 @@ def _metadata_for_pid(pid: str) -> dict | None:
|
|||||||
|
|
||||||
|
|
||||||
def _fetch_all_sync() -> list[dict]:
|
def _fetch_all_sync() -> list[dict]:
|
||||||
# 1 — collect programme IDs across all genre pages
|
|
||||||
all_ids: set[str] = set()
|
all_ids: set[str] = set()
|
||||||
for _genre, url in GENRE_PAGES:
|
for url in GENRE_PAGES:
|
||||||
ids = _prog_ids_from_page(url)
|
ids = _prog_ids_from_page(url)
|
||||||
logger.info(" %s → %d IDs", url.split("/fr/")[1], len(ids))
|
logger.info(" %s → %d IDs", url.split("/fr/")[1], len(ids))
|
||||||
all_ids |= ids
|
all_ids |= ids
|
||||||
|
|
||||||
logger.info("Total unique programme IDs: %d", len(all_ids))
|
logger.info("Total unique programme IDs: %d", len(all_ids))
|
||||||
|
|
||||||
# 2 — fetch metadata concurrently
|
concerts = _resolve_ids(all_ids)
|
||||||
concerts: list[dict] = []
|
concerts.sort(key=lambda c: c.get("expiry") or "", reverse=True)
|
||||||
with ThreadPoolExecutor(max_workers=10) as pool:
|
|
||||||
results = list(pool.map(_metadata_for_pid, sorted(all_ids)))
|
|
||||||
|
|
||||||
for c in results:
|
|
||||||
if c and c["title"]:
|
|
||||||
concerts.append(c)
|
|
||||||
|
|
||||||
concerts.sort(key=lambda c: c.get("expiry", ""), reverse=True)
|
|
||||||
return concerts
|
return concerts
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_ids(ids: set[str], exclude: set[str] | None = None) -> list[dict]:
|
||||||
|
to_fetch = ids - (exclude or set())
|
||||||
|
with ThreadPoolExecutor(max_workers=10) as pool:
|
||||||
|
results = list(pool.map(_metadata_for_pid, sorted(to_fetch)))
|
||||||
|
return [c for c in results if c and c.get("title")]
|
||||||
|
|
||||||
|
|
||||||
|
def _search_sync(query: str) -> set[str]:
|
||||||
|
url = SEARCH_URL.format(q=quote_plus(query))
|
||||||
|
try:
|
||||||
|
html = _fetch_url(url)
|
||||||
|
return set(_PROG_RE.findall(html))
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning("Search failed for %r: %s", query, ex)
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
# ── public API ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def get_all_concerts() -> list[dict]:
|
async def get_all_concerts() -> list[dict]:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||||
return _cache["data"]
|
return _cache["data"]
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
data = await loop.run_in_executor(None, _fetch_all_sync)
|
data = await loop.run_in_executor(None, _fetch_all_sync)
|
||||||
if data:
|
if data:
|
||||||
@@ -119,15 +128,32 @@ async def get_all_concerts() -> list[dict]:
|
|||||||
|
|
||||||
async def fetch_concerts(page: int = 1, search: str = "", page_size: int = 24) -> dict:
|
async def fetch_concerts(page: int = 1, search: str = "", page_size: int = 24) -> dict:
|
||||||
all_c = await get_all_concerts()
|
all_c = await get_all_concerts()
|
||||||
filtered = all_c
|
cached_ids = {c["id"] for c in all_c}
|
||||||
|
|
||||||
if search:
|
if search:
|
||||||
q = search.lower()
|
q = search.lower()
|
||||||
filtered = [
|
# local filter
|
||||||
|
local = [
|
||||||
c for c in all_c
|
c for c in all_c
|
||||||
if q in (c.get("title") or "").lower()
|
if q in (c.get("title") or "").lower()
|
||||||
or q in (c.get("subtitle") or "").lower()
|
or q in (c.get("subtitle") or "").lower()
|
||||||
or q in (c.get("description") or "").lower()
|
or q in (c.get("description") or "").lower()
|
||||||
]
|
]
|
||||||
|
# Arte search for IDs not in cache
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
remote_ids = await loop.run_in_executor(None, _search_sync, search)
|
||||||
|
new_ids = remote_ids - cached_ids
|
||||||
|
if new_ids:
|
||||||
|
extra = await loop.run_in_executor(None, _resolve_ids, new_ids, None)
|
||||||
|
# merge: local results first, then extras not already present
|
||||||
|
local_ids = {c["id"] for c in local}
|
||||||
|
for c in extra:
|
||||||
|
if c["id"] not in local_ids:
|
||||||
|
local.append(c)
|
||||||
|
filtered = local
|
||||||
|
else:
|
||||||
|
filtered = all_c
|
||||||
|
|
||||||
start = (page - 1) * page_size
|
start = (page - 1) * page_size
|
||||||
return {
|
return {
|
||||||
"concerts": filtered[start : start + page_size],
|
"concerts": filtered[start : start + page_size],
|
||||||
|
|||||||
Reference in New Issue
Block a user