From e1a2dd16856c187de7dedddf540a6441aff0445c Mon Sep 17 00:00:00 2001 From: dev Date: Sun, 10 May 2026 12:01:48 +0200 Subject: [PATCH] feat: VOSTFR/VO/FRENCH detection and subtitle embedding Query Arte Player API before each download to determine available stream versions. Select lang tag (VOSTFR > VO, FRENCH if audio is fr). Embed French subtitles as default MKV track when VOSTFR. All output now .mkv. Co-Authored-By: Claude Sonnet 4.6 --- arte_api.py | 29 +++++++++++++++++++++++++++++ downloader.py | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/arte_api.py b/arte_api.py index 7f3dcc0..6321bcb 100644 --- a/arte_api.py +++ b/arte_api.py @@ -278,6 +278,35 @@ async def fetch_concerts(page: int = 1, search: str = "", page_size: int = 24, c } +def get_versions(pid: str) -> list[dict]: + """Fetch available stream versions from Arte Player API for a programme ID.""" + try: + raw = _fetch_url( + PLAYER_API.format(pid=pid), + headers={"User-Agent": _HEADERS["User-Agent"], "Accept": "application/json"}, + ) + data = json.loads(raw) + streams = data["data"]["attributes"].get("streams") or [] + return streams[0].get("versions") or [] if streams else [] + except Exception as ex: + logger.debug("Failed to get versions for %s: %s", pid, ex) + return [] + + +def select_lang_tag(versions: list[dict]) -> str: + """ + Determine UNFR language tag from stream versions. + FR audio → FRENCH, non-FR + FR subs → VOSTFR, otherwise → VO. + """ + if not versions: + return "VO" + if any(v.get("audioLanguage") == "fr" for v in versions): + return "FRENCH" + if any(v.get("subtitleLanguage") == "fr" for v in versions): + return "VOSTFR" + return "VO" + + async def invalidate_cache() -> int: _cache["ts"] = 0 try: diff --git a/downloader.py b/downloader.py index 0d4b899..63c6e8f 100644 --- a/downloader.py +++ b/downloader.py @@ -9,7 +9,10 @@ from pathlib import Path import yt_dlp +from arte_api import get_versions, select_lang_tag + OUTPUT_DIR = "/data/Arte" +_PID_RE = re.compile(r"\b(\d{6}-\d{3}-[A-Z])\b") DB_PATH = "data/arte_dl.db" Path("data").mkdir(exist_ok=True) @@ -39,12 +42,10 @@ def _slugify(s: str) -> str: return s.strip(".") -def build_release_name(title: str, subtitle: str, year: int | None, info: dict) -> str: +def build_release_name(title: str, subtitle: str, year: int | None, info: dict, lang_tag: str = "VO") -> str: + """Build a proper UNFR/scene release name. + Format: Title.Event.Year.LANG.Resolution.WEB-DL.x264.AAC-ReMoRa.mkv """ - Build a proper UNFR/scene release name. - Format: Title.Event.Year.FRENCH.Resolution.WEBRip.x264.AAC-ReMoRa.mp4 - """ - # Strip year from both title and subtitle to avoid duplication t = re.sub(r"\b" + str(year) + r"\b", "", title).strip() if year else title name = _slugify(t) @@ -57,7 +58,6 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict) year_str = str(year) if year else "" - # Resolution from yt-dlp info height = info.get("height") or 0 if height >= 2160: res = "2160p" @@ -68,7 +68,6 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict) else: res = f"{height}p" if height else "1080p" - # Video codec (avc1 = H.264, hev1/hvc1/hevc = H.265) vcodec = (info.get("vcodec") or "").lower() if "hevc" in vcodec or "h265" in vcodec or "hev1" in vcodec or "hvc1" in vcodec: vc = "HEVC" @@ -77,9 +76,9 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict) else: vc = "x264" - parts = [name, year_str, res, "WEB-DL", vc, "AAC"] + parts = [name, year_str, lang_tag, res, "WEB-DL", vc, "AAC"] base = ".".join(p for p in parts if p) - return f"{base}-ReMoRa.mp4" + return f"{base}-ReMoRa.mkv" class DownloadManager: @@ -192,6 +191,13 @@ class DownloadManager: with _db() as conn: conn.execute("UPDATE downloads SET state='downloading' WHERE id=?", (dl_id,)) + # Determine language tag from Arte Player API before downloading + pid_m = _PID_RE.search(url) + lang_tag = "VO" + if pid_m: + versions = get_versions(pid_m.group(1)) + lang_tag = select_lang_tag(versions) + # For HLS, yt-dlp downloads video then audio separately. # After the first stream finishes, stay in "processing" to avoid # resetting progress to 0% when the audio stream starts. @@ -216,19 +222,30 @@ class DownloadManager: ydl_opts = { "outtmpl": f"{out_dir}/%(title)s.%(ext)s", "format": "bestvideo[vcodec^=avc1]+bestaudio/bestvideo+bestaudio/best", - "merge_output_format": "mp4", + "merge_output_format": "mkv", "progress_hooks": [hook], "quiet": True, "no_warnings": True, } + if lang_tag == "VOSTFR": + ydl_opts.update({ + "writesubtitles": True, + "subtitleslangs": ["fr"], + "embedsubtitles": True, + # Set first subtitle track as default in MKV + "postprocessor_args": {"ffmpeg_o": ["-disposition:s:0", "default"]}, + }) try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) orig_path = Path(ydl.prepare_filename(info)) - # Rename to proper release name - release_name = build_release_name(title, subtitle, year, info) + # yt-dlp renames to .mkv after merge; prepare_filename may return .mp4 + if not orig_path.exists(): + orig_path = orig_path.with_suffix(".mkv") + + release_name = build_release_name(title, subtitle, year, info, lang_tag) dest_path = orig_path.parent / release_name if orig_path.exists() and orig_path != dest_path: if dest_path.exists():