feat: VOSTFR/VO/FRENCH detection and subtitle embedding
Docker / docker (push) Successful in 1m32s

Query Arte Player API before each download to determine available stream
versions. Select lang tag (VOSTFR > VO, FRENCH if audio is fr). Embed
French subtitles as default MKV track when VOSTFR. All output now .mkv.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
dev
2026-05-10 12:01:48 +02:00
parent 3f17203976
commit e1a2dd1685
2 changed files with 58 additions and 12 deletions
+29
View File
@@ -278,6 +278,35 @@ async def fetch_concerts(page: int = 1, search: str = "", page_size: int = 24, c
}
def get_versions(pid: str) -> list[dict]:
"""Fetch available stream versions from Arte Player API for a programme ID."""
try:
raw = _fetch_url(
PLAYER_API.format(pid=pid),
headers={"User-Agent": _HEADERS["User-Agent"], "Accept": "application/json"},
)
data = json.loads(raw)
streams = data["data"]["attributes"].get("streams") or []
return streams[0].get("versions") or [] if streams else []
except Exception as ex:
logger.debug("Failed to get versions for %s: %s", pid, ex)
return []
def select_lang_tag(versions: list[dict]) -> str:
"""
Determine UNFR language tag from stream versions.
FR audio → FRENCH, non-FR + FR subs → VOSTFR, otherwise → VO.
"""
if not versions:
return "VO"
if any(v.get("audioLanguage") == "fr" for v in versions):
return "FRENCH"
if any(v.get("subtitleLanguage") == "fr" for v in versions):
return "VOSTFR"
return "VO"
async def invalidate_cache() -> int:
_cache["ts"] = 0
try:
+29 -12
View File
@@ -9,7 +9,10 @@ from pathlib import Path
import yt_dlp
from arte_api import get_versions, select_lang_tag
OUTPUT_DIR = "/data/Arte"
_PID_RE = re.compile(r"\b(\d{6}-\d{3}-[A-Z])\b")
DB_PATH = "data/arte_dl.db"
Path("data").mkdir(exist_ok=True)
@@ -39,12 +42,10 @@ def _slugify(s: str) -> str:
return s.strip(".")
def build_release_name(title: str, subtitle: str, year: int | None, info: dict) -> str:
def build_release_name(title: str, subtitle: str, year: int | None, info: dict, lang_tag: str = "VO") -> str:
"""Build a proper UNFR/scene release name.
Format: Title.Event.Year.LANG.Resolution.WEB-DL.x264.AAC-ReMoRa.mkv
"""
Build a proper UNFR/scene release name.
Format: Title.Event.Year.FRENCH.Resolution.WEBRip.x264.AAC-ReMoRa.mp4
"""
# Strip year from both title and subtitle to avoid duplication
t = re.sub(r"\b" + str(year) + r"\b", "", title).strip() if year else title
name = _slugify(t)
@@ -57,7 +58,6 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict)
year_str = str(year) if year else ""
# Resolution from yt-dlp info
height = info.get("height") or 0
if height >= 2160:
res = "2160p"
@@ -68,7 +68,6 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict)
else:
res = f"{height}p" if height else "1080p"
# Video codec (avc1 = H.264, hev1/hvc1/hevc = H.265)
vcodec = (info.get("vcodec") or "").lower()
if "hevc" in vcodec or "h265" in vcodec or "hev1" in vcodec or "hvc1" in vcodec:
vc = "HEVC"
@@ -77,9 +76,9 @@ def build_release_name(title: str, subtitle: str, year: int | None, info: dict)
else:
vc = "x264"
parts = [name, year_str, res, "WEB-DL", vc, "AAC"]
parts = [name, year_str, lang_tag, res, "WEB-DL", vc, "AAC"]
base = ".".join(p for p in parts if p)
return f"{base}-ReMoRa.mp4"
return f"{base}-ReMoRa.mkv"
class DownloadManager:
@@ -192,6 +191,13 @@ class DownloadManager:
with _db() as conn:
conn.execute("UPDATE downloads SET state='downloading' WHERE id=?", (dl_id,))
# Determine language tag from Arte Player API before downloading
pid_m = _PID_RE.search(url)
lang_tag = "VO"
if pid_m:
versions = get_versions(pid_m.group(1))
lang_tag = select_lang_tag(versions)
# For HLS, yt-dlp downloads video then audio separately.
# After the first stream finishes, stay in "processing" to avoid
# resetting progress to 0% when the audio stream starts.
@@ -216,19 +222,30 @@ class DownloadManager:
ydl_opts = {
"outtmpl": f"{out_dir}/%(title)s.%(ext)s",
"format": "bestvideo[vcodec^=avc1]+bestaudio/bestvideo+bestaudio/best",
"merge_output_format": "mp4",
"merge_output_format": "mkv",
"progress_hooks": [hook],
"quiet": True,
"no_warnings": True,
}
if lang_tag == "VOSTFR":
ydl_opts.update({
"writesubtitles": True,
"subtitleslangs": ["fr"],
"embedsubtitles": True,
# Set first subtitle track as default in MKV
"postprocessor_args": {"ffmpeg_o": ["-disposition:s:0", "default"]},
})
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
orig_path = Path(ydl.prepare_filename(info))
# Rename to proper release name
release_name = build_release_name(title, subtitle, year, info)
# yt-dlp renames to .mkv after merge; prepare_filename may return .mp4
if not orig_path.exists():
orig_path = orig_path.with_suffix(".mkv")
release_name = build_release_name(title, subtitle, year, info, lang_tag)
dest_path = orig_path.parent / release_name
if orig_path.exists() and orig_path != dest_path:
if dest_path.exists():