2026-05-02 19:23:57 +02:00
|
|
|
|
import asyncio
|
2026-04-26 13:48:56 +02:00
|
|
|
|
import re
|
2026-04-25 18:36:00 +02:00
|
|
|
|
import sqlite3
|
|
|
|
|
|
import threading
|
2026-04-26 13:48:56 +02:00
|
|
|
|
import unicodedata
|
2026-04-25 18:36:00 +02:00
|
|
|
|
import uuid
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
import yt_dlp
|
|
|
|
|
|
|
2026-05-10 12:01:48 +02:00
|
|
|
|
from arte_api import get_versions, select_lang_tag
|
|
|
|
|
|
|
2026-04-25 18:36:00 +02:00
|
|
|
|
OUTPUT_DIR = "/data/Arte"
|
2026-05-10 12:01:48 +02:00
|
|
|
|
_PID_RE = re.compile(r"\b(\d{6}-\d{3}-[A-Z])\b")
|
2026-05-02 20:43:01 +02:00
|
|
|
|
DB_PATH = "data/arte_dl.db"
|
|
|
|
|
|
|
|
|
|
|
|
Path("data").mkdir(exist_ok=True)
|
2026-04-25 18:36:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _db():
|
|
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
|
|
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
|
|
return conn
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 13:48:56 +02:00
|
|
|
|
# ── Release naming ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
def _slugify(s: str) -> str:
|
|
|
|
|
|
"""Normalize a string to dot-separated scene-style slug."""
|
|
|
|
|
|
# Strip accents (NFD decompose then drop combining marks)
|
|
|
|
|
|
s = unicodedata.normalize("NFD", s)
|
|
|
|
|
|
s = "".join(c for c in s if unicodedata.category(c) != "Mn")
|
|
|
|
|
|
# Apostrophe before letter → .Letter (L'Amour → .L.Amour)
|
|
|
|
|
|
s = re.sub(r"['’]([A-Za-z])", lambda m: "." + m.group(1).upper(), s)
|
|
|
|
|
|
# Spaces / underscores → dot
|
|
|
|
|
|
s = re.sub(r"[\s_]+", ".", s)
|
|
|
|
|
|
# Keep only alphanumeric, dot, hyphen
|
|
|
|
|
|
s = re.sub(r"[^A-Za-z0-9.\-]", "", s)
|
|
|
|
|
|
# Collapse multiple dots
|
|
|
|
|
|
s = re.sub(r"\.{2,}", ".", s)
|
|
|
|
|
|
return s.strip(".")
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-10 12:01:48 +02:00
|
|
|
|
def build_release_name(title: str, subtitle: str, year: int | None, info: dict, lang_tag: str = "VO") -> str:
|
|
|
|
|
|
"""Build a proper UNFR/scene release name.
|
|
|
|
|
|
Format: Title.Event.Year.LANG.Resolution.WEB-DL.x264.AAC-ReMoRa.mkv
|
2026-04-26 13:48:56 +02:00
|
|
|
|
"""
|
|
|
|
|
|
t = re.sub(r"\b" + str(year) + r"\b", "", title).strip() if year else title
|
|
|
|
|
|
name = _slugify(t)
|
|
|
|
|
|
|
|
|
|
|
|
sub = subtitle or ""
|
|
|
|
|
|
if year:
|
|
|
|
|
|
sub = re.sub(r"\b" + str(year) + r"\b", "", sub).strip()
|
|
|
|
|
|
sub_slug = _slugify(sub)
|
|
|
|
|
|
if sub_slug:
|
|
|
|
|
|
name = f"{name}.{sub_slug}"
|
|
|
|
|
|
|
|
|
|
|
|
year_str = str(year) if year else ""
|
|
|
|
|
|
|
|
|
|
|
|
height = info.get("height") or 0
|
|
|
|
|
|
if height >= 2160:
|
|
|
|
|
|
res = "2160p"
|
|
|
|
|
|
elif height >= 1080:
|
|
|
|
|
|
res = "1080p"
|
|
|
|
|
|
elif height >= 720:
|
|
|
|
|
|
res = "720p"
|
|
|
|
|
|
else:
|
|
|
|
|
|
res = f"{height}p" if height else "1080p"
|
|
|
|
|
|
|
|
|
|
|
|
vcodec = (info.get("vcodec") or "").lower()
|
|
|
|
|
|
if "hevc" in vcodec or "h265" in vcodec or "hev1" in vcodec or "hvc1" in vcodec:
|
|
|
|
|
|
vc = "HEVC"
|
|
|
|
|
|
elif "avc" in vcodec or "h264" in vcodec:
|
|
|
|
|
|
vc = "x264"
|
|
|
|
|
|
else:
|
|
|
|
|
|
vc = "x264"
|
|
|
|
|
|
|
2026-05-10 12:01:48 +02:00
|
|
|
|
parts = [name, year_str, lang_tag, res, "WEB-DL", vc, "AAC"]
|
2026-04-26 13:48:56 +02:00
|
|
|
|
base = ".".join(p for p in parts if p)
|
2026-05-10 12:01:48 +02:00
|
|
|
|
return f"{base}-ReMoRa.mkv"
|
2026-04-26 13:48:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
2026-04-25 18:36:00 +02:00
|
|
|
|
class DownloadManager:
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
self._active: dict[str, dict] = {}
|
|
|
|
|
|
self._lock = threading.Lock()
|
2026-05-03 11:30:07 +02:00
|
|
|
|
self._queue: asyncio.Queue = asyncio.Queue()
|
2026-04-25 18:36:00 +02:00
|
|
|
|
self._init_db()
|
|
|
|
|
|
|
|
|
|
|
|
def _init_db(self):
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute("""
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS downloads (
|
|
|
|
|
|
id TEXT PRIMARY KEY,
|
|
|
|
|
|
url TEXT NOT NULL,
|
|
|
|
|
|
title TEXT NOT NULL,
|
|
|
|
|
|
filename TEXT,
|
|
|
|
|
|
state TEXT NOT NULL DEFAULT 'queued',
|
|
|
|
|
|
progress REAL DEFAULT 0,
|
|
|
|
|
|
speed TEXT DEFAULT '',
|
|
|
|
|
|
eta INTEGER,
|
|
|
|
|
|
started_at TEXT,
|
|
|
|
|
|
finished_at TEXT,
|
|
|
|
|
|
error TEXT
|
|
|
|
|
|
)
|
|
|
|
|
|
""")
|
2026-05-02 19:23:57 +02:00
|
|
|
|
conn.execute("""
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS auto_dl_categories (
|
|
|
|
|
|
category TEXT PRIMARY KEY,
|
|
|
|
|
|
added_at TEXT NOT NULL
|
|
|
|
|
|
)
|
|
|
|
|
|
""")
|
2026-04-25 18:36:00 +02:00
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------ public
|
|
|
|
|
|
|
2026-05-02 19:23:57 +02:00
|
|
|
|
def get_watched_categories(self) -> list[str]:
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
rows = conn.execute(
|
|
|
|
|
|
"SELECT category FROM auto_dl_categories ORDER BY added_at"
|
|
|
|
|
|
).fetchall()
|
|
|
|
|
|
return [r["category"] for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
def watch_category(self, category: str):
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute(
|
|
|
|
|
|
"INSERT OR IGNORE INTO auto_dl_categories (category, added_at) VALUES (?,?)",
|
|
|
|
|
|
(category, datetime.now().isoformat()),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def unwatch_category(self, category: str):
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute("DELETE FROM auto_dl_categories WHERE category=?", (category,))
|
|
|
|
|
|
|
|
|
|
|
|
def already_enqueued(self, url: str) -> bool:
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
row = conn.execute(
|
|
|
|
|
|
"SELECT id FROM downloads WHERE url=? AND state != 'error' LIMIT 1", (url,)
|
|
|
|
|
|
).fetchone()
|
|
|
|
|
|
return row is not None
|
|
|
|
|
|
|
|
|
|
|
|
def already_downloaded(self, url: str) -> bool:
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
row = conn.execute(
|
|
|
|
|
|
"SELECT id FROM downloads WHERE url=? AND state='done' LIMIT 1", (url,)
|
|
|
|
|
|
).fetchone()
|
|
|
|
|
|
return row is not None
|
|
|
|
|
|
|
2026-05-03 11:30:07 +02:00
|
|
|
|
async def enqueue(self, url: str, title: str, subtitle: str,
|
|
|
|
|
|
year: int | None, category: str) -> str:
|
2026-04-25 18:36:00 +02:00
|
|
|
|
dl_id = str(uuid.uuid4())
|
|
|
|
|
|
now = datetime.now().isoformat()
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute(
|
|
|
|
|
|
"INSERT INTO downloads (id, url, title, state, started_at) VALUES (?,?,?,'queued',?)",
|
|
|
|
|
|
(dl_id, url, title, now),
|
|
|
|
|
|
)
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
self._active[dl_id] = {"state": "queued", "progress": 0, "title": title}
|
2026-05-03 11:30:07 +02:00
|
|
|
|
await self._queue.put((dl_id, url, title, subtitle, year, category))
|
2026-05-02 19:23:57 +02:00
|
|
|
|
return dl_id
|
|
|
|
|
|
|
2026-05-03 11:30:07 +02:00
|
|
|
|
async def start_worker(self):
|
2026-05-02 19:23:57 +02:00
|
|
|
|
loop = asyncio.get_running_loop()
|
2026-05-03 11:30:07 +02:00
|
|
|
|
while True:
|
|
|
|
|
|
job = await self._queue.get()
|
|
|
|
|
|
dl_id, url, title, subtitle, year, category = job
|
|
|
|
|
|
await loop.run_in_executor(None, self._run, dl_id, url, title, subtitle, year, category)
|
2026-05-02 19:23:57 +02:00
|
|
|
|
|
2026-04-25 18:36:00 +02:00
|
|
|
|
def status(self, dl_id: str) -> dict:
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
return dict(self._active.get(dl_id, {"state": "unknown"}))
|
|
|
|
|
|
|
|
|
|
|
|
def history(self) -> list[dict]:
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
rows = conn.execute(
|
|
|
|
|
|
"SELECT * FROM downloads ORDER BY started_at DESC LIMIT 200"
|
|
|
|
|
|
).fetchall()
|
|
|
|
|
|
return [dict(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------- private
|
|
|
|
|
|
|
|
|
|
|
|
def _set(self, dl_id: str, **kw):
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
self._active.setdefault(dl_id, {}).update(kw)
|
|
|
|
|
|
|
2026-05-02 19:16:36 +02:00
|
|
|
|
def _run(self, dl_id: str, url: str, title: str, subtitle: str, year: int | None, category: str = ""):
|
|
|
|
|
|
out_dir = f"{OUTPUT_DIR}/{category}" if category else OUTPUT_DIR
|
|
|
|
|
|
Path(out_dir).mkdir(parents=True, exist_ok=True)
|
2026-04-25 18:36:00 +02:00
|
|
|
|
self._set(dl_id, state="downloading")
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute("UPDATE downloads SET state='downloading' WHERE id=?", (dl_id,))
|
|
|
|
|
|
|
2026-05-10 12:01:48 +02:00
|
|
|
|
# Determine language tag from Arte Player API before downloading
|
|
|
|
|
|
pid_m = _PID_RE.search(url)
|
|
|
|
|
|
lang_tag = "VO"
|
|
|
|
|
|
if pid_m:
|
|
|
|
|
|
versions = get_versions(pid_m.group(1))
|
|
|
|
|
|
lang_tag = select_lang_tag(versions)
|
|
|
|
|
|
|
2026-04-26 12:48:29 +02:00
|
|
|
|
# For HLS, yt-dlp downloads video then audio separately.
|
2026-04-26 13:48:56 +02:00
|
|
|
|
# After the first stream finishes, stay in "processing" to avoid
|
|
|
|
|
|
# resetting progress to 0% when the audio stream starts.
|
2026-04-26 12:48:29 +02:00
|
|
|
|
finished_once = [False]
|
|
|
|
|
|
|
2026-04-25 18:36:00 +02:00
|
|
|
|
def hook(d):
|
2026-04-26 12:48:29 +02:00
|
|
|
|
if d["status"] == "downloading" and not finished_once[0]:
|
|
|
|
|
|
dl = d.get("downloaded_bytes") or 0
|
|
|
|
|
|
total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0
|
|
|
|
|
|
pct = min(dl / total * 100, 99.0) if total > 0 else 0.0
|
2026-04-25 18:36:00 +02:00
|
|
|
|
self._set(
|
|
|
|
|
|
dl_id,
|
|
|
|
|
|
state="downloading",
|
2026-04-26 12:48:29 +02:00
|
|
|
|
progress=round(pct, 1),
|
2026-04-25 18:36:00 +02:00
|
|
|
|
speed=d.get("_speed_str", ""),
|
|
|
|
|
|
eta=d.get("eta"),
|
|
|
|
|
|
)
|
|
|
|
|
|
elif d["status"] == "finished":
|
2026-04-26 12:48:29 +02:00
|
|
|
|
finished_once[0] = True
|
2026-04-25 18:36:00 +02:00
|
|
|
|
self._set(dl_id, state="processing", progress=100)
|
|
|
|
|
|
|
|
|
|
|
|
ydl_opts = {
|
2026-05-02 19:16:36 +02:00
|
|
|
|
"outtmpl": f"{out_dir}/%(title)s.%(ext)s",
|
2026-04-26 12:48:29 +02:00
|
|
|
|
"format": "bestvideo[vcodec^=avc1]+bestaudio/bestvideo+bestaudio/best",
|
2026-05-10 12:01:48 +02:00
|
|
|
|
"merge_output_format": "mkv",
|
2026-04-25 18:36:00 +02:00
|
|
|
|
"progress_hooks": [hook],
|
|
|
|
|
|
"quiet": True,
|
|
|
|
|
|
"no_warnings": True,
|
|
|
|
|
|
}
|
2026-05-10 12:01:48 +02:00
|
|
|
|
if lang_tag == "VOSTFR":
|
|
|
|
|
|
ydl_opts.update({
|
|
|
|
|
|
"writesubtitles": True,
|
|
|
|
|
|
"subtitleslangs": ["fr"],
|
|
|
|
|
|
"embedsubtitles": True,
|
|
|
|
|
|
# Set first subtitle track as default in MKV
|
|
|
|
|
|
"postprocessor_args": {"ffmpeg_o": ["-disposition:s:0", "default"]},
|
|
|
|
|
|
})
|
2026-04-25 18:36:00 +02:00
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
|
|
|
|
info = ydl.extract_info(url, download=True)
|
2026-04-26 13:48:56 +02:00
|
|
|
|
orig_path = Path(ydl.prepare_filename(info))
|
|
|
|
|
|
|
2026-05-10 12:01:48 +02:00
|
|
|
|
# yt-dlp renames to .mkv after merge; prepare_filename may return .mp4
|
|
|
|
|
|
if not orig_path.exists():
|
|
|
|
|
|
orig_path = orig_path.with_suffix(".mkv")
|
|
|
|
|
|
|
|
|
|
|
|
release_name = build_release_name(title, subtitle, year, info, lang_tag)
|
2026-04-26 13:48:56 +02:00
|
|
|
|
dest_path = orig_path.parent / release_name
|
|
|
|
|
|
if orig_path.exists() and orig_path != dest_path:
|
|
|
|
|
|
if dest_path.exists():
|
|
|
|
|
|
dest_path.unlink()
|
|
|
|
|
|
orig_path.rename(dest_path)
|
|
|
|
|
|
filename = str(dest_path)
|
|
|
|
|
|
|
2026-04-25 18:36:00 +02:00
|
|
|
|
self._set(dl_id, state="done", progress=100)
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute(
|
|
|
|
|
|
"UPDATE downloads SET state='done', progress=100, filename=?, finished_at=? WHERE id=?",
|
|
|
|
|
|
(filename, datetime.now().isoformat(), dl_id),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
self._set(dl_id, state="error", error=str(exc))
|
|
|
|
|
|
with _db() as conn:
|
|
|
|
|
|
conn.execute(
|
|
|
|
|
|
"UPDATE downloads SET state='error', error=?, finished_at=? WHERE id=?",
|
|
|
|
|
|
(str(exc), datetime.now().isoformat(), dl_id),
|
|
|
|
|
|
)
|