From a4ffd6d63ea264cf98ed5ddc0b41be22525ae9d5 Mon Sep 17 00:00:00 2001 From: dev Date: Sat, 2 May 2026 20:43:01 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20DB=20sur=20volume=20persistant=20+=20ver?= =?UTF-8?q?rou=20anti-scrapes=20parall=C3=A8les?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - DB_PATH → data/arte_dl.db (mappé sur /mnt/user/appdata/arte-dl) - mkdir data/ au démarrage dans downloader.py et arte_api.py - asyncio.Lock sur get_all_concerts() : une seule scrape à la fois, les requêtes concurrentes attendent le résultat au lieu de relancer Co-Authored-By: Claude Sonnet 4.6 --- arte_api.py | 34 ++++++++++++++++++++++++++-------- downloader.py | 4 +++- tmdb.py | 2 +- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/arte_api.py b/arte_api.py index 93a516d..7f3dcc0 100644 --- a/arte_api.py +++ b/arte_api.py @@ -12,8 +12,20 @@ import tmdb as _tmdb logger = logging.getLogger(__name__) CACHE_TTL = 6 * 3600 -DB_PATH = "arte_dl.db" +DB_PATH = "data/arte_dl.db" _cache: dict = {"data": [], "ts": 0} +_fetch_lock: asyncio.Lock | None = None + + +def _get_fetch_lock() -> asyncio.Lock: + global _fetch_lock + if _fetch_lock is None: + _fetch_lock = asyncio.Lock() + return _fetch_lock + + +import os as _os +_os.makedirs("data", exist_ok=True) def _db(): @@ -202,7 +214,7 @@ async def get_all_concerts() -> list[dict]: if _cache["data"] and now - _cache["ts"] < CACHE_TTL: return _cache["data"] - # Try SQLite cache before hitting the network + # Try SQLite cache before acquiring the lock db_data, db_ts = _load_db_cache() if db_data and now - db_ts < CACHE_TTL: logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data)) @@ -210,12 +222,18 @@ async def get_all_concerts() -> list[dict]: _cache["ts"] = db_ts return _cache["data"] - loop = asyncio.get_event_loop() - data = await loop.run_in_executor(None, _fetch_all_sync) - if data: - _cache["data"] = data - _cache["ts"] = now - _save_db_cache(data, now) + # Lock prevents multiple concurrent scrapes (thundering herd) + async with _get_fetch_lock(): + # Re-check after acquiring lock — another coroutine may have populated the cache + if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL: + return _cache["data"] + loop = asyncio.get_event_loop() + data = await loop.run_in_executor(None, _fetch_all_sync) + if data: + ts = time.time() + _cache["data"] = data + _cache["ts"] = ts + _save_db_cache(data, ts) return _cache["data"] diff --git a/downloader.py b/downloader.py index ae2d399..6cb6fa3 100644 --- a/downloader.py +++ b/downloader.py @@ -11,7 +11,9 @@ import yt_dlp from fastapi import BackgroundTasks OUTPUT_DIR = "/data/Arte" -DB_PATH = "arte_dl.db" +DB_PATH = "data/arte_dl.db" + +Path("data").mkdir(exist_ok=True) def _db(): diff --git a/tmdb.py b/tmdb.py index defd74c..2ba44ab 100644 --- a/tmdb.py +++ b/tmdb.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) _CACHE_DAYS = 30 _THRESHOLD = 0.45 -_DB = "arte_dl.db" +_DB = "data/arte_dl.db" _IMG_BASE = "https://image.tmdb.org/t/p" _SEARCH_URL = "https://api.themoviedb.org/3/search/movie"