fix: DB sur volume persistant + verrou anti-scrapes parallèles
Docker / docker (push) Successful in 3m1s
Docker / docker (push) Successful in 3m1s
- DB_PATH → data/arte_dl.db (mappé sur /mnt/user/appdata/arte-dl) - mkdir data/ au démarrage dans downloader.py et arte_api.py - asyncio.Lock sur get_all_concerts() : une seule scrape à la fois, les requêtes concurrentes attendent le résultat au lieu de relancer Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+22
-4
@@ -12,8 +12,20 @@ import tmdb as _tmdb
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
CACHE_TTL = 6 * 3600
|
CACHE_TTL = 6 * 3600
|
||||||
DB_PATH = "arte_dl.db"
|
DB_PATH = "data/arte_dl.db"
|
||||||
_cache: dict = {"data": [], "ts": 0}
|
_cache: dict = {"data": [], "ts": 0}
|
||||||
|
_fetch_lock: asyncio.Lock | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_fetch_lock() -> asyncio.Lock:
|
||||||
|
global _fetch_lock
|
||||||
|
if _fetch_lock is None:
|
||||||
|
_fetch_lock = asyncio.Lock()
|
||||||
|
return _fetch_lock
|
||||||
|
|
||||||
|
|
||||||
|
import os as _os
|
||||||
|
_os.makedirs("data", exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def _db():
|
def _db():
|
||||||
@@ -202,7 +214,7 @@ async def get_all_concerts() -> list[dict]:
|
|||||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||||
return _cache["data"]
|
return _cache["data"]
|
||||||
|
|
||||||
# Try SQLite cache before hitting the network
|
# Try SQLite cache before acquiring the lock
|
||||||
db_data, db_ts = _load_db_cache()
|
db_data, db_ts = _load_db_cache()
|
||||||
if db_data and now - db_ts < CACHE_TTL:
|
if db_data and now - db_ts < CACHE_TTL:
|
||||||
logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data))
|
logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data))
|
||||||
@@ -210,12 +222,18 @@ async def get_all_concerts() -> list[dict]:
|
|||||||
_cache["ts"] = db_ts
|
_cache["ts"] = db_ts
|
||||||
return _cache["data"]
|
return _cache["data"]
|
||||||
|
|
||||||
|
# Lock prevents multiple concurrent scrapes (thundering herd)
|
||||||
|
async with _get_fetch_lock():
|
||||||
|
# Re-check after acquiring lock — another coroutine may have populated the cache
|
||||||
|
if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL:
|
||||||
|
return _cache["data"]
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
data = await loop.run_in_executor(None, _fetch_all_sync)
|
data = await loop.run_in_executor(None, _fetch_all_sync)
|
||||||
if data:
|
if data:
|
||||||
|
ts = time.time()
|
||||||
_cache["data"] = data
|
_cache["data"] = data
|
||||||
_cache["ts"] = now
|
_cache["ts"] = ts
|
||||||
_save_db_cache(data, now)
|
_save_db_cache(data, ts)
|
||||||
return _cache["data"]
|
return _cache["data"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+3
-1
@@ -11,7 +11,9 @@ import yt_dlp
|
|||||||
from fastapi import BackgroundTasks
|
from fastapi import BackgroundTasks
|
||||||
|
|
||||||
OUTPUT_DIR = "/data/Arte"
|
OUTPUT_DIR = "/data/Arte"
|
||||||
DB_PATH = "arte_dl.db"
|
DB_PATH = "data/arte_dl.db"
|
||||||
|
|
||||||
|
Path("data").mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def _db():
|
def _db():
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
_CACHE_DAYS = 30
|
_CACHE_DAYS = 30
|
||||||
_THRESHOLD = 0.45
|
_THRESHOLD = 0.45
|
||||||
_DB = "arte_dl.db"
|
_DB = "data/arte_dl.db"
|
||||||
_IMG_BASE = "https://image.tmdb.org/t/p"
|
_IMG_BASE = "https://image.tmdb.org/t/p"
|
||||||
_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
|
_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user