From d3ce89f22849d262a76c1eecab02b4ffb9647bde Mon Sep 17 00:00:00 2001 From: dev Date: Fri, 15 May 2026 17:02:09 +0200 Subject: [PATCH] perf: stale-while-revalidate cache + startup pre-warm If DB has any concerts data (even expired), return it immediately and refresh in background. Start pre-warming at container startup so the scrape runs before the first user request. Co-Authored-By: Claude Sonnet 4.6 --- arte_api.py | 47 +++++++++++++++++++++++++++++++---------------- main.py | 3 ++- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/arte_api.py b/arte_api.py index 6321bcb..f43cedc 100644 --- a/arte_api.py +++ b/arte_api.py @@ -15,6 +15,7 @@ CACHE_TTL = 6 * 3600 DB_PATH = "data/arte_dl.db" _cache: dict = {"data": [], "ts": 0} _fetch_lock: asyncio.Lock | None = None +_refresh_task: asyncio.Task | None = None def _get_fetch_lock() -> asyncio.Lock: @@ -209,24 +210,12 @@ async def get_concerts_by_category(category: str) -> list[dict]: return [c for c in data if category in (c.get("categories") or [])] -async def get_all_concerts() -> list[dict]: - now = time.time() - if _cache["data"] and now - _cache["ts"] < CACHE_TTL: - return _cache["data"] - - # Try SQLite cache before acquiring the lock - db_data, db_ts = _load_db_cache() - if db_data and now - db_ts < CACHE_TTL: - logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data)) - _cache["data"] = db_data - _cache["ts"] = db_ts - return _cache["data"] - - # Lock prevents multiple concurrent scrapes (thundering herd) +async def _do_refresh(): + """Full scrape under lock; updates in-memory + DB cache.""" + global _refresh_task async with _get_fetch_lock(): - # Re-check after acquiring lock — another coroutine may have populated the cache if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL: - return _cache["data"] + return loop = asyncio.get_event_loop() data = await loop.run_in_executor(None, _fetch_all_sync) if data: @@ -234,6 +223,32 @@ async def get_all_concerts() -> list[dict]: _cache["data"] = data _cache["ts"] = ts _save_db_cache(data, ts) + logger.info("Cache refreshed: %d concerts", len(data)) + _refresh_task = None + + +async def get_all_concerts() -> list[dict]: + global _refresh_task + now = time.time() + + # In-memory cache hit + if _cache["data"] and now - _cache["ts"] < CACHE_TTL: + return _cache["data"] + + # Try DB cache — return immediately even if stale, refresh in background + db_data, db_ts = _load_db_cache() + if db_data: + _cache["data"] = db_data + _cache["ts"] = db_ts + logger.info("Concerts loaded from DB cache (%d concerts)", len(db_data)) + if now - db_ts >= CACHE_TTL: + # Stale — serve now, refresh silently in background + if _refresh_task is None or _refresh_task.done(): + _refresh_task = asyncio.create_task(_do_refresh()) + return _cache["data"] + + # No cache at all — must scrape synchronously (first run or cleared DB) + await _do_refresh() return _cache["data"] diff --git a/main.py b/main.py index 559f251..7371862 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from pydantic import BaseModel -from arte_api import fetch_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES +from arte_api import fetch_concerts, get_all_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES from downloader import DownloadManager logging.basicConfig(level=logging.INFO) @@ -56,6 +56,7 @@ async def lifespan(app: FastAPI): tasks = [ asyncio.create_task(dm.start_worker()), asyncio.create_task(_auto_dl_loop()), + asyncio.create_task(get_all_concerts()), # pre-warm cache at startup ] yield for t in tasks: