perf: stale-while-revalidate cache + startup pre-warm
Docker / docker (push) Successful in 1m58s

If DB has any concerts data (even expired), return it immediately and
refresh in background. Start pre-warming at container startup so the
scrape runs before the first user request.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
dev
2026-05-15 17:02:09 +02:00
parent 09457868e4
commit d3ce89f228
2 changed files with 33 additions and 17 deletions
+31 -16
View File
@@ -15,6 +15,7 @@ CACHE_TTL = 6 * 3600
DB_PATH = "data/arte_dl.db" DB_PATH = "data/arte_dl.db"
_cache: dict = {"data": [], "ts": 0} _cache: dict = {"data": [], "ts": 0}
_fetch_lock: asyncio.Lock | None = None _fetch_lock: asyncio.Lock | None = None
_refresh_task: asyncio.Task | None = None
def _get_fetch_lock() -> asyncio.Lock: def _get_fetch_lock() -> asyncio.Lock:
@@ -209,24 +210,12 @@ async def get_concerts_by_category(category: str) -> list[dict]:
return [c for c in data if category in (c.get("categories") or [])] return [c for c in data if category in (c.get("categories") or [])]
async def get_all_concerts() -> list[dict]: async def _do_refresh():
now = time.time() """Full scrape under lock; updates in-memory + DB cache."""
if _cache["data"] and now - _cache["ts"] < CACHE_TTL: global _refresh_task
return _cache["data"]
# Try SQLite cache before acquiring the lock
db_data, db_ts = _load_db_cache()
if db_data and now - db_ts < CACHE_TTL:
logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data))
_cache["data"] = db_data
_cache["ts"] = db_ts
return _cache["data"]
# Lock prevents multiple concurrent scrapes (thundering herd)
async with _get_fetch_lock(): async with _get_fetch_lock():
# Re-check after acquiring lock — another coroutine may have populated the cache
if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL: if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL:
return _cache["data"] return
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
data = await loop.run_in_executor(None, _fetch_all_sync) data = await loop.run_in_executor(None, _fetch_all_sync)
if data: if data:
@@ -234,6 +223,32 @@ async def get_all_concerts() -> list[dict]:
_cache["data"] = data _cache["data"] = data
_cache["ts"] = ts _cache["ts"] = ts
_save_db_cache(data, ts) _save_db_cache(data, ts)
logger.info("Cache refreshed: %d concerts", len(data))
_refresh_task = None
async def get_all_concerts() -> list[dict]:
global _refresh_task
now = time.time()
# In-memory cache hit
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
return _cache["data"]
# Try DB cache — return immediately even if stale, refresh in background
db_data, db_ts = _load_db_cache()
if db_data:
_cache["data"] = db_data
_cache["ts"] = db_ts
logger.info("Concerts loaded from DB cache (%d concerts)", len(db_data))
if now - db_ts >= CACHE_TTL:
# Stale — serve now, refresh silently in background
if _refresh_task is None or _refresh_task.done():
_refresh_task = asyncio.create_task(_do_refresh())
return _cache["data"]
# No cache at all — must scrape synchronously (first run or cleared DB)
await _do_refresh()
return _cache["data"] return _cache["data"]
+2 -1
View File
@@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from pydantic import BaseModel from pydantic import BaseModel
from arte_api import fetch_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES from arte_api import fetch_concerts, get_all_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES
from downloader import DownloadManager from downloader import DownloadManager
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@@ -56,6 +56,7 @@ async def lifespan(app: FastAPI):
tasks = [ tasks = [
asyncio.create_task(dm.start_worker()), asyncio.create_task(dm.start_worker()),
asyncio.create_task(_auto_dl_loop()), asyncio.create_task(_auto_dl_loop()),
asyncio.create_task(get_all_concerts()), # pre-warm cache at startup
] ]
yield yield
for t in tasks: for t in tasks: