If DB has any concerts data (even expired), return it immediately and refresh in background. Start pre-warming at container startup so the scrape runs before the first user request. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+31
-16
@@ -15,6 +15,7 @@ CACHE_TTL = 6 * 3600
|
||||
DB_PATH = "data/arte_dl.db"
|
||||
_cache: dict = {"data": [], "ts": 0}
|
||||
_fetch_lock: asyncio.Lock | None = None
|
||||
_refresh_task: asyncio.Task | None = None
|
||||
|
||||
|
||||
def _get_fetch_lock() -> asyncio.Lock:
|
||||
@@ -209,24 +210,12 @@ async def get_concerts_by_category(category: str) -> list[dict]:
|
||||
return [c for c in data if category in (c.get("categories") or [])]
|
||||
|
||||
|
||||
async def get_all_concerts() -> list[dict]:
|
||||
now = time.time()
|
||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||
return _cache["data"]
|
||||
|
||||
# Try SQLite cache before acquiring the lock
|
||||
db_data, db_ts = _load_db_cache()
|
||||
if db_data and now - db_ts < CACHE_TTL:
|
||||
logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data))
|
||||
_cache["data"] = db_data
|
||||
_cache["ts"] = db_ts
|
||||
return _cache["data"]
|
||||
|
||||
# Lock prevents multiple concurrent scrapes (thundering herd)
|
||||
async def _do_refresh():
|
||||
"""Full scrape under lock; updates in-memory + DB cache."""
|
||||
global _refresh_task
|
||||
async with _get_fetch_lock():
|
||||
# Re-check after acquiring lock — another coroutine may have populated the cache
|
||||
if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL:
|
||||
return _cache["data"]
|
||||
return
|
||||
loop = asyncio.get_event_loop()
|
||||
data = await loop.run_in_executor(None, _fetch_all_sync)
|
||||
if data:
|
||||
@@ -234,6 +223,32 @@ async def get_all_concerts() -> list[dict]:
|
||||
_cache["data"] = data
|
||||
_cache["ts"] = ts
|
||||
_save_db_cache(data, ts)
|
||||
logger.info("Cache refreshed: %d concerts", len(data))
|
||||
_refresh_task = None
|
||||
|
||||
|
||||
async def get_all_concerts() -> list[dict]:
|
||||
global _refresh_task
|
||||
now = time.time()
|
||||
|
||||
# In-memory cache hit
|
||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||
return _cache["data"]
|
||||
|
||||
# Try DB cache — return immediately even if stale, refresh in background
|
||||
db_data, db_ts = _load_db_cache()
|
||||
if db_data:
|
||||
_cache["data"] = db_data
|
||||
_cache["ts"] = db_ts
|
||||
logger.info("Concerts loaded from DB cache (%d concerts)", len(db_data))
|
||||
if now - db_ts >= CACHE_TTL:
|
||||
# Stale — serve now, refresh silently in background
|
||||
if _refresh_task is None or _refresh_task.done():
|
||||
_refresh_task = asyncio.create_task(_do_refresh())
|
||||
return _cache["data"]
|
||||
|
||||
# No cache at all — must scrape synchronously (first run or cleared DB)
|
||||
await _do_refresh()
|
||||
return _cache["data"]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user