If DB has any concerts data (even expired), return it immediately and refresh in background. Start pre-warming at container startup so the scrape runs before the first user request. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+31
-16
@@ -15,6 +15,7 @@ CACHE_TTL = 6 * 3600
|
|||||||
DB_PATH = "data/arte_dl.db"
|
DB_PATH = "data/arte_dl.db"
|
||||||
_cache: dict = {"data": [], "ts": 0}
|
_cache: dict = {"data": [], "ts": 0}
|
||||||
_fetch_lock: asyncio.Lock | None = None
|
_fetch_lock: asyncio.Lock | None = None
|
||||||
|
_refresh_task: asyncio.Task | None = None
|
||||||
|
|
||||||
|
|
||||||
def _get_fetch_lock() -> asyncio.Lock:
|
def _get_fetch_lock() -> asyncio.Lock:
|
||||||
@@ -209,24 +210,12 @@ async def get_concerts_by_category(category: str) -> list[dict]:
|
|||||||
return [c for c in data if category in (c.get("categories") or [])]
|
return [c for c in data if category in (c.get("categories") or [])]
|
||||||
|
|
||||||
|
|
||||||
async def get_all_concerts() -> list[dict]:
|
async def _do_refresh():
|
||||||
now = time.time()
|
"""Full scrape under lock; updates in-memory + DB cache."""
|
||||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
global _refresh_task
|
||||||
return _cache["data"]
|
|
||||||
|
|
||||||
# Try SQLite cache before acquiring the lock
|
|
||||||
db_data, db_ts = _load_db_cache()
|
|
||||||
if db_data and now - db_ts < CACHE_TTL:
|
|
||||||
logger.info("Concerts cache loaded from DB (%d concerts)", len(db_data))
|
|
||||||
_cache["data"] = db_data
|
|
||||||
_cache["ts"] = db_ts
|
|
||||||
return _cache["data"]
|
|
||||||
|
|
||||||
# Lock prevents multiple concurrent scrapes (thundering herd)
|
|
||||||
async with _get_fetch_lock():
|
async with _get_fetch_lock():
|
||||||
# Re-check after acquiring lock — another coroutine may have populated the cache
|
|
||||||
if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL:
|
if _cache["data"] and time.time() - _cache["ts"] < CACHE_TTL:
|
||||||
return _cache["data"]
|
return
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
data = await loop.run_in_executor(None, _fetch_all_sync)
|
data = await loop.run_in_executor(None, _fetch_all_sync)
|
||||||
if data:
|
if data:
|
||||||
@@ -234,6 +223,32 @@ async def get_all_concerts() -> list[dict]:
|
|||||||
_cache["data"] = data
|
_cache["data"] = data
|
||||||
_cache["ts"] = ts
|
_cache["ts"] = ts
|
||||||
_save_db_cache(data, ts)
|
_save_db_cache(data, ts)
|
||||||
|
logger.info("Cache refreshed: %d concerts", len(data))
|
||||||
|
_refresh_task = None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_all_concerts() -> list[dict]:
|
||||||
|
global _refresh_task
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
# In-memory cache hit
|
||||||
|
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||||
|
return _cache["data"]
|
||||||
|
|
||||||
|
# Try DB cache — return immediately even if stale, refresh in background
|
||||||
|
db_data, db_ts = _load_db_cache()
|
||||||
|
if db_data:
|
||||||
|
_cache["data"] = db_data
|
||||||
|
_cache["ts"] = db_ts
|
||||||
|
logger.info("Concerts loaded from DB cache (%d concerts)", len(db_data))
|
||||||
|
if now - db_ts >= CACHE_TTL:
|
||||||
|
# Stale — serve now, refresh silently in background
|
||||||
|
if _refresh_task is None or _refresh_task.done():
|
||||||
|
_refresh_task = asyncio.create_task(_do_refresh())
|
||||||
|
return _cache["data"]
|
||||||
|
|
||||||
|
# No cache at all — must scrape synchronously (first run or cleared DB)
|
||||||
|
await _do_refresh()
|
||||||
return _cache["data"]
|
return _cache["data"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from fastapi.staticfiles import StaticFiles
|
|||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from arte_api import fetch_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES
|
from arte_api import fetch_concerts, get_all_concerts, get_concerts_by_category, invalidate_cache, CATEGORIES
|
||||||
from downloader import DownloadManager
|
from downloader import DownloadManager
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
@@ -56,6 +56,7 @@ async def lifespan(app: FastAPI):
|
|||||||
tasks = [
|
tasks = [
|
||||||
asyncio.create_task(dm.start_worker()),
|
asyncio.create_task(dm.start_worker()),
|
||||||
asyncio.create_task(_auto_dl_loop()),
|
asyncio.create_task(_auto_dl_loop()),
|
||||||
|
asyncio.create_task(get_all_concerts()), # pre-warm cache at startup
|
||||||
]
|
]
|
||||||
yield
|
yield
|
||||||
for t in tasks:
|
for t in tasks:
|
||||||
|
|||||||
Reference in New Issue
Block a user