diff --git a/requirements.txt b/requirements.txt index 090a373..286126a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,12 @@ jinja2>=3.1 python-multipart>=0.0.12 httpx>=0.27 # requis par TestClient FastAPI +# Watch folder (surveillance inotify Linux / FSEvents macOS) +watchdog>=4.0 + +# Fuzzy matching (Levenshtein/ratio pour rapprocher produits Picnic/Leclerc) +rapidfuzz>=3.9 + # Tests pytest==8.3.4 diff --git a/tests/test_matcher.py b/tests/test_matcher.py new file mode 100644 index 0000000..24095c2 --- /dev/null +++ b/tests/test_matcher.py @@ -0,0 +1,209 @@ +""" +Tests du fuzzy matcher (tickettracker/db/matcher.py). + +Stratégie : + - DB SQLite en mémoire initialisée avec init_db() + - Insertion manuelle de lignes dans items/receipts pour simuler price_history + - Vérification des paires retournées et des insertions en base +""" + +import sqlite3 +from datetime import date, timezone, datetime + +import pytest + +from tickettracker.db.schema import init_db, get_connection +from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def db_path(tmp_path): + """Base SQLite vide dans un répertoire temporaire.""" + path = tmp_path / "test_matcher.db" + init_db(path) + return path + + +@pytest.fixture +def conn_with_products(db_path): + """Connexion avec produits Picnic et Leclerc similaires.""" + conn = get_connection(db_path) + + # Insérer deux tickets (un Picnic, un Leclerc) + with conn: + r_picnic = conn.execute( + "INSERT INTO receipts (store, date, total, raw_json, created_at) " + "VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')" + ).lastrowid + r_leclerc = conn.execute( + "INSERT INTO receipts (store, date, total, raw_json, created_at) " + "VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')" + ).lastrowid + + # Produits Picnic (name_normalized rempli) + conn.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)", + (r_picnic,), + ) + conn.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)", + (r_picnic,), + ) + + # Produits Leclerc (similaires aux Picnic) + conn.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)", + (r_leclerc,), + ) + conn.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)", + (r_leclerc,), + ) + + yield conn + conn.close() + + +@pytest.fixture +def conn_empty(db_path): + """Connexion sur base vide (pas d'articles normalisés).""" + conn = get_connection(db_path) + yield conn + conn.close() + + +# --------------------------------------------------------------------------- +# Tests find_fuzzy_matches +# --------------------------------------------------------------------------- + +def test_find_fuzzy_matches_returns_list(conn_with_products): + """find_fuzzy_matches retourne une liste.""" + result = find_fuzzy_matches(conn_with_products, threshold=70.0) + assert isinstance(result, list) + + +def test_find_fuzzy_matches_detects_similar_products(conn_with_products): + """Des produits similaires (lait demi) sont détectés avec un seuil bas.""" + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + assert len(matches) >= 1 + # La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée + picnic_names = [m["name_picnic"] for m in matches] + assert "lait demi-écrémé" in picnic_names + + +def test_find_fuzzy_matches_threshold_respected(conn_with_products): + """Avec un seuil de 100, aucun match (car noms ≠ exact).""" + matches = find_fuzzy_matches(conn_with_products, threshold=100.0) + assert matches == [] + + +def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products): + """Un seuil élevé retourne moins de résultats qu'un seuil bas.""" + matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0) + matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0) + assert len(matches_high) <= len(matches_low) + + +def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products): + """Les résultats sont triés par score décroissant.""" + matches = find_fuzzy_matches(conn_with_products, threshold=50.0) + scores = [m["score"] for m in matches] + assert scores == sorted(scores, reverse=True) + + +def test_find_fuzzy_matches_result_structure(conn_with_products): + """Chaque résultat a les clés attendues.""" + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + if matches: + m = matches[0] + assert "name_picnic" in m + assert "name_leclerc" in m + assert "score" in m + assert 0 <= m["score"] <= 100 + + +def test_find_fuzzy_matches_exact_same_excluded(conn_with_products): + """Les noms identiques ne doivent pas apparaître comme paires fuzzy.""" + # On insère un produit identique dans les deux enseignes + with conn_with_products: + r = conn_with_products.execute( + "INSERT INTO receipts (store, date, total, raw_json, created_at) " + "VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')" + ).lastrowid + conn_with_products.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)", + (r,), + ) + r2 = conn_with_products.execute( + "INSERT INTO receipts (store, date, total, raw_json, created_at) " + "VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')" + ).lastrowid + conn_with_products.execute( + "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " + "VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)", + (r2,), + ) + + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + # Aucune paire ne doit avoir name_picnic == name_leclerc + for m in matches: + assert m["name_picnic"] != m["name_leclerc"] + + +def test_find_fuzzy_matches_empty_db(conn_empty): + """Sur une base sans produits normalisés, retourne une liste vide.""" + matches = find_fuzzy_matches(conn_empty, threshold=85.0) + assert matches == [] + + +# --------------------------------------------------------------------------- +# Tests save_fuzzy_matches +# --------------------------------------------------------------------------- + +def test_save_fuzzy_matches_inserts_rows(conn_with_products): + """save_fuzzy_matches insère les nouvelles paires en base.""" + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + inserted = save_fuzzy_matches(conn_with_products, matches) + assert inserted == len(matches) + + +def test_save_fuzzy_matches_ignores_duplicates(conn_with_products): + """Un second appel avec les mêmes paires n'insère rien (OR IGNORE).""" + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + save_fuzzy_matches(conn_with_products, matches) + inserted_again = save_fuzzy_matches(conn_with_products, matches) + assert inserted_again == 0 + + +def test_save_fuzzy_matches_status_pending(conn_with_products): + """Les paires insérées ont le statut 'pending' par défaut.""" + matches = find_fuzzy_matches(conn_with_products, threshold=70.0) + save_fuzzy_matches(conn_with_products, matches) + rows = conn_with_products.execute( + "SELECT status FROM product_matches" + ).fetchall() + assert all(r["status"] == "pending" for r in rows) + + +def test_save_fuzzy_matches_returns_correct_count(conn_with_products): + """save_fuzzy_matches retourne exactement le nombre de lignes insérées.""" + matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}] + count = save_fuzzy_matches(conn_with_products, matches) + assert count == 1 + + +def test_save_fuzzy_matches_empty_list(conn_with_products): + """Appel avec une liste vide retourne 0 et ne modifie pas la base.""" + count = save_fuzzy_matches(conn_with_products, []) + assert count == 0 + rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0] + assert rows == 0 diff --git a/tickettracker/cli.py b/tickettracker/cli.py index 81c8c3b..620142e 100644 --- a/tickettracker/cli.py +++ b/tickettracker/cli.py @@ -28,7 +28,9 @@ def build_parser() -> argparse.ArgumentParser: tickettracker.cli ├── import --source {picnic,leclerc} [--db PATH] ├── stats [--db PATH] - └── normalize [--dry-run] [--batch-size N] [--db PATH] + ├── normalize [--dry-run] [--batch-size N] [--db PATH] + ├── match [--threshold N] [--db PATH] + └── watch [--inbox PATH] [--db PATH] """ parser = argparse.ArgumentParser( prog="python -m tickettracker.cli", @@ -99,6 +101,47 @@ def build_parser() -> argparse.ArgumentParser: help=f"Articles par appel LLM (défaut : {_cfg.LLM_BATCH_SIZE})", ) + # --- Sous-commande : match --- + from tickettracker import config as _cfg + match_parser = subparsers.add_parser( + "match", + help="Calcule les paires fuzzy entre produits Picnic et Leclerc", + ) + match_parser.add_argument( + "--db", + type=Path, + default=DEFAULT_DB_PATH, + metavar="PATH", + help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})", + ) + match_parser.add_argument( + "--threshold", + type=float, + default=_cfg.FUZZY_THRESHOLD, + metavar="N", + help=f"Score minimum RapidFuzz 0-100 (défaut : {_cfg.FUZZY_THRESHOLD})", + ) + + # --- Sous-commande : watch --- + watch_parser = subparsers.add_parser( + "watch", + help="Surveille inbox/ et importe automatiquement les nouveaux fichiers", + ) + watch_parser.add_argument( + "--db", + type=Path, + default=DEFAULT_DB_PATH, + metavar="PATH", + help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})", + ) + watch_parser.add_argument( + "--inbox", + type=Path, + default=Path("inbox"), + metavar="PATH", + help="Répertoire inbox/ à surveiller (défaut : ./inbox)", + ) + return parser @@ -205,6 +248,51 @@ def cmd_normalize(args: argparse.Namespace) -> int: return 1 +def cmd_match(args: argparse.Namespace) -> int: + """Exécute la sous-commande 'match'. + + Calcule les paires fuzzy entre produits Picnic et Leclerc, + les insère dans product_matches et affiche un résumé. + + Returns: + 0 si succès, 1 si la base est absente. + """ + from tickettracker.db import schema + from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches + + if not Path(args.db).exists(): + print(f"Base de données absente : {args.db}", file=sys.stderr) + print("Importez d'abord un ticket avec la commande 'import'.", file=sys.stderr) + return 1 + + with schema.get_connection(args.db) as conn: + matches = find_fuzzy_matches(conn, threshold=args.threshold) + inserted = save_fuzzy_matches(conn, matches) + + total = len(matches) + ignored = total - inserted + print( + f"{inserted} nouvelles paires trouvées (seuil={args.threshold:.0f}%). " + f"{ignored} ignorées (déjà connues)." + ) + return 0 + + +def cmd_watch(args: argparse.Namespace) -> int: + """Exécute la sous-commande 'watch'. + + Lance la surveillance du dossier inbox/ (bloquant — Ctrl+C pour arrêter). + + Returns: + 0 après interruption par l'utilisateur. + """ + from tickettracker.watcher import watch + + inbox_path = args.inbox.resolve() + watch(inbox_path, args.db) + return 0 + + def main() -> None: """Point d'entrée principal.""" parser = build_parser() @@ -216,6 +304,10 @@ def main() -> None: sys.exit(cmd_stats(args)) elif args.command == "normalize": sys.exit(cmd_normalize(args)) + elif args.command == "match": + sys.exit(cmd_match(args)) + elif args.command == "watch": + sys.exit(cmd_watch(args)) if __name__ == "__main__": diff --git a/tickettracker/config.py b/tickettracker/config.py index 5e6198c..57c2141 100644 --- a/tickettracker/config.py +++ b/tickettracker/config.py @@ -45,3 +45,10 @@ LLM_TIMEOUT: int = int(os.environ.get("TICKETTRACKER_LLM_TIMEOUT", "60")) # Nombre d'articles traités par appel LLM LLM_BATCH_SIZE: int = int(os.environ.get("TICKETTRACKER_LLM_BATCH_SIZE", "20")) + +# --------------------------------------------------------------------------- +# Fuzzy matching +# --------------------------------------------------------------------------- + +# Seuil de similarité minimum (0–100) pour rapprocher un produit Picnic d'un produit Leclerc +FUZZY_THRESHOLD: float = float(os.environ.get("TICKETTRACKER_FUZZY_THRESHOLD", "85")) diff --git a/tickettracker/db/matcher.py b/tickettracker/db/matcher.py new file mode 100644 index 0000000..1f5f3a6 --- /dev/null +++ b/tickettracker/db/matcher.py @@ -0,0 +1,90 @@ +""" +Fuzzy matching entre produits Picnic et Leclerc. + +Utilise RapidFuzz (token_sort_ratio) pour rapprocher des produits dont le nom +n'est pas identique mais désigne la même chose +(ex : "Lait demi-écremé" ↔ "LAIT DEMI ECREME"). + +Workflow : + 1. find_fuzzy_matches() — calcule les paires candidates + 2. save_fuzzy_matches() — les insère dans product_matches (ignoring duplicates) + 3. L'utilisateur valide/rejette via le dashboard /matches +""" + +import sqlite3 +from datetime import datetime, timezone + +from rapidfuzz import fuzz + + +def find_fuzzy_matches( + conn: sqlite3.Connection, + threshold: float = 85.0, +) -> list[dict]: + """Calcule les paires de produits similaires entre Picnic et Leclerc. + + Utilise rapidfuzz.fuzz.token_sort_ratio (insensible à l'ordre des mots). + Ne retourne que les paires avec score >= threshold. + Les noms identiques sont exclus (ils sont déjà traités par get_compare_prices). + + Args: + conn: Connexion SQLite ouverte. + threshold: Score minimum 0–100 (défaut 85). + + Returns: + Liste de dicts {name_picnic, name_leclerc, score}, triée par score décroissant. + """ + # Noms normalisés distincts par enseigne + picnic_names = [ + r[0] + for r in conn.execute( + "SELECT DISTINCT name_normalized FROM price_history " + "WHERE store='picnic' AND name_normalized IS NOT NULL" + ) + ] + leclerc_names = [ + r[0] + for r in conn.execute( + "SELECT DISTINCT name_normalized FROM price_history " + "WHERE store='leclerc' AND name_normalized IS NOT NULL" + ) + ] + + # Produit cartésien filtré par seuil + matches = [] + for p in picnic_names: + for lec in leclerc_names: + if p == lec: + continue # exact match déjà géré par get_compare_prices + score = fuzz.token_sort_ratio(p, lec) + if score >= threshold: + matches.append({"name_picnic": p, "name_leclerc": lec, "score": score}) + + return sorted(matches, key=lambda x: -x["score"]) + + +def save_fuzzy_matches(conn: sqlite3.Connection, matches: list[dict]) -> int: + """Insère les nouvelles paires dans product_matches (ignore les doublons). + + Utilise INSERT OR IGNORE pour ne pas écraser les paires déjà en base + (statut 'validated' ou 'rejected' conservé). + + Args: + conn: Connexion SQLite ouverte. + matches: Résultat de find_fuzzy_matches(). + + Returns: + Nombre de nouvelles paires réellement insérées. + """ + created_at = datetime.now(timezone.utc).isoformat() + inserted = 0 + with conn: + for m in matches: + cur = conn.execute( + "INSERT OR IGNORE INTO product_matches " + "(name_picnic, name_leclerc, score, status, created_at) " + "VALUES (?, ?, ?, 'pending', ?)", + (m["name_picnic"], m["name_leclerc"], m["score"], created_at), + ) + inserted += cur.rowcount + return inserted diff --git a/tickettracker/db/schema.py b/tickettracker/db/schema.py index d82b268..2efbb9b 100644 --- a/tickettracker/db/schema.py +++ b/tickettracker/db/schema.py @@ -63,6 +63,23 @@ CREATE INDEX IF NOT EXISTS idx_items_name_normalized ON items (name_normalized); """ +_SQL_CREATE_PRODUCT_MATCHES = """ +CREATE TABLE IF NOT EXISTS product_matches ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name_picnic TEXT NOT NULL, + name_leclerc TEXT NOT NULL, + score REAL NOT NULL, -- score RapidFuzz 0-100 + status TEXT NOT NULL DEFAULT 'pending', -- 'pending'|'validated'|'rejected' + created_at TEXT NOT NULL, + UNIQUE(name_picnic, name_leclerc) +); +""" + +_SQL_CREATE_PRODUCT_MATCHES_IDX = """ +CREATE INDEX IF NOT EXISTS idx_product_matches_status + ON product_matches (status); +""" + _SQL_CREATE_PRICE_HISTORY = """ CREATE VIEW IF NOT EXISTS price_history AS SELECT @@ -125,3 +142,5 @@ def init_db(db_path: str | Path = DEFAULT_DB_PATH) -> None: conn.execute(_SQL_CREATE_ITEMS_IDX) conn.execute(_SQL_CREATE_ITEMS_NORM_IDX) conn.execute(_SQL_CREATE_PRICE_HISTORY) + conn.execute(_SQL_CREATE_PRODUCT_MATCHES) + conn.execute(_SQL_CREATE_PRODUCT_MATCHES_IDX) diff --git a/tickettracker/web/api.py b/tickettracker/web/api.py index e10afc5..51ec928 100644 --- a/tickettracker/web/api.py +++ b/tickettracker/web/api.py @@ -8,6 +8,7 @@ appelle la fonction de queries.py correspondante, puis ferme la connexion. import sqlite3 from fastapi import APIRouter, HTTPException +from fastapi.responses import Response import tickettracker.config as config from tickettracker.db.schema import get_connection @@ -70,6 +71,46 @@ def api_receipts(): conn.close() +@router.post("/match/{match_id}/validate") +def api_match_validate(match_id: int): + """Valide une paire fuzzy (status → 'validated'). + + Retourne 404 si l'id est inconnu. + """ + conn = get_connection(config.DB_PATH) + try: + with conn: + cur = conn.execute( + "UPDATE product_matches SET status='validated' WHERE id=?", + (match_id,), + ) + finally: + conn.close() + if cur.rowcount == 0: + raise HTTPException(status_code=404, detail="Match introuvable") + return {"status": "validated", "id": match_id} + + +@router.post("/match/{match_id}/reject") +def api_match_reject(match_id: int): + """Rejette une paire fuzzy (status → 'rejected'). + + Retourne 404 si l'id est inconnu. + """ + conn = get_connection(config.DB_PATH) + try: + with conn: + cur = conn.execute( + "UPDATE product_matches SET status='rejected' WHERE id=?", + (match_id,), + ) + finally: + conn.close() + if cur.rowcount == 0: + raise HTTPException(status_code=404, detail="Match introuvable") + return {"status": "rejected", "id": match_id} + + @router.get("/receipt/{receipt_id}") def api_receipt_detail(receipt_id: int): """Détail d'un ticket et de ses articles. diff --git a/tickettracker/web/app.py b/tickettracker/web/app.py index e7a19aa..0896e75 100644 --- a/tickettracker/web/app.py +++ b/tickettracker/web/app.py @@ -30,6 +30,7 @@ from tickettracker.web.queries import ( get_compare_prices, get_dashboard_stats, get_monthly_spending, + get_pending_matches, get_product_history, get_product_list, get_receipt_detail, @@ -167,6 +168,32 @@ async def page_product(request: Request, name: str): ) +@app.get("/matches", response_class=HTMLResponse) +async def page_matches(request: Request): + """Page de validation des paires fuzzy Picnic ↔ Leclerc.""" + conn = get_connection(config.DB_PATH) + try: + pending = get_pending_matches(conn) + validated_count = conn.execute( + "SELECT COUNT(*) FROM product_matches WHERE status='validated'" + ).fetchone()[0] + rejected_count = conn.execute( + "SELECT COUNT(*) FROM product_matches WHERE status='rejected'" + ).fetchone()[0] + finally: + conn.close() + + return templates.TemplateResponse( + request, + "matches.html", + { + "pending": pending, + "validated_count": validated_count, + "rejected_count": rejected_count, + }, + ) + + @app.get("/receipt/{receipt_id}", response_class=HTMLResponse) async def page_receipt(request: Request, receipt_id: int): """Page détail d'un ticket.""" diff --git a/tickettracker/web/queries.py b/tickettracker/web/queries.py index 1052002..68f4bc9 100644 --- a/tickettracker/web/queries.py +++ b/tickettracker/web/queries.py @@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]: def get_compare_prices(conn: sqlite3.Connection) -> list[dict]: """Comparaison de prix entre Picnic et Leclerc pour les produits communs. - Utilise la vue price_history. Ne retourne que les produits présents - dans les deux enseignes. Trié par écart décroissant (le plus cher en premier). + Combine deux sources : + - Correspondances exactes (même name_normalized dans les deux enseignes) + - Correspondances fuzzy validées dans product_matches (status='validated') + + Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés + par UNION (qui déduplique) + sélection par nom picnic. Returns: - Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}. - diff = price_leclerc - price_picnic (positif = Leclerc plus cher) - diff_pct = diff / MIN(price_picnic, price_leclerc) * 100 + Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}. + diff = price_leclerc - price_picnic (positif = Leclerc plus cher) + diff_pct = diff / MIN(price_picnic, price_leclerc) * 100 + match_type = 'exact' ou 'fuzzy' """ rows = conn.execute( """ @@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]: FROM price_history WHERE name_normalized IS NOT NULL GROUP BY name_normalized, store + ), + exact_matches AS ( + SELECT + a.name_normalized AS name, + a.name_normalized AS name_display, + a.avg_price AS price_picnic, + b.avg_price AS price_leclerc, + ROUND(b.avg_price - a.avg_price, 2) AS diff, + ROUND( + (b.avg_price - a.avg_price) + / MIN(a.avg_price, b.avg_price) * 100 + , 1) AS diff_pct, + 'exact' AS match_type + FROM avg_by_store a + JOIN avg_by_store b + ON a.name_normalized = b.name_normalized + AND a.store = 'picnic' + AND b.store = 'leclerc' + ), + fuzzy_matches AS ( + SELECT + pm.name_picnic AS name, + pm.name_picnic || ' ≈ ' || pm.name_leclerc AS name_display, + ap_p.avg_price AS price_picnic, + ap_l.avg_price AS price_leclerc, + ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff, + ROUND( + (ap_l.avg_price - ap_p.avg_price) + / MIN(ap_p.avg_price, ap_l.avg_price) * 100 + , 1) AS diff_pct, + 'fuzzy' AS match_type + FROM product_matches pm + JOIN avg_by_store ap_p + ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic' + JOIN avg_by_store ap_l + ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc' + WHERE pm.status = 'validated' + -- Exclure si déjà présent en exact match + AND pm.name_picnic NOT IN (SELECT name FROM exact_matches) ) - SELECT - a.name_normalized AS name, - a.avg_price AS price_picnic, - b.avg_price AS price_leclerc, - ROUND(b.avg_price - a.avg_price, 2) AS diff, - ROUND( - (b.avg_price - a.avg_price) - / MIN(a.avg_price, b.avg_price) * 100 - , 1) AS diff_pct - FROM avg_by_store a - JOIN avg_by_store b - ON a.name_normalized = b.name_normalized - AND a.store = 'picnic' - AND b.store = 'leclerc' - ORDER BY ABS(b.avg_price - a.avg_price) DESC + SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type + FROM ( + SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type + FROM exact_matches + UNION ALL + SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type + FROM fuzzy_matches + ) + ORDER BY ABS(diff) DESC """ ).fetchall() return [ { "name": r["name"], + "name_display": r["name_display"], "price_picnic": r["price_picnic"], "price_leclerc": r["price_leclerc"], "diff": r["diff"], "diff_pct": r["diff_pct"], + "match_type": r["match_type"], } for r in rows ] @@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None } +def get_pending_matches(conn: sqlite3.Connection) -> list[dict]: + """Paires en attente de validation, avec prix moyens des deux enseignes. + + Returns: + Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}. + price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue + price_history (None si aucune occurrence pour ce nom normalisé). + """ + rows = conn.execute( + """ + SELECT + pm.id, + pm.name_picnic, + pm.name_leclerc, + pm.score, + ROUND(AVG(CASE WHEN ph.store='picnic' THEN ph.unit_price END), 2) AS price_picnic, + ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc + FROM product_matches pm + LEFT JOIN price_history ph + ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc) + WHERE pm.status = 'pending' + GROUP BY pm.id + ORDER BY pm.score DESC + """ + ).fetchall() + + return [ + { + "id": r["id"], + "name_picnic": r["name_picnic"], + "name_leclerc": r["name_leclerc"], + "score": r["score"], + "price_picnic": r["price_picnic"], + "price_leclerc": r["price_leclerc"], + } + for r in rows + ] + + +def get_validated_matches(conn: sqlite3.Connection) -> list[dict]: + """Paires validées pour enrichir get_compare_prices. + + Returns: + Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}. + """ + rows = conn.execute( + """ + WITH avg_prices AS ( + SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price + FROM price_history + WHERE name_normalized IS NOT NULL + GROUP BY name_normalized, store + ) + SELECT + pm.id, + pm.name_picnic, + pm.name_leclerc, + ap_p.avg_price AS price_picnic, + ap_l.avg_price AS price_leclerc, + ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff, + ROUND( + (ap_l.avg_price - ap_p.avg_price) + / MIN(ap_p.avg_price, ap_l.avg_price) * 100 + , 1) AS diff_pct + FROM product_matches pm + JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic' + JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc' + WHERE pm.status = 'validated' + ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC + """ + ).fetchall() + + return [ + { + "name_picnic": r["name_picnic"], + "name_leclerc": r["name_leclerc"], + "price_picnic": r["price_picnic"], + "price_leclerc": r["price_leclerc"], + "diff": r["diff"], + "diff_pct": r["diff_pct"], + } + for r in rows + ] + + def get_product_list(conn: sqlite3.Connection) -> list[str]: """Liste tous les noms normalisés distincts (non NULL) pour le sélecteur. diff --git a/tickettracker/web/static/style.css b/tickettracker/web/static/style.css index b0a6d5b..d2ad079 100644 --- a/tickettracker/web/static/style.css +++ b/tickettracker/web/static/style.css @@ -46,3 +46,72 @@ .overflow-auto { overflow-x: auto; } + +/* Badge pour les correspondances fuzzy dans la table compare */ +.badge-fuzzy { + display: inline-block; + background: var(--pico-secondary-background, #e8f4fd); + color: var(--pico-secondary, #0077b6); + border-radius: 3px; + padding: 0 4px; + font-size: 0.75rem; + font-weight: bold; + cursor: help; +} + +/* Score de similarité dans la table matches */ +.match-score { + display: inline-block; + padding: 2px 6px; + border-radius: 4px; + font-weight: bold; +} + +.score-high { background: #d4edda; color: #155724; } +.score-medium { background: #fff3cd; color: #856404; } +.score-low { background: #f8d7da; color: #721c24; } + +/* Boutons valider/rejeter dans la table matches */ +.btn-validate { + background: var(--pico-primary); + color: white; + border: none; + padding: 4px 10px; + border-radius: 4px; + cursor: pointer; + font-size: 0.85rem; +} + +.btn-reject { + padding: 4px 10px; + font-size: 0.85rem; +} + +.match-actions { + white-space: nowrap; +} + +/* Formulaire de filtre de dates */ +.date-filter { + display: flex; + gap: 0.5rem; + align-items: center; + flex-wrap: wrap; + margin-bottom: 1.5rem; + padding: 0.75rem 1rem; + background: var(--pico-card-background-color, #f8f9fa); + border-radius: 6px; +} + +.date-filter input[type="month"] { + width: auto; + margin: 0; + padding: 4px 8px; +} + +.date-filter button, +.date-filter a { + margin: 0; + padding: 4px 12px; + font-size: 0.9rem; +} diff --git a/tickettracker/web/templates/base.html b/tickettracker/web/templates/base.html index e9664c0..c38aa38 100644 --- a/tickettracker/web/templates/base.html +++ b/tickettracker/web/templates/base.html @@ -20,6 +20,7 @@ diff --git a/tickettracker/web/templates/compare.html b/tickettracker/web/templates/compare.html index 2d0172e..170aef1 100644 --- a/tickettracker/web/templates/compare.html +++ b/tickettracker/web/templates/compare.html @@ -38,7 +38,12 @@ {% for p in products %} - {{ p.name }} + + {{ p.name_display }} + {% if p.match_type == 'fuzzy' %} + ~ + {% endif %} + {{ "%.2f"|format(p.price_picnic) }} € {{ "%.2f"|format(p.price_leclerc) }} € @@ -56,7 +61,12 @@ -

Positif = Leclerc plus cher, négatif = Picnic plus cher.

+

+ Positif = Leclerc plus cher, négatif = Picnic plus cher.
+ ~ = correspondance fuzzy validée (noms différents, même produit) +

+ +

Gérer les correspondances fuzzy →

{% endif %} {% endblock %} diff --git a/tickettracker/web/templates/matches.html b/tickettracker/web/templates/matches.html new file mode 100644 index 0000000..3248613 --- /dev/null +++ b/tickettracker/web/templates/matches.html @@ -0,0 +1,85 @@ +{% extends "base.html" %} + +{% block title %}Correspondances fuzzy — TicketTracker{% endblock %} + +{% block content %} +

Correspondances Picnic ↔ Leclerc

+ +

+ Ces paires ont été détectées automatiquement par fuzzy matching. + Validez celles qui désignent le même produit pour enrichir la comparaison de prix. +

+ + +
+
+

{{ pending | length }}

+

En attente

+
+
+

{{ validated_count }}

+

Validées

+
+
+

{{ rejected_count }}

+

Rejetées

+
+
+ +{% if pending %} +
+

Paires à valider

+
+ + + + + + + + + + + + + {% for m in pending %} + + + + + + + + + {% endfor %} + +
Produit PicnicPrix moy.Produit LeclercPrix moy.ScoreAction
{{ m.name_picnic }}{% if m.price_picnic %}{{ "%.2f"|format(m.price_picnic) }} €{% else %}—{% endif %}{{ m.name_leclerc }}{% if m.price_leclerc %}{{ "%.2f"|format(m.price_leclerc) }} €{% else %}—{% endif %} + + {{ "%.0f"|format(m.score) }}% + + +
+ +
+
+ +
+
+
+
+ +{% else %} +
+

+ Aucune paire en attente. + {% if validated_count == 0 and rejected_count == 0 %} + Lancez d'abord la commande de matching : +

python -m tickettracker.cli match --threshold 85
+ {% else %} + Toutes les paires ont été traitées ({{ validated_count }} validées, {{ rejected_count }} rejetées). + {% endif %} +

+
+{% endif %} + +{% endblock %}