feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard

Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).

Workflow :
  1. python -m tickettracker.cli match [--threshold 85]
     → calcule et stocke les paires candidates
  2. http://localhost:8000/matches
     → l'utilisateur valide ou rejette chaque paire
  3. La comparaison de prix enrichie avec les paires validées

Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-25 18:02:48 +01:00
parent f360332626
commit be4d4a7076
13 changed files with 804 additions and 23 deletions

View File

@@ -17,6 +17,12 @@ jinja2>=3.1
python-multipart>=0.0.12 python-multipart>=0.0.12
httpx>=0.27 # requis par TestClient FastAPI httpx>=0.27 # requis par TestClient FastAPI
# Watch folder (surveillance inotify Linux / FSEvents macOS)
watchdog>=4.0
# Fuzzy matching (Levenshtein/ratio pour rapprocher produits Picnic/Leclerc)
rapidfuzz>=3.9
# Tests # Tests
pytest==8.3.4 pytest==8.3.4

209
tests/test_matcher.py Normal file
View File

@@ -0,0 +1,209 @@
"""
Tests du fuzzy matcher (tickettracker/db/matcher.py).
Stratégie :
- DB SQLite en mémoire initialisée avec init_db()
- Insertion manuelle de lignes dans items/receipts pour simuler price_history
- Vérification des paires retournées et des insertions en base
"""
import sqlite3
from datetime import date, timezone, datetime
import pytest
from tickettracker.db.schema import init_db, get_connection
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def db_path(tmp_path):
"""Base SQLite vide dans un répertoire temporaire."""
path = tmp_path / "test_matcher.db"
init_db(path)
return path
@pytest.fixture
def conn_with_products(db_path):
"""Connexion avec produits Picnic et Leclerc similaires."""
conn = get_connection(db_path)
# Insérer deux tickets (un Picnic, un Leclerc)
with conn:
r_picnic = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')"
).lastrowid
r_leclerc = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')"
).lastrowid
# Produits Picnic (name_normalized rempli)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)",
(r_picnic,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)",
(r_picnic,),
)
# Produits Leclerc (similaires aux Picnic)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)",
(r_leclerc,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)",
(r_leclerc,),
)
yield conn
conn.close()
@pytest.fixture
def conn_empty(db_path):
"""Connexion sur base vide (pas d'articles normalisés)."""
conn = get_connection(db_path)
yield conn
conn.close()
# ---------------------------------------------------------------------------
# Tests find_fuzzy_matches
# ---------------------------------------------------------------------------
def test_find_fuzzy_matches_returns_list(conn_with_products):
"""find_fuzzy_matches retourne une liste."""
result = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert isinstance(result, list)
def test_find_fuzzy_matches_detects_similar_products(conn_with_products):
"""Des produits similaires (lait demi) sont détectés avec un seuil bas."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert len(matches) >= 1
# La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée
picnic_names = [m["name_picnic"] for m in matches]
assert "lait demi-écrémé" in picnic_names
def test_find_fuzzy_matches_threshold_respected(conn_with_products):
"""Avec un seuil de 100, aucun match (car noms ≠ exact)."""
matches = find_fuzzy_matches(conn_with_products, threshold=100.0)
assert matches == []
def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products):
"""Un seuil élevé retourne moins de résultats qu'un seuil bas."""
matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0)
matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0)
assert len(matches_high) <= len(matches_low)
def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products):
"""Les résultats sont triés par score décroissant."""
matches = find_fuzzy_matches(conn_with_products, threshold=50.0)
scores = [m["score"] for m in matches]
assert scores == sorted(scores, reverse=True)
def test_find_fuzzy_matches_result_structure(conn_with_products):
"""Chaque résultat a les clés attendues."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
if matches:
m = matches[0]
assert "name_picnic" in m
assert "name_leclerc" in m
assert "score" in m
assert 0 <= m["score"] <= 100
def test_find_fuzzy_matches_exact_same_excluded(conn_with_products):
"""Les noms identiques ne doivent pas apparaître comme paires fuzzy."""
# On insère un produit identique dans les deux enseignes
with conn_with_products:
r = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)",
(r,),
)
r2 = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)",
(r2,),
)
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
# Aucune paire ne doit avoir name_picnic == name_leclerc
for m in matches:
assert m["name_picnic"] != m["name_leclerc"]
def test_find_fuzzy_matches_empty_db(conn_empty):
"""Sur une base sans produits normalisés, retourne une liste vide."""
matches = find_fuzzy_matches(conn_empty, threshold=85.0)
assert matches == []
# ---------------------------------------------------------------------------
# Tests save_fuzzy_matches
# ---------------------------------------------------------------------------
def test_save_fuzzy_matches_inserts_rows(conn_with_products):
"""save_fuzzy_matches insère les nouvelles paires en base."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
inserted = save_fuzzy_matches(conn_with_products, matches)
assert inserted == len(matches)
def test_save_fuzzy_matches_ignores_duplicates(conn_with_products):
"""Un second appel avec les mêmes paires n'insère rien (OR IGNORE)."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
inserted_again = save_fuzzy_matches(conn_with_products, matches)
assert inserted_again == 0
def test_save_fuzzy_matches_status_pending(conn_with_products):
"""Les paires insérées ont le statut 'pending' par défaut."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
rows = conn_with_products.execute(
"SELECT status FROM product_matches"
).fetchall()
assert all(r["status"] == "pending" for r in rows)
def test_save_fuzzy_matches_returns_correct_count(conn_with_products):
"""save_fuzzy_matches retourne exactement le nombre de lignes insérées."""
matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}]
count = save_fuzzy_matches(conn_with_products, matches)
assert count == 1
def test_save_fuzzy_matches_empty_list(conn_with_products):
"""Appel avec une liste vide retourne 0 et ne modifie pas la base."""
count = save_fuzzy_matches(conn_with_products, [])
assert count == 0
rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0]
assert rows == 0

View File

@@ -28,7 +28,9 @@ def build_parser() -> argparse.ArgumentParser:
tickettracker.cli tickettracker.cli
├── import <file> --source {picnic,leclerc} [--db PATH] ├── import <file> --source {picnic,leclerc} [--db PATH]
├── stats [--db PATH] ├── stats [--db PATH]
── normalize [--dry-run] [--batch-size N] [--db PATH] ── normalize [--dry-run] [--batch-size N] [--db PATH]
├── match [--threshold N] [--db PATH]
└── watch [--inbox PATH] [--db PATH]
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="python -m tickettracker.cli", prog="python -m tickettracker.cli",
@@ -99,6 +101,47 @@ def build_parser() -> argparse.ArgumentParser:
help=f"Articles par appel LLM (défaut : {_cfg.LLM_BATCH_SIZE})", help=f"Articles par appel LLM (défaut : {_cfg.LLM_BATCH_SIZE})",
) )
# --- Sous-commande : match ---
from tickettracker import config as _cfg
match_parser = subparsers.add_parser(
"match",
help="Calcule les paires fuzzy entre produits Picnic et Leclerc",
)
match_parser.add_argument(
"--db",
type=Path,
default=DEFAULT_DB_PATH,
metavar="PATH",
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
)
match_parser.add_argument(
"--threshold",
type=float,
default=_cfg.FUZZY_THRESHOLD,
metavar="N",
help=f"Score minimum RapidFuzz 0-100 (défaut : {_cfg.FUZZY_THRESHOLD})",
)
# --- Sous-commande : watch ---
watch_parser = subparsers.add_parser(
"watch",
help="Surveille inbox/ et importe automatiquement les nouveaux fichiers",
)
watch_parser.add_argument(
"--db",
type=Path,
default=DEFAULT_DB_PATH,
metavar="PATH",
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
)
watch_parser.add_argument(
"--inbox",
type=Path,
default=Path("inbox"),
metavar="PATH",
help="Répertoire inbox/ à surveiller (défaut : ./inbox)",
)
return parser return parser
@@ -205,6 +248,51 @@ def cmd_normalize(args: argparse.Namespace) -> int:
return 1 return 1
def cmd_match(args: argparse.Namespace) -> int:
"""Exécute la sous-commande 'match'.
Calcule les paires fuzzy entre produits Picnic et Leclerc,
les insère dans product_matches et affiche un résumé.
Returns:
0 si succès, 1 si la base est absente.
"""
from tickettracker.db import schema
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
if not Path(args.db).exists():
print(f"Base de données absente : {args.db}", file=sys.stderr)
print("Importez d'abord un ticket avec la commande 'import'.", file=sys.stderr)
return 1
with schema.get_connection(args.db) as conn:
matches = find_fuzzy_matches(conn, threshold=args.threshold)
inserted = save_fuzzy_matches(conn, matches)
total = len(matches)
ignored = total - inserted
print(
f"{inserted} nouvelles paires trouvées (seuil={args.threshold:.0f}%). "
f"{ignored} ignorées (déjà connues)."
)
return 0
def cmd_watch(args: argparse.Namespace) -> int:
"""Exécute la sous-commande 'watch'.
Lance la surveillance du dossier inbox/ (bloquant — Ctrl+C pour arrêter).
Returns:
0 après interruption par l'utilisateur.
"""
from tickettracker.watcher import watch
inbox_path = args.inbox.resolve()
watch(inbox_path, args.db)
return 0
def main() -> None: def main() -> None:
"""Point d'entrée principal.""" """Point d'entrée principal."""
parser = build_parser() parser = build_parser()
@@ -216,6 +304,10 @@ def main() -> None:
sys.exit(cmd_stats(args)) sys.exit(cmd_stats(args))
elif args.command == "normalize": elif args.command == "normalize":
sys.exit(cmd_normalize(args)) sys.exit(cmd_normalize(args))
elif args.command == "match":
sys.exit(cmd_match(args))
elif args.command == "watch":
sys.exit(cmd_watch(args))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -45,3 +45,10 @@ LLM_TIMEOUT: int = int(os.environ.get("TICKETTRACKER_LLM_TIMEOUT", "60"))
# Nombre d'articles traités par appel LLM # Nombre d'articles traités par appel LLM
LLM_BATCH_SIZE: int = int(os.environ.get("TICKETTRACKER_LLM_BATCH_SIZE", "20")) LLM_BATCH_SIZE: int = int(os.environ.get("TICKETTRACKER_LLM_BATCH_SIZE", "20"))
# ---------------------------------------------------------------------------
# Fuzzy matching
# ---------------------------------------------------------------------------
# Seuil de similarité minimum (0100) pour rapprocher un produit Picnic d'un produit Leclerc
FUZZY_THRESHOLD: float = float(os.environ.get("TICKETTRACKER_FUZZY_THRESHOLD", "85"))

View File

@@ -0,0 +1,90 @@
"""
Fuzzy matching entre produits Picnic et Leclerc.
Utilise RapidFuzz (token_sort_ratio) pour rapprocher des produits dont le nom
n'est pas identique mais désigne la même chose
(ex : "Lait demi-écremé""LAIT DEMI ECREME").
Workflow :
1. find_fuzzy_matches() — calcule les paires candidates
2. save_fuzzy_matches() — les insère dans product_matches (ignoring duplicates)
3. L'utilisateur valide/rejette via le dashboard /matches
"""
import sqlite3
from datetime import datetime, timezone
from rapidfuzz import fuzz
def find_fuzzy_matches(
conn: sqlite3.Connection,
threshold: float = 85.0,
) -> list[dict]:
"""Calcule les paires de produits similaires entre Picnic et Leclerc.
Utilise rapidfuzz.fuzz.token_sort_ratio (insensible à l'ordre des mots).
Ne retourne que les paires avec score >= threshold.
Les noms identiques sont exclus (ils sont déjà traités par get_compare_prices).
Args:
conn: Connexion SQLite ouverte.
threshold: Score minimum 0100 (défaut 85).
Returns:
Liste de dicts {name_picnic, name_leclerc, score}, triée par score décroissant.
"""
# Noms normalisés distincts par enseigne
picnic_names = [
r[0]
for r in conn.execute(
"SELECT DISTINCT name_normalized FROM price_history "
"WHERE store='picnic' AND name_normalized IS NOT NULL"
)
]
leclerc_names = [
r[0]
for r in conn.execute(
"SELECT DISTINCT name_normalized FROM price_history "
"WHERE store='leclerc' AND name_normalized IS NOT NULL"
)
]
# Produit cartésien filtré par seuil
matches = []
for p in picnic_names:
for lec in leclerc_names:
if p == lec:
continue # exact match déjà géré par get_compare_prices
score = fuzz.token_sort_ratio(p, lec)
if score >= threshold:
matches.append({"name_picnic": p, "name_leclerc": lec, "score": score})
return sorted(matches, key=lambda x: -x["score"])
def save_fuzzy_matches(conn: sqlite3.Connection, matches: list[dict]) -> int:
"""Insère les nouvelles paires dans product_matches (ignore les doublons).
Utilise INSERT OR IGNORE pour ne pas écraser les paires déjà en base
(statut 'validated' ou 'rejected' conservé).
Args:
conn: Connexion SQLite ouverte.
matches: Résultat de find_fuzzy_matches().
Returns:
Nombre de nouvelles paires réellement insérées.
"""
created_at = datetime.now(timezone.utc).isoformat()
inserted = 0
with conn:
for m in matches:
cur = conn.execute(
"INSERT OR IGNORE INTO product_matches "
"(name_picnic, name_leclerc, score, status, created_at) "
"VALUES (?, ?, ?, 'pending', ?)",
(m["name_picnic"], m["name_leclerc"], m["score"], created_at),
)
inserted += cur.rowcount
return inserted

View File

@@ -63,6 +63,23 @@ CREATE INDEX IF NOT EXISTS idx_items_name_normalized
ON items (name_normalized); ON items (name_normalized);
""" """
_SQL_CREATE_PRODUCT_MATCHES = """
CREATE TABLE IF NOT EXISTS product_matches (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name_picnic TEXT NOT NULL,
name_leclerc TEXT NOT NULL,
score REAL NOT NULL, -- score RapidFuzz 0-100
status TEXT NOT NULL DEFAULT 'pending', -- 'pending'|'validated'|'rejected'
created_at TEXT NOT NULL,
UNIQUE(name_picnic, name_leclerc)
);
"""
_SQL_CREATE_PRODUCT_MATCHES_IDX = """
CREATE INDEX IF NOT EXISTS idx_product_matches_status
ON product_matches (status);
"""
_SQL_CREATE_PRICE_HISTORY = """ _SQL_CREATE_PRICE_HISTORY = """
CREATE VIEW IF NOT EXISTS price_history AS CREATE VIEW IF NOT EXISTS price_history AS
SELECT SELECT
@@ -125,3 +142,5 @@ def init_db(db_path: str | Path = DEFAULT_DB_PATH) -> None:
conn.execute(_SQL_CREATE_ITEMS_IDX) conn.execute(_SQL_CREATE_ITEMS_IDX)
conn.execute(_SQL_CREATE_ITEMS_NORM_IDX) conn.execute(_SQL_CREATE_ITEMS_NORM_IDX)
conn.execute(_SQL_CREATE_PRICE_HISTORY) conn.execute(_SQL_CREATE_PRICE_HISTORY)
conn.execute(_SQL_CREATE_PRODUCT_MATCHES)
conn.execute(_SQL_CREATE_PRODUCT_MATCHES_IDX)

View File

@@ -8,6 +8,7 @@ appelle la fonction de queries.py correspondante, puis ferme la connexion.
import sqlite3 import sqlite3
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
import tickettracker.config as config import tickettracker.config as config
from tickettracker.db.schema import get_connection from tickettracker.db.schema import get_connection
@@ -70,6 +71,46 @@ def api_receipts():
conn.close() conn.close()
@router.post("/match/{match_id}/validate")
def api_match_validate(match_id: int):
"""Valide une paire fuzzy (status → 'validated').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='validated' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "validated", "id": match_id}
@router.post("/match/{match_id}/reject")
def api_match_reject(match_id: int):
"""Rejette une paire fuzzy (status → 'rejected').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='rejected' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "rejected", "id": match_id}
@router.get("/receipt/{receipt_id}") @router.get("/receipt/{receipt_id}")
def api_receipt_detail(receipt_id: int): def api_receipt_detail(receipt_id: int):
"""Détail d'un ticket et de ses articles. """Détail d'un ticket et de ses articles.

View File

@@ -30,6 +30,7 @@ from tickettracker.web.queries import (
get_compare_prices, get_compare_prices,
get_dashboard_stats, get_dashboard_stats,
get_monthly_spending, get_monthly_spending,
get_pending_matches,
get_product_history, get_product_history,
get_product_list, get_product_list,
get_receipt_detail, get_receipt_detail,
@@ -167,6 +168,32 @@ async def page_product(request: Request, name: str):
) )
@app.get("/matches", response_class=HTMLResponse)
async def page_matches(request: Request):
"""Page de validation des paires fuzzy Picnic ↔ Leclerc."""
conn = get_connection(config.DB_PATH)
try:
pending = get_pending_matches(conn)
validated_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='validated'"
).fetchone()[0]
rejected_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='rejected'"
).fetchone()[0]
finally:
conn.close()
return templates.TemplateResponse(
request,
"matches.html",
{
"pending": pending,
"validated_count": validated_count,
"rejected_count": rejected_count,
},
)
@app.get("/receipt/{receipt_id}", response_class=HTMLResponse) @app.get("/receipt/{receipt_id}", response_class=HTMLResponse)
async def page_receipt(request: Request, receipt_id: int): async def page_receipt(request: Request, receipt_id: int):
"""Page détail d'un ticket.""" """Page détail d'un ticket."""

View File

@@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]:
def get_compare_prices(conn: sqlite3.Connection) -> list[dict]: def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
"""Comparaison de prix entre Picnic et Leclerc pour les produits communs. """Comparaison de prix entre Picnic et Leclerc pour les produits communs.
Utilise la vue price_history. Ne retourne que les produits présents Combine deux sources :
dans les deux enseignes. Trié par écart décroissant (le plus cher en premier). - Correspondances exactes (même name_normalized dans les deux enseignes)
- Correspondances fuzzy validées dans product_matches (status='validated')
Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés
par UNION (qui déduplique) + sélection par nom picnic.
Returns: Returns:
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}. Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}.
diff = price_leclerc - price_picnic (positif = Leclerc plus cher) diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100 diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
match_type = 'exact' ou 'fuzzy'
""" """
rows = conn.execute( rows = conn.execute(
""" """
@@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
FROM price_history FROM price_history
WHERE name_normalized IS NOT NULL WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store GROUP BY name_normalized, store
) ),
exact_matches AS (
SELECT SELECT
a.name_normalized AS name, a.name_normalized AS name,
a.name_normalized AS name_display,
a.avg_price AS price_picnic, a.avg_price AS price_picnic,
b.avg_price AS price_leclerc, b.avg_price AS price_leclerc,
ROUND(b.avg_price - a.avg_price, 2) AS diff, ROUND(b.avg_price - a.avg_price, 2) AS diff,
ROUND( ROUND(
(b.avg_price - a.avg_price) (b.avg_price - a.avg_price)
/ MIN(a.avg_price, b.avg_price) * 100 / MIN(a.avg_price, b.avg_price) * 100
, 1) AS diff_pct , 1) AS diff_pct,
'exact' AS match_type
FROM avg_by_store a FROM avg_by_store a
JOIN avg_by_store b JOIN avg_by_store b
ON a.name_normalized = b.name_normalized ON a.name_normalized = b.name_normalized
AND a.store = 'picnic' AND a.store = 'picnic'
AND b.store = 'leclerc' AND b.store = 'leclerc'
ORDER BY ABS(b.avg_price - a.avg_price) DESC ),
fuzzy_matches AS (
SELECT
pm.name_picnic AS name,
pm.name_picnic || '' || pm.name_leclerc AS name_display,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct,
'fuzzy' AS match_type
FROM product_matches pm
JOIN avg_by_store ap_p
ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_by_store ap_l
ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
-- Exclure si déjà présent en exact match
AND pm.name_picnic NOT IN (SELECT name FROM exact_matches)
)
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM (
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM exact_matches
UNION ALL
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM fuzzy_matches
)
ORDER BY ABS(diff) DESC
""" """
).fetchall() ).fetchall()
return [ return [
{ {
"name": r["name"], "name": r["name"],
"name_display": r["name_display"],
"price_picnic": r["price_picnic"], "price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"], "price_leclerc": r["price_leclerc"],
"diff": r["diff"], "diff": r["diff"],
"diff_pct": r["diff_pct"], "diff_pct": r["diff_pct"],
"match_type": r["match_type"],
} }
for r in rows for r in rows
] ]
@@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None
} }
def get_pending_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires en attente de validation, avec prix moyens des deux enseignes.
Returns:
Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}.
price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue
price_history (None si aucune occurrence pour ce nom normalisé).
"""
rows = conn.execute(
"""
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
pm.score,
ROUND(AVG(CASE WHEN ph.store='picnic' THEN ph.unit_price END), 2) AS price_picnic,
ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc
FROM product_matches pm
LEFT JOIN price_history ph
ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc)
WHERE pm.status = 'pending'
GROUP BY pm.id
ORDER BY pm.score DESC
"""
).fetchall()
return [
{
"id": r["id"],
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"score": r["score"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
}
for r in rows
]
def get_validated_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires validées pour enrichir get_compare_prices.
Returns:
Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}.
"""
rows = conn.execute(
"""
WITH avg_prices AS (
SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price
FROM price_history
WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store
)
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct
FROM product_matches pm
JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC
"""
).fetchall()
return [
{
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
"diff": r["diff"],
"diff_pct": r["diff_pct"],
}
for r in rows
]
def get_product_list(conn: sqlite3.Connection) -> list[str]: def get_product_list(conn: sqlite3.Connection) -> list[str]:
"""Liste tous les noms normalisés distincts (non NULL) pour le sélecteur. """Liste tous les noms normalisés distincts (non NULL) pour le sélecteur.

View File

@@ -46,3 +46,72 @@
.overflow-auto { .overflow-auto {
overflow-x: auto; overflow-x: auto;
} }
/* Badge pour les correspondances fuzzy dans la table compare */
.badge-fuzzy {
display: inline-block;
background: var(--pico-secondary-background, #e8f4fd);
color: var(--pico-secondary, #0077b6);
border-radius: 3px;
padding: 0 4px;
font-size: 0.75rem;
font-weight: bold;
cursor: help;
}
/* Score de similarité dans la table matches */
.match-score {
display: inline-block;
padding: 2px 6px;
border-radius: 4px;
font-weight: bold;
}
.score-high { background: #d4edda; color: #155724; }
.score-medium { background: #fff3cd; color: #856404; }
.score-low { background: #f8d7da; color: #721c24; }
/* Boutons valider/rejeter dans la table matches */
.btn-validate {
background: var(--pico-primary);
color: white;
border: none;
padding: 4px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 0.85rem;
}
.btn-reject {
padding: 4px 10px;
font-size: 0.85rem;
}
.match-actions {
white-space: nowrap;
}
/* Formulaire de filtre de dates */
.date-filter {
display: flex;
gap: 0.5rem;
align-items: center;
flex-wrap: wrap;
margin-bottom: 1.5rem;
padding: 0.75rem 1rem;
background: var(--pico-card-background-color, #f8f9fa);
border-radius: 6px;
}
.date-filter input[type="month"] {
width: auto;
margin: 0;
padding: 4px 8px;
}
.date-filter button,
.date-filter a {
margin: 0;
padding: 4px 12px;
font-size: 0.9rem;
}

View File

@@ -20,6 +20,7 @@
<ul> <ul>
<li><a href="/">Accueil</a></li> <li><a href="/">Accueil</a></li>
<li><a href="/compare">Comparer</a></li> <li><a href="/compare">Comparer</a></li>
<li><a href="/matches">Correspondances</a></li>
<li><a href="/api/docs" target="_blank">API docs</a></li> <li><a href="/api/docs" target="_blank">API docs</a></li>
</ul> </ul>
</nav> </nav>

View File

@@ -38,7 +38,12 @@
<tbody> <tbody>
{% for p in products %} {% for p in products %}
<tr> <tr>
<td>{{ p.name }}</td> <td>
{{ p.name_display }}
{% if p.match_type == 'fuzzy' %}
<span class="badge-fuzzy" title="Correspondance fuzzy validée">~</span>
{% endif %}
</td>
<td>{{ "%.2f"|format(p.price_picnic) }} €</td> <td>{{ "%.2f"|format(p.price_picnic) }} €</td>
<td>{{ "%.2f"|format(p.price_leclerc) }} €</td> <td>{{ "%.2f"|format(p.price_leclerc) }} €</td>
<td class="{% if p.diff > 0 %}diff-positive{% elif p.diff < 0 %}diff-negative{% endif %}"> <td class="{% if p.diff > 0 %}diff-positive{% elif p.diff < 0 %}diff-negative{% endif %}">
@@ -56,7 +61,12 @@
</table> </table>
</div> </div>
<p><small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small></p> <p>
<small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small><br>
<small><span class="badge-fuzzy">~</span> = correspondance fuzzy validée (noms différents, même produit)</small>
</p>
<p><a href="/matches">Gérer les correspondances fuzzy →</a></p>
{% endif %} {% endif %}
{% endblock %} {% endblock %}

View File

@@ -0,0 +1,85 @@
{% extends "base.html" %}
{% block title %}Correspondances fuzzy — TicketTracker{% endblock %}
{% block content %}
<h1>Correspondances Picnic ↔ Leclerc</h1>
<p>
Ces paires ont été détectées automatiquement par fuzzy matching.
Validez celles qui désignent le même produit pour enrichir la comparaison de prix.
</p>
<!-- Résumé statistiques -->
<div class="stat-grid">
<article class="stat-card">
<h3>{{ pending | length }}</h3>
<p>En attente</p>
</article>
<article class="stat-card">
<h3>{{ validated_count }}</h3>
<p>Validées</p>
</article>
<article class="stat-card">
<h3>{{ rejected_count }}</h3>
<p>Rejetées</p>
</article>
</div>
{% if pending %}
<article>
<h2>Paires à valider</h2>
<div class="overflow-auto">
<table>
<thead>
<tr>
<th>Produit Picnic</th>
<th>Prix moy.</th>
<th>Produit Leclerc</th>
<th>Prix moy.</th>
<th>Score</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% for m in pending %}
<tr>
<td>{{ m.name_picnic }}</td>
<td>{% if m.price_picnic %}{{ "%.2f"|format(m.price_picnic) }} €{% else %}—{% endif %}</td>
<td>{{ m.name_leclerc }}</td>
<td>{% if m.price_leclerc %}{{ "%.2f"|format(m.price_leclerc) }} €{% else %}—{% endif %}</td>
<td>
<small class="match-score {% if m.score >= 95 %}score-high{% elif m.score >= 85 %}score-medium{% else %}score-low{% endif %}">
{{ "%.0f"|format(m.score) }}%
</small>
</td>
<td class="match-actions">
<form method="post" action="/api/match/{{ m.id }}/validate" style="display:inline">
<button type="submit" class="btn-validate">✓ Valider</button>
</form>
<form method="post" action="/api/match/{{ m.id }}/reject" style="display:inline">
<button type="submit" class="btn-reject secondary outline">✗ Rejeter</button>
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</article>
{% else %}
<article>
<p>
Aucune paire en attente.
{% if validated_count == 0 and rejected_count == 0 %}
Lancez d'abord la commande de matching :
<pre><code>python -m tickettracker.cli match --threshold 85</code></pre>
{% else %}
Toutes les paires ont été traitées ({{ validated_count }} validées, {{ rejected_count }} rejetées).
{% endif %}
</p>
</article>
{% endif %}
{% endblock %}