feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard
Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).
Workflow :
1. python -m tickettracker.cli match [--threshold 85]
→ calcule et stocke les paires candidates
2. http://localhost:8000/matches
→ l'utilisateur valide ou rejette chaque paire
3. La comparaison de prix enrichie avec les paires validées
Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]:
|
||||
def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Comparaison de prix entre Picnic et Leclerc pour les produits communs.
|
||||
|
||||
Utilise la vue price_history. Ne retourne que les produits présents
|
||||
dans les deux enseignes. Trié par écart décroissant (le plus cher en premier).
|
||||
Combine deux sources :
|
||||
- Correspondances exactes (même name_normalized dans les deux enseignes)
|
||||
- Correspondances fuzzy validées dans product_matches (status='validated')
|
||||
|
||||
Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés
|
||||
par UNION (qui déduplique) + sélection par nom picnic.
|
||||
|
||||
Returns:
|
||||
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}.
|
||||
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
|
||||
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
|
||||
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}.
|
||||
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
|
||||
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
|
||||
match_type = 'exact' ou 'fuzzy'
|
||||
"""
|
||||
rows = conn.execute(
|
||||
"""
|
||||
@@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
|
||||
FROM price_history
|
||||
WHERE name_normalized IS NOT NULL
|
||||
GROUP BY name_normalized, store
|
||||
),
|
||||
exact_matches AS (
|
||||
SELECT
|
||||
a.name_normalized AS name,
|
||||
a.name_normalized AS name_display,
|
||||
a.avg_price AS price_picnic,
|
||||
b.avg_price AS price_leclerc,
|
||||
ROUND(b.avg_price - a.avg_price, 2) AS diff,
|
||||
ROUND(
|
||||
(b.avg_price - a.avg_price)
|
||||
/ MIN(a.avg_price, b.avg_price) * 100
|
||||
, 1) AS diff_pct,
|
||||
'exact' AS match_type
|
||||
FROM avg_by_store a
|
||||
JOIN avg_by_store b
|
||||
ON a.name_normalized = b.name_normalized
|
||||
AND a.store = 'picnic'
|
||||
AND b.store = 'leclerc'
|
||||
),
|
||||
fuzzy_matches AS (
|
||||
SELECT
|
||||
pm.name_picnic AS name,
|
||||
pm.name_picnic || ' ≈ ' || pm.name_leclerc AS name_display,
|
||||
ap_p.avg_price AS price_picnic,
|
||||
ap_l.avg_price AS price_leclerc,
|
||||
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
|
||||
ROUND(
|
||||
(ap_l.avg_price - ap_p.avg_price)
|
||||
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
|
||||
, 1) AS diff_pct,
|
||||
'fuzzy' AS match_type
|
||||
FROM product_matches pm
|
||||
JOIN avg_by_store ap_p
|
||||
ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
|
||||
JOIN avg_by_store ap_l
|
||||
ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
|
||||
WHERE pm.status = 'validated'
|
||||
-- Exclure si déjà présent en exact match
|
||||
AND pm.name_picnic NOT IN (SELECT name FROM exact_matches)
|
||||
)
|
||||
SELECT
|
||||
a.name_normalized AS name,
|
||||
a.avg_price AS price_picnic,
|
||||
b.avg_price AS price_leclerc,
|
||||
ROUND(b.avg_price - a.avg_price, 2) AS diff,
|
||||
ROUND(
|
||||
(b.avg_price - a.avg_price)
|
||||
/ MIN(a.avg_price, b.avg_price) * 100
|
||||
, 1) AS diff_pct
|
||||
FROM avg_by_store a
|
||||
JOIN avg_by_store b
|
||||
ON a.name_normalized = b.name_normalized
|
||||
AND a.store = 'picnic'
|
||||
AND b.store = 'leclerc'
|
||||
ORDER BY ABS(b.avg_price - a.avg_price) DESC
|
||||
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
|
||||
FROM (
|
||||
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
|
||||
FROM exact_matches
|
||||
UNION ALL
|
||||
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
|
||||
FROM fuzzy_matches
|
||||
)
|
||||
ORDER BY ABS(diff) DESC
|
||||
"""
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"name": r["name"],
|
||||
"name_display": r["name_display"],
|
||||
"price_picnic": r["price_picnic"],
|
||||
"price_leclerc": r["price_leclerc"],
|
||||
"diff": r["diff"],
|
||||
"diff_pct": r["diff_pct"],
|
||||
"match_type": r["match_type"],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
@@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None
|
||||
}
|
||||
|
||||
|
||||
def get_pending_matches(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Paires en attente de validation, avec prix moyens des deux enseignes.
|
||||
|
||||
Returns:
|
||||
Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}.
|
||||
price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue
|
||||
price_history (None si aucune occurrence pour ce nom normalisé).
|
||||
"""
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT
|
||||
pm.id,
|
||||
pm.name_picnic,
|
||||
pm.name_leclerc,
|
||||
pm.score,
|
||||
ROUND(AVG(CASE WHEN ph.store='picnic' THEN ph.unit_price END), 2) AS price_picnic,
|
||||
ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc
|
||||
FROM product_matches pm
|
||||
LEFT JOIN price_history ph
|
||||
ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc)
|
||||
WHERE pm.status = 'pending'
|
||||
GROUP BY pm.id
|
||||
ORDER BY pm.score DESC
|
||||
"""
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": r["id"],
|
||||
"name_picnic": r["name_picnic"],
|
||||
"name_leclerc": r["name_leclerc"],
|
||||
"score": r["score"],
|
||||
"price_picnic": r["price_picnic"],
|
||||
"price_leclerc": r["price_leclerc"],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def get_validated_matches(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Paires validées pour enrichir get_compare_prices.
|
||||
|
||||
Returns:
|
||||
Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}.
|
||||
"""
|
||||
rows = conn.execute(
|
||||
"""
|
||||
WITH avg_prices AS (
|
||||
SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price
|
||||
FROM price_history
|
||||
WHERE name_normalized IS NOT NULL
|
||||
GROUP BY name_normalized, store
|
||||
)
|
||||
SELECT
|
||||
pm.id,
|
||||
pm.name_picnic,
|
||||
pm.name_leclerc,
|
||||
ap_p.avg_price AS price_picnic,
|
||||
ap_l.avg_price AS price_leclerc,
|
||||
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
|
||||
ROUND(
|
||||
(ap_l.avg_price - ap_p.avg_price)
|
||||
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
|
||||
, 1) AS diff_pct
|
||||
FROM product_matches pm
|
||||
JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
|
||||
JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
|
||||
WHERE pm.status = 'validated'
|
||||
ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC
|
||||
"""
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"name_picnic": r["name_picnic"],
|
||||
"name_leclerc": r["name_leclerc"],
|
||||
"price_picnic": r["price_picnic"],
|
||||
"price_leclerc": r["price_leclerc"],
|
||||
"diff": r["diff"],
|
||||
"diff_pct": r["diff_pct"],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def get_product_list(conn: sqlite3.Connection) -> list[str]:
|
||||
"""Liste tous les noms normalisés distincts (non NULL) pour le sélecteur.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user