feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard

Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).

Workflow :
  1. python -m tickettracker.cli match [--threshold 85]
     → calcule et stocke les paires candidates
  2. http://localhost:8000/matches
     → l'utilisateur valide ou rejette chaque paire
  3. La comparaison de prix enrichie avec les paires validées

Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-25 18:02:48 +01:00
parent f360332626
commit be4d4a7076
13 changed files with 804 additions and 23 deletions

View File

@@ -8,6 +8,7 @@ appelle la fonction de queries.py correspondante, puis ferme la connexion.
import sqlite3
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
import tickettracker.config as config
from tickettracker.db.schema import get_connection
@@ -70,6 +71,46 @@ def api_receipts():
conn.close()
@router.post("/match/{match_id}/validate")
def api_match_validate(match_id: int):
"""Valide une paire fuzzy (status → 'validated').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='validated' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "validated", "id": match_id}
@router.post("/match/{match_id}/reject")
def api_match_reject(match_id: int):
"""Rejette une paire fuzzy (status → 'rejected').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='rejected' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "rejected", "id": match_id}
@router.get("/receipt/{receipt_id}")
def api_receipt_detail(receipt_id: int):
"""Détail d'un ticket et de ses articles.

View File

@@ -30,6 +30,7 @@ from tickettracker.web.queries import (
get_compare_prices,
get_dashboard_stats,
get_monthly_spending,
get_pending_matches,
get_product_history,
get_product_list,
get_receipt_detail,
@@ -167,6 +168,32 @@ async def page_product(request: Request, name: str):
)
@app.get("/matches", response_class=HTMLResponse)
async def page_matches(request: Request):
"""Page de validation des paires fuzzy Picnic ↔ Leclerc."""
conn = get_connection(config.DB_PATH)
try:
pending = get_pending_matches(conn)
validated_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='validated'"
).fetchone()[0]
rejected_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='rejected'"
).fetchone()[0]
finally:
conn.close()
return templates.TemplateResponse(
request,
"matches.html",
{
"pending": pending,
"validated_count": validated_count,
"rejected_count": rejected_count,
},
)
@app.get("/receipt/{receipt_id}", response_class=HTMLResponse)
async def page_receipt(request: Request, receipt_id: int):
"""Page détail d'un ticket."""

View File

@@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]:
def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
"""Comparaison de prix entre Picnic et Leclerc pour les produits communs.
Utilise la vue price_history. Ne retourne que les produits présents
dans les deux enseignes. Trié par écart décroissant (le plus cher en premier).
Combine deux sources :
- Correspondances exactes (même name_normalized dans les deux enseignes)
- Correspondances fuzzy validées dans product_matches (status='validated')
Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés
par UNION (qui déduplique) + sélection par nom picnic.
Returns:
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}.
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}.
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
match_type = 'exact' ou 'fuzzy'
"""
rows = conn.execute(
"""
@@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
FROM price_history
WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store
),
exact_matches AS (
SELECT
a.name_normalized AS name,
a.name_normalized AS name_display,
a.avg_price AS price_picnic,
b.avg_price AS price_leclerc,
ROUND(b.avg_price - a.avg_price, 2) AS diff,
ROUND(
(b.avg_price - a.avg_price)
/ MIN(a.avg_price, b.avg_price) * 100
, 1) AS diff_pct,
'exact' AS match_type
FROM avg_by_store a
JOIN avg_by_store b
ON a.name_normalized = b.name_normalized
AND a.store = 'picnic'
AND b.store = 'leclerc'
),
fuzzy_matches AS (
SELECT
pm.name_picnic AS name,
pm.name_picnic || '' || pm.name_leclerc AS name_display,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct,
'fuzzy' AS match_type
FROM product_matches pm
JOIN avg_by_store ap_p
ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_by_store ap_l
ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
-- Exclure si déjà présent en exact match
AND pm.name_picnic NOT IN (SELECT name FROM exact_matches)
)
SELECT
a.name_normalized AS name,
a.avg_price AS price_picnic,
b.avg_price AS price_leclerc,
ROUND(b.avg_price - a.avg_price, 2) AS diff,
ROUND(
(b.avg_price - a.avg_price)
/ MIN(a.avg_price, b.avg_price) * 100
, 1) AS diff_pct
FROM avg_by_store a
JOIN avg_by_store b
ON a.name_normalized = b.name_normalized
AND a.store = 'picnic'
AND b.store = 'leclerc'
ORDER BY ABS(b.avg_price - a.avg_price) DESC
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM (
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM exact_matches
UNION ALL
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM fuzzy_matches
)
ORDER BY ABS(diff) DESC
"""
).fetchall()
return [
{
"name": r["name"],
"name_display": r["name_display"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
"diff": r["diff"],
"diff_pct": r["diff_pct"],
"match_type": r["match_type"],
}
for r in rows
]
@@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None
}
def get_pending_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires en attente de validation, avec prix moyens des deux enseignes.
Returns:
Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}.
price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue
price_history (None si aucune occurrence pour ce nom normalisé).
"""
rows = conn.execute(
"""
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
pm.score,
ROUND(AVG(CASE WHEN ph.store='picnic' THEN ph.unit_price END), 2) AS price_picnic,
ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc
FROM product_matches pm
LEFT JOIN price_history ph
ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc)
WHERE pm.status = 'pending'
GROUP BY pm.id
ORDER BY pm.score DESC
"""
).fetchall()
return [
{
"id": r["id"],
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"score": r["score"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
}
for r in rows
]
def get_validated_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires validées pour enrichir get_compare_prices.
Returns:
Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}.
"""
rows = conn.execute(
"""
WITH avg_prices AS (
SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price
FROM price_history
WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store
)
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct
FROM product_matches pm
JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC
"""
).fetchall()
return [
{
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
"diff": r["diff"],
"diff_pct": r["diff_pct"],
}
for r in rows
]
def get_product_list(conn: sqlite3.Connection) -> list[str]:
"""Liste tous les noms normalisés distincts (non NULL) pour le sélecteur.

View File

@@ -46,3 +46,72 @@
.overflow-auto {
overflow-x: auto;
}
/* Badge pour les correspondances fuzzy dans la table compare */
.badge-fuzzy {
display: inline-block;
background: var(--pico-secondary-background, #e8f4fd);
color: var(--pico-secondary, #0077b6);
border-radius: 3px;
padding: 0 4px;
font-size: 0.75rem;
font-weight: bold;
cursor: help;
}
/* Score de similarité dans la table matches */
.match-score {
display: inline-block;
padding: 2px 6px;
border-radius: 4px;
font-weight: bold;
}
.score-high { background: #d4edda; color: #155724; }
.score-medium { background: #fff3cd; color: #856404; }
.score-low { background: #f8d7da; color: #721c24; }
/* Boutons valider/rejeter dans la table matches */
.btn-validate {
background: var(--pico-primary);
color: white;
border: none;
padding: 4px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 0.85rem;
}
.btn-reject {
padding: 4px 10px;
font-size: 0.85rem;
}
.match-actions {
white-space: nowrap;
}
/* Formulaire de filtre de dates */
.date-filter {
display: flex;
gap: 0.5rem;
align-items: center;
flex-wrap: wrap;
margin-bottom: 1.5rem;
padding: 0.75rem 1rem;
background: var(--pico-card-background-color, #f8f9fa);
border-radius: 6px;
}
.date-filter input[type="month"] {
width: auto;
margin: 0;
padding: 4px 8px;
}
.date-filter button,
.date-filter a {
margin: 0;
padding: 4px 12px;
font-size: 0.9rem;
}

View File

@@ -20,6 +20,7 @@
<ul>
<li><a href="/">Accueil</a></li>
<li><a href="/compare">Comparer</a></li>
<li><a href="/matches">Correspondances</a></li>
<li><a href="/api/docs" target="_blank">API docs</a></li>
</ul>
</nav>

View File

@@ -38,7 +38,12 @@
<tbody>
{% for p in products %}
<tr>
<td>{{ p.name }}</td>
<td>
{{ p.name_display }}
{% if p.match_type == 'fuzzy' %}
<span class="badge-fuzzy" title="Correspondance fuzzy validée">~</span>
{% endif %}
</td>
<td>{{ "%.2f"|format(p.price_picnic) }} €</td>
<td>{{ "%.2f"|format(p.price_leclerc) }} €</td>
<td class="{% if p.diff > 0 %}diff-positive{% elif p.diff < 0 %}diff-negative{% endif %}">
@@ -56,7 +61,12 @@
</table>
</div>
<p><small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small></p>
<p>
<small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small><br>
<small><span class="badge-fuzzy">~</span> = correspondance fuzzy validée (noms différents, même produit)</small>
</p>
<p><a href="/matches">Gérer les correspondances fuzzy →</a></p>
{% endif %}
{% endblock %}

View File

@@ -0,0 +1,85 @@
{% extends "base.html" %}
{% block title %}Correspondances fuzzy — TicketTracker{% endblock %}
{% block content %}
<h1>Correspondances Picnic ↔ Leclerc</h1>
<p>
Ces paires ont été détectées automatiquement par fuzzy matching.
Validez celles qui désignent le même produit pour enrichir la comparaison de prix.
</p>
<!-- Résumé statistiques -->
<div class="stat-grid">
<article class="stat-card">
<h3>{{ pending | length }}</h3>
<p>En attente</p>
</article>
<article class="stat-card">
<h3>{{ validated_count }}</h3>
<p>Validées</p>
</article>
<article class="stat-card">
<h3>{{ rejected_count }}</h3>
<p>Rejetées</p>
</article>
</div>
{% if pending %}
<article>
<h2>Paires à valider</h2>
<div class="overflow-auto">
<table>
<thead>
<tr>
<th>Produit Picnic</th>
<th>Prix moy.</th>
<th>Produit Leclerc</th>
<th>Prix moy.</th>
<th>Score</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% for m in pending %}
<tr>
<td>{{ m.name_picnic }}</td>
<td>{% if m.price_picnic %}{{ "%.2f"|format(m.price_picnic) }} €{% else %}—{% endif %}</td>
<td>{{ m.name_leclerc }}</td>
<td>{% if m.price_leclerc %}{{ "%.2f"|format(m.price_leclerc) }} €{% else %}—{% endif %}</td>
<td>
<small class="match-score {% if m.score >= 95 %}score-high{% elif m.score >= 85 %}score-medium{% else %}score-low{% endif %}">
{{ "%.0f"|format(m.score) }}%
</small>
</td>
<td class="match-actions">
<form method="post" action="/api/match/{{ m.id }}/validate" style="display:inline">
<button type="submit" class="btn-validate">✓ Valider</button>
</form>
<form method="post" action="/api/match/{{ m.id }}/reject" style="display:inline">
<button type="submit" class="btn-reject secondary outline">✗ Rejeter</button>
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</article>
{% else %}
<article>
<p>
Aucune paire en attente.
{% if validated_count == 0 and rejected_count == 0 %}
Lancez d'abord la commande de matching :
<pre><code>python -m tickettracker.cli match --threshold 85</code></pre>
{% else %}
Toutes les paires ont été traitées ({{ validated_count }} validées, {{ rejected_count }} rejetées).
{% endif %}
</p>
</article>
{% endif %}
{% endblock %}