feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard

Nouvelle table product_matches (status: pending/validated/rejected). Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%). Workflow : 1. python -m tickettracker.cli match [--threshold 85] → calcule et stocke les paires candidates 2. http://localhost:8000/matches → l'utilisateur valide ou rejette chaque paire 3. La comparaison de prix enrichie avec les paires validées Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt). 10 tests ajoutés (test_matcher.py), tous passent. Suite complète : 129 passent, 1 xfail, 0 échec. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 18:02:48 +01:00
parent f360332626
commit be4d4a7076
13 changed files with 804 additions and 23 deletions
@@ -8,6 +8,7 @@ appelle la fonction de queries.py correspondante, puis ferme la connexion.
 import sqlite3

 from fastapi import APIRouter, HTTPException
+from fastapi.responses import Response

 import tickettracker.config as config
 from tickettracker.db.schema import get_connection
@@ -70,6 +71,46 @@ def api_receipts():
        conn.close()


+@router.post("/match/{match_id}/validate")
+def api_match_validate(match_id: int):
+    """Valide une paire fuzzy (status → 'validated').
+
+    Retourne 404 si l'id est inconnu.
+    """
+    conn = get_connection(config.DB_PATH)
+    try:
+        with conn:
+            cur = conn.execute(
+                "UPDATE product_matches SET status='validated' WHERE id=?",
+                (match_id,),
+            )
+    finally:
+        conn.close()
+    if cur.rowcount == 0:
+        raise HTTPException(status_code=404, detail="Match introuvable")
+    return {"status": "validated", "id": match_id}
+
+
+@router.post("/match/{match_id}/reject")
+def api_match_reject(match_id: int):
+    """Rejette une paire fuzzy (status → 'rejected').
+
+    Retourne 404 si l'id est inconnu.
+    """
+    conn = get_connection(config.DB_PATH)
+    try:
+        with conn:
+            cur = conn.execute(
+                "UPDATE product_matches SET status='rejected' WHERE id=?",
+                (match_id,),
+            )
+    finally:
+        conn.close()
+    if cur.rowcount == 0:
+        raise HTTPException(status_code=404, detail="Match introuvable")
+    return {"status": "rejected", "id": match_id}
+
+
@router.get("/receipt/{receipt_id}")
 def api_receipt_detail(receipt_id: int):
    """Détail d'un ticket et de ses articles.
@@ -30,6 +30,7 @@ from tickettracker.web.queries import (
    get_compare_prices,
    get_dashboard_stats,
    get_monthly_spending,
+    get_pending_matches,
    get_product_history,
    get_product_list,
    get_receipt_detail,
@@ -167,6 +168,32 @@ async def page_product(request: Request, name: str):
    )


+@app.get("/matches", response_class=HTMLResponse)
+async def page_matches(request: Request):
+    """Page de validation des paires fuzzy Picnic ↔ Leclerc."""
+    conn = get_connection(config.DB_PATH)
+    try:
+        pending = get_pending_matches(conn)
+        validated_count = conn.execute(
+            "SELECT COUNT(*) FROM product_matches WHERE status='validated'"
+        ).fetchone()[0]
+        rejected_count = conn.execute(
+            "SELECT COUNT(*) FROM product_matches WHERE status='rejected'"
+        ).fetchone()[0]
+    finally:
+        conn.close()
+
+    return templates.TemplateResponse(
+        request,
+        "matches.html",
+        {
+            "pending": pending,
+            "validated_count": validated_count,
+            "rejected_count": rejected_count,
+        },
+    )
+
+
@app.get("/receipt/{receipt_id}", response_class=HTMLResponse)
 async def page_receipt(request: Request, receipt_id: int):
    """Page détail d'un ticket."""
@@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]:
 def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
    """Comparaison de prix entre Picnic et Leclerc pour les produits communs.

-    Utilise la vue price_history. Ne retourne que les produits présents
-    dans les deux enseignes. Trié par écart décroissant (le plus cher en premier).
+    Combine deux sources :
+      - Correspondances exactes (même name_normalized dans les deux enseignes)
+      - Correspondances fuzzy validées dans product_matches (status='validated')
+
+    Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés
+    par UNION (qui déduplique) + sélection par nom picnic.

    Returns:
-        Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}.
-        diff    = price_leclerc - price_picnic  (positif = Leclerc plus cher)
-        diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
+        Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}.
+        diff       = price_leclerc - price_picnic  (positif = Leclerc plus cher)
+        diff_pct   = diff / MIN(price_picnic, price_leclerc) * 100
+        match_type = 'exact' ou 'fuzzy'
    """
    rows = conn.execute(
        """
@@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
            FROM price_history
            WHERE name_normalized IS NOT NULL
            GROUP BY name_normalized, store
+        ),
+        exact_matches AS (
+            SELECT
+                a.name_normalized                             AS name,
+                a.name_normalized                             AS name_display,
+                a.avg_price                                   AS price_picnic,
+                b.avg_price                                   AS price_leclerc,
+                ROUND(b.avg_price - a.avg_price, 2)           AS diff,
+                ROUND(
+                    (b.avg_price - a.avg_price)
+                    / MIN(a.avg_price, b.avg_price) * 100
+                , 1)                                          AS diff_pct,
+                'exact'                                       AS match_type
+            FROM avg_by_store a
+            JOIN avg_by_store b
+              ON a.name_normalized = b.name_normalized
+             AND a.store = 'picnic'
+             AND b.store = 'leclerc'
+        ),
+        fuzzy_matches AS (
+            SELECT
+                pm.name_picnic                                AS name,
+                pm.name_picnic || ' ≈ ' || pm.name_leclerc  AS name_display,
+                ap_p.avg_price                                AS price_picnic,
+                ap_l.avg_price                                AS price_leclerc,
+                ROUND(ap_l.avg_price - ap_p.avg_price, 2)    AS diff,
+                ROUND(
+                    (ap_l.avg_price - ap_p.avg_price)
+                    / MIN(ap_p.avg_price, ap_l.avg_price) * 100
+                , 1)                                          AS diff_pct,
+                'fuzzy'                                       AS match_type
+            FROM product_matches pm
+            JOIN avg_by_store ap_p
+              ON ap_p.name_normalized = pm.name_picnic  AND ap_p.store = 'picnic'
+            JOIN avg_by_store ap_l
+              ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
+            WHERE pm.status = 'validated'
+              -- Exclure si déjà présent en exact match
+              AND pm.name_picnic NOT IN (SELECT name FROM exact_matches)
        )
-        SELECT
-            a.name_normalized                             AS name,
-            a.avg_price                                   AS price_picnic,
-            b.avg_price                                   AS price_leclerc,
-            ROUND(b.avg_price - a.avg_price, 2)           AS diff,
-            ROUND(
-                (b.avg_price - a.avg_price)
-                / MIN(a.avg_price, b.avg_price) * 100
-            , 1)                                          AS diff_pct
-        FROM avg_by_store a
-        JOIN avg_by_store b
-          ON a.name_normalized = b.name_normalized
-         AND a.store = 'picnic'
-         AND b.store = 'leclerc'
-        ORDER BY ABS(b.avg_price - a.avg_price) DESC
+        SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
+        FROM (
+            SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
+            FROM exact_matches
+            UNION ALL
+            SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
+            FROM fuzzy_matches
+        )
+        ORDER BY ABS(diff) DESC
        """
    ).fetchall()

    return [
        {
            "name": r["name"],
+            "name_display": r["name_display"],
            "price_picnic": r["price_picnic"],
            "price_leclerc": r["price_leclerc"],
            "diff": r["diff"],
            "diff_pct": r["diff_pct"],
+            "match_type": r["match_type"],
        }
        for r in rows
    ]
@@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None
    }


+def get_pending_matches(conn: sqlite3.Connection) -> list[dict]:
+    """Paires en attente de validation, avec prix moyens des deux enseignes.
+
+    Returns:
+        Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}.
+        price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue
+        price_history (None si aucune occurrence pour ce nom normalisé).
+    """
+    rows = conn.execute(
+        """
+        SELECT
+            pm.id,
+            pm.name_picnic,
+            pm.name_leclerc,
+            pm.score,
+            ROUND(AVG(CASE WHEN ph.store='picnic'  THEN ph.unit_price END), 2) AS price_picnic,
+            ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc
+        FROM product_matches pm
+        LEFT JOIN price_history ph
+               ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc)
+        WHERE pm.status = 'pending'
+        GROUP BY pm.id
+        ORDER BY pm.score DESC
+        """
+    ).fetchall()
+
+    return [
+        {
+            "id": r["id"],
+            "name_picnic": r["name_picnic"],
+            "name_leclerc": r["name_leclerc"],
+            "score": r["score"],
+            "price_picnic": r["price_picnic"],
+            "price_leclerc": r["price_leclerc"],
+        }
+        for r in rows
+    ]
+
+
+def get_validated_matches(conn: sqlite3.Connection) -> list[dict]:
+    """Paires validées pour enrichir get_compare_prices.
+
+    Returns:
+        Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}.
+    """
+    rows = conn.execute(
+        """
+        WITH avg_prices AS (
+            SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price
+            FROM price_history
+            WHERE name_normalized IS NOT NULL
+            GROUP BY name_normalized, store
+        )
+        SELECT
+            pm.id,
+            pm.name_picnic,
+            pm.name_leclerc,
+            ap_p.avg_price                              AS price_picnic,
+            ap_l.avg_price                              AS price_leclerc,
+            ROUND(ap_l.avg_price - ap_p.avg_price, 2)  AS diff,
+            ROUND(
+                (ap_l.avg_price - ap_p.avg_price)
+                / MIN(ap_p.avg_price, ap_l.avg_price) * 100
+            , 1)                                        AS diff_pct
+        FROM product_matches pm
+        JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic  AND ap_p.store = 'picnic'
+        JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
+        WHERE pm.status = 'validated'
+        ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC
+        """
+    ).fetchall()
+
+    return [
+        {
+            "name_picnic": r["name_picnic"],
+            "name_leclerc": r["name_leclerc"],
+            "price_picnic": r["price_picnic"],
+            "price_leclerc": r["price_leclerc"],
+            "diff": r["diff"],
+            "diff_pct": r["diff_pct"],
+        }
+        for r in rows
+    ]
+
+
 def get_product_list(conn: sqlite3.Connection) -> list[str]:
    """Liste tous les noms normalisés distincts (non NULL) pour le sélecteur.

@@ -46,3 +46,72 @@
 .overflow-auto {
    overflow-x: auto;
 }
+
+/* Badge pour les correspondances fuzzy dans la table compare */
+.badge-fuzzy {
+    display: inline-block;
+    background: var(--pico-secondary-background, #e8f4fd);
+    color: var(--pico-secondary, #0077b6);
+    border-radius: 3px;
+    padding: 0 4px;
+    font-size: 0.75rem;
+    font-weight: bold;
+    cursor: help;
+}
+
+/* Score de similarité dans la table matches */
+.match-score {
+    display: inline-block;
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-weight: bold;
+}
+
+.score-high   { background: #d4edda; color: #155724; }
+.score-medium { background: #fff3cd; color: #856404; }
+.score-low    { background: #f8d7da; color: #721c24; }
+
+/* Boutons valider/rejeter dans la table matches */
+.btn-validate {
+    background: var(--pico-primary);
+    color: white;
+    border: none;
+    padding: 4px 10px;
+    border-radius: 4px;
+    cursor: pointer;
+    font-size: 0.85rem;
+}
+
+.btn-reject {
+    padding: 4px 10px;
+    font-size: 0.85rem;
+}
+
+.match-actions {
+    white-space: nowrap;
+}
+
+/* Formulaire de filtre de dates */
+.date-filter {
+    display: flex;
+    gap: 0.5rem;
+    align-items: center;
+    flex-wrap: wrap;
+    margin-bottom: 1.5rem;
+    padding: 0.75rem 1rem;
+    background: var(--pico-card-background-color, #f8f9fa);
+    border-radius: 6px;
+}
+
+.date-filter input[type="month"] {
+    width: auto;
+    margin: 0;
+    padding: 4px 8px;
+}
+
+.date-filter button,
+.date-filter a {
+    margin: 0;
+    padding: 4px 12px;
+    font-size: 0.9rem;
+}
@@ -20,6 +20,7 @@
            <ul>
                <li><a href="/">Accueil</a></li>
                <li><a href="/compare">Comparer</a></li>
+                <li><a href="/matches">Correspondances</a></li>
                <li><a href="/api/docs" target="_blank">API docs</a></li>
            </ul>
        </nav>
@@ -38,7 +38,12 @@
        <tbody>
            {% for p in products %}
            <tr>
-                <td>{{ p.name }}</td>
+                <td>
+                    {{ p.name_display }}
+                    {% if p.match_type == 'fuzzy' %}
+                    <span class="badge-fuzzy" title="Correspondance fuzzy validée">~</span>
+                    {% endif %}
+                </td>
                <td>{{ "%.2f"|format(p.price_picnic) }} €</td>
                <td>{{ "%.2f"|format(p.price_leclerc) }} €</td>
                <td class="{% if p.diff > 0 %}diff-positive{% elif p.diff < 0 %}diff-negative{% endif %}">
@@ -56,7 +61,12 @@
    </table>
 </div>

-<p><small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small></p>
+<p>
+    <small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small><br>
+    <small><span class="badge-fuzzy">~</span> = correspondance fuzzy validée (noms différents, même produit)</small>
+</p>
+
+<p><a href="/matches">Gérer les correspondances fuzzy →</a></p>

 {% endif %}
 {% endblock %}
@@ -0,0 +1,85 @@
+{% extends "base.html" %}
+
+{% block title %}Correspondances fuzzy — TicketTracker{% endblock %}
+
+{% block content %}
+<h1>Correspondances Picnic ↔ Leclerc</h1>
+
+<p>
+    Ces paires ont été détectées automatiquement par fuzzy matching.
+    Validez celles qui désignent le même produit pour enrichir la comparaison de prix.
+</p>
+
+<!-- Résumé statistiques -->
+<div class="stat-grid">
+    <article class="stat-card">
+        <h3>{{ pending | length }}</h3>
+        <p>En attente</p>
+    </article>
+    <article class="stat-card">
+        <h3>{{ validated_count }}</h3>
+        <p>Validées</p>
+    </article>
+    <article class="stat-card">
+        <h3>{{ rejected_count }}</h3>
+        <p>Rejetées</p>
+    </article>
+</div>
+
+{% if pending %}
+<article>
+    <h2>Paires à valider</h2>
+    <div class="overflow-auto">
+        <table>
+            <thead>
+                <tr>
+                    <th>Produit Picnic</th>
+                    <th>Prix moy.</th>
+                    <th>Produit Leclerc</th>
+                    <th>Prix moy.</th>
+                    <th>Score</th>
+                    <th>Action</th>
+                </tr>
+            </thead>
+            <tbody>
+                {% for m in pending %}
+                <tr>
+                    <td>{{ m.name_picnic }}</td>
+                    <td>{% if m.price_picnic %}{{ "%.2f"|format(m.price_picnic) }} €{% else %}—{% endif %}</td>
+                    <td>{{ m.name_leclerc }}</td>
+                    <td>{% if m.price_leclerc %}{{ "%.2f"|format(m.price_leclerc) }} €{% else %}—{% endif %}</td>
+                    <td>
+                        <small class="match-score {% if m.score >= 95 %}score-high{% elif m.score >= 85 %}score-medium{% else %}score-low{% endif %}">
+                            {{ "%.0f"|format(m.score) }}%
+                        </small>
+                    </td>
+                    <td class="match-actions">
+                        <form method="post" action="/api/match/{{ m.id }}/validate" style="display:inline">
+                            <button type="submit" class="btn-validate">✓ Valider</button>
+                        </form>
+                        <form method="post" action="/api/match/{{ m.id }}/reject" style="display:inline">
+                            <button type="submit" class="btn-reject secondary outline">✗ Rejeter</button>
+                        </form>
+                    </td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+    </div>
+</article>
+
+{% else %}
+<article>
+    <p>
+        Aucune paire en attente.
+        {% if validated_count == 0 and rejected_count == 0 %}
+        Lancez d'abord la commande de matching :
+        <pre><code>python -m tickettracker.cli match --threshold 85</code></pre>
+        {% else %}
+        Toutes les paires ont été traitées ({{ validated_count }} validées, {{ rejected_count }} rejetées).
+        {% endif %}
+    </p>
+</article>
+{% endif %}
+
+{% endblock %}