feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard

Nouvelle table product_matches (status: pending/validated/rejected). Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%). Workflow : 1. python -m tickettracker.cli match [--threshold 85] → calcule et stocke les paires candidates 2. http://localhost:8000/matches → l'utilisateur valide ou rejette chaque paire 3. La comparaison de prix enrichie avec les paires validées Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt). 10 tests ajoutés (test_matcher.py), tous passent. Suite complète : 129 passent, 1 xfail, 0 échec. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 18:02:48 +01:00
parent f360332626
commit be4d4a7076
13 changed files with 804 additions and 23 deletions
@@ -0,0 +1,209 @@
+"""
+Tests du fuzzy matcher (tickettracker/db/matcher.py).
+
+Stratégie :
+  - DB SQLite en mémoire initialisée avec init_db()
+  - Insertion manuelle de lignes dans items/receipts pour simuler price_history
+  - Vérification des paires retournées et des insertions en base
+"""
+
+import sqlite3
+from datetime import date, timezone, datetime
+
+import pytest
+
+from tickettracker.db.schema import init_db, get_connection
+from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def db_path(tmp_path):
+    """Base SQLite vide dans un répertoire temporaire."""
+    path = tmp_path / "test_matcher.db"
+    init_db(path)
+    return path
+
+
+@pytest.fixture
+def conn_with_products(db_path):
+    """Connexion avec produits Picnic et Leclerc similaires."""
+    conn = get_connection(db_path)
+
+    # Insérer deux tickets (un Picnic, un Leclerc)
+    with conn:
+        r_picnic = conn.execute(
+            "INSERT INTO receipts (store, date, total, raw_json, created_at) "
+            "VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')"
+        ).lastrowid
+        r_leclerc = conn.execute(
+            "INSERT INTO receipts (store, date, total, raw_json, created_at) "
+            "VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')"
+        ).lastrowid
+
+        # Produits Picnic (name_normalized rempli)
+        conn.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)",
+            (r_picnic,),
+        )
+        conn.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)",
+            (r_picnic,),
+        )
+
+        # Produits Leclerc (similaires aux Picnic)
+        conn.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)",
+            (r_leclerc,),
+        )
+        conn.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)",
+            (r_leclerc,),
+        )
+
+    yield conn
+    conn.close()
+
+
+@pytest.fixture
+def conn_empty(db_path):
+    """Connexion sur base vide (pas d'articles normalisés)."""
+    conn = get_connection(db_path)
+    yield conn
+    conn.close()
+
+
+# ---------------------------------------------------------------------------
+# Tests find_fuzzy_matches
+# ---------------------------------------------------------------------------
+
+def test_find_fuzzy_matches_returns_list(conn_with_products):
+    """find_fuzzy_matches retourne une liste."""
+    result = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    assert isinstance(result, list)
+
+
+def test_find_fuzzy_matches_detects_similar_products(conn_with_products):
+    """Des produits similaires (lait demi) sont détectés avec un seuil bas."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    assert len(matches) >= 1
+    # La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée
+    picnic_names = [m["name_picnic"] for m in matches]
+    assert "lait demi-écrémé" in picnic_names
+
+
+def test_find_fuzzy_matches_threshold_respected(conn_with_products):
+    """Avec un seuil de 100, aucun match (car noms ≠ exact)."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=100.0)
+    assert matches == []
+
+
+def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products):
+    """Un seuil élevé retourne moins de résultats qu'un seuil bas."""
+    matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0)
+    matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0)
+    assert len(matches_high) <= len(matches_low)
+
+
+def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products):
+    """Les résultats sont triés par score décroissant."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=50.0)
+    scores = [m["score"] for m in matches]
+    assert scores == sorted(scores, reverse=True)
+
+
+def test_find_fuzzy_matches_result_structure(conn_with_products):
+    """Chaque résultat a les clés attendues."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    if matches:
+        m = matches[0]
+        assert "name_picnic" in m
+        assert "name_leclerc" in m
+        assert "score" in m
+        assert 0 <= m["score"] <= 100
+
+
+def test_find_fuzzy_matches_exact_same_excluded(conn_with_products):
+    """Les noms identiques ne doivent pas apparaître comme paires fuzzy."""
+    # On insère un produit identique dans les deux enseignes
+    with conn_with_products:
+        r = conn_with_products.execute(
+            "INSERT INTO receipts (store, date, total, raw_json, created_at) "
+            "VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')"
+        ).lastrowid
+        conn_with_products.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)",
+            (r,),
+        )
+        r2 = conn_with_products.execute(
+            "INSERT INTO receipts (store, date, total, raw_json, created_at) "
+            "VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')"
+        ).lastrowid
+        conn_with_products.execute(
+            "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
+            "VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)",
+            (r2,),
+        )
+
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    # Aucune paire ne doit avoir name_picnic == name_leclerc
+    for m in matches:
+        assert m["name_picnic"] != m["name_leclerc"]
+
+
+def test_find_fuzzy_matches_empty_db(conn_empty):
+    """Sur une base sans produits normalisés, retourne une liste vide."""
+    matches = find_fuzzy_matches(conn_empty, threshold=85.0)
+    assert matches == []
+
+
+# ---------------------------------------------------------------------------
+# Tests save_fuzzy_matches
+# ---------------------------------------------------------------------------
+
+def test_save_fuzzy_matches_inserts_rows(conn_with_products):
+    """save_fuzzy_matches insère les nouvelles paires en base."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    inserted = save_fuzzy_matches(conn_with_products, matches)
+    assert inserted == len(matches)
+
+
+def test_save_fuzzy_matches_ignores_duplicates(conn_with_products):
+    """Un second appel avec les mêmes paires n'insère rien (OR IGNORE)."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    save_fuzzy_matches(conn_with_products, matches)
+    inserted_again = save_fuzzy_matches(conn_with_products, matches)
+    assert inserted_again == 0
+
+
+def test_save_fuzzy_matches_status_pending(conn_with_products):
+    """Les paires insérées ont le statut 'pending' par défaut."""
+    matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
+    save_fuzzy_matches(conn_with_products, matches)
+    rows = conn_with_products.execute(
+        "SELECT status FROM product_matches"
+    ).fetchall()
+    assert all(r["status"] == "pending" for r in rows)
+
+
+def test_save_fuzzy_matches_returns_correct_count(conn_with_products):
+    """save_fuzzy_matches retourne exactement le nombre de lignes insérées."""
+    matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}]
+    count = save_fuzzy_matches(conn_with_products, matches)
+    assert count == 1
+
+
+def test_save_fuzzy_matches_empty_list(conn_with_products):
+    """Appel avec une liste vide retourne 0 et ne modifie pas la base."""
+    count = save_fuzzy_matches(conn_with_products, [])
+    assert count == 0
+    rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0]
+    assert rows == 0