feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard
Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).
Workflow :
1. python -m tickettracker.cli match [--threshold 85]
→ calcule et stocke les paires candidates
2. http://localhost:8000/matches
→ l'utilisateur valide ou rejette chaque paire
3. La comparaison de prix enrichie avec les paires validées
Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
209
tests/test_matcher.py
Normal file
209
tests/test_matcher.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Tests du fuzzy matcher (tickettracker/db/matcher.py).
|
||||
|
||||
Stratégie :
|
||||
- DB SQLite en mémoire initialisée avec init_db()
|
||||
- Insertion manuelle de lignes dans items/receipts pour simuler price_history
|
||||
- Vérification des paires retournées et des insertions en base
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from datetime import date, timezone, datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from tickettracker.db.schema import init_db, get_connection
|
||||
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def db_path(tmp_path):
|
||||
"""Base SQLite vide dans un répertoire temporaire."""
|
||||
path = tmp_path / "test_matcher.db"
|
||||
init_db(path)
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn_with_products(db_path):
|
||||
"""Connexion avec produits Picnic et Leclerc similaires."""
|
||||
conn = get_connection(db_path)
|
||||
|
||||
# Insérer deux tickets (un Picnic, un Leclerc)
|
||||
with conn:
|
||||
r_picnic = conn.execute(
|
||||
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
|
||||
"VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')"
|
||||
).lastrowid
|
||||
r_leclerc = conn.execute(
|
||||
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
|
||||
"VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')"
|
||||
).lastrowid
|
||||
|
||||
# Produits Picnic (name_normalized rempli)
|
||||
conn.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)",
|
||||
(r_picnic,),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)",
|
||||
(r_picnic,),
|
||||
)
|
||||
|
||||
# Produits Leclerc (similaires aux Picnic)
|
||||
conn.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)",
|
||||
(r_leclerc,),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)",
|
||||
(r_leclerc,),
|
||||
)
|
||||
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn_empty(db_path):
|
||||
"""Connexion sur base vide (pas d'articles normalisés)."""
|
||||
conn = get_connection(db_path)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests find_fuzzy_matches
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_find_fuzzy_matches_returns_list(conn_with_products):
|
||||
"""find_fuzzy_matches retourne une liste."""
|
||||
result = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
assert isinstance(result, list)
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_detects_similar_products(conn_with_products):
|
||||
"""Des produits similaires (lait demi) sont détectés avec un seuil bas."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
assert len(matches) >= 1
|
||||
# La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée
|
||||
picnic_names = [m["name_picnic"] for m in matches]
|
||||
assert "lait demi-écrémé" in picnic_names
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_threshold_respected(conn_with_products):
|
||||
"""Avec un seuil de 100, aucun match (car noms ≠ exact)."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=100.0)
|
||||
assert matches == []
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products):
|
||||
"""Un seuil élevé retourne moins de résultats qu'un seuil bas."""
|
||||
matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0)
|
||||
matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0)
|
||||
assert len(matches_high) <= len(matches_low)
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products):
|
||||
"""Les résultats sont triés par score décroissant."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=50.0)
|
||||
scores = [m["score"] for m in matches]
|
||||
assert scores == sorted(scores, reverse=True)
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_result_structure(conn_with_products):
|
||||
"""Chaque résultat a les clés attendues."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
if matches:
|
||||
m = matches[0]
|
||||
assert "name_picnic" in m
|
||||
assert "name_leclerc" in m
|
||||
assert "score" in m
|
||||
assert 0 <= m["score"] <= 100
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_exact_same_excluded(conn_with_products):
|
||||
"""Les noms identiques ne doivent pas apparaître comme paires fuzzy."""
|
||||
# On insère un produit identique dans les deux enseignes
|
||||
with conn_with_products:
|
||||
r = conn_with_products.execute(
|
||||
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
|
||||
"VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')"
|
||||
).lastrowid
|
||||
conn_with_products.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)",
|
||||
(r,),
|
||||
)
|
||||
r2 = conn_with_products.execute(
|
||||
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
|
||||
"VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')"
|
||||
).lastrowid
|
||||
conn_with_products.execute(
|
||||
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
|
||||
"VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)",
|
||||
(r2,),
|
||||
)
|
||||
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
# Aucune paire ne doit avoir name_picnic == name_leclerc
|
||||
for m in matches:
|
||||
assert m["name_picnic"] != m["name_leclerc"]
|
||||
|
||||
|
||||
def test_find_fuzzy_matches_empty_db(conn_empty):
|
||||
"""Sur une base sans produits normalisés, retourne une liste vide."""
|
||||
matches = find_fuzzy_matches(conn_empty, threshold=85.0)
|
||||
assert matches == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests save_fuzzy_matches
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_save_fuzzy_matches_inserts_rows(conn_with_products):
|
||||
"""save_fuzzy_matches insère les nouvelles paires en base."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
inserted = save_fuzzy_matches(conn_with_products, matches)
|
||||
assert inserted == len(matches)
|
||||
|
||||
|
||||
def test_save_fuzzy_matches_ignores_duplicates(conn_with_products):
|
||||
"""Un second appel avec les mêmes paires n'insère rien (OR IGNORE)."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
save_fuzzy_matches(conn_with_products, matches)
|
||||
inserted_again = save_fuzzy_matches(conn_with_products, matches)
|
||||
assert inserted_again == 0
|
||||
|
||||
|
||||
def test_save_fuzzy_matches_status_pending(conn_with_products):
|
||||
"""Les paires insérées ont le statut 'pending' par défaut."""
|
||||
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
|
||||
save_fuzzy_matches(conn_with_products, matches)
|
||||
rows = conn_with_products.execute(
|
||||
"SELECT status FROM product_matches"
|
||||
).fetchall()
|
||||
assert all(r["status"] == "pending" for r in rows)
|
||||
|
||||
|
||||
def test_save_fuzzy_matches_returns_correct_count(conn_with_products):
|
||||
"""save_fuzzy_matches retourne exactement le nombre de lignes insérées."""
|
||||
matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}]
|
||||
count = save_fuzzy_matches(conn_with_products, matches)
|
||||
assert count == 1
|
||||
|
||||
|
||||
def test_save_fuzzy_matches_empty_list(conn_with_products):
|
||||
"""Appel avec une liste vide retourne 0 et ne modifie pas la base."""
|
||||
count = save_fuzzy_matches(conn_with_products, [])
|
||||
assert count == 0
|
||||
rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0]
|
||||
assert rows == 0
|
||||
Reference in New Issue
Block a user