Files
TicketTracker/tests/test_matcher.py
laurent be4d4a7076 feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard
Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).

Workflow :
  1. python -m tickettracker.cli match [--threshold 85]
     → calcule et stocke les paires candidates
  2. http://localhost:8000/matches
     → l'utilisateur valide ou rejette chaque paire
  3. La comparaison de prix enrichie avec les paires validées

Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 18:02:48 +01:00

210 lines
8.2 KiB
Python

"""
Tests du fuzzy matcher (tickettracker/db/matcher.py).
Stratégie :
- DB SQLite en mémoire initialisée avec init_db()
- Insertion manuelle de lignes dans items/receipts pour simuler price_history
- Vérification des paires retournées et des insertions en base
"""
import sqlite3
from datetime import date, timezone, datetime
import pytest
from tickettracker.db.schema import init_db, get_connection
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def db_path(tmp_path):
"""Base SQLite vide dans un répertoire temporaire."""
path = tmp_path / "test_matcher.db"
init_db(path)
return path
@pytest.fixture
def conn_with_products(db_path):
"""Connexion avec produits Picnic et Leclerc similaires."""
conn = get_connection(db_path)
# Insérer deux tickets (un Picnic, un Leclerc)
with conn:
r_picnic = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')"
).lastrowid
r_leclerc = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')"
).lastrowid
# Produits Picnic (name_normalized rempli)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)",
(r_picnic,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)",
(r_picnic,),
)
# Produits Leclerc (similaires aux Picnic)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)",
(r_leclerc,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)",
(r_leclerc,),
)
yield conn
conn.close()
@pytest.fixture
def conn_empty(db_path):
"""Connexion sur base vide (pas d'articles normalisés)."""
conn = get_connection(db_path)
yield conn
conn.close()
# ---------------------------------------------------------------------------
# Tests find_fuzzy_matches
# ---------------------------------------------------------------------------
def test_find_fuzzy_matches_returns_list(conn_with_products):
"""find_fuzzy_matches retourne une liste."""
result = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert isinstance(result, list)
def test_find_fuzzy_matches_detects_similar_products(conn_with_products):
"""Des produits similaires (lait demi) sont détectés avec un seuil bas."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert len(matches) >= 1
# La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée
picnic_names = [m["name_picnic"] for m in matches]
assert "lait demi-écrémé" in picnic_names
def test_find_fuzzy_matches_threshold_respected(conn_with_products):
"""Avec un seuil de 100, aucun match (car noms ≠ exact)."""
matches = find_fuzzy_matches(conn_with_products, threshold=100.0)
assert matches == []
def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products):
"""Un seuil élevé retourne moins de résultats qu'un seuil bas."""
matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0)
matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0)
assert len(matches_high) <= len(matches_low)
def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products):
"""Les résultats sont triés par score décroissant."""
matches = find_fuzzy_matches(conn_with_products, threshold=50.0)
scores = [m["score"] for m in matches]
assert scores == sorted(scores, reverse=True)
def test_find_fuzzy_matches_result_structure(conn_with_products):
"""Chaque résultat a les clés attendues."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
if matches:
m = matches[0]
assert "name_picnic" in m
assert "name_leclerc" in m
assert "score" in m
assert 0 <= m["score"] <= 100
def test_find_fuzzy_matches_exact_same_excluded(conn_with_products):
"""Les noms identiques ne doivent pas apparaître comme paires fuzzy."""
# On insère un produit identique dans les deux enseignes
with conn_with_products:
r = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)",
(r,),
)
r2 = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)",
(r2,),
)
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
# Aucune paire ne doit avoir name_picnic == name_leclerc
for m in matches:
assert m["name_picnic"] != m["name_leclerc"]
def test_find_fuzzy_matches_empty_db(conn_empty):
"""Sur une base sans produits normalisés, retourne une liste vide."""
matches = find_fuzzy_matches(conn_empty, threshold=85.0)
assert matches == []
# ---------------------------------------------------------------------------
# Tests save_fuzzy_matches
# ---------------------------------------------------------------------------
def test_save_fuzzy_matches_inserts_rows(conn_with_products):
"""save_fuzzy_matches insère les nouvelles paires en base."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
inserted = save_fuzzy_matches(conn_with_products, matches)
assert inserted == len(matches)
def test_save_fuzzy_matches_ignores_duplicates(conn_with_products):
"""Un second appel avec les mêmes paires n'insère rien (OR IGNORE)."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
inserted_again = save_fuzzy_matches(conn_with_products, matches)
assert inserted_again == 0
def test_save_fuzzy_matches_status_pending(conn_with_products):
"""Les paires insérées ont le statut 'pending' par défaut."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
rows = conn_with_products.execute(
"SELECT status FROM product_matches"
).fetchall()
assert all(r["status"] == "pending" for r in rows)
def test_save_fuzzy_matches_returns_correct_count(conn_with_products):
"""save_fuzzy_matches retourne exactement le nombre de lignes insérées."""
matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}]
count = save_fuzzy_matches(conn_with_products, matches)
assert count == 1
def test_save_fuzzy_matches_empty_list(conn_with_products):
"""Appel avec une liste vide retourne 0 et ne modifie pas la base."""
count = save_fuzzy_matches(conn_with_products, [])
assert count == 0
rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0]
assert rows == 0