""" Tests du fuzzy matcher (tickettracker/db/matcher.py). Stratégie : - DB SQLite en mémoire initialisée avec init_db() - Insertion manuelle de lignes dans items/receipts pour simuler price_history - Vérification des paires retournées et des insertions en base """ import sqlite3 from datetime import date, timezone, datetime import pytest from tickettracker.db.schema import init_db, get_connection from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture def db_path(tmp_path): """Base SQLite vide dans un répertoire temporaire.""" path = tmp_path / "test_matcher.db" init_db(path) return path @pytest.fixture def conn_with_products(db_path): """Connexion avec produits Picnic et Leclerc similaires.""" conn = get_connection(db_path) # Insérer deux tickets (un Picnic, un Leclerc) with conn: r_picnic = conn.execute( "INSERT INTO receipts (store, date, total, raw_json, created_at) " "VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')" ).lastrowid r_leclerc = conn.execute( "INSERT INTO receipts (store, date, total, raw_json, created_at) " "VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')" ).lastrowid # Produits Picnic (name_normalized rempli) conn.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)", (r_picnic,), ) conn.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)", (r_picnic,), ) # Produits Leclerc (similaires aux Picnic) conn.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)", (r_leclerc,), ) conn.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)", (r_leclerc,), ) yield conn conn.close() @pytest.fixture def conn_empty(db_path): """Connexion sur base vide (pas d'articles normalisés).""" conn = get_connection(db_path) yield conn conn.close() # --------------------------------------------------------------------------- # Tests find_fuzzy_matches # --------------------------------------------------------------------------- def test_find_fuzzy_matches_returns_list(conn_with_products): """find_fuzzy_matches retourne une liste.""" result = find_fuzzy_matches(conn_with_products, threshold=70.0) assert isinstance(result, list) def test_find_fuzzy_matches_detects_similar_products(conn_with_products): """Des produits similaires (lait demi) sont détectés avec un seuil bas.""" matches = find_fuzzy_matches(conn_with_products, threshold=70.0) assert len(matches) >= 1 # La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée picnic_names = [m["name_picnic"] for m in matches] assert "lait demi-écrémé" in picnic_names def test_find_fuzzy_matches_threshold_respected(conn_with_products): """Avec un seuil de 100, aucun match (car noms ≠ exact).""" matches = find_fuzzy_matches(conn_with_products, threshold=100.0) assert matches == [] def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products): """Un seuil élevé retourne moins de résultats qu'un seuil bas.""" matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0) matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0) assert len(matches_high) <= len(matches_low) def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products): """Les résultats sont triés par score décroissant.""" matches = find_fuzzy_matches(conn_with_products, threshold=50.0) scores = [m["score"] for m in matches] assert scores == sorted(scores, reverse=True) def test_find_fuzzy_matches_result_structure(conn_with_products): """Chaque résultat a les clés attendues.""" matches = find_fuzzy_matches(conn_with_products, threshold=70.0) if matches: m = matches[0] assert "name_picnic" in m assert "name_leclerc" in m assert "score" in m assert 0 <= m["score"] <= 100 def test_find_fuzzy_matches_exact_same_excluded(conn_with_products): """Les noms identiques ne doivent pas apparaître comme paires fuzzy.""" # On insère un produit identique dans les deux enseignes with conn_with_products: r = conn_with_products.execute( "INSERT INTO receipts (store, date, total, raw_json, created_at) " "VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')" ).lastrowid conn_with_products.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)", (r,), ) r2 = conn_with_products.execute( "INSERT INTO receipts (store, date, total, raw_json, created_at) " "VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')" ).lastrowid conn_with_products.execute( "INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) " "VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)", (r2,), ) matches = find_fuzzy_matches(conn_with_products, threshold=70.0) # Aucune paire ne doit avoir name_picnic == name_leclerc for m in matches: assert m["name_picnic"] != m["name_leclerc"] def test_find_fuzzy_matches_empty_db(conn_empty): """Sur une base sans produits normalisés, retourne une liste vide.""" matches = find_fuzzy_matches(conn_empty, threshold=85.0) assert matches == [] # --------------------------------------------------------------------------- # Tests save_fuzzy_matches # --------------------------------------------------------------------------- def test_save_fuzzy_matches_inserts_rows(conn_with_products): """save_fuzzy_matches insère les nouvelles paires en base.""" matches = find_fuzzy_matches(conn_with_products, threshold=70.0) inserted = save_fuzzy_matches(conn_with_products, matches) assert inserted == len(matches) def test_save_fuzzy_matches_ignores_duplicates(conn_with_products): """Un second appel avec les mêmes paires n'insère rien (OR IGNORE).""" matches = find_fuzzy_matches(conn_with_products, threshold=70.0) save_fuzzy_matches(conn_with_products, matches) inserted_again = save_fuzzy_matches(conn_with_products, matches) assert inserted_again == 0 def test_save_fuzzy_matches_status_pending(conn_with_products): """Les paires insérées ont le statut 'pending' par défaut.""" matches = find_fuzzy_matches(conn_with_products, threshold=70.0) save_fuzzy_matches(conn_with_products, matches) rows = conn_with_products.execute( "SELECT status FROM product_matches" ).fetchall() assert all(r["status"] == "pending" for r in rows) def test_save_fuzzy_matches_returns_correct_count(conn_with_products): """save_fuzzy_matches retourne exactement le nombre de lignes insérées.""" matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}] count = save_fuzzy_matches(conn_with_products, matches) assert count == 1 def test_save_fuzzy_matches_empty_list(conn_with_products): """Appel avec une liste vide retourne 0 et ne modifie pas la base.""" count = save_fuzzy_matches(conn_with_products, []) assert count == 0 rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0] assert rows == 0