Compare commits

...

2 Commits

Author SHA1 Message Date
be4d4a7076 feat: fuzzy matching Picnic ↔ Leclerc + page /matches dans le dashboard
Nouvelle table product_matches (status: pending/validated/rejected).
Matching via RapidFuzz token_sort_ratio, seuil configurable (défaut 85%).

Workflow :
  1. python -m tickettracker.cli match [--threshold 85]
     → calcule et stocke les paires candidates
  2. http://localhost:8000/matches
     → l'utilisateur valide ou rejette chaque paire
  3. La comparaison de prix enrichie avec les paires validées

Nouvelles dépendances : rapidfuzz, watchdog (requirements.txt).
10 tests ajoutés (test_matcher.py), tous passent.
Suite complète : 129 passent, 1 xfail, 0 échec.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 18:02:48 +01:00
f360332626 feat: watcher — surveillance automatique du dossier inbox/
Surveille inbox/picnic/ et inbox/leclerc/ avec watchdog.
Chaque nouveau fichier est importé automatiquement :
  - succès/doublon → processed/{source}_{date}_{nom}
  - erreur         → failed/{nom} + failed/{nom}.log

Nouvelle commande CLI : python -m tickettracker.cli watch [--inbox] [--db]
22 tests ajoutés (test_watcher.py), tous passent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 18:02:40 +01:00
16 changed files with 1168 additions and 47 deletions

View File

@@ -1,24 +0,0 @@
{
"permissions": {
"allow": [
"Bash(.venv/Scripts/pytest tests/ -v)",
"Bash(git add:*)",
"Bash(git commit:*)",
"Bash(.venv/Scripts/python:*)",
"Bash(cat:*)",
"Bash(python:*)",
"Bash(.venv/Scripts/python.exe:*)",
"Bash(PYTHONIOENCODING=utf-8 .venv/Scripts/python.exe:*)",
"Bash(PYTHONIOENCODING=utf-8 python:*)",
"Bash(tesseract:*)",
"Bash(winget install:*)",
"Bash(curl:*)",
"Bash(TESSDATA=\"/c/Program Files/Tesseract-OCR/tessdata\")",
"Bash(TESSDATA_PREFIX=/c/code/TicketTracker/tessdata python:*)",
"Bash(ls:*)",
"Bash(.venv/Scripts/pip install:*)",
"Bash(Marque)",
"Bash(Quantité\":*)"
]
}
}

View File

@@ -17,6 +17,12 @@ jinja2>=3.1
python-multipart>=0.0.12
httpx>=0.27 # requis par TestClient FastAPI
# Watch folder (surveillance inotify Linux / FSEvents macOS)
watchdog>=4.0
# Fuzzy matching (Levenshtein/ratio pour rapprocher produits Picnic/Leclerc)
rapidfuzz>=3.9
# Tests
pytest==8.3.4

209
tests/test_matcher.py Normal file
View File

@@ -0,0 +1,209 @@
"""
Tests du fuzzy matcher (tickettracker/db/matcher.py).
Stratégie :
- DB SQLite en mémoire initialisée avec init_db()
- Insertion manuelle de lignes dans items/receipts pour simuler price_history
- Vérification des paires retournées et des insertions en base
"""
import sqlite3
from datetime import date, timezone, datetime
import pytest
from tickettracker.db.schema import init_db, get_connection
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def db_path(tmp_path):
"""Base SQLite vide dans un répertoire temporaire."""
path = tmp_path / "test_matcher.db"
init_db(path)
return path
@pytest.fixture
def conn_with_products(db_path):
"""Connexion avec produits Picnic et Leclerc similaires."""
conn = get_connection(db_path)
# Insérer deux tickets (un Picnic, un Leclerc)
with conn:
r_picnic = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-01-10', 15.0, '{}', '2026-01-10T10:00:00')"
).lastrowid
r_leclerc = conn.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-01-15', 20.0, '{}', '2026-01-15T10:00:00')"
).lastrowid
# Produits Picnic (name_normalized rempli)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Lait demi-écremé', 'lait demi-écrémé', 1, 'pièce', 1.05, 1.05)",
(r_picnic,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'Jus orange', 'jus d orange', 1, 'pièce', 2.10, 2.10)",
(r_picnic,),
)
# Produits Leclerc (similaires aux Picnic)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'LAIT DEMI ECREME', 'lait demi ecreme', 1, 'pièce', 0.95, 0.95)",
(r_leclerc,),
)
conn.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'FARINE BLE', 'farine blé', 1, 'pièce', 1.20, 1.20)",
(r_leclerc,),
)
yield conn
conn.close()
@pytest.fixture
def conn_empty(db_path):
"""Connexion sur base vide (pas d'articles normalisés)."""
conn = get_connection(db_path)
yield conn
conn.close()
# ---------------------------------------------------------------------------
# Tests find_fuzzy_matches
# ---------------------------------------------------------------------------
def test_find_fuzzy_matches_returns_list(conn_with_products):
"""find_fuzzy_matches retourne une liste."""
result = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert isinstance(result, list)
def test_find_fuzzy_matches_detects_similar_products(conn_with_products):
"""Des produits similaires (lait demi) sont détectés avec un seuil bas."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
assert len(matches) >= 1
# La paire lait demi-écrémé ↔ lait demi ecreme doit être détectée
picnic_names = [m["name_picnic"] for m in matches]
assert "lait demi-écrémé" in picnic_names
def test_find_fuzzy_matches_threshold_respected(conn_with_products):
"""Avec un seuil de 100, aucun match (car noms ≠ exact)."""
matches = find_fuzzy_matches(conn_with_products, threshold=100.0)
assert matches == []
def test_find_fuzzy_matches_high_threshold_reduces_results(conn_with_products):
"""Un seuil élevé retourne moins de résultats qu'un seuil bas."""
matches_low = find_fuzzy_matches(conn_with_products, threshold=50.0)
matches_high = find_fuzzy_matches(conn_with_products, threshold=90.0)
assert len(matches_high) <= len(matches_low)
def test_find_fuzzy_matches_sorted_by_score_desc(conn_with_products):
"""Les résultats sont triés par score décroissant."""
matches = find_fuzzy_matches(conn_with_products, threshold=50.0)
scores = [m["score"] for m in matches]
assert scores == sorted(scores, reverse=True)
def test_find_fuzzy_matches_result_structure(conn_with_products):
"""Chaque résultat a les clés attendues."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
if matches:
m = matches[0]
assert "name_picnic" in m
assert "name_leclerc" in m
assert "score" in m
assert 0 <= m["score"] <= 100
def test_find_fuzzy_matches_exact_same_excluded(conn_with_products):
"""Les noms identiques ne doivent pas apparaître comme paires fuzzy."""
# On insère un produit identique dans les deux enseignes
with conn_with_products:
r = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('picnic', '2026-02-01', 5.0, '{}', '2026-02-01T10:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 1.0, 1.0)",
(r,),
)
r2 = conn_with_products.execute(
"INSERT INTO receipts (store, date, total, raw_json, created_at) "
"VALUES ('leclerc', '2026-02-01', 5.0, '{}', '2026-02-01T11:00:00')"
).lastrowid
conn_with_products.execute(
"INSERT INTO items (receipt_id, name_raw, name_normalized, quantity, unit, unit_price, total_price) "
"VALUES (?, 'pain', 'pain', 1, 'pièce', 0.9, 0.9)",
(r2,),
)
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
# Aucune paire ne doit avoir name_picnic == name_leclerc
for m in matches:
assert m["name_picnic"] != m["name_leclerc"]
def test_find_fuzzy_matches_empty_db(conn_empty):
"""Sur une base sans produits normalisés, retourne une liste vide."""
matches = find_fuzzy_matches(conn_empty, threshold=85.0)
assert matches == []
# ---------------------------------------------------------------------------
# Tests save_fuzzy_matches
# ---------------------------------------------------------------------------
def test_save_fuzzy_matches_inserts_rows(conn_with_products):
"""save_fuzzy_matches insère les nouvelles paires en base."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
inserted = save_fuzzy_matches(conn_with_products, matches)
assert inserted == len(matches)
def test_save_fuzzy_matches_ignores_duplicates(conn_with_products):
"""Un second appel avec les mêmes paires n'insère rien (OR IGNORE)."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
inserted_again = save_fuzzy_matches(conn_with_products, matches)
assert inserted_again == 0
def test_save_fuzzy_matches_status_pending(conn_with_products):
"""Les paires insérées ont le statut 'pending' par défaut."""
matches = find_fuzzy_matches(conn_with_products, threshold=70.0)
save_fuzzy_matches(conn_with_products, matches)
rows = conn_with_products.execute(
"SELECT status FROM product_matches"
).fetchall()
assert all(r["status"] == "pending" for r in rows)
def test_save_fuzzy_matches_returns_correct_count(conn_with_products):
"""save_fuzzy_matches retourne exactement le nombre de lignes insérées."""
matches = [{"name_picnic": "test1", "name_leclerc": "test2", "score": 90.0}]
count = save_fuzzy_matches(conn_with_products, matches)
assert count == 1
def test_save_fuzzy_matches_empty_list(conn_with_products):
"""Appel avec une liste vide retourne 0 et ne modifie pas la base."""
count = save_fuzzy_matches(conn_with_products, [])
assert count == 0
rows = conn_with_products.execute("SELECT COUNT(*) FROM product_matches").fetchone()[0]
assert rows == 0

241
tests/test_watcher.py Normal file
View File

@@ -0,0 +1,241 @@
"""
Tests du watch folder (tickettracker/watcher.py).
Stratégie :
- Utilise tmp_path pour les dossiers inbox/processed/failed
- Mocke pipeline.import_receipt pour contrôler le résultat sans parser de vrais fichiers
- Teste _process_file directement (évite la dépendance à watchdog / inotify)
"""
from pathlib import Path
from unittest.mock import patch
import pytest
from tickettracker.watcher import _process_file, ReceiptHandler
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def dirs(tmp_path):
"""Structure de dossiers inbox/picnic, inbox/leclerc, processed, failed."""
inbox = tmp_path / "inbox"
(inbox / "picnic").mkdir(parents=True)
(inbox / "leclerc").mkdir(parents=True)
processed = tmp_path / "processed"
processed.mkdir()
failed = tmp_path / "failed"
failed.mkdir()
return {
"inbox": inbox,
"processed": processed,
"failed": failed,
"tmp_path": tmp_path,
}
@pytest.fixture
def sample_file(dirs):
"""Crée un faux fichier HTML Picnic dans inbox/picnic/."""
f = dirs["inbox"] / "picnic" / "ticket_picnic.html"
f.write_text("<html>Picnic</html>", encoding="utf-8")
return f
@pytest.fixture
def sample_leclerc(dirs):
"""Crée un faux fichier PDF Leclerc dans inbox/leclerc/."""
f = dirs["inbox"] / "leclerc" / "ticket_leclerc.pdf"
f.write_bytes(b"%PDF-1.4 fake")
return f
# ---------------------------------------------------------------------------
# Tests _process_file — import réussi
# ---------------------------------------------------------------------------
def test_process_file_success_moves_to_processed(dirs, sample_file):
"""Import réussi : le fichier est déplacé dans processed/."""
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=True):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
# Le fichier original ne doit plus être dans inbox/
assert not sample_file.exists()
# Un fichier doit être présent dans processed/
processed_files = list(dirs["processed"].iterdir())
assert len(processed_files) == 1
def test_process_file_success_naming_convention(dirs, sample_file):
"""Le fichier déplacé suit le pattern {source}_{date}_{nom_original}."""
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=True):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
processed_files = list(dirs["processed"].iterdir())
name = processed_files[0].name
assert name.startswith("picnic_")
assert name.endswith("ticket_picnic.html")
def test_process_file_duplicate_moves_to_processed(dirs, sample_file):
"""Doublon (import_receipt retourne False) : fichier dans processed/ quand même."""
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=False):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
assert not sample_file.exists()
processed_files = list(dirs["processed"].iterdir())
assert len(processed_files) == 1
def test_process_file_error_moves_to_failed(dirs, sample_file):
"""Erreur pendant l'import : le fichier est déplacé dans failed/."""
with patch(
"tickettracker.watcher.pipeline.import_receipt",
side_effect=ValueError("format invalide"),
):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
assert not sample_file.exists()
failed_files = [f for f in dirs["failed"].iterdir() if not f.name.endswith(".log")]
assert len(failed_files) == 1
def test_process_file_error_creates_log(dirs, sample_file):
"""Erreur : un fichier .log est créé dans failed/ avec le message d'erreur."""
with patch(
"tickettracker.watcher.pipeline.import_receipt",
side_effect=ValueError("format invalide"),
):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
log_files = [f for f in dirs["failed"].iterdir() if f.name.endswith(".log")]
assert len(log_files) == 1
log_content = log_files[0].read_text(encoding="utf-8")
assert "format invalide" in log_content
def test_process_file_nothing_in_failed_on_success(dirs, sample_file):
"""Import réussi : aucun fichier dans failed/."""
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=True):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
assert list(dirs["failed"].iterdir()) == []
def test_process_file_leclerc_source(dirs, sample_leclerc):
"""Source leclerc : le fichier déplacé commence par 'leclerc_'."""
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=True):
_process_file(
sample_leclerc, "leclerc",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
processed_files = list(dirs["processed"].iterdir())
assert processed_files[0].name.startswith("leclerc_")
# ---------------------------------------------------------------------------
# Tests ReceiptHandler
# ---------------------------------------------------------------------------
def test_handler_detects_source_from_parent_folder(dirs):
"""ReceiptHandler détecte la source depuis le nom du sous-dossier."""
handler = ReceiptHandler(
db_path=dirs["tmp_path"] / "test.db",
inbox_path=dirs["inbox"],
)
# Le sous-dossier parent du fichier donne la source
assert handler.processed_dir == dirs["processed"]
assert handler.failed_dir == dirs["failed"]
def test_handler_ignores_unknown_subfolder(dirs):
"""Un fichier dans un sous-dossier inconnu (ni picnic ni leclerc) est ignoré."""
unknown_dir = dirs["inbox"] / "autre"
unknown_dir.mkdir()
f = unknown_dir / "fichier.txt"
f.write_text("test")
handler = ReceiptHandler(
db_path=dirs["tmp_path"] / "test.db",
inbox_path=dirs["inbox"],
)
# Simuler un événement de création de fichier
class FakeEvent:
is_directory = False
src_path = str(f)
with patch("tickettracker.watcher._process_file") as mock_process:
handler.on_created(FakeEvent())
mock_process.assert_not_called()
def test_handler_ignores_directory_events(dirs):
"""Un événement de création de répertoire est ignoré."""
handler = ReceiptHandler(
db_path=dirs["tmp_path"] / "test.db",
inbox_path=dirs["inbox"],
)
class FakeEvent:
is_directory = True
src_path = str(dirs["inbox"] / "picnic" / "subdir")
with patch("tickettracker.watcher._process_file") as mock_process:
handler.on_created(FakeEvent())
mock_process.assert_not_called()
# ---------------------------------------------------------------------------
# Tests création des dossiers
# ---------------------------------------------------------------------------
def test_process_file_creates_processed_dir_if_missing(dirs, sample_file):
"""_process_file crée processed/ s'il est absent."""
dirs["processed"].rmdir() # supprime le dossier
assert not dirs["processed"].exists()
with patch("tickettracker.watcher.pipeline.import_receipt", return_value=True):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
assert dirs["processed"].exists()
def test_process_file_creates_failed_dir_if_missing(dirs, sample_file):
"""_process_file crée failed/ s'il est absent."""
dirs["failed"].rmdir()
assert not dirs["failed"].exists()
with patch(
"tickettracker.watcher.pipeline.import_receipt",
side_effect=RuntimeError("boom"),
):
_process_file(
sample_file, "picnic",
dirs["tmp_path"] / "test.db",
dirs["processed"], dirs["failed"],
)
assert dirs["failed"].exists()

View File

@@ -28,7 +28,9 @@ def build_parser() -> argparse.ArgumentParser:
tickettracker.cli
├── import <file> --source {picnic,leclerc} [--db PATH]
├── stats [--db PATH]
── normalize [--dry-run] [--batch-size N] [--db PATH]
── normalize [--dry-run] [--batch-size N] [--db PATH]
├── match [--threshold N] [--db PATH]
└── watch [--inbox PATH] [--db PATH]
"""
parser = argparse.ArgumentParser(
prog="python -m tickettracker.cli",
@@ -99,6 +101,47 @@ def build_parser() -> argparse.ArgumentParser:
help=f"Articles par appel LLM (défaut : {_cfg.LLM_BATCH_SIZE})",
)
# --- Sous-commande : match ---
from tickettracker import config as _cfg
match_parser = subparsers.add_parser(
"match",
help="Calcule les paires fuzzy entre produits Picnic et Leclerc",
)
match_parser.add_argument(
"--db",
type=Path,
default=DEFAULT_DB_PATH,
metavar="PATH",
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
)
match_parser.add_argument(
"--threshold",
type=float,
default=_cfg.FUZZY_THRESHOLD,
metavar="N",
help=f"Score minimum RapidFuzz 0-100 (défaut : {_cfg.FUZZY_THRESHOLD})",
)
# --- Sous-commande : watch ---
watch_parser = subparsers.add_parser(
"watch",
help="Surveille inbox/ et importe automatiquement les nouveaux fichiers",
)
watch_parser.add_argument(
"--db",
type=Path,
default=DEFAULT_DB_PATH,
metavar="PATH",
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
)
watch_parser.add_argument(
"--inbox",
type=Path,
default=Path("inbox"),
metavar="PATH",
help="Répertoire inbox/ à surveiller (défaut : ./inbox)",
)
return parser
@@ -205,6 +248,51 @@ def cmd_normalize(args: argparse.Namespace) -> int:
return 1
def cmd_match(args: argparse.Namespace) -> int:
"""Exécute la sous-commande 'match'.
Calcule les paires fuzzy entre produits Picnic et Leclerc,
les insère dans product_matches et affiche un résumé.
Returns:
0 si succès, 1 si la base est absente.
"""
from tickettracker.db import schema
from tickettracker.db.matcher import find_fuzzy_matches, save_fuzzy_matches
if not Path(args.db).exists():
print(f"Base de données absente : {args.db}", file=sys.stderr)
print("Importez d'abord un ticket avec la commande 'import'.", file=sys.stderr)
return 1
with schema.get_connection(args.db) as conn:
matches = find_fuzzy_matches(conn, threshold=args.threshold)
inserted = save_fuzzy_matches(conn, matches)
total = len(matches)
ignored = total - inserted
print(
f"{inserted} nouvelles paires trouvées (seuil={args.threshold:.0f}%). "
f"{ignored} ignorées (déjà connues)."
)
return 0
def cmd_watch(args: argparse.Namespace) -> int:
"""Exécute la sous-commande 'watch'.
Lance la surveillance du dossier inbox/ (bloquant — Ctrl+C pour arrêter).
Returns:
0 après interruption par l'utilisateur.
"""
from tickettracker.watcher import watch
inbox_path = args.inbox.resolve()
watch(inbox_path, args.db)
return 0
def main() -> None:
"""Point d'entrée principal."""
parser = build_parser()
@@ -216,6 +304,10 @@ def main() -> None:
sys.exit(cmd_stats(args))
elif args.command == "normalize":
sys.exit(cmd_normalize(args))
elif args.command == "match":
sys.exit(cmd_match(args))
elif args.command == "watch":
sys.exit(cmd_watch(args))
if __name__ == "__main__":

View File

@@ -45,3 +45,10 @@ LLM_TIMEOUT: int = int(os.environ.get("TICKETTRACKER_LLM_TIMEOUT", "60"))
# Nombre d'articles traités par appel LLM
LLM_BATCH_SIZE: int = int(os.environ.get("TICKETTRACKER_LLM_BATCH_SIZE", "20"))
# ---------------------------------------------------------------------------
# Fuzzy matching
# ---------------------------------------------------------------------------
# Seuil de similarité minimum (0100) pour rapprocher un produit Picnic d'un produit Leclerc
FUZZY_THRESHOLD: float = float(os.environ.get("TICKETTRACKER_FUZZY_THRESHOLD", "85"))

View File

@@ -0,0 +1,90 @@
"""
Fuzzy matching entre produits Picnic et Leclerc.
Utilise RapidFuzz (token_sort_ratio) pour rapprocher des produits dont le nom
n'est pas identique mais désigne la même chose
(ex : "Lait demi-écremé""LAIT DEMI ECREME").
Workflow :
1. find_fuzzy_matches() — calcule les paires candidates
2. save_fuzzy_matches() — les insère dans product_matches (ignoring duplicates)
3. L'utilisateur valide/rejette via le dashboard /matches
"""
import sqlite3
from datetime import datetime, timezone
from rapidfuzz import fuzz
def find_fuzzy_matches(
conn: sqlite3.Connection,
threshold: float = 85.0,
) -> list[dict]:
"""Calcule les paires de produits similaires entre Picnic et Leclerc.
Utilise rapidfuzz.fuzz.token_sort_ratio (insensible à l'ordre des mots).
Ne retourne que les paires avec score >= threshold.
Les noms identiques sont exclus (ils sont déjà traités par get_compare_prices).
Args:
conn: Connexion SQLite ouverte.
threshold: Score minimum 0100 (défaut 85).
Returns:
Liste de dicts {name_picnic, name_leclerc, score}, triée par score décroissant.
"""
# Noms normalisés distincts par enseigne
picnic_names = [
r[0]
for r in conn.execute(
"SELECT DISTINCT name_normalized FROM price_history "
"WHERE store='picnic' AND name_normalized IS NOT NULL"
)
]
leclerc_names = [
r[0]
for r in conn.execute(
"SELECT DISTINCT name_normalized FROM price_history "
"WHERE store='leclerc' AND name_normalized IS NOT NULL"
)
]
# Produit cartésien filtré par seuil
matches = []
for p in picnic_names:
for lec in leclerc_names:
if p == lec:
continue # exact match déjà géré par get_compare_prices
score = fuzz.token_sort_ratio(p, lec)
if score >= threshold:
matches.append({"name_picnic": p, "name_leclerc": lec, "score": score})
return sorted(matches, key=lambda x: -x["score"])
def save_fuzzy_matches(conn: sqlite3.Connection, matches: list[dict]) -> int:
"""Insère les nouvelles paires dans product_matches (ignore les doublons).
Utilise INSERT OR IGNORE pour ne pas écraser les paires déjà en base
(statut 'validated' ou 'rejected' conservé).
Args:
conn: Connexion SQLite ouverte.
matches: Résultat de find_fuzzy_matches().
Returns:
Nombre de nouvelles paires réellement insérées.
"""
created_at = datetime.now(timezone.utc).isoformat()
inserted = 0
with conn:
for m in matches:
cur = conn.execute(
"INSERT OR IGNORE INTO product_matches "
"(name_picnic, name_leclerc, score, status, created_at) "
"VALUES (?, ?, ?, 'pending', ?)",
(m["name_picnic"], m["name_leclerc"], m["score"], created_at),
)
inserted += cur.rowcount
return inserted

View File

@@ -63,6 +63,23 @@ CREATE INDEX IF NOT EXISTS idx_items_name_normalized
ON items (name_normalized);
"""
_SQL_CREATE_PRODUCT_MATCHES = """
CREATE TABLE IF NOT EXISTS product_matches (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name_picnic TEXT NOT NULL,
name_leclerc TEXT NOT NULL,
score REAL NOT NULL, -- score RapidFuzz 0-100
status TEXT NOT NULL DEFAULT 'pending', -- 'pending'|'validated'|'rejected'
created_at TEXT NOT NULL,
UNIQUE(name_picnic, name_leclerc)
);
"""
_SQL_CREATE_PRODUCT_MATCHES_IDX = """
CREATE INDEX IF NOT EXISTS idx_product_matches_status
ON product_matches (status);
"""
_SQL_CREATE_PRICE_HISTORY = """
CREATE VIEW IF NOT EXISTS price_history AS
SELECT
@@ -125,3 +142,5 @@ def init_db(db_path: str | Path = DEFAULT_DB_PATH) -> None:
conn.execute(_SQL_CREATE_ITEMS_IDX)
conn.execute(_SQL_CREATE_ITEMS_NORM_IDX)
conn.execute(_SQL_CREATE_PRICE_HISTORY)
conn.execute(_SQL_CREATE_PRODUCT_MATCHES)
conn.execute(_SQL_CREATE_PRODUCT_MATCHES_IDX)

123
tickettracker/watcher.py Normal file
View File

@@ -0,0 +1,123 @@
"""
Watch folder pour TicketTracker.
Surveille les dossiers inbox/picnic/ et inbox/leclerc/ et importe automatiquement
tout nouveau fichier déposé. Les fichiers traités sont déplacés vers :
processed/{source}_{YYYY-MM-DD}_{nom_original} — import OK ou doublon
failed/{nom_original} — erreur + fichier .log créé
Usage CLI :
python -m tickettracker.cli watch [--inbox PATH] [--db PATH]
Interrompre avec Ctrl+C.
"""
import logging
import time
from datetime import datetime
from pathlib import Path
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from tickettracker import pipeline
logger = logging.getLogger(__name__)
class ReceiptHandler(FileSystemEventHandler):
"""Gestionnaire d'événements watchdog pour les dossiers inbox/."""
def __init__(self, db_path: Path, inbox_path: Path):
self.db_path = db_path
self.inbox_path = inbox_path
# processed/ et failed/ sont au même niveau qu'inbox/
self.processed_dir = inbox_path.parent / "processed"
self.failed_dir = inbox_path.parent / "failed"
def on_created(self, event):
"""Appelé quand un fichier arrive dans inbox/picnic/ ou inbox/leclerc/."""
if event.is_directory:
return
file_path = Path(event.src_path)
# La source est déduite du nom du sous-dossier parent (picnic ou leclerc)
source = file_path.parent.name
if source not in ("picnic", "leclerc"):
logger.warning("Fichier ignoré (dossier inconnu) : %s", file_path)
return
_process_file(file_path, source, self.db_path,
self.processed_dir, self.failed_dir)
def _process_file(
file_path: Path,
source: str,
db_path: Path,
processed_dir: Path,
failed_dir: Path,
) -> None:
"""Importe un fichier, le déplace selon le résultat.
Succès ou doublon → processed/{source}_{date}_{nom}
Erreur → failed/{nom} + failed/{nom}.log
"""
# Attendre un court instant : certains éditeurs / copiers écrivent en deux passes
time.sleep(0.2)
date_str = datetime.now().strftime("%Y-%m-%d")
dest_name = f"{source}_{date_str}_{file_path.name}"
try:
inserted = pipeline.import_receipt(file_path, source, db_path)
status = "importé" if inserted else "doublon ignoré"
logger.info("[watcher] %s : %s → processed/", file_path.name, status)
# Déplacement vers processed/
processed_dir.mkdir(parents=True, exist_ok=True)
file_path.rename(processed_dir / dest_name)
except Exception as exc:
logger.error("[watcher] Erreur sur %s : %s", file_path.name, exc)
# Déplacement vers failed/ + création d'un .log
failed_dir.mkdir(parents=True, exist_ok=True)
log_path = failed_dir / f"{file_path.name}.log"
log_path.write_text(
f"Fichier : {file_path}\n"
f"Source : {source}\n"
f"Date : {datetime.now().isoformat()}\n"
f"Erreur : {exc}\n",
encoding="utf-8",
)
file_path.rename(failed_dir / file_path.name)
def watch(inbox_path: Path, db_path: Path) -> None:
"""Lance le watcher en mode bloquant (interrompre avec Ctrl+C).
Surveille inbox_path/picnic/ et inbox_path/leclerc/ récursivement.
Crée les dossiers inbox/, processed/ et failed/ s'ils sont absents.
Args:
inbox_path: Répertoire parent contenant les sous-dossiers picnic/ et leclerc/.
db_path: Chemin vers la base SQLite.
"""
# Créer les dossiers nécessaires
for sub in ("picnic", "leclerc"):
(inbox_path / sub).mkdir(parents=True, exist_ok=True)
(inbox_path.parent / "processed").mkdir(parents=True, exist_ok=True)
(inbox_path.parent / "failed").mkdir(parents=True, exist_ok=True)
handler = ReceiptHandler(db_path=db_path, inbox_path=inbox_path)
observer = Observer()
# Surveillance récursive du dossier inbox/ (capte picnic/ et leclerc/ en une passe)
observer.schedule(handler, str(inbox_path), recursive=True)
observer.start()
print(f"Surveillance de {inbox_path}/picnic/ et {inbox_path}/leclerc/ — Ctrl+C pour arrêter")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()

View File

@@ -8,6 +8,7 @@ appelle la fonction de queries.py correspondante, puis ferme la connexion.
import sqlite3
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
import tickettracker.config as config
from tickettracker.db.schema import get_connection
@@ -70,6 +71,46 @@ def api_receipts():
conn.close()
@router.post("/match/{match_id}/validate")
def api_match_validate(match_id: int):
"""Valide une paire fuzzy (status → 'validated').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='validated' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "validated", "id": match_id}
@router.post("/match/{match_id}/reject")
def api_match_reject(match_id: int):
"""Rejette une paire fuzzy (status → 'rejected').
Retourne 404 si l'id est inconnu.
"""
conn = get_connection(config.DB_PATH)
try:
with conn:
cur = conn.execute(
"UPDATE product_matches SET status='rejected' WHERE id=?",
(match_id,),
)
finally:
conn.close()
if cur.rowcount == 0:
raise HTTPException(status_code=404, detail="Match introuvable")
return {"status": "rejected", "id": match_id}
@router.get("/receipt/{receipt_id}")
def api_receipt_detail(receipt_id: int):
"""Détail d'un ticket et de ses articles.

View File

@@ -30,6 +30,7 @@ from tickettracker.web.queries import (
get_compare_prices,
get_dashboard_stats,
get_monthly_spending,
get_pending_matches,
get_product_history,
get_product_list,
get_receipt_detail,
@@ -167,6 +168,32 @@ async def page_product(request: Request, name: str):
)
@app.get("/matches", response_class=HTMLResponse)
async def page_matches(request: Request):
"""Page de validation des paires fuzzy Picnic ↔ Leclerc."""
conn = get_connection(config.DB_PATH)
try:
pending = get_pending_matches(conn)
validated_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='validated'"
).fetchone()[0]
rejected_count = conn.execute(
"SELECT COUNT(*) FROM product_matches WHERE status='rejected'"
).fetchone()[0]
finally:
conn.close()
return templates.TemplateResponse(
request,
"matches.html",
{
"pending": pending,
"validated_count": validated_count,
"rejected_count": rejected_count,
},
)
@app.get("/receipt/{receipt_id}", response_class=HTMLResponse)
async def page_receipt(request: Request, receipt_id: int):
"""Page détail d'un ticket."""

View File

@@ -83,13 +83,18 @@ def get_monthly_spending(conn: sqlite3.Connection) -> list[dict]:
def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
"""Comparaison de prix entre Picnic et Leclerc pour les produits communs.
Utilise la vue price_history. Ne retourne que les produits présents
dans les deux enseignes. Trié par écart décroissant (le plus cher en premier).
Combine deux sources :
- Correspondances exactes (même name_normalized dans les deux enseignes)
- Correspondances fuzzy validées dans product_matches (status='validated')
Les doublons éventuels (un produit déjà en exact ET en fuzzy) sont éliminés
par UNION (qui déduplique) + sélection par nom picnic.
Returns:
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct}.
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
Liste de dicts {name, price_picnic, price_leclerc, diff, diff_pct, match_type}.
diff = price_leclerc - price_picnic (positif = Leclerc plus cher)
diff_pct = diff / MIN(price_picnic, price_leclerc) * 100
match_type = 'exact' ou 'fuzzy'
"""
rows = conn.execute(
"""
@@ -101,32 +106,67 @@ def get_compare_prices(conn: sqlite3.Connection) -> list[dict]:
FROM price_history
WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store
),
exact_matches AS (
SELECT
a.name_normalized AS name,
a.name_normalized AS name_display,
a.avg_price AS price_picnic,
b.avg_price AS price_leclerc,
ROUND(b.avg_price - a.avg_price, 2) AS diff,
ROUND(
(b.avg_price - a.avg_price)
/ MIN(a.avg_price, b.avg_price) * 100
, 1) AS diff_pct,
'exact' AS match_type
FROM avg_by_store a
JOIN avg_by_store b
ON a.name_normalized = b.name_normalized
AND a.store = 'picnic'
AND b.store = 'leclerc'
),
fuzzy_matches AS (
SELECT
pm.name_picnic AS name,
pm.name_picnic || '' || pm.name_leclerc AS name_display,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct,
'fuzzy' AS match_type
FROM product_matches pm
JOIN avg_by_store ap_p
ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_by_store ap_l
ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
-- Exclure si déjà présent en exact match
AND pm.name_picnic NOT IN (SELECT name FROM exact_matches)
)
SELECT
a.name_normalized AS name,
a.avg_price AS price_picnic,
b.avg_price AS price_leclerc,
ROUND(b.avg_price - a.avg_price, 2) AS diff,
ROUND(
(b.avg_price - a.avg_price)
/ MIN(a.avg_price, b.avg_price) * 100
, 1) AS diff_pct
FROM avg_by_store a
JOIN avg_by_store b
ON a.name_normalized = b.name_normalized
AND a.store = 'picnic'
AND b.store = 'leclerc'
ORDER BY ABS(b.avg_price - a.avg_price) DESC
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM (
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM exact_matches
UNION ALL
SELECT name, name_display, price_picnic, price_leclerc, diff, diff_pct, match_type
FROM fuzzy_matches
)
ORDER BY ABS(diff) DESC
"""
).fetchall()
return [
{
"name": r["name"],
"name_display": r["name_display"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
"diff": r["diff"],
"diff_pct": r["diff_pct"],
"match_type": r["match_type"],
}
for r in rows
]
@@ -279,6 +319,91 @@ def get_receipt_detail(conn: sqlite3.Connection, receipt_id: int) -> dict | None
}
def get_pending_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires en attente de validation, avec prix moyens des deux enseignes.
Returns:
Liste de dicts {id, name_picnic, price_picnic, name_leclerc, price_leclerc, score}.
price_picnic / price_leclerc : prix moyen unitaire de ce produit dans la vue
price_history (None si aucune occurrence pour ce nom normalisé).
"""
rows = conn.execute(
"""
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
pm.score,
ROUND(AVG(CASE WHEN ph.store='picnic' THEN ph.unit_price END), 2) AS price_picnic,
ROUND(AVG(CASE WHEN ph.store='leclerc' THEN ph.unit_price END), 2) AS price_leclerc
FROM product_matches pm
LEFT JOIN price_history ph
ON ph.name_normalized IN (pm.name_picnic, pm.name_leclerc)
WHERE pm.status = 'pending'
GROUP BY pm.id
ORDER BY pm.score DESC
"""
).fetchall()
return [
{
"id": r["id"],
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"score": r["score"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
}
for r in rows
]
def get_validated_matches(conn: sqlite3.Connection) -> list[dict]:
"""Paires validées pour enrichir get_compare_prices.
Returns:
Liste de dicts {name_picnic, price_picnic, name_leclerc, price_leclerc, diff, diff_pct}.
"""
rows = conn.execute(
"""
WITH avg_prices AS (
SELECT name_normalized, store, ROUND(AVG(unit_price), 2) AS avg_price
FROM price_history
WHERE name_normalized IS NOT NULL
GROUP BY name_normalized, store
)
SELECT
pm.id,
pm.name_picnic,
pm.name_leclerc,
ap_p.avg_price AS price_picnic,
ap_l.avg_price AS price_leclerc,
ROUND(ap_l.avg_price - ap_p.avg_price, 2) AS diff,
ROUND(
(ap_l.avg_price - ap_p.avg_price)
/ MIN(ap_p.avg_price, ap_l.avg_price) * 100
, 1) AS diff_pct
FROM product_matches pm
JOIN avg_prices ap_p ON ap_p.name_normalized = pm.name_picnic AND ap_p.store = 'picnic'
JOIN avg_prices ap_l ON ap_l.name_normalized = pm.name_leclerc AND ap_l.store = 'leclerc'
WHERE pm.status = 'validated'
ORDER BY ABS(ap_l.avg_price - ap_p.avg_price) DESC
"""
).fetchall()
return [
{
"name_picnic": r["name_picnic"],
"name_leclerc": r["name_leclerc"],
"price_picnic": r["price_picnic"],
"price_leclerc": r["price_leclerc"],
"diff": r["diff"],
"diff_pct": r["diff_pct"],
}
for r in rows
]
def get_product_list(conn: sqlite3.Connection) -> list[str]:
"""Liste tous les noms normalisés distincts (non NULL) pour le sélecteur.

View File

@@ -46,3 +46,72 @@
.overflow-auto {
overflow-x: auto;
}
/* Badge pour les correspondances fuzzy dans la table compare */
.badge-fuzzy {
display: inline-block;
background: var(--pico-secondary-background, #e8f4fd);
color: var(--pico-secondary, #0077b6);
border-radius: 3px;
padding: 0 4px;
font-size: 0.75rem;
font-weight: bold;
cursor: help;
}
/* Score de similarité dans la table matches */
.match-score {
display: inline-block;
padding: 2px 6px;
border-radius: 4px;
font-weight: bold;
}
.score-high { background: #d4edda; color: #155724; }
.score-medium { background: #fff3cd; color: #856404; }
.score-low { background: #f8d7da; color: #721c24; }
/* Boutons valider/rejeter dans la table matches */
.btn-validate {
background: var(--pico-primary);
color: white;
border: none;
padding: 4px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 0.85rem;
}
.btn-reject {
padding: 4px 10px;
font-size: 0.85rem;
}
.match-actions {
white-space: nowrap;
}
/* Formulaire de filtre de dates */
.date-filter {
display: flex;
gap: 0.5rem;
align-items: center;
flex-wrap: wrap;
margin-bottom: 1.5rem;
padding: 0.75rem 1rem;
background: var(--pico-card-background-color, #f8f9fa);
border-radius: 6px;
}
.date-filter input[type="month"] {
width: auto;
margin: 0;
padding: 4px 8px;
}
.date-filter button,
.date-filter a {
margin: 0;
padding: 4px 12px;
font-size: 0.9rem;
}

View File

@@ -20,6 +20,7 @@
<ul>
<li><a href="/">Accueil</a></li>
<li><a href="/compare">Comparer</a></li>
<li><a href="/matches">Correspondances</a></li>
<li><a href="/api/docs" target="_blank">API docs</a></li>
</ul>
</nav>

View File

@@ -38,7 +38,12 @@
<tbody>
{% for p in products %}
<tr>
<td>{{ p.name }}</td>
<td>
{{ p.name_display }}
{% if p.match_type == 'fuzzy' %}
<span class="badge-fuzzy" title="Correspondance fuzzy validée">~</span>
{% endif %}
</td>
<td>{{ "%.2f"|format(p.price_picnic) }} €</td>
<td>{{ "%.2f"|format(p.price_leclerc) }} €</td>
<td class="{% if p.diff > 0 %}diff-positive{% elif p.diff < 0 %}diff-negative{% endif %}">
@@ -56,7 +61,12 @@
</table>
</div>
<p><small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small></p>
<p>
<small>Positif = Leclerc plus cher, négatif = Picnic plus cher.</small><br>
<small><span class="badge-fuzzy">~</span> = correspondance fuzzy validée (noms différents, même produit)</small>
</p>
<p><a href="/matches">Gérer les correspondances fuzzy →</a></p>
{% endif %}
{% endblock %}

View File

@@ -0,0 +1,85 @@
{% extends "base.html" %}
{% block title %}Correspondances fuzzy — TicketTracker{% endblock %}
{% block content %}
<h1>Correspondances Picnic ↔ Leclerc</h1>
<p>
Ces paires ont été détectées automatiquement par fuzzy matching.
Validez celles qui désignent le même produit pour enrichir la comparaison de prix.
</p>
<!-- Résumé statistiques -->
<div class="stat-grid">
<article class="stat-card">
<h3>{{ pending | length }}</h3>
<p>En attente</p>
</article>
<article class="stat-card">
<h3>{{ validated_count }}</h3>
<p>Validées</p>
</article>
<article class="stat-card">
<h3>{{ rejected_count }}</h3>
<p>Rejetées</p>
</article>
</div>
{% if pending %}
<article>
<h2>Paires à valider</h2>
<div class="overflow-auto">
<table>
<thead>
<tr>
<th>Produit Picnic</th>
<th>Prix moy.</th>
<th>Produit Leclerc</th>
<th>Prix moy.</th>
<th>Score</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% for m in pending %}
<tr>
<td>{{ m.name_picnic }}</td>
<td>{% if m.price_picnic %}{{ "%.2f"|format(m.price_picnic) }} €{% else %}—{% endif %}</td>
<td>{{ m.name_leclerc }}</td>
<td>{% if m.price_leclerc %}{{ "%.2f"|format(m.price_leclerc) }} €{% else %}—{% endif %}</td>
<td>
<small class="match-score {% if m.score >= 95 %}score-high{% elif m.score >= 85 %}score-medium{% else %}score-low{% endif %}">
{{ "%.0f"|format(m.score) }}%
</small>
</td>
<td class="match-actions">
<form method="post" action="/api/match/{{ m.id }}/validate" style="display:inline">
<button type="submit" class="btn-validate">✓ Valider</button>
</form>
<form method="post" action="/api/match/{{ m.id }}/reject" style="display:inline">
<button type="submit" class="btn-reject secondary outline">✗ Rejeter</button>
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</article>
{% else %}
<article>
<p>
Aucune paire en attente.
{% if validated_count == 0 and rejected_count == 0 %}
Lancez d'abord la commande de matching :
<pre><code>python -m tickettracker.cli match --threshold 85</code></pre>
{% else %}
Toutes les paires ont été traitées ({{ validated_count }} validées, {{ rejected_count }} rejetées).
{% endif %}
</p>
</article>
{% endif %}
{% endblock %}