223 lines
7.3 KiB
Python
223 lines
7.3 KiB
Python
|
|
"""
|
|||
|
|
Tests pour le parser Leclerc.
|
|||
|
|
|
|||
|
|
Utilise le fichier samples/ticket_leclerc_10_20260208_190621.pdf —
|
|||
|
|
ticket réel du E.Leclerc Clichy-sous-Bois du 08 novembre 2025,
|
|||
|
|
45 articles, CB 139,25 €.
|
|||
|
|
|
|||
|
|
Ce ticket est un scan JPEG embarqué dans un PDF (pas de couche texte).
|
|||
|
|
Le parser utilise Tesseract OCR (fra+eng) pour extraire le texte.
|
|||
|
|
|
|||
|
|
Notes OCR connues :
|
|||
|
|
- 'G' final peut être lu '6' (ex: "220G" → "2206", "120G" → "1206")
|
|||
|
|
- Les 'G' initiaux (grammes) sont bien reconnus dans la plupart des cas
|
|||
|
|
- FILET POULET : prix OCR 10.46 au lieu de 10.40 (3 décimales OCR → "10.460")
|
|||
|
|
- Les tests utilisent des vérifications souples sur les noms (fragment)
|
|||
|
|
pour rester stables face aux variations d'OCR
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import shutil
|
|||
|
|
from datetime import date
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
from tickettracker.parsers import leclerc
|
|||
|
|
from tickettracker.models.receipt import Receipt
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _tesseract_disponible() -> bool:
|
|||
|
|
"""Vérifie si le binaire Tesseract est accessible sur ce système."""
|
|||
|
|
if shutil.which("tesseract"):
|
|||
|
|
return True
|
|||
|
|
# Chemins Windows standards (utilisés par leclerc._configure_tesseract)
|
|||
|
|
chemins_windows = [
|
|||
|
|
r"C:/Program Files/Tesseract-OCR/tesseract.exe",
|
|||
|
|
r"C:/Program Files (x86)/Tesseract-OCR/tesseract.exe",
|
|||
|
|
]
|
|||
|
|
return any(Path(p).is_file() for p in chemins_windows)
|
|||
|
|
|
|||
|
|
|
|||
|
|
pytestmark = pytest.mark.skipif(
|
|||
|
|
not _tesseract_disponible(),
|
|||
|
|
reason="Tesseract OCR non installé — Linux : apt install tesseract-ocr tesseract-ocr-fra",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
SAMPLE_DIR = Path(__file__).parent.parent / "samples"
|
|||
|
|
LECLERC_PDF = SAMPLE_DIR / "ticket_leclerc_10_20260208_190621.pdf"
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture(scope="module")
|
|||
|
|
def receipt() -> Receipt:
|
|||
|
|
"""Parse le PDF une seule fois pour tous les tests du module."""
|
|||
|
|
return leclerc.parse(str(LECLERC_PDF))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Structure générale
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def test_store(receipt):
|
|||
|
|
assert receipt.store == "leclerc"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_date(receipt):
|
|||
|
|
# "Caisse 018-0003 08 novembre 2025 12:46"
|
|||
|
|
assert receipt.date == date(2025, 11, 8)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_caisse_id(receipt):
|
|||
|
|
assert receipt.order_id == "018-0003"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_total_cb(receipt):
|
|||
|
|
# Montant CB (après 3 bons de réduction : 0.60 + 0.30 + 0.30 = 1.20)
|
|||
|
|
assert receipt.total == pytest.approx(139.25)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_nombre_lignes_articles(receipt):
|
|||
|
|
# 42 lignes produits (45 articles physiques = somme des quantités)
|
|||
|
|
assert len(receipt.items) == 42
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_somme_quantites(receipt):
|
|||
|
|
# Le ticket dit "Total 45 articles" = somme des quantités
|
|||
|
|
total_qty = sum(int(i.quantity) for i in receipt.items)
|
|||
|
|
assert total_qty == 45
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Catégories
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def test_categories_presentes(receipt):
|
|||
|
|
cats = {i.category for i in receipt.items if i.category}
|
|||
|
|
assert "EPICERIE SALEE" in cats
|
|||
|
|
assert "EPICERIE SUCREE" in cats
|
|||
|
|
assert "BOUCHERIE LS" in cats
|
|||
|
|
assert "LEGUMES" in cats
|
|||
|
|
assert "CREMERIE LS" in cats
|
|||
|
|
assert "ANIMALERIE" in cats
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_categorie_de_chaque_article(receipt):
|
|||
|
|
"""Tous les articles doivent avoir une catégorie."""
|
|||
|
|
for item in receipt.items:
|
|||
|
|
assert item.category is not None, f"{item.name!r} n'a pas de catégorie"
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Articles clés
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def _find(receipt, fragment: str):
|
|||
|
|
"""Cherche un article par fragment de nom (insensible à la casse)."""
|
|||
|
|
needle = fragment.lower()
|
|||
|
|
matches = [i for i in receipt.items if needle in i.name.lower()]
|
|||
|
|
assert matches, (
|
|||
|
|
f"Article contenant '{fragment}' introuvable. "
|
|||
|
|
f"Articles : {[i.name for i in receipt.items]}"
|
|||
|
|
)
|
|||
|
|
return matches[0]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_noix_cajou(receipt):
|
|||
|
|
item = _find(receipt, "NOIX CAJOU")
|
|||
|
|
assert item.quantity == 1
|
|||
|
|
assert item.total_price == pytest.approx(5.12)
|
|||
|
|
assert item.category == "EPICERIE SALEE"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_coca_cola(receipt):
|
|||
|
|
item = _find(receipt, "COCA-COLA")
|
|||
|
|
assert item.quantity == 1
|
|||
|
|
assert item.total_price == pytest.approx(6.72)
|
|||
|
|
assert item.category == "EAUX, BIERES, JUS ET SIROP,CID"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_saucisse_multi_unites(receipt):
|
|||
|
|
# "SAUCISSE FUMEES A CUIRE X 4" acheté 2 fois : 2 X 3.48€ = 6.96
|
|||
|
|
item = _find(receipt, "SAUCISSE FUMEES")
|
|||
|
|
assert item.quantity == 2
|
|||
|
|
assert item.unit_price == pytest.approx(3.48)
|
|||
|
|
assert item.total_price == pytest.approx(6.96)
|
|||
|
|
assert item.category == "BOUCHERIE LS"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_jambon_multi_unites(receipt):
|
|||
|
|
# "4 TR JAMBON SUP,-SEL CSN,140G" acheté 2 fois : 2 X 2.93€ = 5.86
|
|||
|
|
item = _find(receipt, "JAMBON")
|
|||
|
|
assert item.quantity == 2
|
|||
|
|
assert item.unit_price == pytest.approx(2.93)
|
|||
|
|
assert item.total_price == pytest.approx(5.86)
|
|||
|
|
assert item.category == "CHARCUTERIE LS"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_lait_multi_unites(receipt):
|
|||
|
|
# "LAIT PAST.ENTIER,DELISSE,1L" acheté 2 fois : 2 X 1.33€ = 2.66
|
|||
|
|
item = _find(receipt, "LAIT PAST")
|
|||
|
|
assert item.quantity == 2
|
|||
|
|
assert item.unit_price == pytest.approx(1.33)
|
|||
|
|
assert item.total_price == pytest.approx(2.66)
|
|||
|
|
assert item.category == "CREMERIE LS"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_litiere_tva20(receipt):
|
|||
|
|
# Litière = TVA 20% (code 4 sur le ticket)
|
|||
|
|
item = _find(receipt, "LITIERE")
|
|||
|
|
assert item.quantity == 1
|
|||
|
|
assert item.total_price == pytest.approx(3.10)
|
|||
|
|
assert item.category == "ANIMALERIE"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_citron_vert(receipt):
|
|||
|
|
item = _find(receipt, "CITRON VERT")
|
|||
|
|
assert item.quantity == 1
|
|||
|
|
assert item.total_price == pytest.approx(0.86)
|
|||
|
|
assert item.category == "FRUITS"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_deux_carottes(receipt):
|
|||
|
|
# Deux lignes CAROTTE (deux barquettes distinctes)
|
|||
|
|
carottes = [i for i in receipt.items if "CAROTTE" in i.name.upper()]
|
|||
|
|
assert len(carottes) == 2
|
|||
|
|
for c in carottes:
|
|||
|
|
assert c.total_price == pytest.approx(1.99)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_lor_espresso(receipt):
|
|||
|
|
# Article le plus cher (capsules café)
|
|||
|
|
item = _find(receipt, "LUNGPRO")
|
|||
|
|
assert item.total_price == pytest.approx(16.04)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Cohérence arithmétique
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
@pytest.mark.xfail(
|
|||
|
|
reason=(
|
|||
|
|
"Précision OCR variable selon la plateforme Tesseract : "
|
|||
|
|
"ex. 'FARINE BLE PATISST45 1K' lu 6.69€ (Linux 5.3.4) au lieu de ~1.69€ (Windows). "
|
|||
|
|
"Solution cible : remplacer Tesseract par un LLM vision (Sprint suivant)."
|
|||
|
|
),
|
|||
|
|
strict=False,
|
|||
|
|
)
|
|||
|
|
def test_total_avant_remise(receipt):
|
|||
|
|
"""La somme des articles doit être proche du sous-total ticket (140.45).
|
|||
|
|
|
|||
|
|
La différence acceptée couvre les erreurs OCR connues
|
|||
|
|
(ex: FILET POULET 10.46 lu au lieu de 10.40 → écart de 0.06 €).
|
|||
|
|
"""
|
|||
|
|
somme = sum(i.total_price for i in receipt.items)
|
|||
|
|
assert somme == pytest.approx(140.45, abs=0.15)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_prix_unitaire_coherent(receipt):
|
|||
|
|
"""Pour les articles multi-unités, unit_price × qty ≈ total_price."""
|
|||
|
|
for item in receipt.items:
|
|||
|
|
if item.quantity > 1:
|
|||
|
|
assert item.unit_price * item.quantity == pytest.approx(
|
|||
|
|
item.total_price, rel=0.01
|
|||
|
|
), f"Prix incohérent pour {item.name!r}"
|