From 1d8f139c7c1efb87356d0e76c9110c67208b0f54 Mon Sep 17 00:00:00 2001
From: laurent <laurent@dilain.com>
Date: Wed, 25 Feb 2026 18:35:46 +0100
Subject: [PATCH] =?UTF-8?q?feat:=20inclure=20l'unit=C3=A9/poids=20dans=20l?=
 =?UTF-8?q?a=20normalisation=20LLM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fetch_unnormalized() remonte maintenant la colonne `unit` (ex: "250 g",
"20 sachets"). Le normaliseur concatène name_raw + unit avant d'envoyer
au LLM, qui peut ainsi placer le poids dans le champ format.

Résultat : "Haribo dragibus" → "Dragibus | Haribo | 250g"
au lieu de   "Haribo dragibus" → "Dragibus | Haribo | -"

Améliore aussi la qualité du fuzzy matching Picnic ↔ Leclerc.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tickettracker/db/repository.py  |  4 ++--
 tickettracker/llm/normalizer.py | 10 ++++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/tickettracker/db/repository.py b/tickettracker/db/repository.py
index 34ce784..9db0c18 100644
--- a/tickettracker/db/repository.py
+++ b/tickettracker/db/repository.py
@@ -140,7 +140,7 @@ def fetch_unnormalized(
 ) -> list[sqlite3.Row]:
     """Retourne les articles dont name_normalized est NULL.
 
-    Chaque Row expose les clés : id, name_raw, receipt_id.
+    Chaque Row expose les clés : id, name_raw, unit, receipt_id.
     Trié par id pour un traitement reproductible.
 
     Args:
@@ -150,7 +150,7 @@ def fetch_unnormalized(
     Returns:
         Liste de sqlite3.Row.
     """
-    sql = "SELECT id, name_raw, receipt_id FROM items WHERE name_normalized IS NULL ORDER BY id"
+    sql = "SELECT id, name_raw, unit, receipt_id FROM items WHERE name_normalized IS NULL ORDER BY id"
     if limit is not None:
         sql += f" LIMIT {int(limit)}"
     return conn.execute(sql).fetchall()
diff --git a/tickettracker/llm/normalizer.py b/tickettracker/llm/normalizer.py
index 41192bc..57a27e8 100644
--- a/tickettracker/llm/normalizer.py
+++ b/tickettracker/llm/normalizer.py
@@ -229,7 +229,13 @@ def normalize_all_in_db(
 
         for start in range(0, total, batch_size):
             batch = items[start: start + batch_size]
-            raw_names = [row["name_raw"] for row in batch]
+            # On inclut l'unité/poids (ex: "250 g", "20 sachets") dans le nom
+            # envoyé au LLM pour qu'il puisse le placer dans le champ format.
+            # Pour les articles sans unité (Leclerc OCR), unit est None ou "".
+            raw_names = [
+                f"{row['name_raw']} {row['unit']}".strip() if row["unit"] else row["name_raw"]
+                for row in batch
+            ]
 
             # --- Tentative batch ---
             try:
@@ -246,7 +252,7 @@ def normalize_all_in_db(
             # tente le fallback un par un
             if all(r is None for r in results):
                 logger.debug("Fallback unitaire pour le batch %d–%d.", start, start + len(batch))
-                results = [normalize_product_name(name) for name in raw_names]
+                results = [normalize_product_name(name) for name in raw_names]  # raw_names contient déjà l'unité
 
             # --- Mise à jour ou affichage ---
             for item, normalized in zip(batch, results):