223 lines
6.9 KiB
Python
223 lines
6.9 KiB
Python
|
|
"""
|
||
|
|
Point d'entrée CLI pour TicketTracker.
|
||
|
|
|
||
|
|
Utilisation :
|
||
|
|
python -m tickettracker.cli import fichier.html --source picnic
|
||
|
|
python -m tickettracker.cli import fichier.pdf --source leclerc [--db /chemin/db]
|
||
|
|
python -m tickettracker.cli stats
|
||
|
|
python -m tickettracker.cli stats --db /chemin/db
|
||
|
|
python -m tickettracker.cli normalize [--dry-run] [--batch-size N] [--db /chemin/db]
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import logging
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from tickettracker.db.schema import DEFAULT_DB_PATH
|
||
|
|
from tickettracker import pipeline
|
||
|
|
|
||
|
|
# Affiche les messages INFO dans le terminal (utile pour voir les doublons skippés)
|
||
|
|
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
||
|
|
|
||
|
|
|
||
|
|
def build_parser() -> argparse.ArgumentParser:
|
||
|
|
"""Construit le parseur d'arguments CLI.
|
||
|
|
|
||
|
|
Structure :
|
||
|
|
tickettracker.cli
|
||
|
|
├── import <file> --source {picnic,leclerc} [--db PATH]
|
||
|
|
├── stats [--db PATH]
|
||
|
|
└── normalize [--dry-run] [--batch-size N] [--db PATH]
|
||
|
|
"""
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
prog="python -m tickettracker.cli",
|
||
|
|
description="TicketTracker — import et analyse de tickets de courses",
|
||
|
|
)
|
||
|
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
||
|
|
|
||
|
|
# --- Sous-commande : import ---
|
||
|
|
import_parser = subparsers.add_parser(
|
||
|
|
"import",
|
||
|
|
help="Parse et importe un ticket dans la base SQLite",
|
||
|
|
)
|
||
|
|
import_parser.add_argument(
|
||
|
|
"file",
|
||
|
|
type=Path,
|
||
|
|
help="Chemin vers le fichier à importer (.html pour Picnic, .pdf pour Leclerc)",
|
||
|
|
)
|
||
|
|
import_parser.add_argument(
|
||
|
|
"--source",
|
||
|
|
required=True,
|
||
|
|
choices=["picnic", "leclerc"],
|
||
|
|
help="Format du fichier",
|
||
|
|
)
|
||
|
|
import_parser.add_argument(
|
||
|
|
"--db",
|
||
|
|
type=Path,
|
||
|
|
default=DEFAULT_DB_PATH,
|
||
|
|
metavar="PATH",
|
||
|
|
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
|
||
|
|
)
|
||
|
|
|
||
|
|
# --- Sous-commande : stats ---
|
||
|
|
stats_parser = subparsers.add_parser(
|
||
|
|
"stats",
|
||
|
|
help="Affiche un résumé de la base de données",
|
||
|
|
)
|
||
|
|
stats_parser.add_argument(
|
||
|
|
"--db",
|
||
|
|
type=Path,
|
||
|
|
default=DEFAULT_DB_PATH,
|
||
|
|
metavar="PATH",
|
||
|
|
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
|
||
|
|
)
|
||
|
|
|
||
|
|
# --- Sous-commande : normalize ---
|
||
|
|
from tickettracker import config as _cfg
|
||
|
|
normalize_parser = subparsers.add_parser(
|
||
|
|
"normalize",
|
||
|
|
help="Normalise les noms de produits via le LLM",
|
||
|
|
)
|
||
|
|
normalize_parser.add_argument(
|
||
|
|
"--db",
|
||
|
|
type=Path,
|
||
|
|
default=DEFAULT_DB_PATH,
|
||
|
|
metavar="PATH",
|
||
|
|
help=f"Chemin vers la base SQLite (défaut : {DEFAULT_DB_PATH})",
|
||
|
|
)
|
||
|
|
normalize_parser.add_argument(
|
||
|
|
"--dry-run",
|
||
|
|
action="store_true",
|
||
|
|
help="Calcule les normalisations sans écrire en base",
|
||
|
|
)
|
||
|
|
normalize_parser.add_argument(
|
||
|
|
"--batch-size",
|
||
|
|
type=int,
|
||
|
|
default=_cfg.LLM_BATCH_SIZE,
|
||
|
|
metavar="N",
|
||
|
|
help=f"Articles par appel LLM (défaut : {_cfg.LLM_BATCH_SIZE})",
|
||
|
|
)
|
||
|
|
|
||
|
|
return parser
|
||
|
|
|
||
|
|
|
||
|
|
def cmd_import(args: argparse.Namespace) -> int:
|
||
|
|
"""Exécute la sous-commande 'import'.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
0 si succès (ticket inséré ou déjà présent), 1 si erreur.
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
inserted = pipeline.import_receipt(args.file, args.source, args.db)
|
||
|
|
if inserted:
|
||
|
|
print(f"OK Ticket importé depuis {args.file}")
|
||
|
|
else:
|
||
|
|
print(f"[skip] Ticket déjà présent en base — import ignoré.")
|
||
|
|
return 0
|
||
|
|
except (FileNotFoundError, ValueError) as e:
|
||
|
|
print(f"Erreur : {e}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Erreur inattendue : {e}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
|
||
|
|
def cmd_stats(args: argparse.Namespace) -> int:
|
||
|
|
"""Exécute la sous-commande 'stats'.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
0 si succès, 1 si la base est absente ou vide.
|
||
|
|
"""
|
||
|
|
from tickettracker.db import schema, repository
|
||
|
|
|
||
|
|
if not Path(args.db).exists():
|
||
|
|
print(f"Base de données absente : {args.db}", file=sys.stderr)
|
||
|
|
print("Importez d'abord un ticket avec la commande 'import'.", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
with schema.get_connection(args.db) as conn:
|
||
|
|
stats = repository.get_stats(conn)
|
||
|
|
|
||
|
|
total_receipts = sum(stats["receipts_by_store"].values())
|
||
|
|
if total_receipts == 0:
|
||
|
|
print("Aucun ticket en base.")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
print("--- TicketTracker : résumé ---")
|
||
|
|
print("Tickets par enseigne :")
|
||
|
|
for store, nb in sorted(stats["receipts_by_store"].items()):
|
||
|
|
print(f" {store:<10}: {nb} ticket(s)")
|
||
|
|
print(f"Total dépensé : {stats['total_spent']:.2f} €")
|
||
|
|
print(f"Nombre d'articles : {stats['total_items']} lignes")
|
||
|
|
normalized = stats["distinct_normalized"]
|
||
|
|
null_count = stats["null_normalized"]
|
||
|
|
total_items = stats["total_items"]
|
||
|
|
print(f"Noms normalisés : {normalized} distincts / {total_items} articles")
|
||
|
|
if null_count > 0:
|
||
|
|
print(f" ({null_count} articles sans nom normalisé)")
|
||
|
|
print(" Lancez : python -m tickettracker.cli normalize")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
def cmd_normalize(args: argparse.Namespace) -> int:
|
||
|
|
"""Exécute la sous-commande 'normalize'.
|
||
|
|
|
||
|
|
Normalise les articles dont name_normalized est NULL en appelant
|
||
|
|
le LLM par batchs. Avec --dry-run, affiche sans écrire en base.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
0 si succès ou dry-run, 1 si erreur (LLM injoignable, clé manquante…).
|
||
|
|
"""
|
||
|
|
from tickettracker import config
|
||
|
|
from tickettracker.llm.client import LLMError, LLMUnavailable
|
||
|
|
from tickettracker.llm import normalizer
|
||
|
|
|
||
|
|
# Vérification préalable de la clé API
|
||
|
|
if not config.LLM_API_KEY:
|
||
|
|
print(
|
||
|
|
"Erreur : clé API LLM manquante.\n"
|
||
|
|
"Définissez la variable d'environnement TICKETTRACKER_LLM_API_KEY.",
|
||
|
|
file=sys.stderr,
|
||
|
|
)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
if not Path(args.db).exists():
|
||
|
|
print(f"Base de données absente : {args.db}", file=sys.stderr)
|
||
|
|
print("Importez d'abord un ticket avec la commande 'import'.", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
try:
|
||
|
|
nb_ok, nb_err = normalizer.normalize_all_in_db(
|
||
|
|
db_path=args.db,
|
||
|
|
batch_size=args.batch_size,
|
||
|
|
dry_run=args.dry_run,
|
||
|
|
)
|
||
|
|
return 0 if nb_err == 0 else 1
|
||
|
|
except LLMUnavailable as e:
|
||
|
|
print(f"LLM injoignable : {e}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
except LLMError as e:
|
||
|
|
print(f"Erreur LLM : {e}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Erreur inattendue : {e}", file=sys.stderr)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
"""Point d'entrée principal."""
|
||
|
|
parser = build_parser()
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
if args.command == "import":
|
||
|
|
sys.exit(cmd_import(args))
|
||
|
|
elif args.command == "stats":
|
||
|
|
sys.exit(cmd_stats(args))
|
||
|
|
elif args.command == "normalize":
|
||
|
|
sys.exit(cmd_normalize(args))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|