tmdb-radarr-tag/script.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Radarr-TMDB Spectacle Tagger
─────────────────────────────
Détecte les spectacles vivants (stand-up, théâtre, one-man/woman show, etc.)
dans ta bibliothèque Radarr grâce aux métadonnées TMDB, et leur colle un tag
"spectacle". Les concerts de musique sont explicitement exclus.
Parce qu'à 50 ans, on sait faire la différence entre Gad Elmaleh et Metallica.

Dry-run par défaut — on touche à rien sans --apply.

Auteur : Un dev qui aime le stand-up ET le code propre.
"""

import os
import sys
import csv
import time
import getpass
import argparse
import logging
from pathlib import Path

# ── Dépendances externes ──────────────────────────────────────────────
try:
    import yaml
    import requests
    from dotenv import dotenv_values
except ImportError as e:
    print(
        f"[ERREUR] Module manquant : {e.name}\n"
        "Lance : pip install -r requirements.txt\n"
        "Ou si t'es du genre impatient : pip install requests pyyaml python-dotenv"
    )
    sys.exit(1)


# ══════════════════════════════════════════════════════════════════════
# CONSTANTES & CHEMINS
# ══════════════════════════════════════════════════════════════════════

# Chemin vers le fichier de secrets : UN NIVEAU AU-DESSUS du repo.
# Si ton repo est dans /home/moi/projets/spectacle-tagger/,
# alors .env.global sera dans /home/moi/projets/.env.global
SCRIPT_DIR = Path(__file__).resolve().parent
ENV_GLOBAL_PATH = SCRIPT_DIR.parent / ".env.global"

CONFIG_PATH = SCRIPT_DIR / "config.yaml"

# Variables obligatoires dans .env.global
REQUIRED_ENV_VARS = ["RADARR_URL", "RADARR_APIKEY", "TMDB_APIKEY"]

# ── Valeurs par défaut (écrasées par config.yaml puis par les args CLI) ──
DEFAULTS = {
    "TAG_NAME": "spectacle",
    "DRY_RUN": True,
    "MIN_RUNTIME": 15,
    "MAX_RUNTIME": 240,
    "EXTRA_KEYWORDS": [
        # Keywords ultra-spécifiques au stand-up/comédie
        "stand-up", "standup", "stand up comedy",
        "one man show", "one-man show", "one woman show", "one-woman show",
        "comedy special", "humoriste", "spectacle humoristique",
        "seul en scène", "seule en scène", "spectacle solo",
        # Théâtre (avec contexte pour éviter faux positifs)
        "pièce de théâtre", "théâtre filmé", "captation théâtre",
        "captation spectacle", "enregistrement spectacle",
        # Formulations spécifiques
        "sketch show", "spectacle d'humour", "spectacle comique",
        "one-woman", "one-man",  # Pour matchs partiels spécifiques
        # Comédies musicales filmées (musicals)
        "comédie musicale", "musical live", "broadway musical",
        "west end musical", "theatre musical", "musical filmé",
        "musical recording", "filmed musical",
    ],
    "EXCLUDE_KEYWORDS": [
        "concert", "live concert", "music video",
        "festival", "musician", "band", "rock",
        "pop", "hip hop", "rap", "jazz", "classical",
        "symphony", "orchestra", "dj", "live album",
        "metal", "punk", "electronic", "techno",
        # NOTE : "music" retiré pour permettre "musical" / "comédie musicale"
        # NOTE : "tour" retiré car matche dans "detour", "contour", etc.
    ],
    # Patterns de titres indiquant des concerts/spectacles musicaux
    "MUSIC_TITLE_PATTERNS": [
        "- live", " live at", "live in concert",
        "- the song remains", "- tour", " tour ",
        "last curtain call", "farewell tour",
        "unplugged", "mtv live", "live from",
        "in concert", "live performance",
    ],
    # Patterns de titres indiquant des VRAIS spectacles (stand-up/théâtre filmé)
    # Format typique : "Nom Artiste - Lieu/Titre spectacle"
    "SPECTACLE_TITLE_PATTERNS": [
        " - ",  # Ex: "Gad Elmaleh - Papa est en haut", "Florence Foresti - Motherfucker"
    ],
    "LIMIT": 5,
    "OUTPUT_CSV": "results_spectacle_dryrun.csv",
    "LOG_LEVEL": "INFO",
    "SENSITIVITY": "strict",  # "strict" (keyword+runtime requis) ou "loose" (keyword suffit)
}

# Timeout / Retry pour les requêtes HTTP
HTTP_TIMEOUT = 15  # secondes
HTTP_RETRIES = 3
HTTP_RETRY_DELAY = 2  # secondes entre retries

# ══════════════════════════════════════════════════════════════════════
# LOGGING — on configure ça proprement
# ══════════════════════════════════════════════════════════════════════

logger = logging.getLogger("spectacle-tagger")


def setup_logging(level_str: str = "INFO"):
    """Configure le logger avec un format lisible."""
    level = getattr(logging, level_str.upper(), logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(level)
    fmt = logging.Formatter(
        "[%(asctime)s] %(levelname)-8s %(message)s",
        datefmt="%H:%M:%S",
    )
    handler.setFormatter(fmt)
    logger.setLevel(level)
    logger.handlers.clear()
    logger.addHandler(handler)


# ══════════════════════════════════════════════════════════════════════
# GESTION DE ../.env.global — LECTURE / CRÉATION INTERACTIVE
# ══════════════════════════════════════════════════════════════════════

def create_env_global_interactive():
    """
    Crée le fichier ../.env.global en demandant les valeurs à l'utilisateur.
    Les clés API sont saisies en mode masqué (getpass) pour éviter
    qu'un collègue curieux ou un screen recording ne les capture.

    ╔═══════════════════════════════════════════════════════════════╗
    ║  CE FICHIER EST CRÉÉ ICI — UN NIVEAU AU-DESSUS DU REPO      ║
    ║  Chemin : {ENV_GLOBAL_PATH}                                  ║
    ╚═══════════════════════════════════════════════════════════════╝
    """
    print("=" * 60)
    print("  Création de ../.env.global")
    print("  (Tes secrets restent entre toi et ton serveur)")
    print("=" * 60)
    print(f"\n  Le fichier sera créé ici : {ENV_GLOBAL_PATH}\n")

    radarr_url = input("  RADARR_URL (ex: http://localhost:7878) : ").strip()
    if not radarr_url:
        logger.error("RADARR_URL ne peut pas être vide. On arrête là, chef.")
        sys.exit(1)

    print("  RADARR_APIKEY (saisie masquée) : ", end="", flush=True)
    try:
        radarr_apikey = getpass.getpass(prompt="")
    except Exception:
        radarr_apikey = input("  RADARR_APIKEY : ").strip()

    if not radarr_apikey:
        logger.error("RADARR_APIKEY ne peut pas être vide.")
        sys.exit(1)

    print("  TMDB_APIKEY   (saisie masquée) : ", end="", flush=True)
    try:
        tmdb_apikey = getpass.getpass(prompt="")
    except Exception:
        tmdb_apikey = input("  TMDB_APIKEY : ").strip()

    if not tmdb_apikey:
        logger.error("TMDB_APIKEY ne peut pas être vide.")
        sys.exit(1)

    # Écriture du fichier — on retire le slash final éventuel de l'URL
    radarr_url = radarr_url.rstrip("/")

    content = (
        f'RADARR_URL="{radarr_url}"\n'
        f'RADARR_APIKEY="{radarr_apikey}"\n'
        f'TMDB_APIKEY="{tmdb_apikey}"\n'
    )

    try:
        ENV_GLOBAL_PATH.write_text(content, encoding="utf-8")
        # Permissions restrictives (lecture/écriture proprio uniquement)
        ENV_GLOBAL_PATH.chmod(0o600)
        print(f"\n  ✅ Fichier créé : {ENV_GLOBAL_PATH}")
        print("  ⚠️  Ne le commite JAMAIS. Jamais. Nada. Que dalle.\n")
    except OSError as e:
        logger.error(f"Impossible de créer {ENV_GLOBAL_PATH} : {e}")
        sys.exit(1)


def load_env_global() -> dict:
    """
    ╔═══════════════════════════════════════════════════════════════════╗
    ║  LECTURE DE ../.env.global                                       ║
    ║                                                                   ║
    ║  Le fichier est cherché à : {ENV_GLOBAL_PATH}                    ║
    ║  S'il n'existe pas, on propose de le créer interactivement.      ║
    ║  Les valeurs ne sont JAMAIS loggées ni affichées.                ║
    ╚═══════════════════════════════════════════════════════════════════╝
    """
    # ── Étape 1 : le fichier existe-t-il ? ──
    if not ENV_GLOBAL_PATH.exists():
        logger.warning(f"Fichier {ENV_GLOBAL_PATH} introuvable.")
        logger.info("On va le créer ensemble, t'inquiète.")
        create_env_global_interactive()

    # ── Étape 2 : charger les variables ──
    env_vars = dotenv_values(ENV_GLOBAL_PATH)

    # ── Étape 3 : vérifier que les 3 variables sont présentes ──
    missing = [v for v in REQUIRED_ENV_VARS if not env_vars.get(v)]
    if missing:
        logger.error(
            f"Variables manquantes dans {ENV_GLOBAL_PATH} : {', '.join(missing)}\n"
            f"  → Ouvre le fichier {ENV_GLOBAL_PATH} et vérifie son contenu.\n"
            f"  → Ou supprime-le et relance le script pour le recréer."
        )
        sys.exit(1)

    logger.info(f"✅ Secrets chargés depuis {ENV_GLOBAL_PATH} (contenu non affiché, évidemment)")
    return env_vars


# ══════════════════════════════════════════════════════════════════════
# CHARGEMENT DE LA CONFIG (config.yaml + args CLI)
# ══════════════════════════════════════════════════════════════════════

def load_config_yaml() -> dict:
    """Charge config.yaml s'il existe, sinon renvoie un dict vide."""
    if CONFIG_PATH.exists():
        try:
            with open(CONFIG_PATH, "r", encoding="utf-8") as f:
                data = yaml.safe_load(f) or {}
            logger.info(f"✅ Config chargée depuis {CONFIG_PATH}")
            return data
        except Exception as e:
            logger.warning(f"Erreur lecture {CONFIG_PATH} : {e} — on utilise les défauts.")
    else:
        logger.info(f"Pas de {CONFIG_PATH} trouvé, on utilise les valeurs par défaut.")
    return {}


def parse_args():
    """Parse les arguments CLI."""
    parser = argparse.ArgumentParser(
        description=(
            "🎭 Radarr-TMDB Spectacle Tagger — "
            "Détecte les spectacles vivants et tag dans Radarr."
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "Exemples :\n"
            "  # Workflow recommandé (2 étapes) :\n"
            "  python script.py --limit 100           # 1. Dry-run (génère CSV)\n"
            "  python script.py --apply-from-csv      # 2. Appliquer depuis CSV\n\n"
            "  # Autres exemples :\n"
            "  python script.py                       # dry-run, 5 films\n"
            "  python script.py --limit 0             # dry-run, TOUS les films\n"
            "  python script.py --limit 0 --apply     # scanner + appliquer direct\n"
            "  python script.py --verbose             # mode bavard\n"
        ),
    )
    parser.add_argument(
        "--apply", action="store_true",
        help="Appliquer réellement les tags (sinon dry-run)",
    )
    parser.add_argument(
        "--apply-from-csv", action="store_true",
        help="Appliquer les tags depuis le CSV du dry-run (pas de requêtes TMDB)",
    )
    parser.add_argument(
        "--limit", type=int, default=None,
        help="Nombre max de films à traiter (0 = tous, défaut : 5)",
    )
    parser.add_argument(
        "--tag", type=str, default=None,
        help='Nom du tag Radarr (défaut : "spectacle")',
    )
    parser.add_argument(
        "--output", type=str, default=None,
        help="Chemin du CSV de résultats",
    )
    parser.add_argument(
        "--sensitivity", choices=["strict", "loose"], default=None,
        help="Sensibilité de détection (strict = keyword + runtime, loose = keyword OU runtime)",
    )
    parser.add_argument(
        "--verbose", action="store_true",
        help="Logs détaillés (DEBUG)",
    )
    parser.add_argument(
        "--quiet", action="store_true",
        help="Logs minimaux (WARNING)",
    )
    return parser.parse_args()


def build_config(cli_args) -> dict:
    """
    Fusionne : DEFAULTS ← config.yaml ← args CLI.
    La priorité va de gauche à droite (CLI gagne).
    """
    cfg = dict(DEFAULTS)

    # Couche config.yaml
    yaml_cfg = load_config_yaml()
    for key in DEFAULTS:
        if key in yaml_cfg and yaml_cfg[key] is not None:
            cfg[key] = yaml_cfg[key]

    # Couche CLI
    if cli_args.apply or cli_args.apply_from_csv:
        cfg["DRY_RUN"] = False
    if cli_args.apply_from_csv:
        cfg["APPLY_FROM_CSV"] = True
    else:
        cfg["APPLY_FROM_CSV"] = False
    if cli_args.limit is not None:
        cfg["LIMIT"] = cli_args.limit
    if cli_args.tag is not None:
        cfg["TAG_NAME"] = cli_args.tag
    if cli_args.output is not None:
        cfg["OUTPUT_CSV"] = cli_args.output
    if cli_args.sensitivity is not None:
        cfg["SENSITIVITY"] = cli_args.sensitivity
    if cli_args.verbose:
        cfg["LOG_LEVEL"] = "DEBUG"
    if cli_args.quiet:
        cfg["LOG_LEVEL"] = "WARNING"

    return cfg


# ══════════════════════════════════════════════════════════════════════
# REQUÊTES HTTP AVEC RETRY
# ══════════════════════════════════════════════════════════════════════

def http_get(url: str, headers: dict | None = None, params: dict | None = None) -> requests.Response:
    """
    GET avec retry et gestion d'erreurs.
    On ne logue jamais les headers (ils contiennent les API keys).
    """
    last_exception = None
    for attempt in range(1, HTTP_RETRIES + 1):
        try:
            resp = requests.get(
                url, headers=headers, params=params, timeout=HTTP_TIMEOUT
            )

            # Rate limit TMDB (429)
            if resp.status_code == 429:
                retry_after = int(resp.headers.get("Retry-After", 2))
                logger.warning(
                    f"  ⏳ Rate limit TMDB — on patiente {retry_after}s "
                    f"(tentative {attempt}/{HTTP_RETRIES})"
                )
                time.sleep(retry_after)
                continue

            resp.raise_for_status()
            return resp

        except requests.exceptions.HTTPError as e:
            status = e.response.status_code if e.response else "unknown"
            logger.warning(f"  ❌ HTTP {status} sur {url} (tentative {attempt}/{HTTP_RETRIES})")
            last_exception = e
        except requests.exceptions.Timeout:
            logger.warning(f"  ⏱️  Timeout sur {url} (tentative {attempt}/{HTTP_RETRIES})")
            last_exception = TimeoutError(f"Timeout sur {url}")
        except requests.exceptions.ConnectionError as e:
            logger.warning(f"  🔌 Connexion échouée vers {url} (tentative {attempt}/{HTTP_RETRIES})")
            last_exception = e

        if attempt < HTTP_RETRIES:
            time.sleep(HTTP_RETRY_DELAY)

    raise ConnectionError(
        f"Échec après {HTTP_RETRIES} tentatives sur {url} : {last_exception}"
    )


def http_put(url: str, headers: dict, json_data: dict) -> requests.Response:
    """PUT avec retry pour l'API Radarr."""
    last_exception = None
    for attempt in range(1, HTTP_RETRIES + 1):
        try:
            resp = requests.put(
                url, headers=headers, json=json_data, timeout=HTTP_TIMEOUT
            )
            resp.raise_for_status()
            return resp
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError) as e:
            logger.warning(f"  PUT échoué sur {url} (tentative {attempt}/{HTTP_RETRIES})")
            last_exception = e
            if attempt < HTTP_RETRIES:
                time.sleep(HTTP_RETRY_DELAY)

    raise ConnectionError(f"PUT échoué après {HTTP_RETRIES} tentatives : {last_exception}")


def http_post(url: str, headers: dict, json_data: dict) -> requests.Response:
    """POST avec retry pour l'API Radarr (création de tag)."""
    last_exception = None
    for attempt in range(1, HTTP_RETRIES + 1):
        try:
            resp = requests.post(
                url, headers=headers, json=json_data, timeout=HTTP_TIMEOUT
            )
            resp.raise_for_status()
            return resp
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError) as e:
            logger.warning(f"  POST échoué sur {url} (tentative {attempt}/{HTTP_RETRIES})")
            last_exception = e
            if attempt < HTTP_RETRIES:
                time.sleep(HTTP_RETRY_DELAY)

    raise ConnectionError(f"POST échoué après {HTTP_RETRIES} tentatives : {last_exception}")


# ══════════════════════════════════════════════════════════════════════
# FONCTIONS RADARR
# ══════════════════════════════════════════════════════════════════════

def radarr_headers(apikey: str) -> dict:
    """Headers pour l'API Radarr. Ne pas logger ce dict."""
    return {
        "X-Api-Key": apikey,
        "Content-Type": "application/json",
    }


def ensure_tag_exists(radarr_url: str, apikey: str, tag_name: str) -> int:
    """
    Vérifie si le tag existe dans Radarr.
    S'il n'existe pas, le crée.
    Retourne l'ID du tag.
    """
    headers = radarr_headers(apikey)
    url = f"{radarr_url}/api/v3/tag"

    logger.info(f"🏷️  Vérification du tag '{tag_name}' dans Radarr...")

    resp = http_get(url, headers=headers)
    tags = resp.json()

    for tag in tags:
        if tag.get("label", "").lower() == tag_name.lower():
            logger.info(f"  → Tag '{tag_name}' trouvé (id={tag['id']})")
            return tag["id"]

    # Le tag n'existe pas, on le crée
    logger.info(f"  → Tag '{tag_name}' inexistant, création en cours...")
    resp = http_post(url, headers=headers, json_data={"label": tag_name})
    new_tag = resp.json()
    tag_id = new_tag["id"]
    logger.info(f"  → Tag '{tag_name}' créé (id={tag_id}) 🎉")
    return tag_id


def fetch_movies(radarr_url: str, apikey: str, limit: int = 0) -> list:
    """
    Récupère la liste des films depuis Radarr.
    limit=0 signifie tous les films.
    """
    headers = radarr_headers(apikey)
    url = f"{radarr_url}/api/v3/movie"

    logger.info("📥 Récupération des films depuis Radarr...")
    resp = http_get(url, headers=headers)
    movies = resp.json()
    total = len(movies)
    logger.info(f"  → {total} films dans la bibliothèque Radarr")

    if limit and limit > 0:
        movies = movies[:limit]
        logger.info(f"  → Limité à {len(movies)} films (--limit {limit})")

    return movies


def apply_tag_to_movie(radarr_url: str, apikey: str, movie: dict, tag_id: int) -> bool:
    """
    Ajoute le tag à un film via l'API Radarr.
    Retourne True si le tag a été ajouté, False s'il était déjà présent.
    """
    current_tags = movie.get("tags", [])
    if tag_id in current_tags:
        logger.debug(f"  Tag déjà présent sur '{movie.get('title')}'")
        return False

    current_tags.append(tag_id)
    movie["tags"] = current_tags

    headers = radarr_headers(apikey)
    url = f"{radarr_url}/api/v3/movie/{movie['id']}"

    http_put(url, headers=headers, json_data=movie)
    return True


# ══════════════════════════════════════════════════════════════════════
# FONCTIONS TMDB
# ══════════════════════════════════════════════════════════════════════

def tmdb_headers(apikey: str) -> dict:
    """Headers pour l'API TMDB v3. Ne pas logger ce dict."""
    return {
        "Authorization": f"Bearer {apikey}",
        "Accept": "application/json",
    }


def fetch_tmdb_movie(tmdb_id: int, tmdb_apikey: str) -> dict | None:
    """Récupère les détails d'un film sur TMDB."""
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
    params = {"api_key": tmdb_apikey, "language": "fr-FR"}

    try:
        resp = http_get(url, params=params)
        return resp.json()
    except Exception as e:
        logger.debug(f"  TMDB /movie/{tmdb_id} échoué : {e}")
        return None


def fetch_tmdb_keywords(tmdb_id: int, tmdb_apikey: str) -> list:
    """Récupère les keywords TMDB d'un film."""
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/keywords"
    params = {"api_key": tmdb_apikey}

    try:
        resp = http_get(url, params=params)
        data = resp.json()
        return data.get("keywords", [])
    except Exception as e:
        logger.debug(f"  TMDB /movie/{tmdb_id}/keywords échoué : {e}")
        return []


# ══════════════════════════════════════════════════════════════════════
# DÉTECTION "SPECTACLE" — LE CŒUR DU BAZAR
# ══════════════════════════════════════════════════════════════════════

def detect_spectacle(
    movie_radarr: dict,
    tmdb_data: dict | None,
    tmdb_keywords: list,
    config: dict,
) -> dict:
    """
    Heuristique de détection des spectacles vivants.

    Retourne un dict :
    {
        "is_spectacle": bool,
        "score": int,
        "reasons": [str],
        "excluded_by": str | None,
    }

    ── LOGIQUE AMÉLIORÉE ────────────────────────────────────────────
    1. On construit un "sac de texte" à partir de :
       - titre Radarr + titre TMDB
       - overview TMDB
       - keywords TMDB
       - genres TMDB
    2. EXCLUSIONS PRIORITAIRES (dans l'ordre) :
       a) Patterns de titres musicaux (ex: "- Live", "Tour", etc.)
       b) EXCLUDE_KEYWORDS dans le contenu
       c) Runtime invalide (0 ou hors fourchette)
    3. On cherche les EXTRA_KEYWORDS dans le sac de texte.
       → Chaque match = +2 au score.
    4. On vérifie le runtime (entre MIN et MAX).
       → Si dans la fourchette = +1 au score.
    5. Mode "strict" : keyword match ET runtime match.
       Mode "loose"  : keyword match ET runtime > 0.
    ─────────────────────────────────────────────────────────────────
    """
    result = {
        "is_spectacle": False,
        "score": 0,
        "reasons": [],
        "excluded_by": None,
    }

    extra_kw = [kw.lower() for kw in config.get("EXTRA_KEYWORDS", [])]
    excl_kw = [kw.lower() for kw in config.get("EXCLUDE_KEYWORDS", [])]
    music_patterns = [p.lower() for p in config.get("MUSIC_TITLE_PATTERNS", [])]
    min_rt = config.get("MIN_RUNTIME", 15)
    max_rt = config.get("MAX_RUNTIME", 240)
    sensitivity = config.get("SENSITIVITY", "loose")

    # ── Récupérer le path Radarr (peut indiquer si déjà dans dossier Spectacles) ──
    movie_path = movie_radarr.get("path", "").lower()

    # ── Construire le titre complet (pour détection patterns) ──
    title_full = " ".join([
        movie_radarr.get("title", "").lower(),
        movie_radarr.get("originalTitle", "").lower(),
        tmdb_data.get("title", "").lower() if tmdb_data else "",
        tmdb_data.get("original_title", "").lower() if tmdb_data else "",
    ])

    # ── Construire le sac de texte (pour keywords) ──
    texts = []
    texts.append(movie_radarr.get("title", "").lower())
    texts.append(movie_radarr.get("originalTitle", "").lower())

    if tmdb_data:
        texts.append(tmdb_data.get("title", "").lower())
        texts.append(tmdb_data.get("original_title", "").lower())
        texts.append(tmdb_data.get("overview", "").lower())
        texts.append(tmdb_data.get("tagline", "").lower())
        # Genres TMDB
        for genre in tmdb_data.get("genres", []):
            texts.append(genre.get("name", "").lower())

    # Keywords TMDB
    for kw in tmdb_keywords:
        texts.append(kw.get("name", "").lower())

    bag = " ".join(texts)

    # ── Étape 0 : DÉTECTION PRÉCOCE des comédies musicales ──
    # Si "musical" dans le titre/bag, c'est probablement un spectacle filmé
    musical_indicators = ["musical", "comédie musicale", "broadway", "west end"]
    is_musical = any(indicator in bag for indicator in musical_indicators)

    # ── Étape 1a : EXCLUSION par patterns de titres musicaux (sauf si musical détecté) ──
    if not is_musical:  # Ne pas exclure si c'est un musical
        for pattern in music_patterns:
            if pattern in title_full:
                result["excluded_by"] = f"music pattern '{pattern}'"
                result["reasons"].append(f"EXCLU par pattern musical '{pattern}' dans titre")
                logger.debug(f"    ❌ Exclusion : pattern musical '{pattern}' dans titre")
                return result

    # ── Étape 1b : EXCLUSION par genres TMDB (films de fiction) ──
    # Les spectacles filmés ne sont généralement PAS catégorisés comme Romance, Drama, etc.
    fiction_genres = ["romance", "drama", "thriller", "horror", "action", "adventure",
                      "science fiction", "fantasy", "animation", "mystery", "crime", "war"]
    if tmdb_data:
        for genre in tmdb_data.get("genres", []):
            genre_name = genre.get("name", "").lower()
            if genre_name in fiction_genres:
                result["excluded_by"] = f"genre '{genre_name}'"
                result["reasons"].append(f"EXCLU par genre fiction '{genre_name}'")
                logger.debug(f"    ❌ Exclusion : genre fiction '{genre_name}'")
                return result

    # ── Étape 1c : EXCLUSION par keywords (sauf si musical détecté) ──
    if not is_musical:  # Ne pas exclure si c'est un musical
        for ekw in excl_kw:
            if ekw in bag:
                result["excluded_by"] = ekw
                result["reasons"].append(f"EXCLU par keyword '{ekw}'")
                logger.debug(f"    ❌ Exclusion : '{ekw}' trouvé")
                return result

    # ── Étape 2 : vérification runtime ──
    runtime = 0
    if tmdb_data and tmdb_data.get("runtime"):
        runtime = tmdb_data["runtime"]
    elif movie_radarr.get("runtime"):
        runtime = movie_radarr["runtime"]

    # ── Étape 1c : EXCLUSION si runtime = 0 ou invalide ──
    # Un vrai spectacle a TOUJOURS un runtime connu
    if not runtime or runtime == 0:
        result["excluded_by"] = "runtime=0"
        result["reasons"].append("EXCLU : runtime invalide ou inconnu")
        logger.debug(f"    ❌ Exclusion : runtime invalide ({runtime})")
        return result

    runtime_match = False
    if min_rt <= runtime <= max_rt:
        result["score"] += 1
        result["reasons"].append(f"runtime {runtime}min (dans [{min_rt}-{max_rt}])")
        runtime_match = True
    else:
        # Runtime hors fourchette → exclusion
        result["excluded_by"] = f"runtime={runtime}"
        result["reasons"].append(f"EXCLU : runtime {runtime}min hors fourchette [{min_rt}-{max_rt}]")
        logger.debug(f"    ❌ Exclusion : runtime {runtime} hors fourchette")
        return result

    # ── Étape 3a : BONUS si path contient "spectacle" ──
    # Si le film est déjà dans un dossier "Spectacles" → fort indicateur
    path_bonus = False
    if movie_path and ("spectacle" in movie_path or "spectacles" in movie_path):
        result["score"] += 10  # Bonus très fort (l'utilisateur l'a déjà classé)
        result["reasons"].append("path contient 'spectacle' (+10)")
        path_bonus = True
        logger.debug(f"    ✅ BONUS PATH : 'spectacle' trouvé dans {movie_path}")

    # ── Étape 3a-bis : BONUS si musical détecté ──
    if is_musical:
        result["score"] += 3  # Bonus modéré pour musicals
        result["reasons"].append("musical détecté (+3)")
        logger.debug(f"    ✅ BONUS MUSICAL : indicateurs musical trouvés")

    # ── Étape 3b : BONUS si titre avec pattern spectacle (ex: "Nom - Titre") ──
    spectacle_patterns = config.get("SPECTACLE_TITLE_PATTERNS", [])
    title_bonus = False
    for pattern in spectacle_patterns:
        if pattern in title_full:
            # Pattern " - " dans le titre = fort indicateur de spectacle filmé
            result["score"] += 5
            result["reasons"].append(f"titre pattern spectacle '{pattern}'")
            title_bonus = True
            logger.debug(f"    ✅ Bonus titre : pattern '{pattern}' trouvé")
            break

    # ── Étape 4 : recherche EXTRA_KEYWORDS ──
    keyword_matches = []
    for ikw in extra_kw:
        if ikw in bag:
            # Bonus si keyword très spécifique (>= 10 caractères)
            points = 3 if len(ikw) >= 10 else 2
            result["score"] += points
            result["reasons"].append(f"keyword '{ikw}' (+{points})")
            keyword_matches.append(ikw)

    # ── Étape 5 : décision selon sensibilité + SEUIL MINIMUM ──
    # NOUVEAU : Seuil minimum de score pour éviter faux positifs
    MIN_SCORE_STRICT = 7   # Mode strict : besoin d'un bon match (pattern titre + keywords)
    MIN_SCORE_LOOSE = 10   # Mode loose : besoin d'un très bon match

    keyword_match = len(keyword_matches) > 0

    # ── CAS SPÉCIAL : Path bonus (film déjà dans dossier Spectacles) ──
    # Si path_bonus ET runtime valide → détection automatique (confiance utilisateur)
    if path_bonus and runtime_match:
        result["is_spectacle"] = True
        logger.debug(f"    ✅ Détection automatique : path bonus + runtime valide")
        return result

    # ── CAS NORMAL : Scoring classique ──
    if sensitivity == "strict":
        # Mode strict : keyword + runtime + score >= 5
        result["is_spectacle"] = (
            keyword_match
            and runtime_match
            and result["score"] >= MIN_SCORE_STRICT
        )
    else:
        # Mode loose : keyword + score >= 7 (ou pattern titre + keyword)
        result["is_spectacle"] = (
            keyword_match
            and (result["score"] >= MIN_SCORE_LOOSE or title_bonus)
        )

    return result


# ══════════════════════════════════════════════════════════════════════
# INSPECTION D'UN FILM
# ══════════════════════════════════════════════════════════════════════

def inspect_movie(movie: dict, tmdb_apikey: str, config: dict) -> dict | None:
    """
    Inspecte un film Radarr :
    - Récupère les données TMDB
    - Lance la détection heuristique
    - Retourne un dict résultat ou None si pas de tmdbId.
    """
    tmdb_id = movie.get("tmdbId")
    title = movie.get("title", "Inconnu")

    if not tmdb_id:
        logger.debug(f"  ⚠️  '{title}' — pas de tmdbId, on saute.")
        return None

    logger.debug(f"  🔍 Inspection de '{title}' (tmdbId={tmdb_id})...")

    # Récupérer données TMDB
    tmdb_data = fetch_tmdb_movie(tmdb_id, tmdb_apikey)
    tmdb_keywords = fetch_tmdb_keywords(tmdb_id, tmdb_apikey)

    # Petit délai pour ne pas spammer TMDB
    time.sleep(0.25)

    # Détection
    detection = detect_spectacle(movie, tmdb_data, tmdb_keywords, config)

    return {
        "radarr_id": movie.get("id"),
        "tmdb_id": tmdb_id,
        "title": title,
        "year": movie.get("year", ""),
        "runtime": tmdb_data.get("runtime", "") if tmdb_data else "",
        "is_spectacle": detection["is_spectacle"],
        "score": detection["score"],
        "reasons": "; ".join(detection["reasons"]),
        "excluded_by": detection["excluded_by"],
        "movie_data": movie,  # on garde pour apply_tag
    }


# ══════════════════════════════════════════════════════════════════════
# DRY-RUN : COLLECTE ET CSV
# ══════════════════════════════════════════════════════════════════════

def write_csv(matches: list, all_results: list, output_path: str):
    """
    Écrit le CSV des résultats.
    ⚠️  Aucun secret n'est inclus dans ce fichier.
    """
    fieldnames = [
        "title", "year", "tmdb_id", "radarr_id",
        "is_spectacle", "score", "reasons", "excluded_by", "runtime",
    ]

    csv_path = SCRIPT_DIR / output_path

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
        writer.writeheader()
        for r in all_results:
            row = {k: v for k, v in r.items() if k in fieldnames}
            writer.writerow(row)

    logger.info(f"📄 CSV généré : {csv_path}")
    logger.info(f"   → {len(matches)} spectacle(s) détecté(s) sur {len(all_results)} films analysés")


# ══════════════════════════════════════════════════════════════════════
# LECTURE CSV POUR APPLICATION
# ══════════════════════════════════════════════════════════════════════

def load_spectacles_from_csv(csv_path: str, radarr_url: str, apikey: str) -> list:
    """
    Charge les spectacles détectés depuis le CSV du dry-run.
    Récupère les données complètes des films depuis Radarr.
    Retourne une liste de résultats compatibles avec apply_tags().
    """
    csv_full_path = SCRIPT_DIR / csv_path

    if not csv_full_path.exists():
        logger.error(f"❌ CSV introuvable : {csv_full_path}")
        logger.error(f"   → Lance d'abord un dry-run : python script.py --limit 100")
        sys.exit(1)

    # Vérifier l'âge du CSV
    import datetime
    csv_age = datetime.datetime.now() - datetime.datetime.fromtimestamp(csv_full_path.stat().st_mtime)
    if csv_age.total_seconds() > 86400:  # > 24h
        logger.warning(f"⚠️  Le CSV a {csv_age.days} jour(s). Les données TMDB peuvent avoir changé.")
        logger.warning(f"   → Recommandé : relancer un dry-run d'abord.")

    logger.info(f"📥 Chargement du CSV : {csv_full_path}")

    # Récupérer tous les films Radarr (pour avoir les données complètes)
    headers = radarr_headers(apikey)
    url = f"{radarr_url}/api/v3/movie"
    resp = http_get(url, headers=headers)
    radarr_movies = {m["id"]: m for m in resp.json()}

    # Lire le CSV
    spectacles = []
    with open(csv_full_path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Ne charger que les spectacles détectés
            if row["is_spectacle"].lower() == "true":
                radarr_id = int(row["radarr_id"])

                # Récupérer les données complètes depuis Radarr
                if radarr_id in radarr_movies:
                    spectacles.append({
                        "radarr_id": radarr_id,
                        "tmdb_id": row["tmdb_id"],
                        "title": row["title"],
                        "year": row["year"],
                        "score": row["score"],
                        "reasons": row["reasons"],
                        "movie_data": radarr_movies[radarr_id],  # Données complètes
                    })
                else:
                    logger.warning(f"  ⚠️  Film ID {radarr_id} ({row['title']}) non trouvé dans Radarr")

    logger.info(f"   → {len(spectacles)} spectacle(s) chargé(s) depuis le CSV")
    return spectacles


# ══════════════════════════════════════════════════════════════════════
# APPLICATION DES TAGS
# ══════════════════════════════════════════════════════════════════════

def apply_tags(
    matches: list, radarr_url: str, apikey: str, tag_id: int
):
    """
    Applique le tag 'spectacle' aux films détectés.
    Mode --apply uniquement. Journalise chaque action (sans secrets).
    """
    logger.info(f"🏷️  Application du tag (id={tag_id}) à {len(matches)} film(s)...")

    applied = 0
    skipped = 0
    errors = 0

    for match in matches:
        title = match["title"]
        movie = match["movie_data"]

        try:
            was_applied = apply_tag_to_movie(radarr_url, apikey, movie, tag_id)
            if was_applied:
                applied += 1
                logger.info(f"  ✅ Tag ajouté : '{title}'")
            else:
                skipped += 1
                logger.info(f"  ⏭️  Tag déjà présent : '{title}'")
        except Exception as e:
            errors += 1
            logger.error(f"  ❌ Erreur tag '{title}' : {e}")

    logger.info(f"\n📊 Bilan : {applied} ajouté(s), {skipped} déjà tagué(s), {errors} erreur(s)")


# ══════════════════════════════════════════════════════════════════════
# MAIN — C'EST PARTI MON KIKI
# ══════════════════════════════════════════════════════════════════════

def main():
    # ── 0. Parse des arguments CLI (avant logging pour --verbose/--quiet) ──
    cli_args = parse_args()

    # ── 1. Config : défauts ← yaml ← CLI ──
    # On setup un logging temporaire pour les étapes de chargement
    setup_logging("INFO")
    config = build_config(cli_args)

    # Maintenant on peut configurer le vrai niveau de log
    setup_logging(config["LOG_LEVEL"])

    # ── 2. Chargement des secrets ──
    # ╔══════════════════════════════════════════════════════════════╗
    # ║  ../.env.global est lu/créé ICI                             ║
    # ║  Voir load_env_global() et create_env_global_interactive()  ║
    # ╚══════════════════════════════════════════════════════════════╝
    env = load_env_global()
    radarr_url = env["RADARR_URL"].rstrip("/")
    radarr_apikey = env["RADARR_APIKEY"]
    tmdb_apikey = env["TMDB_APIKEY"]

    # ── 3. Affichage de la config (sans secrets !) ──
    dry_run = config["DRY_RUN"]
    tag_name = config["TAG_NAME"]
    limit = config["LIMIT"]

    print()
    print("🎭 " + "=" * 56)
    print("   RADARR-TMDB SPECTACLE TAGGER")
    print("   " + ("🔒 MODE DRY-RUN (on regarde mais on touche pas)"
                    if dry_run
                    else "🔥 MODE APPLY (on tague pour de vrai !)"))
    print("=" * 60)
    print(f"   Tag          : {tag_name}")
    print(f"   Limite       : {limit if limit else 'tous les films'}")
    print(f"   Sensibilité  : {config['SENSITIVITY']}")
    print(f"   CSV sortie   : {config['OUTPUT_CSV']}")
    print(f"   Radarr       : {radarr_url}")
    print("=" * 60)
    print()

    # ── 4. Vérifier/créer le tag dans Radarr ──
    try:
        tag_id = ensure_tag_exists(radarr_url, radarr_apikey, tag_name)
    except Exception as e:
        logger.error(
            f"❌ Impossible de vérifier/créer le tag dans Radarr : {e}\n"
            f"   Vérifie que Radarr est accessible à {radarr_url}"
        )
        sys.exit(1)

    # ── 5. MODE APPLY-FROM-CSV : Charger depuis le CSV du dry-run ──
    if config.get("APPLY_FROM_CSV", False):
        logger.info("📂 MODE APPLY-FROM-CSV : Chargement depuis le CSV du dry-run")
        logger.info("   → Pas de requêtes TMDB, lecture du CSV uniquement\n")

        matches = load_spectacles_from_csv(config["OUTPUT_CSV"], radarr_url, radarr_apikey)

        # Résumé
        print()
        print("=" * 60)
        print(f"  📊 RÉSUMÉ : {len(matches)} spectacle(s) chargé(s) depuis le CSV")
        if matches:
            print("  Spectacles à taguer :")
            for m in matches:
                print(f"    🎭 {m['title']} ({m['year']}) — score={m['score']}")
        print("=" * 60)
        print()

        # Application
        if matches:
            apply_tags(matches, radarr_url, radarr_apikey, tag_id)
        else:
            logger.info("Aucun spectacle dans le CSV. Rien à faire.")

        print()
        logger.info("✅ Terminé. Tags appliqués depuis le CSV ! 🎭")
        return

    # ── 6. MODE NORMAL : Récupérer les films et analyser ──
    try:
        movies = fetch_movies(radarr_url, radarr_apikey, limit)
    except Exception as e:
        logger.error(
            f"❌ Impossible de récupérer les films Radarr : {e}\n"
            f"   Vérifie l'URL et la clé API dans {ENV_GLOBAL_PATH}"
        )
        sys.exit(1)

    if not movies:
        logger.warning("Aucun film trouvé dans Radarr. Ta bibliothèque est vide ou le LIMIT est à 0.")
        sys.exit(0)

    # ── 7. Inspecter chaque film ──
    all_results = []
    matches = []

    for i, movie in enumerate(movies, 1):
        title = movie.get("title", "???")
        logger.info(f"[{i}/{len(movies)}] 🎬 {title}")

        result = inspect_movie(movie, tmdb_apikey, config)
        if result is None:
            continue

        all_results.append(result)

        if result["is_spectacle"]:
            matches.append(result)
            logger.info(f"  → 🎭 SPECTACLE détecté ! (score={result['score']}) — {result['reasons']}")
        elif result["excluded_by"]:
            logger.info(f"  → 🎵 Exclu (concert/musique) : {result['excluded_by']}")
        else:
            logger.info(f"  → ➖ Pas un spectacle (score={result['score']})")

    # ── 8. Écriture CSV ──
    write_csv(matches, all_results, config["OUTPUT_CSV"])

    # ── 9. Résumé ──
    print()
    print("=" * 60)
    print(f"  📊 RÉSUMÉ : {len(matches)} spectacle(s) sur {len(all_results)} films analysés")
    if matches:
        print("  Spectacles détectés :")
        for m in matches:
            print(f"    🎭 {m['title']} ({m['year']}) — score={m['score']}")
    print("=" * 60)
    print()

    # ── 10. Application ou dry-run ──
    if dry_run:
        if matches:
            logger.info(
                "🔒 Dry-run terminé. Pour appliquer les tags :\n"
                "   OPTION 1 (RECOMMANDÉ) : Utiliser le CSV généré (pas de requêtes TMDB)\n"
                f"     → python script.py --apply-from-csv\n"
                "   OPTION 2 : Re-scanner et appliquer (re-requête TMDB)\n"
                f"     → python script.py --limit {limit if limit else 0} --apply"
            )
        else:
            logger.info("🔒 Dry-run terminé. Aucun spectacle détecté.")
    else:
        if matches:
            apply_tags(matches, radarr_url, radarr_apikey, tag_id)
        else:
            logger.info("Aucun spectacle à taguer. Rien à faire. 🍺")

    print()
    logger.info("✅ Terminé. Bonne soirée, l'artiste ! 🎭")


if __name__ == "__main__":
    main()