fix: use correct public api endpoint and fix data mapping

2026-02-22 16:45:41 +01:00
parent 09dc6c85b1
commit 0ce845fec1
2 changed files with 97 additions and 339 deletions
--- a/update_models.py
+++ b/update_models.py
@@ -6,15 +6,13 @@ from dotenv import load_dotenv

 load_dotenv("../.env.global")

-MAMMOUTH_APIKEY = os.getenv("MAMMOUTH_APIKEY")
 AIANALASYS_APIKEY = os.getenv("AIANALASYS_APIKEY")

 def get_mammouth_models():
-    # Retour au point d'accès OpenRouter compatible qui est plus stable
-    url = "https://openrouter.ai/api/v1/models"
-    headers = {"Authorization": f"Bearer {MAMMOUTH_APIKEY}"}
+    # URL correcte fournie par l'utilisateur
+    url = "https://api.mammouth.ai/public/models"
    try:
-        response = requests.get(url, headers=headers)
+        response = requests.get(url, verify=False) # verify=False au cas où il y a des soucis de certifs
        response.raise_for_status()
        return response.json().get('data', [])
    except Exception as e:
@@ -41,7 +39,7 @@ def generate_markdown(models_data):
        categories[cat].append(m)

    md = "# Table des Modèles Mammouth.ai\n\n"
-    md += "*Mise à jour automatique via Artificial Analysis & Mammouth API*\n\n"
+    md += "*Mise à jour automatique via Artificial Analysis & Mammouth Public API*\n\n"
    md += f"Dernière mise à jour : {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"

    order = ['Coding', 'Agents', 'General']
@@ -49,7 +47,7 @@ def generate_markdown(models_data):

    for cat in sorted_cats:
        models = categories[cat]
-        # Tri par score (desc) puis prix (asc)
+        # Tri : Score (desc), puis Prix (asc)
        models.sort(key=lambda x: (x.get('score') or 0, -(x.get('price_in') or 999)), reverse=True)
        
        md += f"## {cat}\n\n"
@@ -65,35 +63,45 @@ def generate_markdown(models_data):
    return md

 def main():
-    print("Fetching models...")
+    print("Fetching Mammouth public models...")
    mammouth_models = get_mammouth_models()
+    print(f"Found {len(mammouth_models)} models from Mammouth.")
+    
+    print("Fetching Artificial Analysis data...")
    aa_raw = get_aa_data()
    
-    # Mapping AA : Priorité aux noms exacts
-    aa_map_exact = {m.get('model_name', '').lower(): m for m in aa_raw}
-    aa_map_id = {m.get('model_id', '').lower(): m for m in aa_raw}
+    # Mapping AA
+    aa_map = {}
+    for aa_m in aa_raw:
+        m_id = aa_m.get('model_id', '').lower()
+        m_name = aa_m.get('model_name', '').lower()
+        if m_id: aa_map[m_id] = aa_m
+        if m_name: aa_map[m_name] = aa_m

    enriched_models = []
    for m in mammouth_models:
        m_id = m.get('id', '')
-        m_name = m.get('name', '').lower()
-        short_id = m_id.split('/')[-1].lower()
+        info = m.get('model_info', {})
        
-        # Mapping plus strict pour éviter les scores identiques
-        aa_info = aa_map_id.get(m_id.lower()) or aa_map_exact.get(m_name) or aa_map_id.get(short_id)
-        
-        # Si toujours pas de match, on ne fait PAS de recherche par sous-chaîne floue
-        # pour éviter de polluer les données. On ne match que si le nom est très proche.
-        if not aa_info:
-            for name, info in aa_map_exact.items():
-                if name in m_name and len(name) > 0.8 * len(m_name):
-                    aa_info = info
-                    break
+        if not m_id: continue

-        pricing = m.get('pricing', {})
+        # Mapping intelligent
+        m_id_low = m_id.lower()
+        aa_info = aa_map.get(m_id_low)
+        
+        # Si pas de match exact, on cherche une correspondance partielle
+        if not aa_info:
+            for key in aa_map:
+                if key in m_id_low or m_id_low in key:
+                    # On vérifie que ce n'est pas un faux positif (ex: gpt-4 vs gpt-4-turbo)
+                    if abs(len(key) - len(m_id_low)) < 5:
+                        aa_info = aa_map[key]
+                        break
+
+        # Extraction des prix
        try:
-            price_in = float(pricing.get('prompt', 0)) * 1000000
-            price_out = float(pricing.get('completion', 0)) * 1000000
+            price_in = float(info.get('input_cost_per_token', 0)) * 1000000
+            price_out = float(info.get('output_cost_per_token', 0)) * 1000000
        except:
            price_in, price_out = 0, 0
            
@@ -104,15 +112,15 @@ def main():
            score = evals.get('artificial_analysis_intelligence_index')
            speed = aa_info.get('median_output_tokens_per_second')
            
+        # Catégorisation
        category = "General"
-        m_lower = (m_id + m_name).lower()
-        if any(x in m_lower for x in ['coding', 'code', 'starcoder', 'coder']):
+        if any(x in m_id_low for x in ['coding', 'code', 'starcoder', 'coder', 'codestral']):
            category = "Coding"
-        elif any(x in m_lower for x in ['agent', 'hermes', 'tool', 'function']):
+        elif any(x in m_id_low for x in ['agent', 'hermes', 'tool', 'function', 'sonar']):
            category = "Agents"

        enriched_models.append({
-            'name': m.get('name', m_id),
+            'name': m_id,
            'price_in': price_in,
            'price_out': price_out,
            'score': score,
@@ -120,13 +128,13 @@ def main():
            'category': category
        })

-    # Filtrer les modèles inutiles (prix nul et pas de score)
+    # Filtrer les modèles (on garde tout ce qui a un prix ou un score)
    final_list = [m for m in enriched_models if m['price_in'] > 0 or m['score'] is not None]
    
    with open("README.md", "w", encoding="utf-8") as f:
        f.write(generate_markdown(final_list))
    
-    print(f"Done! {len(final_list)} models processed.")
+    print(f"Done! README.md updated with {len(final_list)} models.")

 if __name__ == "__main__":
    main()