fix: implement robust model mapping using slugs and normalized names

This commit is contained in:
laurent
2026-02-22 16:47:56 +01:00
parent 0ce845fec1
commit 4f9459e558
2 changed files with 116 additions and 116 deletions

View File

@@ -2,17 +2,19 @@ import os
import requests
import json
import time
import re
from dotenv import load_dotenv
# Charger .env.global
load_dotenv("../.env.global")
AIANALASYS_APIKEY = os.getenv("AIANALASYS_APIKEY")
def get_mammouth_models():
# URL correcte fournie par l'utilisateur
url = "https://api.mammouth.ai/public/models"
try:
response = requests.get(url, verify=False) # verify=False au cas où il y a des soucis de certifs
# Désactiver les warnings InsecureRequest car verify=False est utilisé
requests.packages.urllib3.disable_warnings()
response = requests.get(url, verify=False)
response.raise_for_status()
return response.json().get('data', [])
except Exception as e:
@@ -27,99 +29,97 @@ def get_aa_data():
response.raise_for_status()
return response.json().get('data', [])
except Exception as e:
print(f"Error fetching Artificial Analysis data: {e}")
print(f"Error fetching AA data: {e}")
return []
def clean_id(model_id):
# Nettoyage agressif pour favoriser le mapping
id_clean = re.sub(r'-\d{4,8}', '', model_id.lower())
id_clean = id_clean.replace('-latest', '').replace('-preview', '').replace('-instruct', '')
return id_clean.strip()
def generate_markdown(models_data):
categories = {}
for m in models_data:
cat = m.get('category', 'General')
if cat not in categories:
categories[cat] = []
if cat not in categories: categories[cat] = []
categories[cat].append(m)
md = "# Table des Modèles Mammouth.ai\n\n"
md += "*Mise à jour automatique via Artificial Analysis & Mammouth Public API*\n\n"
md += "*Généré automatiquement à partir des benchmarks d'Artificial Analysis et des tarifs Mammouth.ai.*\n\n"
md += f"Dernière mise à jour : {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
order = ['Coding', 'Agents', 'General']
sorted_cats = sorted(categories.keys(), key=lambda x: order.index(x) if x in order else 99)
for cat in sorted_cats:
models = categories[cat]
# Tri : Score (desc), puis Prix (asc)
models.sort(key=lambda x: (x.get('score') or 0, -(x.get('price_in') or 999)), reverse=True)
md += f"## {cat}\n\n"
md += "| Modèle | Prix (In / Out / 1M) | Intelligence Index | Vitesse (TPS) |\n"
md += "| Modèle | Prix (In / Out / 1M) | Score (Intelligence) | Vitesse (TPS) |\n"
md += "| :--- | :--- | :--- | :--- |\n"
models = categories[cat]
# Tri : Score (desc), puis Nom
models.sort(key=lambda x: (x.get('score') or 0), reverse=True)
for m in models:
p_in = f"${m['price_in']:.2f}"
p_out = f"${m['price_out']:.2f}"
score = f"**{m['score']:.1f}**" if m['score'] else "N/A"
speed = f"{m['speed']:.1f}" if m['speed'] else "N/A"
md += f"| {m['name']} | {p_in} / {p_out} | {score} | {speed} |\n"
score_str = f"**{m['score']:.1f}**" if m['score'] else "N/A"
speed_str = f"{m['speed']:.1f}" if m['speed'] else "N/A"
md += f"| {m['name']} | ${m['price_in']:.2f} / ${m['price_out']:.2f} | {score_str} | {speed_str} |\n"
md += "\n"
return md
def main():
print("Fetching Mammouth public models...")
mammouth_models = get_mammouth_models()
print(f"Found {len(mammouth_models)} models from Mammouth.")
print("Fetching data from Mammouth and Artificial Analysis...")
m_models = get_mammouth_models()
aa_data = get_aa_data()
print("Fetching Artificial Analysis data...")
aa_raw = get_aa_data()
# Mapping AA
# Mapping table (slug -> data)
aa_map = {}
for aa_m in aa_raw:
m_id = aa_m.get('model_id', '').lower()
m_name = aa_m.get('model_name', '').lower()
if m_id: aa_map[m_id] = aa_m
if m_name: aa_map[m_name] = aa_m
for aa_m in aa_data:
slug = aa_m.get('slug', '').lower()
name = aa_m.get('name', '').lower()
if slug: aa_map[slug] = aa_m
if name: aa_map[name] = aa_m
enriched_models = []
for m in mammouth_models:
enriched = []
for m in m_models:
m_id = m.get('id', '')
info = m.get('model_info', {})
if not m_id: continue
# Mapping intelligent
m_id_low = m_id.lower()
aa_info = aa_map.get(m_id_low)
m_id_clean = clean_id(m_id)
short_id = m_id_clean.split('/')[-1]
# Si pas de match exact, on cherche une correspondance partielle
# Match mapping
aa_info = aa_map.get(m_id_clean) or aa_map.get(short_id)
# Recherche floue (ex: claude-3-5-sonnet -> claude-3.5-sonnet)
if not aa_info:
for key in aa_map:
if key in m_id_low or m_id_low in key:
# On vérifie que ce n'est pas un faux positif (ex: gpt-4 vs gpt-4-turbo)
if abs(len(key) - len(m_id_low)) < 5:
aa_info = aa_map[key]
break
normalized_m_id = m_id_clean.replace('-', '').replace('.', '')
for key, val in aa_map.items():
if key.replace('-', '').replace('.', '') == normalized_m_id:
aa_info = val
break
price_in = float(info.get('input_cost_per_token', 0)) * 1000000
price_out = float(info.get('output_cost_per_token', 0)) * 1000000
category = "General"
if any(x in m_id_clean for x in ['coding', 'code', 'starcoder', 'codestral', 'coder']):
category = "Coding"
elif any(x in m_id_clean for x in ['agent', 'hermes', 'tool', 'function', 'sonar']):
category = "Agents"
# Extraction des prix
try:
price_in = float(info.get('input_cost_per_token', 0)) * 1000000
price_out = float(info.get('output_cost_per_token', 0)) * 1000000
except:
price_in, price_out = 0, 0
score = None
speed = None
if aa_info:
evals = aa_info.get('evaluations', {})
score = evals.get('artificial_analysis_intelligence_index')
speed = aa_info.get('median_output_tokens_per_second')
# On prend le score coding si c'est la catégorie, sinon intelligence index
score = evals.get('artificial_analysis_coding_index') if category == "Coding" else None
if not score:
score = evals.get('artificial_analysis_intelligence_index')
# Catégorisation
category = "General"
if any(x in m_id_low for x in ['coding', 'code', 'starcoder', 'coder', 'codestral']):
category = "Coding"
elif any(x in m_id_low for x in ['agent', 'hermes', 'tool', 'function', 'sonar']):
category = "Agents"
speed = aa_info.get('median_output_tokens_per_second')
enriched_models.append({
enriched.append({
'name': m_id,
'price_in': price_in,
'price_out': price_out,
@@ -128,13 +128,13 @@ def main():
'category': category
})
# Filtrer les modèles (on garde tout ce qui a un prix ou un score)
final_list = [m for m in enriched_models if m['price_in'] > 0 or m['score'] is not None]
# On ne garde que les modèles avec prix > 0
final = [x for x in enriched if x['price_in'] > 0]
with open("README.md", "w", encoding="utf-8") as f:
f.write(generate_markdown(final_list))
f.write(generate_markdown(final))
print(f"Done! README.md updated with {len(final_list)} models.")
print(f"Success! {len(final)} models processed.")
if __name__ == "__main__":
main()