fix: improve mapping accuracy and restore stable API
This commit is contained in:
@@ -4,21 +4,21 @@ import json
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Charger .env.global depuis le répertoire parent
|
||||
load_dotenv("../.env.global")
|
||||
|
||||
MAMMOUTH_APIKEY = os.getenv("MAMMOUTH_APIKEY")
|
||||
AIANALASYS_APIKEY = os.getenv("AIANALASYS_APIKEY")
|
||||
|
||||
def get_mammouth_models():
|
||||
# Utilisation de l'endpoint public LiteLLM de Mammouth
|
||||
url = "https://mammouth.ai/public/models"
|
||||
# Retour au point d'accès OpenRouter compatible qui est plus stable
|
||||
url = "https://openrouter.ai/api/v1/models"
|
||||
headers = {"Authorization": f"Bearer {MAMMOUTH_APIKEY}"}
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
# Le format retourné est {'data': [ {id, model_info: {input_cost_per_token, ...}} ]}
|
||||
return response.json().get('data', [])
|
||||
except Exception as e:
|
||||
print(f"Error fetching Mammouth public models: {e}")
|
||||
print(f"Error fetching Mammouth models: {e}")
|
||||
return []
|
||||
|
||||
def get_aa_data():
|
||||
@@ -41,19 +41,20 @@ def generate_markdown(models_data):
|
||||
categories[cat].append(m)
|
||||
|
||||
md = "# Table des Modèles Mammouth.ai\n\n"
|
||||
md += "*Mise à jour automatique via Artificial Analysis & Mammouth Public API*\n\n"
|
||||
md += "Dernière mise à jour : " + time.strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
|
||||
md += "*Mise à jour automatique via Artificial Analysis & Mammouth API*\n\n"
|
||||
md += f"Dernière mise à jour : {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
order = ['Coding', 'Agents', 'General']
|
||||
sorted_cats = sorted(categories.keys(), key=lambda x: order.index(x) if x in order else 99)
|
||||
|
||||
for cat in sorted_cats:
|
||||
models = categories[cat]
|
||||
# Tri par score (desc) puis prix (asc)
|
||||
models.sort(key=lambda x: (x.get('score') or 0, -(x.get('price_in') or 999)), reverse=True)
|
||||
|
||||
md += f"## {cat}\n\n"
|
||||
md += "| Modèle | Prix (In / Out / 1M) | Performance (AA Index) | Vitesse (TPS) |\n"
|
||||
md += "| Modèle | Prix (In / Out / 1M) | Intelligence Index | Vitesse (TPS) |\n"
|
||||
md += "| :--- | :--- | :--- | :--- |\n"
|
||||
# On trie d'abord par score décroissant, puis par prix croissant
|
||||
models.sort(key=lambda x: (x.get('score') or 0, -(x.get('price_in') or 0)), reverse=True)
|
||||
for m in models:
|
||||
p_in = f"${m['price_in']:.2f}"
|
||||
p_out = f"${m['price_out']:.2f}"
|
||||
@@ -64,52 +65,38 @@ def generate_markdown(models_data):
|
||||
return md
|
||||
|
||||
def main():
|
||||
print("Fetching Mammouth public models...")
|
||||
print("Fetching models...")
|
||||
mammouth_models = get_mammouth_models()
|
||||
if not mammouth_models:
|
||||
print("No models found from Mammouth.")
|
||||
return
|
||||
|
||||
print("Fetching Artificial Analysis data...")
|
||||
aa_raw = get_aa_data()
|
||||
|
||||
# Construction du mapping AA (index par nom et par ID technique)
|
||||
aa_map = {}
|
||||
for aa_m in aa_raw:
|
||||
name = aa_m.get('model_name', '').lower()
|
||||
model_id = aa_m.get('model_id', '').lower()
|
||||
if name: aa_map[name] = aa_m
|
||||
if model_id: aa_map[model_id] = aa_m
|
||||
# Mapping AA : Priorité aux noms exacts
|
||||
aa_map_exact = {m.get('model_name', '').lower(): m for m in aa_raw}
|
||||
aa_map_id = {m.get('model_id', '').lower(): m for m in aa_raw}
|
||||
|
||||
enriched_models = []
|
||||
for m in mammouth_models:
|
||||
m_id = m.get('id', '')
|
||||
info = m.get('model_info', {})
|
||||
m_name = m.get('name', '').lower()
|
||||
short_id = m_id.split('/')[-1].lower()
|
||||
|
||||
# On ignore les modèles sans ID
|
||||
if not m_id: continue
|
||||
|
||||
# Normalisation du nom pour le mapping
|
||||
m_id_low = m_id.lower()
|
||||
short_name = m_id_low.split('/')[-1]
|
||||
|
||||
# Recherche de correspondance dans AA (Précis puis Approché)
|
||||
aa_info = aa_map.get(m_id_low) or aa_map.get(short_name)
|
||||
# Mapping plus strict pour éviter les scores identiques
|
||||
aa_info = aa_map_id.get(m_id.lower()) or aa_map_exact.get(m_name) or aa_map_id.get(short_id)
|
||||
|
||||
# Si toujours pas de match, on ne fait PAS de recherche par sous-chaîne floue
|
||||
# pour éviter de polluer les données. On ne match que si le nom est très proche.
|
||||
if not aa_info:
|
||||
# Recherche par sous-chaîne pour les modèles comme "mistral-large-2407"
|
||||
for key in aa_map:
|
||||
if key in m_id_low or m_id_low in key:
|
||||
aa_info = aa_map[key]
|
||||
for name, info in aa_map_exact.items():
|
||||
if name in m_name and len(name) > 0.8 * len(m_name):
|
||||
aa_info = info
|
||||
break
|
||||
|
||||
# Extraction des prix (LiteLLM: prix par 1 token)
|
||||
pricing = m.get('pricing', {})
|
||||
try:
|
||||
price_in = float(info.get('input_cost_per_token', 0)) * 1000000
|
||||
price_out = float(info.get('output_cost_per_token', 0)) * 1000000
|
||||
except (ValueError, TypeError):
|
||||
price_in = float(pricing.get('prompt', 0)) * 1000000
|
||||
price_out = float(pricing.get('completion', 0)) * 1000000
|
||||
except:
|
||||
price_in, price_out = 0, 0
|
||||
|
||||
|
||||
score = None
|
||||
speed = None
|
||||
if aa_info:
|
||||
@@ -117,15 +104,15 @@ def main():
|
||||
score = evals.get('artificial_analysis_intelligence_index')
|
||||
speed = aa_info.get('median_output_tokens_per_second')
|
||||
|
||||
# Catégorisation simplifiée
|
||||
category = "General"
|
||||
if any(x in m_id_low for x in ['coding', 'code', 'starcoder', 'coder']):
|
||||
m_lower = (m_id + m_name).lower()
|
||||
if any(x in m_lower for x in ['coding', 'code', 'starcoder', 'coder']):
|
||||
category = "Coding"
|
||||
elif any(x in m_id_low for x in ['agent', 'hermes', 'tool', 'function']):
|
||||
elif any(x in m_lower for x in ['agent', 'hermes', 'tool', 'function']):
|
||||
category = "Agents"
|
||||
|
||||
enriched_models.append({
|
||||
'name': m_id,
|
||||
'name': m.get('name', m_id),
|
||||
'price_in': price_in,
|
||||
'price_out': price_out,
|
||||
'score': score,
|
||||
@@ -133,18 +120,13 @@ def main():
|
||||
'category': category
|
||||
})
|
||||
|
||||
# Filtrer les modèles : prix > 0 (ceux qui sont configurés)
|
||||
final_list = [m for m in enriched_models if m['price_in'] > 0 or m['price_out'] > 0]
|
||||
# Filtrer les modèles inutiles (prix nul et pas de score)
|
||||
final_list = [m for m in enriched_models if m['price_in'] > 0 or m['score'] is not None]
|
||||
|
||||
if not final_list:
|
||||
print("No valid models found after filtering.")
|
||||
return
|
||||
|
||||
markdown = generate_markdown(final_list)
|
||||
with open("README.md", "w", encoding="utf-8") as f:
|
||||
f.write(markdown)
|
||||
f.write(generate_markdown(final_list))
|
||||
|
||||
print(f"README.md updated with {len(final_list)} models!")
|
||||
print(f"Done! {len(final_list)} models processed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user