File size: 3,000 Bytes

cfcbbc8

#!/usr/bin/env python3
"""
Script to get version information for all models in the dataset.
Usage:
  export CBORG_API_KEY=...
  python get_all_model_versions.py
"""
import os
import sys
import pandas as pd
from openai import OpenAI

def test_model_version(client, model_id):
    """Test a model and return the underlying model name."""
    try:
        response = client.chat.completions.create(
            model=model_id,
            messages=[{"role": "user", "content": "Hi"}],
            max_tokens=5
        )
        return response.model
    except Exception as e:
        error_msg = str(e)[:150]
        return f"ERROR: {error_msg}"

def main():
    api_key = os.environ.get('CBORG_API_KEY')
    if not api_key:
        print("Error: CBORG_API_KEY environment variable not set.")
        sys.exit(1)

    client = OpenAI(
        api_key=api_key,
        base_url="https://api.cborg.lbl.gov"
    )
    
    # Load the dataset to get all unique models
    df = pd.read_csv('/global/cfs/projectdirs/atlas/joshua/llm4hep/results_summary.csv', comment='#')
    df = df.dropna(subset=['supervisor', 'coder'])
    
    # Get all unique models
    all_models = sorted(set(df['supervisor'].unique()) | set(df['coder'].unique()))
    
    print("=" * 100)
    print("TESTING ALL MODELS IN DATASET FOR VERSION INFORMATION")
    print("=" * 100)
    print(f"\nFound {len(all_models)} unique models in the dataset")
    print()
    
    results = {}
    
    for idx, model in enumerate(all_models, 1):
        print(f"[{idx}/{len(all_models)}] Testing {model:<45}", end=" ", flush=True)
        underlying = test_model_version(client, model)
        results[model] = underlying
        
        if underlying.startswith('ERROR'):
            print("❌")
        else:
            print("✓")
    
    # Print results
    print("\n" + "=" * 100)
    print("RESULTS: MODEL MAPPINGS")
    print("=" * 100)
    
    for model in sorted(results.keys()):
        underlying = results[model]
        if underlying.startswith('ERROR'):
            print(f"❌ {model:<45} {underlying[:50]}")
        else:
            if model == underlying:
                print(f"   {model:<45} (no alias)")
            else:
                print(f"   {model:<45} → {underlying}")
    
    # Save to file
    output_file = 'model_version_mappings.txt'
    with open(output_file, 'w') as f:
        f.write("MODEL VERSION MAPPINGS\n")
        f.write("=" * 100 + "\n")
        f.write(f"Discovered on: October 29, 2025\n")
        f.write(f"Total models tested: {len(results)}\n\n")
        
        for model in sorted(results.keys()):
            underlying = results[model]
            if not underlying.startswith('ERROR'):
                if model == underlying:
                    f.write(f"{model} (no alias)\n")
                else:
                    f.write(f"{model} → {underlying}\n")
    
    print(f"\n✓ Results saved to {output_file}")
    print("=" * 100)

if __name__ == '__main__':
    main()