HWresearch
/

LLM4HEP

Text Generation

Model card Files Files and versions

LLM4HEP / get_all_model_versions.py

ho22joshua's picture

initial commit

cfcbbc8 3 months ago

history blame contribute delete

3 kB

	#!/usr/bin/env python3
	"""
	Script to get version information for all models in the dataset.
	Usage:
	export CBORG_API_KEY=...
	python get_all_model_versions.py
	"""
	import os
	import sys
	import pandas as pd
	from openai import OpenAI

	def test_model_version(client, model_id):
	"""Test a model and return the underlying model name."""
	try:
	response = client.chat.completions.create(
	model=model_id,
	messages=[{"role": "user", "content": "Hi"}],
	max_tokens=5
	)
	return response.model
	except Exception as e:
	error_msg = str(e)[:150]
	return f"ERROR: {error_msg}"

	def main():
	api_key = os.environ.get('CBORG_API_KEY')
	if not api_key:
	print("Error: CBORG_API_KEY environment variable not set.")
	sys.exit(1)

	client = OpenAI(
	api_key=api_key,
	base_url="https://api.cborg.lbl.gov"
	)

	# Load the dataset to get all unique models
	df = pd.read_csv('/global/cfs/projectdirs/atlas/joshua/llm4hep/results_summary.csv', comment='#')
	df = df.dropna(subset=['supervisor', 'coder'])

	# Get all unique models
	all_models = sorted(set(df['supervisor'].unique()) \| set(df['coder'].unique()))

	print("=" * 100)
	print("TESTING ALL MODELS IN DATASET FOR VERSION INFORMATION")
	print("=" * 100)
	print(f"\nFound {len(all_models)} unique models in the dataset")
	print()

	results = {}

	for idx, model in enumerate(all_models, 1):
	print(f"[{idx}/{len(all_models)}] Testing {model:<45}", end=" ", flush=True)
	underlying = test_model_version(client, model)
	results[model] = underlying

	if underlying.startswith('ERROR'):
	print("❌")
	else:
	print("✓")

	# Print results
	print("\n" + "=" * 100)
	print("RESULTS: MODEL MAPPINGS")
	print("=" * 100)

	for model in sorted(results.keys()):
	underlying = results[model]
	if underlying.startswith('ERROR'):
	print(f"❌ {model:<45} {underlying[:50]}")
	else:
	if model == underlying:
	print(f" {model:<45} (no alias)")
	else:
	print(f" {model:<45} → {underlying}")

	# Save to file
	output_file = 'model_version_mappings.txt'
	with open(output_file, 'w') as f:
	f.write("MODEL VERSION MAPPINGS\n")
	f.write("=" * 100 + "\n")
	f.write(f"Discovered on: October 29, 2025\n")
	f.write(f"Total models tested: {len(results)}\n\n")

	for model in sorted(results.keys()):
	underlying = results[model]
	if not underlying.startswith('ERROR'):
	if model == underlying:
	f.write(f"{model} (no alias)\n")
	else:
	f.write(f"{model} → {underlying}\n")

	print(f"\n✓ Results saved to {output_file}")
	print("=" * 100)

	if __name__ == '__main__':
	main()