model_trace

Runtime error

model_trace / src /evaluation /initialize_models.py

Ahmed Ahmed

try again

1191811 5 months ago

5.18 kB

	"""
	Initialize the leaderboard with specific models and compute their p-values.

	This module ensures only the specified models are included in the leaderboard
	and their model trace p-values are computed.
	"""

	import os
	import json
	import sys
	from src.evaluation.model_trace_eval import compute_model_trace_p_value
	from src.envs import EVAL_RESULTS_PATH

	# The specific models we want to include
	ALLOWED_MODELS = [
	"lmsys/vicuna-7b-v1.5",
	"ibm-granite/granite-7b-base",
	"EleutherAI/llemma_7b"
	]

	def create_model_result_file(model_name, precision="float16"):
	"""
	Create a result file for a model with computed p-value.

	Args:
	model_name: HuggingFace model identifier
	precision: Model precision
	"""
	sys.stderr.write(f"\n🔧 CREATING RESULT FILE FOR: {model_name}\n")
	sys.stderr.flush()

	# Create the results directory if it doesn't exist
	os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)

	# Generate a safe filename
	safe_name = model_name.replace("/", "_").replace("-", "_")
	result_file = os.path.join(EVAL_RESULTS_PATH, f"{safe_name}_{precision}.json")

	sys.stderr.write(f"📁 Result file path: {result_file}\n")
	sys.stderr.flush()

	# Check if file already exists
	if os.path.exists(result_file):
	sys.stderr.write(f"✅ Result file already exists: {result_file}\n")
	sys.stderr.flush()
	return result_file

	# Create basic result structure
	result_data = {
	"config": {
	"model_dtype": f"torch.{precision}",
	"model_name": model_name,
	"model_sha": "main"
	},
	"results": {
	# No perplexity - we only care about p-values
	}
	}

	# Save the result file
	try:
	with open(result_file, 'w') as f:
	json.dump(result_data, f, indent=2)

	sys.stderr.write(f"✅ Created result file: {result_file}\n")
	sys.stderr.flush()
	return result_file

	except Exception as e:
	sys.stderr.write(f"❌ Failed to create result file: {e}\n")
	sys.stderr.flush()
	return None

	def clean_non_allowed_results():
	"""
	Remove result files for models that are not in the allowed list.
	"""
	sys.stderr.write(f"\n🧹 CLEANING NON-ALLOWED RESULT FILES\n")
	sys.stderr.flush()

	if not os.path.exists(EVAL_RESULTS_PATH):
	sys.stderr.write("📁 Results directory doesn't exist, nothing to clean\n")
	sys.stderr.flush()
	return

	removed_count = 0

	# Walk through all files in the results directory
	for root, dirs, files in os.walk(EVAL_RESULTS_PATH):
	for file in files:
	if not file.endswith('.json'):
	continue

	file_path = os.path.join(root, file)

	try:
	# Try to extract model name from the result file
	with open(file_path, 'r') as f:
	data = json.load(f)

	config = data.get("config", {})
	model_name = config.get("model_name", "")

	if model_name and not is_model_allowed(model_name):
	sys.stderr.write(f"🗑️ Removing non-allowed model result: {file_path} (model: {model_name})\n")
	os.remove(file_path)
	removed_count += 1
	elif not model_name:
	sys.stderr.write(f"⚠️ Skipping file with no model_name: {file_path}\n")

	except Exception as e:
	sys.stderr.write(f"⚠️ Error processing file {file_path}: {e}\n")
	continue

	sys.stderr.write(f"✅ Removed {removed_count} non-allowed result files\n")
	sys.stderr.flush()

	def initialize_allowed_models():
	"""
	Initialize result files for all allowed models.
	"""
	sys.stderr.write(f"\n🚀 INITIALIZING ALLOWED MODELS\n")
	sys.stderr.write(f"📋 Models to initialize: {ALLOWED_MODELS}\n")
	sys.stderr.flush()

	# First, clean up any existing non-allowed results
	clean_non_allowed_results()

	created_files = []

	for model_name in ALLOWED_MODELS:
	try:
	result_file = create_model_result_file(model_name)
	if result_file:
	created_files.append(result_file)

	except Exception as e:
	sys.stderr.write(f"❌ Failed to initialize {model_name}: {e}\n")
	sys.stderr.flush()
	continue

	sys.stderr.write(f"✅ Initialized {len(created_files)} model result files\n")
	sys.stderr.flush()

	return created_files

	def is_model_allowed(model_name):
	"""
	Check if a model is in the allowed list.

	Args:
	model_name: HuggingFace model identifier

	Returns:
	bool: True if model is allowed
	"""
	return model_name in ALLOWED_MODELS

	def get_allowed_models():
	"""
	Get the list of allowed models.

	Returns:
	list: List of allowed model names
	"""
	return ALLOWED_MODELS.copy()