claude's eval overhaul
Browse files- scripts/eval.py +66 -27
scripts/eval.py
CHANGED
|
@@ -204,6 +204,13 @@ def load_custom_model(args) -> HFLM:
|
|
| 204 |
# Import custom model class
|
| 205 |
from modeling_myolmoe import MyOlmoeForCausalLM
|
| 206 |
logger.info("Successfully imported MyOlmoeForCausalLM")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
except ImportError as e:
|
| 208 |
logger.error(f"Failed to import custom model: {e}")
|
| 209 |
logger.error("Make sure the custom model code is available in the specified path")
|
|
@@ -217,26 +224,45 @@ def load_custom_model(args) -> HFLM:
|
|
| 217 |
|
| 218 |
logger.info("Model will use default top-k routing configuration")
|
| 219 |
|
| 220 |
-
#
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
logger.info("Custom model loaded successfully")
|
| 242 |
return model
|
|
@@ -331,16 +357,29 @@ def run_evaluation(args) -> Dict[str, Any]:
|
|
| 331 |
logger.info(f"Running evaluation on tasks: {args.tasks}")
|
| 332 |
logger.info(f"Few-shot examples: {args.num_fewshot}")
|
| 333 |
logger.info(f"Batch size: {args.batch_size}")
|
|
|
|
|
|
|
| 334 |
print("Type of model being passed:", type(model))
|
| 335 |
print("Model config:", getattr(model, "config", None))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
logger.info("Evaluation completed successfully")
|
| 346 |
return results
|
|
|
|
| 204 |
# Import custom model class
|
| 205 |
from modeling_myolmoe import MyOlmoeForCausalLM
|
| 206 |
logger.info("Successfully imported MyOlmoeForCausalLM")
|
| 207 |
+
|
| 208 |
+
# CRITICAL FIX: Register the custom model class
|
| 209 |
+
from transformers import AutoConfig, AutoModelForCausalLM
|
| 210 |
+
AutoConfig.register("olmoe", AutoConfig)
|
| 211 |
+
AutoModelForCausalLM.register(AutoConfig, MyOlmoeForCausalLM)
|
| 212 |
+
logger.info("Registered MyOlmoeForCausalLM with AutoModelForCausalLM")
|
| 213 |
+
|
| 214 |
except ImportError as e:
|
| 215 |
logger.error(f"Failed to import custom model: {e}")
|
| 216 |
logger.error("Make sure the custom model code is available in the specified path")
|
|
|
|
| 224 |
|
| 225 |
logger.info("Model will use default top-k routing configuration")
|
| 226 |
|
| 227 |
+
# Create HFLM with explicit model class specification
|
| 228 |
+
try:
|
| 229 |
+
model = HFLM(
|
| 230 |
+
pretrained=args.model_path,
|
| 231 |
+
device=args.device,
|
| 232 |
+
batch_size=args.batch_size,
|
| 233 |
+
max_batch_size=args.max_batch_size,
|
| 234 |
+
dtype=args.dtype,
|
| 235 |
+
trust_remote_code=args.trust_remote_code,
|
| 236 |
+
# Pass the custom model class explicitly
|
| 237 |
+
backend="causal",
|
| 238 |
+
model_kwargs={"torch_dtype": torch.bfloat16 if args.dtype == "bfloat16" else "auto"}
|
| 239 |
+
)
|
| 240 |
+
except Exception as e:
|
| 241 |
+
logger.error(f"Failed to create HFLM wrapper: {e}")
|
| 242 |
+
# Alternative approach: load model manually then wrap
|
| 243 |
+
logger.info("Trying alternative loading approach...")
|
| 244 |
+
|
| 245 |
+
# Load tokenizer and model manually
|
| 246 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 247 |
+
args.model_path,
|
| 248 |
+
trust_remote_code=args.trust_remote_code
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
model_instance = MyOlmoeForCausalLM.from_pretrained(
|
| 252 |
+
args.model_path,
|
| 253 |
+
config=config,
|
| 254 |
+
trust_remote_code=args.trust_remote_code,
|
| 255 |
+
torch_dtype=torch.bfloat16 if args.dtype == "bfloat16" else "auto"
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
# Create HFLM with pre-loaded model
|
| 259 |
+
model = HFLM(
|
| 260 |
+
pretrained=model_instance,
|
| 261 |
+
tokenizer=tokenizer,
|
| 262 |
+
device=args.device,
|
| 263 |
+
batch_size=args.batch_size,
|
| 264 |
+
max_batch_size=args.max_batch_size
|
| 265 |
+
)
|
| 266 |
|
| 267 |
logger.info("Custom model loaded successfully")
|
| 268 |
return model
|
|
|
|
| 357 |
logger.info(f"Running evaluation on tasks: {args.tasks}")
|
| 358 |
logger.info(f"Few-shot examples: {args.num_fewshot}")
|
| 359 |
logger.info(f"Batch size: {args.batch_size}")
|
| 360 |
+
|
| 361 |
+
# Debug information
|
| 362 |
print("Type of model being passed:", type(model))
|
| 363 |
print("Model config:", getattr(model, "config", None))
|
| 364 |
+
|
| 365 |
+
# Ensure model is properly initialized
|
| 366 |
+
if hasattr(model, '_model') and model._model is not None:
|
| 367 |
+
logger.info("Model is properly loaded and wrapped")
|
| 368 |
+
else:
|
| 369 |
+
logger.warning("Model wrapper may not be properly initialized")
|
| 370 |
|
| 371 |
+
try:
|
| 372 |
+
results = evaluator.simple_evaluate(
|
| 373 |
+
model=model,
|
| 374 |
+
tasks=args.tasks,
|
| 375 |
+
num_fewshot=args.num_fewshot,
|
| 376 |
+
limit=args.limit,
|
| 377 |
+
write_out=args.write_out,
|
| 378 |
+
)
|
| 379 |
+
except Exception as e:
|
| 380 |
+
logger.error(f"Evaluation failed with error: {e}")
|
| 381 |
+
logger.error("This might be due to model registration or configuration issues")
|
| 382 |
+
raise
|
| 383 |
|
| 384 |
logger.info("Evaluation completed successfully")
|
| 385 |
return results
|