Charlie81 commited on
Commit
6cfb013
·
1 Parent(s): 4fed0c0

claude's eval overhaul

Browse files
Files changed (1) hide show
  1. scripts/eval.py +66 -27
scripts/eval.py CHANGED
@@ -204,6 +204,13 @@ def load_custom_model(args) -> HFLM:
204
  # Import custom model class
205
  from modeling_myolmoe import MyOlmoeForCausalLM
206
  logger.info("Successfully imported MyOlmoeForCausalLM")
 
 
 
 
 
 
 
207
  except ImportError as e:
208
  logger.error(f"Failed to import custom model: {e}")
209
  logger.error("Make sure the custom model code is available in the specified path")
@@ -217,26 +224,45 @@ def load_custom_model(args) -> HFLM:
217
 
218
  logger.info("Model will use default top-k routing configuration")
219
 
220
- # Determine torch dtype
221
- if args.dtype == "auto":
222
- torch_dtype = "auto"
223
- else:
224
- torch_dtype = {
225
- "float16": torch.float16,
226
- "bfloat16": torch.bfloat16,
227
- "float32": torch.float32
228
- }[args.dtype]
229
-
230
-
231
- # Wrap in HFLM
232
- model = HFLM(
233
- pretrained=args.model_path,
234
- device=args.device,
235
- batch_size=args.batch_size,
236
- max_batch_size=args.max_batch_size,
237
- dtype=args.dtype,
238
- trust_remote_code=args.trust_remote_code
239
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  logger.info("Custom model loaded successfully")
242
  return model
@@ -331,16 +357,29 @@ def run_evaluation(args) -> Dict[str, Any]:
331
  logger.info(f"Running evaluation on tasks: {args.tasks}")
332
  logger.info(f"Few-shot examples: {args.num_fewshot}")
333
  logger.info(f"Batch size: {args.batch_size}")
 
 
334
  print("Type of model being passed:", type(model))
335
  print("Model config:", getattr(model, "config", None))
 
 
 
 
 
 
336
 
337
- results = evaluator.simple_evaluate(
338
- model=model,
339
- tasks=args.tasks,
340
- num_fewshot=args.num_fewshot,
341
- limit=args.limit,
342
- write_out=args.write_out,
343
- )
 
 
 
 
 
344
 
345
  logger.info("Evaluation completed successfully")
346
  return results
 
204
  # Import custom model class
205
  from modeling_myolmoe import MyOlmoeForCausalLM
206
  logger.info("Successfully imported MyOlmoeForCausalLM")
207
+
208
+ # CRITICAL FIX: Register the custom model class
209
+ from transformers import AutoConfig, AutoModelForCausalLM
210
+ AutoConfig.register("olmoe", AutoConfig)
211
+ AutoModelForCausalLM.register(AutoConfig, MyOlmoeForCausalLM)
212
+ logger.info("Registered MyOlmoeForCausalLM with AutoModelForCausalLM")
213
+
214
  except ImportError as e:
215
  logger.error(f"Failed to import custom model: {e}")
216
  logger.error("Make sure the custom model code is available in the specified path")
 
224
 
225
  logger.info("Model will use default top-k routing configuration")
226
 
227
+ # Create HFLM with explicit model class specification
228
+ try:
229
+ model = HFLM(
230
+ pretrained=args.model_path,
231
+ device=args.device,
232
+ batch_size=args.batch_size,
233
+ max_batch_size=args.max_batch_size,
234
+ dtype=args.dtype,
235
+ trust_remote_code=args.trust_remote_code,
236
+ # Pass the custom model class explicitly
237
+ backend="causal",
238
+ model_kwargs={"torch_dtype": torch.bfloat16 if args.dtype == "bfloat16" else "auto"}
239
+ )
240
+ except Exception as e:
241
+ logger.error(f"Failed to create HFLM wrapper: {e}")
242
+ # Alternative approach: load model manually then wrap
243
+ logger.info("Trying alternative loading approach...")
244
+
245
+ # Load tokenizer and model manually
246
+ tokenizer = AutoTokenizer.from_pretrained(
247
+ args.model_path,
248
+ trust_remote_code=args.trust_remote_code
249
+ )
250
+
251
+ model_instance = MyOlmoeForCausalLM.from_pretrained(
252
+ args.model_path,
253
+ config=config,
254
+ trust_remote_code=args.trust_remote_code,
255
+ torch_dtype=torch.bfloat16 if args.dtype == "bfloat16" else "auto"
256
+ )
257
+
258
+ # Create HFLM with pre-loaded model
259
+ model = HFLM(
260
+ pretrained=model_instance,
261
+ tokenizer=tokenizer,
262
+ device=args.device,
263
+ batch_size=args.batch_size,
264
+ max_batch_size=args.max_batch_size
265
+ )
266
 
267
  logger.info("Custom model loaded successfully")
268
  return model
 
357
  logger.info(f"Running evaluation on tasks: {args.tasks}")
358
  logger.info(f"Few-shot examples: {args.num_fewshot}")
359
  logger.info(f"Batch size: {args.batch_size}")
360
+
361
+ # Debug information
362
  print("Type of model being passed:", type(model))
363
  print("Model config:", getattr(model, "config", None))
364
+
365
+ # Ensure model is properly initialized
366
+ if hasattr(model, '_model') and model._model is not None:
367
+ logger.info("Model is properly loaded and wrapped")
368
+ else:
369
+ logger.warning("Model wrapper may not be properly initialized")
370
 
371
+ try:
372
+ results = evaluator.simple_evaluate(
373
+ model=model,
374
+ tasks=args.tasks,
375
+ num_fewshot=args.num_fewshot,
376
+ limit=args.limit,
377
+ write_out=args.write_out,
378
+ )
379
+ except Exception as e:
380
+ logger.error(f"Evaluation failed with error: {e}")
381
+ logger.error("This might be due to model registration or configuration issues")
382
+ raise
383
 
384
  logger.info("Evaluation completed successfully")
385
  return results