Charlie81
/

LoRE

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

Charlie81 commited on Jul 18, 2025

Commit

6cfb013

1 Parent(s): 4fed0c0

claude's eval overhaul

Browse files

Files changed (1) hide show

scripts/eval.py +66 -27

scripts/eval.py CHANGED Viewed

@@ -204,6 +204,13 @@ def load_custom_model(args) -> HFLM:
         # Import custom model class
         from modeling_myolmoe import MyOlmoeForCausalLM
         logger.info("Successfully imported MyOlmoeForCausalLM")
     except ImportError as e:
         logger.error(f"Failed to import custom model: {e}")
         logger.error("Make sure the custom model code is available in the specified path")
@@ -217,26 +224,45 @@ def load_custom_model(args) -> HFLM:
     logger.info("Model will use default top-k routing configuration")
-    # Determine torch dtype
-    if args.dtype == "auto":
-        torch_dtype = "auto"
-    else:
-        torch_dtype = {
-            "float16": torch.float16,
-            "bfloat16": torch.bfloat16,
-            "float32": torch.float32
-        }[args.dtype]
-    # Wrap in HFLM
-    model = HFLM(
-        pretrained=args.model_path,
-        device=args.device,
-        batch_size=args.batch_size,
-        max_batch_size=args.max_batch_size,
-        dtype=args.dtype,
-        trust_remote_code=args.trust_remote_code
-    )
     logger.info("Custom model loaded successfully")
     return model
@@ -331,16 +357,29 @@ def run_evaluation(args) -> Dict[str, Any]:
     logger.info(f"Running evaluation on tasks: {args.tasks}")
     logger.info(f"Few-shot examples: {args.num_fewshot}")
     logger.info(f"Batch size: {args.batch_size}")
     print("Type of model being passed:", type(model))
     print("Model config:", getattr(model, "config", None))
-    results = evaluator.simple_evaluate(
-        model=model,
-        tasks=args.tasks,
-        num_fewshot=args.num_fewshot,
-        limit=args.limit,
-        write_out=args.write_out,
-    )
     logger.info("Evaluation completed successfully")
     return results

         # Import custom model class
         from modeling_myolmoe import MyOlmoeForCausalLM
         logger.info("Successfully imported MyOlmoeForCausalLM")
+        # CRITICAL FIX: Register the custom model class
+        from transformers import AutoConfig, AutoModelForCausalLM
+        AutoConfig.register("olmoe", AutoConfig)
+        AutoModelForCausalLM.register(AutoConfig, MyOlmoeForCausalLM)
+        logger.info("Registered MyOlmoeForCausalLM with AutoModelForCausalLM")
     except ImportError as e:
         logger.error(f"Failed to import custom model: {e}")
         logger.error("Make sure the custom model code is available in the specified path")
     logger.info("Model will use default top-k routing configuration")
+    # Create HFLM with explicit model class specification
+    try:
+        model = HFLM(
+            pretrained=args.model_path,
+            device=args.device,
+            batch_size=args.batch_size,
+            max_batch_size=args.max_batch_size,
+            dtype=args.dtype,
+            trust_remote_code=args.trust_remote_code,
+            # Pass the custom model class explicitly
+            backend="causal",
+            model_kwargs={"torch_dtype": torch.bfloat16 if args.dtype == "bfloat16" else "auto"}
+        )
+    except Exception as e:
+        logger.error(f"Failed to create HFLM wrapper: {e}")
+        # Alternative approach: load model manually then wrap
+        logger.info("Trying alternative loading approach...")
+        # Load tokenizer and model manually
+        tokenizer = AutoTokenizer.from_pretrained(
+            args.model_path,
+            trust_remote_code=args.trust_remote_code
+        )
+        model_instance = MyOlmoeForCausalLM.from_pretrained(
+            args.model_path,
+            config=config,
+            trust_remote_code=args.trust_remote_code,
+            torch_dtype=torch.bfloat16 if args.dtype == "bfloat16" else "auto"
+        )
+        # Create HFLM with pre-loaded model
+        model = HFLM(
+            pretrained=model_instance,
+            tokenizer=tokenizer,
+            device=args.device,
+            batch_size=args.batch_size,
+            max_batch_size=args.max_batch_size
+        )
     logger.info("Custom model loaded successfully")
     return model
     logger.info(f"Running evaluation on tasks: {args.tasks}")
     logger.info(f"Few-shot examples: {args.num_fewshot}")
     logger.info(f"Batch size: {args.batch_size}")
+    # Debug information
     print("Type of model being passed:", type(model))
     print("Model config:", getattr(model, "config", None))
+    # Ensure model is properly initialized
+    if hasattr(model, '_model') and model._model is not None:
+        logger.info("Model is properly loaded and wrapped")
+    else:
+        logger.warning("Model wrapper may not be properly initialized")
+    try:
+        results = evaluator.simple_evaluate(
+            model=model,
+            tasks=args.tasks,
+            num_fewshot=args.num_fewshot,
+            limit=args.limit,
+            write_out=args.write_out,
+        )
+    except Exception as e:
+        logger.error(f"Evaluation failed with error: {e}")
+        logger.error("This might be due to model registration or configuration issues")
+        raise
     logger.info("Evaluation completed successfully")
     return results