Charlie81
/

LoRE

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

Charlie81 commited on Jul 18, 2025

Commit

077e7bc

1 Parent(s): 44006e7

attempts to fix more

Browse files

Files changed (2) hide show

myolmoe/modeling_myolmoe.py +49 -14
scripts/eval.py +37 -23

myolmoe/modeling_myolmoe.py CHANGED Viewed

@@ -22,12 +22,16 @@ from dataclasses import dataclass, field
 from typing import Optional, List, Any
 from transformers import PretrainedConfig
 @dataclass
 class MyOlmoeConfig(PretrainedConfig):
     """
     Configuration class for MyOlmoe model.
     """
-    model_type: str = "myolmoe"
     # Core model parameters
     vocab_size: int = 50304
@@ -72,31 +76,62 @@ class MyOlmoeConfig(PretrainedConfig):
     rope_theta: float = 10000.0
     rope_scaling: Optional[dict] = None
-    # Token IDs
     pad_token_id: int = 1
     eos_token_id: int = 50279
     # Model architecture
     architectures: List[str] = field(default_factory=lambda: ["MyOlmoeForCausalLM"])
     def __init__(self, **kwargs):
-        # Remove torch_dtype and other model loading parameters that shouldn't be in config
-        model_loading_params = ['torch_dtype', 'device_map', 'low_cpu_mem_usage']
         for param in model_loading_params:
             kwargs.pop(param, None)
-        # Initialize dataclass fields
-        for field in self.__dataclass_fields__:
-            if field in kwargs:
-                setattr(self, field, kwargs.pop(field))
-        # Call parent init with remaining kwargs
         super().__init__(**kwargs)
-    def __post_init__(self):
-        """Post-initialization to ensure compatibility with PretrainedConfig."""
-        # This is handled in __init__ now
-        pass
 logger = logging.get_logger(__name__)

 from typing import Optional, List, Any
 from transformers import PretrainedConfig
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Any
+from transformers import PretrainedConfig
 @dataclass
 class MyOlmoeConfig(PretrainedConfig):
     """
     Configuration class for MyOlmoe model.
     """
+    model_type: str = "olmoe"  # Keep as "olmoe" to match your trained model
     # Core model parameters
     vocab_size: int = 50304
     rope_theta: float = 10000.0
     rope_scaling: Optional[dict] = None
+    # Token IDs - Set proper defaults
     pad_token_id: int = 1
     eos_token_id: int = 50279
+    bos_token_id: int = 1
     # Model architecture
     architectures: List[str] = field(default_factory=lambda: ["MyOlmoeForCausalLM"])
     def __init__(self, **kwargs):
+        # Handle model loading parameters that shouldn't go to config
+        model_loading_params = ['torch_dtype', 'device_map', 'low_cpu_mem_usage',
+                               'load_in_8bit', 'load_in_4bit', 'quantization_config']
         for param in model_loading_params:
             kwargs.pop(param, None)
+        # Set defaults for any missing required fields
+        if 'pad_token_id' not in kwargs:
+            kwargs['pad_token_id'] = self.pad_token_id
+        if 'eos_token_id' not in kwargs:
+            kwargs['eos_token_id'] = self.eos_token_id
+        if 'bos_token_id' not in kwargs:
+            kwargs['bos_token_id'] = self.bos_token_id
+        if 'architectures' not in kwargs:
+            kwargs['architectures'] = ["MyOlmoeForCausalLM"]
+        # Initialize the parent class first
         super().__init__(**kwargs)
+        # Then set dataclass fields from remaining kwargs or defaults
+        for field_name, field_def in self.__dataclass_fields__.items():
+            if hasattr(self, field_name):
+                continue  # Already set by parent
+            if field_name in kwargs:
+                setattr(self, field_name, kwargs[field_name])
+            else:
+                # Use default value from dataclass field
+                if field_def.default != field_def.default_factory:
+                    setattr(self, field_name, field_def.default)
+                elif field_def.default_factory != field_def.default_factory:  # type: ignore
+                    setattr(self, field_name, field_def.default_factory())
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
+        """Override from_pretrained to handle the model type properly."""
+        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
+        # Keep the original model_type from the saved config
+        # This allows loading models trained with "olmoe" type
+        if 'model_type' in config_dict:
+            original_model_type = config_dict['model_type']
+            # But register with the class model_type for compatibility
+            if original_model_type == "olmoe":
+                config_dict['model_type'] = "olmoe"  # Keep as olmoe
+        return cls.from_dict(config_dict, **kwargs)
 logger = logging.get_logger(__name__)

scripts/eval.py CHANGED Viewed

@@ -183,12 +183,6 @@ def load_transformers_model(args) -> HFLM:
 def load_custom_model(args) -> HFLM:
     """
     Load custom MyOLMoE model (uses top-k routing by default).
-    Args:
-        args: Parsed command line arguments
-    Returns:
-        HFLM: Wrapped model ready for evaluation
     """
     logger.info(f"Loading custom MyOLMoE model: {args.model_path}")
     logger.info("Using top-k routing (default)")
@@ -205,15 +199,11 @@ def load_custom_model(args) -> HFLM:
         from modeling_myolmoe import MyOlmoeForCausalLM, MyOlmoeConfig
         logger.info("Successfully imported MyOlmoeForCausalLM and MyOlmoeConfig")
-        # Check if config is a dataclass
-        if not hasattr(MyOlmoeConfig, '__dataclass_fields__'):
-            logger.warning("MyOlmoeConfig is not a dataclass, this may cause issues")
-        # Register the custom model class with the correct config
         from transformers import AutoConfig, AutoModelForCausalLM
-        AutoConfig.register("myolmoe", MyOlmoeConfig)
-        AutoModelForCausalLM.register(MyOlmoeConfig, MyOlmoeForCausalLM)
-        logger.info("Registered MyOlmoeForCausalLM with MyOlmoeConfig")
     except ImportError as e:
         logger.error(f"Failed to import custom model: {e}")
@@ -224,31 +214,51 @@ def load_custom_model(args) -> HFLM:
     logger.info("Loading model manually to avoid wrapper issues...")
     try:
-        # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             args.model_path,
             trust_remote_code=args.trust_remote_code
         )
-        # Load config using the custom config class
-        model_config = MyOlmoeConfig.from_pretrained(
             args.model_path,
             trust_remote_code=args.trust_remote_code
         )
-        # Debug information
         logger.info(f"Loaded config type: {type(model_config)}")
         logger.info(f"Config model_type: {model_config.model_type}")
         # Load model instance
-        model_instance = MyOlmoeForCausalLM.from_pretrained(
             args.model_path,
-            config=model_config,
-            trust_remote_code=args.trust_remote_code,
-            torch_dtype=torch.bfloat16 if args.dtype == "bfloat16" else "auto"
         )
-        # Create HFLM with pre-loaded model
         model = HFLM(
             pretrained=model_instance,
             tokenizer=tokenizer,
@@ -259,11 +269,15 @@ def load_custom_model(args) -> HFLM:
     except Exception as e:
         logger.error(f"Failed to load custom model: {e}")
         raise
     logger.info("Custom model loaded successfully")
     return model
 def validate_model_config(model_path: str, trust_remote_code: bool = False) -> Dict[str, Any]:
     """
     Validate model configuration and return key information.

 def load_custom_model(args) -> HFLM:
     """
     Load custom MyOLMoE model (uses top-k routing by default).
     """
     logger.info(f"Loading custom MyOLMoE model: {args.model_path}")
     logger.info("Using top-k routing (default)")
         from modeling_myolmoe import MyOlmoeForCausalLM, MyOlmoeConfig
         logger.info("Successfully imported MyOlmoeForCausalLM and MyOlmoeConfig")
+        # IMPORTANT: Register with "olmoe" since that's what your model was trained with
         from transformers import AutoConfig, AutoModelForCausalLM
+        AutoConfig.register("olmoe", MyOlmoeConfig, exist_ok=True)  # Use exist_ok=True
+        AutoModelForCausalLM.register(MyOlmoeConfig, MyOlmoeForCausalLM, exist_ok=True)
+        logger.info("Registered MyOlmoeForCausalLM with MyOlmoeConfig for 'olmoe' type")
     except ImportError as e:
         logger.error(f"Failed to import custom model: {e}")
     logger.info("Loading model manually to avoid wrapper issues...")
     try:
+        # Load tokenizer first
         tokenizer = AutoTokenizer.from_pretrained(
             args.model_path,
             trust_remote_code=args.trust_remote_code
         )
+        # Load config - this should now work with the olmoe type
+        model_config = AutoConfig.from_pretrained(
             args.model_path,
             trust_remote_code=args.trust_remote_code
         )
         logger.info(f"Loaded config type: {type(model_config)}")
         logger.info(f"Config model_type: {model_config.model_type}")
+        # Verify the config is properly initialized
+        if not hasattr(model_config, '__dataclass_fields__'):
+            logger.warning("Config is not recognized as a dataclass, attempting to recreate...")
+            # Recreate config as proper dataclass instance
+            config_dict = model_config.to_dict()
+            model_config = MyOlmoeConfig(**config_dict)
+        # Prepare model loading kwargs
+        model_kwargs = {
+            'config': model_config,
+            'trust_remote_code': args.trust_remote_code,
+        }
+        # Add torch_dtype if specified
+        if args.dtype == "bfloat16":
+            model_kwargs['torch_dtype'] = torch.bfloat16
+        elif args.dtype == "float16":
+            model_kwargs['torch_dtype'] = torch.float16
+        elif args.dtype == "float32":
+            model_kwargs['torch_dtype'] = torch.float32
         # Load model instance
+        model_instance = AutoModelForCausalLM.from_pretrained(
             args.model_path,
+            **model_kwargs
         )
+        logger.info(f"Loaded model type: {type(model_instance)}")
+        # Create HFLM wrapper
         model = HFLM(
             pretrained=model_instance,
             tokenizer=tokenizer,
     except Exception as e:
         logger.error(f"Failed to load custom model: {e}")
+        logger.error(f"Error type: {type(e)}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
         raise
     logger.info("Custom model loaded successfully")
     return model
 def validate_model_config(model_path: str, trust_remote_code: bool = False) -> Dict[str, Any]:
     """
     Validate model configuration and return key information.