sunkencity
/

training-scripts

Model card Files Files and versions

xet

Community

sunkencity commited on Dec 10, 2025

Commit

1ae6cb4

verified ·

1 Parent(s): 6f6fc96

Upload train_aviation.py with huggingface_hub

Browse files

Files changed (1) hide show

train_aviation.py +21 -14

train_aviation.py CHANGED Viewed

@@ -42,29 +42,36 @@ from trl import SFTTrainer, SFTConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
 # Register 'ministral3' config to handle nested text_config
-# ... (rest of registration logic)
 print("🔧 Registering ministral3 config (Monkey Patch Strategy)...")
 try:
     from transformers import MinistralConfig, AutoConfig
-    # Monkey patch the model_type to match what the config.json has
-    # This allows us to use the native class which is already registered with AutoModel
-    print(f"   Original MinistralConfig.model_type: {MinistralConfig.model_type}")
-    MinistralConfig.model_type = "ministral3"
-    print(f"   Patched MinistralConfig.model_type: {MinistralConfig.model_type}")
-    # Register the patched class for the "ministral3" key
-    AutoConfig.register("ministral3", MinistralConfig)
-    print("   Registered ministral3 -> MinistralConfig (native, patched)")
 except Exception as e:
     print(f"   ❌ Failed to patch/register ministral3 config: {e}")
 # Register Mistral3Config to a model class
-# ... (rest of registration kept as is)
-# ... (rest of registration kept as is)
-# ... (rest of registration kept as is)
-# ... (rest of registration kept as is)
 print("🔧 Registering Mistral3 model class...")
 try:
     from transformers.models.mistral3.configuration_mistral3 import Mistral3Config
@@ -193,4 +200,4 @@ print("🚀 Starting training...")
 trainer.train()
 print("💾 Pushing to Hub...")
-trainer.push_to_hub()

 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
 # Register 'ministral3' config to handle nested text_config
 print("🔧 Registering ministral3 config (Monkey Patch Strategy)...")
 try:
     from transformers import MinistralConfig, AutoConfig
+    # We need to ensure MinistralConfig has sliding_window and layer_types if it's used
+    # as the inner text_config for Mistral3.
+    # Create a temporary compatible class.
+    class Ministral3CompatConfig(MinistralConfig):
+        model_type = "ministral3" # Ensure this matches the `text_config["model_type"]`
+        def __init__(self, **kwargs):
+            super().__init__(**kwargs)
+            # Ensure sliding_window is set, if null in config.json or missing
+            if not hasattr(self, 'sliding_window') or self.sliding_window is None:
+                self.sliding_window = 4096 # Default value for Mistral/Ministral models
+            # Ensure layer_types is set, as it's expected by modeling_ministral.py
+            if not hasattr(self, 'layer_types'):
+                # Assumes all layers are sliding attention if the model uses it
+                # Use getattr for num_hidden_layers as it might not be set yet if config is partial
+                self.layer_types = ["sliding_attention"] * getattr(self, "num_hidden_layers", 40) # Default to 40 if not found
+    # Register the compatible class for the "ministral3" key
+    AutoConfig.register("ministral3", Ministral3CompatConfig)
+    print("   Registered ministral3 -> Ministral3CompatConfig (patched)")
 except Exception as e:
     print(f"   ❌ Failed to patch/register ministral3 config: {e}")
 # Register Mistral3Config to a model class
 print("🔧 Registering Mistral3 model class...")
 try:
     from transformers.models.mistral3.configuration_mistral3 import Mistral3Config
 trainer.train()
 print("💾 Pushing to Hub...")
+trainer.push_to_hub()