sunkencity commited on
Commit
d083c2d
Β·
verified Β·
1 Parent(s): 7c82449

Upload train_aviation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_aviation.py +21 -32
train_aviation.py CHANGED
@@ -17,32 +17,21 @@ import torch
17
  import os
18
  from huggingface_hub import list_repo_files
19
 
20
- # DEBUG: Check token and repo access
21
- # (commented out for cleaner logs now that it works)
22
- # print("πŸ” DIAGNOSTICS:")
23
- # token = os.environ.get("HF_TOKEN")
24
- # print(f" HF_TOKEN env var present: {bool(token)}")
25
- # if token:
26
- # print(f" HF_TOKEN prefix: {token[:4]}...")
27
-
28
  model_id = "mistralai/Ministral-3-14B-Reasoning-2512" # Defined at top level
29
 
30
- # try:
31
- # print(f" Attempting to list files for {model_id}...")
32
- # files = list_repo_files(model_id, token=token)
33
- # print(f" βœ… Success! Found {len(files)} files.")
34
- # print(f" First 5 files: {files[:5]}")
35
- # except Exception as e:
36
- # print(f" ❌ Failed to list repo files: {e}")
37
- # print("="*40)
38
-
39
 
40
  from datasets import load_dataset
41
  from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
42
  from trl import SFTTrainer, SFTConfig
43
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
 
 
 
 
 
44
 
45
- # All custom config registration logic removed, relying on latest transformers
 
46
 
47
 
48
  # Load dataset
@@ -88,21 +77,21 @@ bnb_config = BitsAndBytesConfig(
88
  bnb_4bit_use_double_quant=True,
89
  )
90
 
91
- # Load config first (AutoConfig should handle it now with latest transformers)
92
  print(f"πŸ€– Loading config for {model_id}...")
93
  config = AutoConfig.from_pretrained(model_id)
94
 
95
- # Patch text_config to include sliding_window and layer_types (Now unnecessary, should be handled by latest transformers)
96
- # print("πŸ”§ Patching config.text_config...")
97
- # if hasattr(config, 'text_config'):
98
- # if not hasattr(config.text_config, 'sliding_window') or config.text_config.sliding_window is None:
99
- # config.text_config.sliding_window = 4096
100
- # print(" Set config.text_config.sliding_window = 4096")
101
- # if not hasattr(config.text_config, 'layer_types'):
102
- # config.text_config.layer_types = ["sliding_attention"] * getattr(config.text_config, "num_hidden_layers", 40)
103
- # print(" Set config.text_config.layer_types")
104
- # else:
105
- # print(" No text_config found, skipping patching.")
106
 
107
  # Load Model with the config
108
  print(f"πŸ€– Loading model {model_id} with config...")
@@ -171,4 +160,4 @@ print("πŸš€ Starting training...")
171
  trainer.train()
172
 
173
  print("πŸ’Ύ Pushing to Hub...")
174
- trainer.push_to_hub()
 
17
  import os
18
  from huggingface_hub import list_repo_files
19
 
 
 
 
 
 
 
 
 
20
  model_id = "mistralai/Ministral-3-14B-Reasoning-2512" # Defined at top level
21
 
 
 
 
 
 
 
 
 
 
22
 
23
  from datasets import load_dataset
24
  from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
25
  from trl import SFTTrainer, SFTConfig
26
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig, MistralConfig
27
+
28
+ # Explicitly register 'ministral3' model type to MistralConfig
29
+ # This is a workaround for transformers not recognizing 'ministral3' internally
30
+ class RegistrableMinistralConfig(MistralConfig):
31
+ model_type = "ministral3"
32
 
33
+ AutoConfig.register("ministral3", RegistrableMinistralConfig)
34
+ print("πŸ”§ Registered 'ministral3' to RegistrableMinistralConfig.")
35
 
36
 
37
  # Load dataset
 
77
  bnb_4bit_use_double_quant=True,
78
  )
79
 
80
+ # Load config first
81
  print(f"πŸ€– Loading config for {model_id}...")
82
  config = AutoConfig.from_pretrained(model_id)
83
 
84
+ # Patch text_config to include sliding_window and layer_types
85
+ print("πŸ”§ Patching config.text_config...")
86
+ if hasattr(config, 'text_config'):
87
+ if not hasattr(config.text_config, 'sliding_window') or config.text_config.sliding_window is None:
88
+ config.text_config.sliding_window = 4096
89
+ print(" Set config.text_config.sliding_window = 4096")
90
+ if not hasattr(config.text_config, 'layer_types'):
91
+ config.text_config.layer_types = ["sliding_attention"] * getattr(config.text_config, "num_hidden_layers", 40)
92
+ print(" Set config.text_config.layer_types")
93
+ else:
94
+ print(" No text_config found, skipping patching.")
95
 
96
  # Load Model with the config
97
  print(f"πŸ€– Loading model {model_id} with config...")
 
160
  trainer.train()
161
 
162
  print("πŸ’Ύ Pushing to Hub...")
163
+ trainer.push_to_hub()