Upload train_aviation.py with huggingface_hub
Browse files- train_aviation.py +17 -14
train_aviation.py
CHANGED
|
@@ -18,21 +18,23 @@ import os
|
|
| 18 |
from huggingface_hub import list_repo_files
|
| 19 |
|
| 20 |
# DEBUG: Check token and repo access
|
| 21 |
-
print("π DIAGNOSTICS:")
|
| 22 |
-
token = os.environ.get("HF_TOKEN")
|
| 23 |
-
print(f" HF_TOKEN env var present: {bool(token)}")
|
| 24 |
-
if token:
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
|
| 28 |
-
try:
|
| 29 |
-
print(f" Attempting to list files for {model_id}...")
|
| 30 |
-
files = list_repo_files(model_id, token=token)
|
| 31 |
-
print(f" β
Success! Found {len(files)} files.")
|
| 32 |
-
print(f" First 5 files: {files[:5]}")
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print(f" β Failed to list repo files: {e}")
|
| 35 |
-
print("="*40)
|
| 36 |
|
| 37 |
from datasets import load_dataset
|
| 38 |
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
|
|
@@ -40,6 +42,7 @@ from trl import SFTTrainer, SFTConfig
|
|
| 40 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
|
| 41 |
|
| 42 |
# Register 'ministral3' config to handle nested text_config
|
|
|
|
| 43 |
print("π§ Registering ministral3 config (Monkey Patch Strategy)...")
|
| 44 |
try:
|
| 45 |
from transformers import MinistralConfig, AutoConfig
|
|
@@ -174,7 +177,7 @@ trainer = SFTTrainer(
|
|
| 174 |
eval_dataset=eval_dataset,
|
| 175 |
args=config,
|
| 176 |
peft_config=peft_config,
|
| 177 |
-
|
| 178 |
)
|
| 179 |
|
| 180 |
print("π Starting training...")
|
|
|
|
| 18 |
from huggingface_hub import list_repo_files
|
| 19 |
|
| 20 |
# DEBUG: Check token and repo access
|
| 21 |
+
# print("π DIAGNOSTICS:")
|
| 22 |
+
# token = os.environ.get("HF_TOKEN")
|
| 23 |
+
# print(f" HF_TOKEN env var present: {bool(token)}")
|
| 24 |
+
# if token:
|
| 25 |
+
# print(f" HF_TOKEN prefix: {token[:4]}...")
|
| 26 |
+
|
| 27 |
+
# model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
|
| 28 |
+
# try:
|
| 29 |
+
# print(f" Attempting to list files for {model_id}...")
|
| 30 |
+
# files = list_repo_files(model_id, token=token)
|
| 31 |
+
# print(f" β
Success! Found {len(files)} files.")
|
| 32 |
+
# print(f" First 5 files: {files[:5]}")
|
| 33 |
+
# except Exception as e:
|
| 34 |
+
# print(f" β Failed to list repo files: {e}")
|
| 35 |
+
# print("="*40)
|
| 36 |
|
| 37 |
model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
from datasets import load_dataset
|
| 40 |
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
|
|
|
|
| 42 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
|
| 43 |
|
| 44 |
# Register 'ministral3' config to handle nested text_config
|
| 45 |
+
# ... (rest of registration logic)
|
| 46 |
print("π§ Registering ministral3 config (Monkey Patch Strategy)...")
|
| 47 |
try:
|
| 48 |
from transformers import MinistralConfig, AutoConfig
|
|
|
|
| 177 |
eval_dataset=eval_dataset,
|
| 178 |
args=config,
|
| 179 |
peft_config=peft_config,
|
| 180 |
+
processing_class=tokenizer,
|
| 181 |
)
|
| 182 |
|
| 183 |
print("π Starting training...")
|