sunkencity commited on
Commit
afbbcb3
Β·
verified Β·
1 Parent(s): bfbfcaf

Upload train_aviation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_aviation.py +17 -14
train_aviation.py CHANGED
@@ -18,21 +18,23 @@ import os
18
  from huggingface_hub import list_repo_files
19
 
20
  # DEBUG: Check token and repo access
21
- print("πŸ” DIAGNOSTICS:")
22
- token = os.environ.get("HF_TOKEN")
23
- print(f" HF_TOKEN env var present: {bool(token)}")
24
- if token:
25
- print(f" HF_TOKEN prefix: {token[:4]}...")
 
 
 
 
 
 
 
 
 
 
26
 
27
  model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
28
- try:
29
- print(f" Attempting to list files for {model_id}...")
30
- files = list_repo_files(model_id, token=token)
31
- print(f" βœ… Success! Found {len(files)} files.")
32
- print(f" First 5 files: {files[:5]}")
33
- except Exception as e:
34
- print(f" ❌ Failed to list repo files: {e}")
35
- print("="*40)
36
 
37
  from datasets import load_dataset
38
  from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
@@ -40,6 +42,7 @@ from trl import SFTTrainer, SFTConfig
40
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
41
 
42
  # Register 'ministral3' config to handle nested text_config
 
43
  print("πŸ”§ Registering ministral3 config (Monkey Patch Strategy)...")
44
  try:
45
  from transformers import MinistralConfig, AutoConfig
@@ -174,7 +177,7 @@ trainer = SFTTrainer(
174
  eval_dataset=eval_dataset,
175
  args=config,
176
  peft_config=peft_config,
177
- tokenizer=tokenizer,
178
  )
179
 
180
  print("πŸš€ Starting training...")
 
18
  from huggingface_hub import list_repo_files
19
 
20
  # DEBUG: Check token and repo access
21
+ # print("πŸ” DIAGNOSTICS:")
22
+ # token = os.environ.get("HF_TOKEN")
23
+ # print(f" HF_TOKEN env var present: {bool(token)}")
24
+ # if token:
25
+ # print(f" HF_TOKEN prefix: {token[:4]}...")
26
+
27
+ # model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
28
+ # try:
29
+ # print(f" Attempting to list files for {model_id}...")
30
+ # files = list_repo_files(model_id, token=token)
31
+ # print(f" βœ… Success! Found {len(files)} files.")
32
+ # print(f" First 5 files: {files[:5]}")
33
+ # except Exception as e:
34
+ # print(f" ❌ Failed to list repo files: {e}")
35
+ # print("="*40)
36
 
37
  model_id = "mistralai/Ministral-3-14B-Reasoning-2512"
 
 
 
 
 
 
 
 
38
 
39
  from datasets import load_dataset
40
  from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
 
42
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
43
 
44
  # Register 'ministral3' config to handle nested text_config
45
+ # ... (rest of registration logic)
46
  print("πŸ”§ Registering ministral3 config (Monkey Patch Strategy)...")
47
  try:
48
  from transformers import MinistralConfig, AutoConfig
 
177
  eval_dataset=eval_dataset,
178
  args=config,
179
  peft_config=peft_config,
180
+ processing_class=tokenizer,
181
  )
182
 
183
  print("πŸš€ Starting training...")