stmasson commited on
Commit
ebb6ddd
·
verified ·
1 Parent(s): 5476247

Upload scripts/train_alizee_v2_stage1_sft.py with huggingface_hub

Browse files
scripts/train_alizee_v2_stage1_sft.py CHANGED
@@ -37,7 +37,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
37
  from trl import SFTTrainer, SFTConfig
38
 
39
  # Configuration
40
- MODEL_NAME = "stmasson/alizee-coder-devstral-1-small"
 
 
41
  OUTPUT_REPO = "stmasson/alizee-coder-devstral-2-small-stage1"
42
  FINAL_REPO = "stmasson/alizee-coder-devstral-2-small"
43
 
@@ -57,14 +59,14 @@ CODING_RATIO = 0.15
57
  print("=" * 60)
58
  print("Stage 1: Reasoning Distillation via SFT")
59
  print("=" * 60)
60
- print(f"Base model: {MODEL_NAME}")
61
  print(f"Output: {OUTPUT_REPO}")
62
  print(f"Data mix: {REASONING_RATIO*100}% reasoning + {CODING_RATIO*100}% coding")
63
  print("=" * 60)
64
 
65
  # Load tokenizer
66
  print("\n📝 Loading tokenizer...")
67
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
68
  if tokenizer.pad_token is None:
69
  tokenizer.pad_token = tokenizer.eos_token
70
  tokenizer.padding_side = "right"
@@ -81,7 +83,7 @@ bnb_config = BitsAndBytesConfig(
81
  # Load model
82
  print("\n🔄 Loading model with QLoRA...")
83
  model = AutoModelForCausalLM.from_pretrained(
84
- MODEL_NAME,
85
  quantization_config=bnb_config,
86
  device_map="auto",
87
  trust_remote_code=True,
 
37
  from trl import SFTTrainer, SFTConfig
38
 
39
  # Configuration
40
+ # Use the base Devstral model directly (v1 was LoRA adapter only)
41
+ # Starting fresh with much larger dataset (736K vs 10K in v1)
42
+ BASE_MODEL = "mistralai/Devstral-Small-2505"
43
  OUTPUT_REPO = "stmasson/alizee-coder-devstral-2-small-stage1"
44
  FINAL_REPO = "stmasson/alizee-coder-devstral-2-small"
45
 
 
59
  print("=" * 60)
60
  print("Stage 1: Reasoning Distillation via SFT")
61
  print("=" * 60)
62
+ print(f"Base model: {BASE_MODEL}")
63
  print(f"Output: {OUTPUT_REPO}")
64
  print(f"Data mix: {REASONING_RATIO*100}% reasoning + {CODING_RATIO*100}% coding")
65
  print("=" * 60)
66
 
67
  # Load tokenizer
68
  print("\n📝 Loading tokenizer...")
69
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
70
  if tokenizer.pad_token is None:
71
  tokenizer.pad_token = tokenizer.eos_token
72
  tokenizer.padding_side = "right"
 
83
  # Load model
84
  print("\n🔄 Loading model with QLoRA...")
85
  model = AutoModelForCausalLM.from_pretrained(
86
+ BASE_MODEL,
87
  quantization_config=bnb_config,
88
  device_map="auto",
89
  trust_remote_code=True,