epinfomax commited on
Commit
f4f43e5
·
verified ·
1 Parent(s): ef5974f

Upload train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train.py +7 -8
train.py CHANGED
@@ -9,7 +9,7 @@ from transformers import AutoTokenizer
9
  import trackio
10
  import os
11
 
12
- print("🚀 Starting FunctionGemma 270M Fine-tuning (V2 with Template Fix)")
13
 
14
  model_id = "google/functiongemma-270m-it"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -18,8 +18,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
  dataset = load_dataset("epinfomax/vn-function-calling-dataset", split="train")
19
 
20
  def format_conversation(example):
21
- # Modern transformers template supports 'tools' argument
22
- # We render the template to a string so SFTTrainer doesn't have to guess
23
  text = tokenizer.apply_chat_template(
24
  example["messages"],
25
  tools=example["tools"],
@@ -33,22 +32,21 @@ dataset = dataset.map(format_conversation, remove_columns=dataset.column_names)
33
 
34
  # Training configuration
35
  config = SFTConfig(
36
- dataset_text_field="text", # Use the pre-rendered text
37
- max_seq_length=1024,
38
  output_dir="vn-function-gemma-270m-finetuned",
39
  push_to_hub=True,
40
  hub_model_id="epinfomax/vn-function-gemma-270m-finetuned",
41
  hub_strategy="every_save",
42
  num_train_epochs=5,
43
- per_device_train_batch_size=8,
44
- gradient_accumulation_steps=2,
45
  learning_rate=5e-5,
46
  logging_steps=5,
47
  save_strategy="steps",
48
  save_steps=50,
49
  report_to="trackio",
50
  project="vn-function-calling",
51
- run_name="function-gemma-270m-v2-fixed"
52
  )
53
 
54
  # LoRA configuration
@@ -65,6 +63,7 @@ trainer = SFTTrainer(
65
  train_dataset=dataset,
66
  peft_config=peft_config,
67
  args=config,
 
68
  )
69
 
70
  trainer.train()
 
9
  import trackio
10
  import os
11
 
12
+ print("🚀 Starting FunctionGemma 270M Fine-tuning (V3 - Config Fix)")
13
 
14
  model_id = "google/functiongemma-270m-it"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
18
  dataset = load_dataset("epinfomax/vn-function-calling-dataset", split="train")
19
 
20
  def format_conversation(example):
21
+ # Pre-render the conversation using the model's chat template
 
22
  text = tokenizer.apply_chat_template(
23
  example["messages"],
24
  tools=example["tools"],
 
32
 
33
  # Training configuration
34
  config = SFTConfig(
35
+ dataset_text_field="text",
 
36
  output_dir="vn-function-gemma-270m-finetuned",
37
  push_to_hub=True,
38
  hub_model_id="epinfomax/vn-function-gemma-270m-finetuned",
39
  hub_strategy="every_save",
40
  num_train_epochs=5,
41
+ per_device_train_batch_size=4, # Reduced for stability
42
+ gradient_accumulation_steps=4,
43
  learning_rate=5e-5,
44
  logging_steps=5,
45
  save_strategy="steps",
46
  save_steps=50,
47
  report_to="trackio",
48
  project="vn-function-calling",
49
+ run_name="function-gemma-270m-v3-fixed"
50
  )
51
 
52
  # LoRA configuration
 
63
  train_dataset=dataset,
64
  peft_config=peft_config,
65
  args=config,
66
+ max_seq_length=1024, # Moved here from SFTConfig
67
  )
68
 
69
  trainer.train()