lokegud commited on
Commit
1bc4f62
·
verified ·
1 Parent(s): 95a8e8a

Upload train_infrastructure_model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_infrastructure_model.py +10 -9
train_infrastructure_model.py CHANGED
@@ -103,16 +103,17 @@ peft_config = LoraConfig(
103
 
104
  # Training configuration
105
  print("Configuring training...")
106
- training_args = SFTConfig(
 
 
107
  output_dir=OUTPUT_MODEL,
108
 
109
  # Training parameters
110
  num_train_epochs=3,
111
- per_device_train_batch_size=4,
112
- per_device_eval_batch_size=4,
113
- gradient_accumulation_steps=4,
114
  gradient_checkpointing=True,
115
- max_seq_length=2048,
116
 
117
  # Optimization
118
  learning_rate=2e-4,
@@ -123,7 +124,7 @@ training_args = SFTConfig(
123
 
124
  # Evaluation and logging
125
  eval_strategy="steps",
126
- eval_steps=50,
127
  logging_steps=10,
128
  save_strategy="steps",
129
  save_steps=200,
@@ -142,12 +143,9 @@ training_args = SFTConfig(
142
  # Performance
143
  bf16=True,
144
  max_grad_norm=0.3,
145
- group_by_length=True,
146
 
147
  # Misc
148
  seed=42,
149
- dataset_text_field="text",
150
- packing=False,
151
  )
152
 
153
  # Initialize trainer
@@ -158,6 +156,9 @@ trainer = SFTTrainer(
158
  eval_dataset=eval_dataset,
159
  peft_config=peft_config,
160
  args=training_args,
 
 
 
161
  )
162
 
163
  # Train
 
103
 
104
  # Training configuration
105
  print("Configuring training...")
106
+ from transformers import TrainingArguments
107
+
108
+ training_args = TrainingArguments(
109
  output_dir=OUTPUT_MODEL,
110
 
111
  # Training parameters
112
  num_train_epochs=3,
113
+ per_device_train_batch_size=2,
114
+ per_device_eval_batch_size=2,
115
+ gradient_accumulation_steps=8,
116
  gradient_checkpointing=True,
 
117
 
118
  # Optimization
119
  learning_rate=2e-4,
 
124
 
125
  # Evaluation and logging
126
  eval_strategy="steps",
127
+ eval_steps=100,
128
  logging_steps=10,
129
  save_strategy="steps",
130
  save_steps=200,
 
143
  # Performance
144
  bf16=True,
145
  max_grad_norm=0.3,
 
146
 
147
  # Misc
148
  seed=42,
 
 
149
  )
150
 
151
  # Initialize trainer
 
156
  eval_dataset=eval_dataset,
157
  peft_config=peft_config,
158
  args=training_args,
159
+ dataset_text_field="text",
160
+ max_seq_length=2048,
161
+ packing=False,
162
  )
163
 
164
  # Train