VortexHunter23 commited on
Commit
76f0e5a
·
verified ·
1 Parent(s): 1b34bc1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -0
README.md CHANGED
@@ -11,6 +11,40 @@ license: apache-2.0
11
  language:
12
  - en
13
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Uploaded model
16
 
 
11
  language:
12
  - en
13
  ---
14
+ from trl import SFTTrainer
15
+ from transformers import TrainingArguments
16
+ from unsloth import is_bfloat16_supported
17
+
18
+ trainer = SFTTrainer(
19
+ model = model,
20
+ tokenizer = tokenizer,
21
+ train_dataset = dataset,
22
+ dataset_text_field = "text",
23
+ max_seq_length = max_seq_length,
24
+ dataset_num_proc = 2,
25
+ packing = False, # Can make training 5x faster for short sequences.
26
+ args = TrainingArguments(
27
+ per_device_train_batch_size = 2,
28
+ gradient_accumulation_steps = 4,
29
+ warmup_steps = 5,
30
+ num_train_epochs = 1, # Set this for 1 full training run.
31
+ max_steps = 100,
32
+ learning_rate = 2e-4,
33
+ fp16 = not is_bfloat16_supported(),
34
+ bf16 = is_bfloat16_supported(),
35
+ logging_steps = 1,
36
+ optim = "adamw_8bit",
37
+ weight_decay = 0.01,
38
+ lr_scheduler_type = "linear",
39
+ seed = 3407,
40
+ output_dir = "outputs",
41
+ save_strategy = "steps",
42
+ save_steps = 60,
43
+ report_to = "none", # Use this for WandB etc
44
+ ),
45
+ )
46
+
47
+
48
 
49
  # Uploaded model
50