passagereptile455 commited on
Commit
a2706dd
·
verified ·
1 Parent(s): 9acedd8

Upload train_concise.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_concise.py +32 -0
train_concise.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "datasets", "transformers", "torch", "accelerate"]
3
+ # ///
4
+
5
+ from datasets import load_dataset
6
+ from peft import LoraConfig
7
+ from trl import SFTTrainer, SFTConfig
8
+
9
+ # Load YOUR custom dataset
10
+ dataset = load_dataset("passagereptile455/concise-tech-explanations", split="train")
11
+
12
+ # Train on concise style
13
+ trainer = SFTTrainer(
14
+ model="Qwen/Qwen2.5-0.5B",
15
+ train_dataset=dataset,
16
+ peft_config=LoraConfig(r=16, lora_alpha=32, target_modules="all-linear"),
17
+ args=SFTConfig(
18
+ output_dir="qwen-concise",
19
+ max_steps=50, # Small dataset, fewer steps
20
+ per_device_train_batch_size=1,
21
+ gradient_accumulation_steps=4,
22
+ logging_steps=10,
23
+ learning_rate=2e-4, # Higher LR for small dataset
24
+ push_to_hub=True,
25
+ hub_model_id="passagereptile455/qwen-concise-style",
26
+ hub_private_repo=True,
27
+ )
28
+ )
29
+
30
+ trainer.train()
31
+ trainer.push_to_hub()
32
+ print("Done! Model trained on YOUR concise style.")