lokegud commited on
Commit
69c41e0
Β·
verified Β·
1 Parent(s): 0d0275b

Upload train_production.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_production.py +82 -0
train_production.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "torch", "transformers", "datasets"]
3
+ # ///
4
+
5
+ from datasets import load_dataset
6
+ from peft import LoraConfig
7
+ from trl import SFTTrainer, SFTConfig
8
+ import trackio
9
+
10
+ print("πŸš€ Starting ComfyUI Specialist Training (Production)")
11
+ print("=" * 60)
12
+
13
+ # Load our custom ComfyUI dataset
14
+ dataset = load_dataset("lokegud/comfyui-workflows-dataset", split="train")
15
+ print(f"πŸ“Š Dataset loaded: {len(dataset)} examples")
16
+
17
+ # Split for evaluation
18
+ dataset_split = dataset.train_test_split(test_size=0.15, seed=42)
19
+ train_dataset = dataset_split["train"]
20
+ eval_dataset = dataset_split["test"]
21
+
22
+ print(f"πŸ“ˆ Train: {len(train_dataset)} | Eval: {len(eval_dataset)}")
23
+
24
+ # LoRA configuration - optimized for 1.5B model
25
+ peft_config = LoraConfig(
26
+ r=32, # Higher rank for better learning
27
+ lora_alpha=64,
28
+ lora_dropout=0.05,
29
+ target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
30
+ task_type="CAUSAL_LM"
31
+ )
32
+
33
+ # Training configuration
34
+ training_args = SFTConfig(
35
+ output_dir="comfyui-specialist-v1",
36
+ num_train_epochs=3,
37
+ per_device_train_batch_size=2,
38
+ per_device_eval_batch_size=2,
39
+ gradient_accumulation_steps=8, # Effective batch size: 16
40
+ learning_rate=2e-4,
41
+ warmup_steps=20,
42
+ logging_steps=5,
43
+ eval_strategy="steps",
44
+ eval_steps=20,
45
+ save_strategy="steps",
46
+ save_steps=50,
47
+ save_total_limit=3,
48
+ load_best_model_at_end=True,
49
+ metric_for_best_model="eval_loss",
50
+ greater_is_better=False,
51
+ push_to_hub=True,
52
+ hub_model_id="lokegud/comfyui-specialist-v1",
53
+ hub_strategy="every_save",
54
+ hub_private_repo=False,
55
+ report_to="trackio",
56
+ project="comfyui-specialist",
57
+ run_name="production-v1",
58
+ gradient_checkpointing=True,
59
+ max_length=2048, # Longer context for full workflows
60
+ dataset_text_field="messages", # Chat format
61
+ )
62
+
63
+ print("πŸ”§ Initializing trainer with Qwen2.5-1.5B-Instruct...")
64
+
65
+ # Initialize trainer
66
+ trainer = SFTTrainer(
67
+ model="Qwen/Qwen2.5-1.5B-Instruct",
68
+ train_dataset=train_dataset,
69
+ eval_dataset=eval_dataset,
70
+ peft_config=peft_config,
71
+ args=training_args,
72
+ )
73
+
74
+ print("πŸ‹οΈ Training ComfyUI Specialist...")
75
+ trainer.train()
76
+
77
+ print("πŸ“€ Pushing final model to Hub...")
78
+ trainer.push_to_hub()
79
+
80
+ print("βœ… Training complete!")
81
+ print(f"πŸ“¦ Model: lokegud/comfyui-specialist-v1")
82
+ print(f"πŸ“Š Trackio: https://lokegud-trackio.hf.space/")