Darin Leonhart commited on
Commit
758ecd8
·
verified ·
1 Parent(s): 198227c

Fix: use TrainingArguments instead of SFTConfig

Browse files
Files changed (1) hide show
  1. train.py +397 -0
train.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ D1337 CIPHER - Custom Training Script
3
+ =====================================
4
+ Optimized QLoRA training for 31B model on 4x L40S (192GB VRAM)
5
+
6
+ Brand: D1337 SOVEREIGN LABS
7
+ Model: GLM-4.7-Flash-abliterated (31B) -> D1337 CIPHER
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import torch
13
+ import gradio as gr
14
+ from threading import Thread
15
+ from dataclasses import dataclass
16
+ from typing import Optional
17
+
18
+ # Training imports
19
+ from transformers import (
20
+ AutoTokenizer,
21
+ AutoModelForCausalLM,
22
+ TrainingArguments,
23
+ BitsAndBytesConfig,
24
+ )
25
+ from peft import (
26
+ LoraConfig,
27
+ get_peft_model,
28
+ TaskType,
29
+ )
30
+ from datasets import load_dataset
31
+ from trl import SFTTrainer, SFTConfig
32
+
33
+ # ============================================
34
+ # CONFIGURATION
35
+ # ============================================
36
+ @dataclass
37
+ class TrainingConfig:
38
+ # Model
39
+ base_model: str = "huihui-ai/Huihui-GLM-4.7-Flash-abliterated"
40
+ output_model: str = "Desorden1337/d1337-cipher-v1"
41
+
42
+ # Dataset
43
+ dataset_name: str = "Desorden1337/d1337-cipher-dataset"
44
+ dataset_split: str = "train"
45
+
46
+ # LoRA Config (reduced for 4x L40S memory)
47
+ lora_r: int = 32
48
+ lora_alpha: int = 64
49
+ lora_dropout: float = 0.05
50
+ target_modules: list = None
51
+
52
+ # Training
53
+ num_epochs: int = 5
54
+ batch_size: int = 1
55
+ gradient_accumulation: int = 8
56
+ learning_rate: float = 2e-4
57
+ max_seq_length: int = 2048 # Reduced for memory
58
+ warmup_ratio: float = 0.1
59
+ weight_decay: float = 0.01
60
+
61
+ # Hardware
62
+ use_4bit: bool = True
63
+ use_bf16: bool = True
64
+
65
+ def __post_init__(self):
66
+ if self.target_modules is None:
67
+ self.target_modules = [
68
+ "q_proj", "k_proj", "v_proj", "o_proj",
69
+ "gate_proj", "up_proj", "down_proj"
70
+ ]
71
+
72
+
73
+ # ============================================
74
+ # TRAINING CLASS
75
+ # ============================================
76
+ class D1337CipherTrainer:
77
+ def __init__(self, config: TrainingConfig = None):
78
+ self.config = config or TrainingConfig()
79
+ self.model = None
80
+ self.tokenizer = None
81
+ self.trainer = None
82
+ self.training_status = "Idle"
83
+ self.training_log = []
84
+
85
+ def log(self, message: str):
86
+ """Log message to console and internal log"""
87
+ print(f"[D1337] {message}")
88
+ self.training_log.append(message)
89
+ if len(self.training_log) > 100:
90
+ self.training_log = self.training_log[-100:]
91
+
92
+ def setup_quantization(self):
93
+ """Setup 4-bit quantization config"""
94
+ if self.config.use_4bit:
95
+ return BitsAndBytesConfig(
96
+ load_in_4bit=True,
97
+ bnb_4bit_quant_type="nf4",
98
+ bnb_4bit_compute_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16,
99
+ bnb_4bit_use_double_quant=True,
100
+ )
101
+ return None
102
+
103
+ def setup_lora(self):
104
+ """Setup LoRA configuration"""
105
+ return LoraConfig(
106
+ r=self.config.lora_r,
107
+ lora_alpha=self.config.lora_alpha,
108
+ lora_dropout=self.config.lora_dropout,
109
+ target_modules=self.config.target_modules,
110
+ bias="none",
111
+ task_type=TaskType.CAUSAL_LM,
112
+ )
113
+
114
+ def load_model(self):
115
+ """Load base model with quantization"""
116
+ self.training_status = "Loading model..."
117
+ self.log(f"Loading model: {self.config.base_model}")
118
+
119
+ # Load tokenizer
120
+ self.tokenizer = AutoTokenizer.from_pretrained(
121
+ self.config.base_model,
122
+ trust_remote_code=True,
123
+ padding_side="right",
124
+ )
125
+
126
+ if self.tokenizer.pad_token is None:
127
+ self.tokenizer.pad_token = self.tokenizer.eos_token
128
+
129
+ # Load model with quantization
130
+ bnb_config = self.setup_quantization()
131
+
132
+ self.model = AutoModelForCausalLM.from_pretrained(
133
+ self.config.base_model,
134
+ quantization_config=bnb_config,
135
+ device_map="auto",
136
+ trust_remote_code=True,
137
+ torch_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16,
138
+ )
139
+
140
+ # Enable gradient checkpointing for memory efficiency
141
+ self.model.gradient_checkpointing_enable()
142
+ self.model.enable_input_require_grads()
143
+
144
+ # Apply LoRA
145
+ lora_config = self.setup_lora()
146
+ self.model = get_peft_model(self.model, lora_config)
147
+
148
+ # Print trainable parameters
149
+ trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
150
+ total_params = sum(p.numel() for p in self.model.parameters())
151
+ self.log(f"Trainable parameters: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
152
+ self.log(f"Model loaded on {torch.cuda.device_count()} GPU(s)")
153
+
154
+ def load_dataset(self):
155
+ """Load and prepare dataset"""
156
+ self.training_status = "Loading dataset..."
157
+ self.log(f"Loading dataset: {self.config.dataset_name}")
158
+
159
+ dataset = load_dataset(self.config.dataset_name, split=self.config.dataset_split)
160
+ self.log(f"Dataset loaded: {len(dataset)} samples")
161
+
162
+ return dataset
163
+
164
+ def format_messages(self, example):
165
+ """Format messages into training text"""
166
+ messages = example["messages"]
167
+
168
+ # Use ChatML format
169
+ text = ""
170
+ for msg in messages:
171
+ role = msg["role"]
172
+ content = msg["content"]
173
+ text += f"<|im_start|>{role}\n{content}<|im_end|>\n"
174
+
175
+ return {"text": text}
176
+
177
+ def train(self):
178
+ """Execute training"""
179
+ try:
180
+ self.training_status = "Initializing..."
181
+ self.log("=" * 60)
182
+ self.log("D1337 CIPHER TRAINING - INITIATED")
183
+ self.log("=" * 60)
184
+
185
+ # Load model and dataset
186
+ self.load_model()
187
+ dataset = self.load_dataset()
188
+
189
+ # Format dataset
190
+ self.log("Formatting dataset...")
191
+ dataset = dataset.map(self.format_messages, remove_columns=dataset.column_names)
192
+
193
+ # Training arguments (standard TrainingArguments)
194
+ self.training_status = "Setting up training..."
195
+ training_args = TrainingArguments(
196
+ output_dir="./d1337-cipher-output",
197
+ num_train_epochs=self.config.num_epochs,
198
+ per_device_train_batch_size=self.config.batch_size,
199
+ gradient_accumulation_steps=self.config.gradient_accumulation,
200
+ learning_rate=self.config.learning_rate,
201
+ weight_decay=self.config.weight_decay,
202
+ warmup_steps=14,
203
+ lr_scheduler_type="cosine",
204
+ logging_steps=1,
205
+ save_steps=50,
206
+ save_total_limit=2,
207
+ bf16=self.config.use_bf16,
208
+ fp16=not self.config.use_bf16,
209
+ gradient_checkpointing=True,
210
+ max_grad_norm=1.0,
211
+ group_by_length=True,
212
+ dataloader_num_workers=4,
213
+ remove_unused_columns=False,
214
+ push_to_hub=True,
215
+ hub_model_id=self.config.output_model,
216
+ hub_private_repo=True,
217
+ report_to="none",
218
+ )
219
+
220
+ # Initialize trainer with explicit tokenizer
221
+ self.trainer = SFTTrainer(
222
+ model=self.model,
223
+ args=training_args,
224
+ train_dataset=dataset,
225
+ )
226
+
227
+ # Start training
228
+ self.training_status = "Training in progress..."
229
+ self.log("Training started!")
230
+ self.trainer.train()
231
+
232
+ # Save and push
233
+ self.training_status = "Saving model..."
234
+ self.log("Saving model...")
235
+ self.trainer.save_model()
236
+ self.trainer.push_to_hub()
237
+
238
+ self.training_status = "Complete!"
239
+ self.log("=" * 60)
240
+ self.log("D1337 CIPHER TRAINING - COMPLETE!")
241
+ self.log(f"Model saved to: {self.config.output_model}")
242
+ self.log("=" * 60)
243
+
244
+ return True
245
+
246
+ except Exception as e:
247
+ self.training_status = f"Error: {str(e)}"
248
+ self.log(f"Training failed: {str(e)}")
249
+ import traceback
250
+ self.log(traceback.format_exc())
251
+ return False
252
+
253
+
254
+ # ============================================
255
+ # GRADIO UI
256
+ # ============================================
257
+ def create_ui(trainer: D1337CipherTrainer):
258
+ """Create Gradio UI for monitoring"""
259
+
260
+ def get_status():
261
+ return trainer.training_status
262
+
263
+ def get_logs():
264
+ return "\n".join(trainer.training_log[-50:])
265
+
266
+ def start_training():
267
+ trainer.training_log = []
268
+ thread = Thread(target=trainer.train)
269
+ thread.start()
270
+ return "Training started! Check logs for progress."
271
+
272
+ def get_gpu_info():
273
+ if torch.cuda.is_available():
274
+ info = []
275
+ for i in range(torch.cuda.device_count()):
276
+ props = torch.cuda.get_device_properties(i)
277
+ mem_total = props.total_memory / (1024**3)
278
+ mem_used = torch.cuda.memory_allocated(i) / (1024**3)
279
+ info.append(f"GPU {i}: {props.name} - {mem_used:.1f}GB / {mem_total:.1f}GB")
280
+ return "\n".join(info)
281
+ return "No GPU available"
282
+
283
+ with gr.Blocks(title="D1337 CIPHER Training", theme=gr.themes.Soft()) as demo:
284
+ gr.Markdown("""
285
+ # 🔥 D1337 CIPHER - Training Console
286
+ ### D1337 SOVEREIGN LABS
287
+
288
+ Custom training environment for GLM-4.7-Flash-abliterated → D1337 CIPHER
289
+ """)
290
+
291
+ with gr.Row():
292
+ with gr.Column(scale=1):
293
+ gr.Markdown("### Configuration")
294
+ model_name = gr.Textbox(
295
+ label="Base Model",
296
+ value=trainer.config.base_model,
297
+ interactive=False
298
+ )
299
+ dataset_name = gr.Textbox(
300
+ label="Dataset",
301
+ value=trainer.config.dataset_name,
302
+ interactive=False
303
+ )
304
+ output_name = gr.Textbox(
305
+ label="Output Model",
306
+ value=trainer.config.output_model,
307
+ interactive=False
308
+ )
309
+
310
+ gr.Markdown("### Training Parameters")
311
+ gr.Textbox(
312
+ label="LoRA Rank",
313
+ value=str(trainer.config.lora_r),
314
+ interactive=False
315
+ )
316
+ gr.Textbox(
317
+ label="Epochs",
318
+ value=str(trainer.config.num_epochs),
319
+ interactive=False
320
+ )
321
+ gr.Textbox(
322
+ label="Learning Rate",
323
+ value=str(trainer.config.learning_rate),
324
+ interactive=False
325
+ )
326
+
327
+ with gr.Column(scale=2):
328
+ gr.Markdown("### Status")
329
+ status_box = gr.Textbox(
330
+ label="Current Status",
331
+ value=get_status,
332
+ every=2
333
+ )
334
+
335
+ gpu_info = gr.Textbox(
336
+ label="GPU Info",
337
+ value=get_gpu_info,
338
+ every=5
339
+ )
340
+
341
+ start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
342
+
343
+ gr.Markdown("### Training Logs")
344
+ logs_box = gr.Textbox(
345
+ label="Logs",
346
+ value=get_logs,
347
+ every=3,
348
+ lines=15,
349
+ max_lines=20
350
+ )
351
+
352
+ start_btn.click(fn=start_training, outputs=status_box)
353
+
354
+ return demo
355
+
356
+
357
+ # ============================================
358
+ # MAIN
359
+ # ============================================
360
+ def main():
361
+ print("=" * 60)
362
+ print("D1337 CIPHER - Custom Training Environment")
363
+ print("D1337 SOVEREIGN LABS")
364
+ print("=" * 60)
365
+
366
+ # Check GPU
367
+ if torch.cuda.is_available():
368
+ print(f"GPUs available: {torch.cuda.device_count()}")
369
+ for i in range(torch.cuda.device_count()):
370
+ props = torch.cuda.get_device_properties(i)
371
+ print(f" GPU {i}: {props.name} ({props.total_memory / (1024**3):.1f} GB)")
372
+ else:
373
+ print("WARNING: No GPU detected!")
374
+
375
+ # Initialize trainer
376
+ config = TrainingConfig()
377
+ trainer = D1337CipherTrainer(config)
378
+
379
+ # Check if auto-start
380
+ auto_start = os.environ.get("AUTO_START_TRAINING", "false").lower() == "true"
381
+
382
+ if auto_start:
383
+ print("Auto-starting training...")
384
+ trainer.train()
385
+ else:
386
+ # Launch Gradio UI
387
+ print("Launching Gradio UI...")
388
+ demo = create_ui(trainer)
389
+ demo.launch(
390
+ server_name="0.0.0.0",
391
+ server_port=7860,
392
+ share=False
393
+ )
394
+
395
+
396
+ if __name__ == "__main__":
397
+ main()