d1337-cipher-train / train.py
Darin Leonhart
Fix: use TrainingArguments instead of SFTConfig
758ecd8 verified
"""
D1337 CIPHER - Custom Training Script
=====================================
Optimized QLoRA training for 31B model on 4x L40S (192GB VRAM)
Brand: D1337 SOVEREIGN LABS
Model: GLM-4.7-Flash-abliterated (31B) -> D1337 CIPHER
"""
import os
import sys
import torch
import gradio as gr
from threading import Thread
from dataclasses import dataclass
from typing import Optional
# Training imports
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
BitsAndBytesConfig,
)
from peft import (
LoraConfig,
get_peft_model,
TaskType,
)
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
# ============================================
# CONFIGURATION
# ============================================
@dataclass
class TrainingConfig:
# Model
base_model: str = "huihui-ai/Huihui-GLM-4.7-Flash-abliterated"
output_model: str = "Desorden1337/d1337-cipher-v1"
# Dataset
dataset_name: str = "Desorden1337/d1337-cipher-dataset"
dataset_split: str = "train"
# LoRA Config (reduced for 4x L40S memory)
lora_r: int = 32
lora_alpha: int = 64
lora_dropout: float = 0.05
target_modules: list = None
# Training
num_epochs: int = 5
batch_size: int = 1
gradient_accumulation: int = 8
learning_rate: float = 2e-4
max_seq_length: int = 2048 # Reduced for memory
warmup_ratio: float = 0.1
weight_decay: float = 0.01
# Hardware
use_4bit: bool = True
use_bf16: bool = True
def __post_init__(self):
if self.target_modules is None:
self.target_modules = [
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"
]
# ============================================
# TRAINING CLASS
# ============================================
class D1337CipherTrainer:
def __init__(self, config: TrainingConfig = None):
self.config = config or TrainingConfig()
self.model = None
self.tokenizer = None
self.trainer = None
self.training_status = "Idle"
self.training_log = []
def log(self, message: str):
"""Log message to console and internal log"""
print(f"[D1337] {message}")
self.training_log.append(message)
if len(self.training_log) > 100:
self.training_log = self.training_log[-100:]
def setup_quantization(self):
"""Setup 4-bit quantization config"""
if self.config.use_4bit:
return BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16,
bnb_4bit_use_double_quant=True,
)
return None
def setup_lora(self):
"""Setup LoRA configuration"""
return LoraConfig(
r=self.config.lora_r,
lora_alpha=self.config.lora_alpha,
lora_dropout=self.config.lora_dropout,
target_modules=self.config.target_modules,
bias="none",
task_type=TaskType.CAUSAL_LM,
)
def load_model(self):
"""Load base model with quantization"""
self.training_status = "Loading model..."
self.log(f"Loading model: {self.config.base_model}")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(
self.config.base_model,
trust_remote_code=True,
padding_side="right",
)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
# Load model with quantization
bnb_config = self.setup_quantization()
self.model = AutoModelForCausalLM.from_pretrained(
self.config.base_model,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16,
)
# Enable gradient checkpointing for memory efficiency
self.model.gradient_checkpointing_enable()
self.model.enable_input_require_grads()
# Apply LoRA
lora_config = self.setup_lora()
self.model = get_peft_model(self.model, lora_config)
# Print trainable parameters
trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in self.model.parameters())
self.log(f"Trainable parameters: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
self.log(f"Model loaded on {torch.cuda.device_count()} GPU(s)")
def load_dataset(self):
"""Load and prepare dataset"""
self.training_status = "Loading dataset..."
self.log(f"Loading dataset: {self.config.dataset_name}")
dataset = load_dataset(self.config.dataset_name, split=self.config.dataset_split)
self.log(f"Dataset loaded: {len(dataset)} samples")
return dataset
def format_messages(self, example):
"""Format messages into training text"""
messages = example["messages"]
# Use ChatML format
text = ""
for msg in messages:
role = msg["role"]
content = msg["content"]
text += f"<|im_start|>{role}\n{content}<|im_end|>\n"
return {"text": text}
def train(self):
"""Execute training"""
try:
self.training_status = "Initializing..."
self.log("=" * 60)
self.log("D1337 CIPHER TRAINING - INITIATED")
self.log("=" * 60)
# Load model and dataset
self.load_model()
dataset = self.load_dataset()
# Format dataset
self.log("Formatting dataset...")
dataset = dataset.map(self.format_messages, remove_columns=dataset.column_names)
# Training arguments (standard TrainingArguments)
self.training_status = "Setting up training..."
training_args = TrainingArguments(
output_dir="./d1337-cipher-output",
num_train_epochs=self.config.num_epochs,
per_device_train_batch_size=self.config.batch_size,
gradient_accumulation_steps=self.config.gradient_accumulation,
learning_rate=self.config.learning_rate,
weight_decay=self.config.weight_decay,
warmup_steps=14,
lr_scheduler_type="cosine",
logging_steps=1,
save_steps=50,
save_total_limit=2,
bf16=self.config.use_bf16,
fp16=not self.config.use_bf16,
gradient_checkpointing=True,
max_grad_norm=1.0,
group_by_length=True,
dataloader_num_workers=4,
remove_unused_columns=False,
push_to_hub=True,
hub_model_id=self.config.output_model,
hub_private_repo=True,
report_to="none",
)
# Initialize trainer with explicit tokenizer
self.trainer = SFTTrainer(
model=self.model,
args=training_args,
train_dataset=dataset,
)
# Start training
self.training_status = "Training in progress..."
self.log("Training started!")
self.trainer.train()
# Save and push
self.training_status = "Saving model..."
self.log("Saving model...")
self.trainer.save_model()
self.trainer.push_to_hub()
self.training_status = "Complete!"
self.log("=" * 60)
self.log("D1337 CIPHER TRAINING - COMPLETE!")
self.log(f"Model saved to: {self.config.output_model}")
self.log("=" * 60)
return True
except Exception as e:
self.training_status = f"Error: {str(e)}"
self.log(f"Training failed: {str(e)}")
import traceback
self.log(traceback.format_exc())
return False
# ============================================
# GRADIO UI
# ============================================
def create_ui(trainer: D1337CipherTrainer):
"""Create Gradio UI for monitoring"""
def get_status():
return trainer.training_status
def get_logs():
return "\n".join(trainer.training_log[-50:])
def start_training():
trainer.training_log = []
thread = Thread(target=trainer.train)
thread.start()
return "Training started! Check logs for progress."
def get_gpu_info():
if torch.cuda.is_available():
info = []
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
mem_total = props.total_memory / (1024**3)
mem_used = torch.cuda.memory_allocated(i) / (1024**3)
info.append(f"GPU {i}: {props.name} - {mem_used:.1f}GB / {mem_total:.1f}GB")
return "\n".join(info)
return "No GPU available"
with gr.Blocks(title="D1337 CIPHER Training", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🔥 D1337 CIPHER - Training Console
### D1337 SOVEREIGN LABS
Custom training environment for GLM-4.7-Flash-abliterated → D1337 CIPHER
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Configuration")
model_name = gr.Textbox(
label="Base Model",
value=trainer.config.base_model,
interactive=False
)
dataset_name = gr.Textbox(
label="Dataset",
value=trainer.config.dataset_name,
interactive=False
)
output_name = gr.Textbox(
label="Output Model",
value=trainer.config.output_model,
interactive=False
)
gr.Markdown("### Training Parameters")
gr.Textbox(
label="LoRA Rank",
value=str(trainer.config.lora_r),
interactive=False
)
gr.Textbox(
label="Epochs",
value=str(trainer.config.num_epochs),
interactive=False
)
gr.Textbox(
label="Learning Rate",
value=str(trainer.config.learning_rate),
interactive=False
)
with gr.Column(scale=2):
gr.Markdown("### Status")
status_box = gr.Textbox(
label="Current Status",
value=get_status,
every=2
)
gpu_info = gr.Textbox(
label="GPU Info",
value=get_gpu_info,
every=5
)
start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
gr.Markdown("### Training Logs")
logs_box = gr.Textbox(
label="Logs",
value=get_logs,
every=3,
lines=15,
max_lines=20
)
start_btn.click(fn=start_training, outputs=status_box)
return demo
# ============================================
# MAIN
# ============================================
def main():
print("=" * 60)
print("D1337 CIPHER - Custom Training Environment")
print("D1337 SOVEREIGN LABS")
print("=" * 60)
# Check GPU
if torch.cuda.is_available():
print(f"GPUs available: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f" GPU {i}: {props.name} ({props.total_memory / (1024**3):.1f} GB)")
else:
print("WARNING: No GPU detected!")
# Initialize trainer
config = TrainingConfig()
trainer = D1337CipherTrainer(config)
# Check if auto-start
auto_start = os.environ.get("AUTO_START_TRAINING", "false").lower() == "true"
if auto_start:
print("Auto-starting training...")
trainer.train()
else:
# Launch Gradio UI
print("Launching Gradio UI...")
demo = create_ui(trainer)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)
if __name__ == "__main__":
main()