d1337-cipher-train / train.py
pacman1337's picture
Rewrite using proper TRL SFTTrainer API
54f8c5a verified
"""
D1337 CIPHER - Custom Training Script
=====================================
Optimized QLoRA training for 31B model on 4x L40S (192GB VRAM)
Brand: D1337 SOVEREIGN LABS
Model: GLM-4.7-Flash-abliterated (31B) -> D1337 CIPHER
Based on TRL docs: https://huggingface.co/docs/trl/main/en/sft_trainer
"""
import os
import torch
from datasets import load_dataset
from transformers import BitsAndBytesConfig
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
def main():
print("=" * 60)
print("D1337 CIPHER - Training")
print("D1337 SOVEREIGN LABS")
print("=" * 60)
# Config
BASE_MODEL = "huihui-ai/Huihui-GLM-4.7-Flash-abliterated"
DATASET = "pacman1337/d1337-cipher-dataset"
OUTPUT_MODEL = "pacman1337/d1337-cipher-v1"
# Get HF token
hf_token = os.environ.get("HF_TOKEN", None)
print(f"HF_TOKEN: {'Found' if hf_token else 'Not found'}")
# Check GPU
if torch.cuda.is_available():
print(f"GPUs: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f" GPU {i}: {props.name} ({props.total_memory / (1024**3):.1f} GB)")
else:
print("WARNING: No GPU!")
# Load dataset
print(f"\nLoading dataset: {DATASET}")
dataset = load_dataset(DATASET, split="train")
print(f"Dataset: {len(dataset)} samples")
# QLoRA config (4-bit quantization)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
# LoRA config
peft_config = LoraConfig(
r=32,
lora_alpha=64,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
bias="none",
task_type="CAUSAL_LM",
)
# SFT Config - all training args here
sft_config = SFTConfig(
output_dir="./d1337-cipher-output",
# Training
num_train_epochs=3,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
learning_rate=2e-4,
weight_decay=0.01,
warmup_ratio=0.1,
lr_scheduler_type="cosine",
# Optimization
bf16=True,
gradient_checkpointing=True,
max_grad_norm=1.0,
# Logging
logging_steps=1,
save_steps=50,
save_total_limit=2,
report_to="none",
# Hub
push_to_hub=True if hf_token else False,
hub_model_id=OUTPUT_MODEL,
hub_token=hf_token,
hub_private_repo=True,
# SFT specific
max_length=2048,
packing=False,
# Model init kwargs for quantization
model_init_kwargs={
"quantization_config": bnb_config,
"device_map": "auto",
"trust_remote_code": True,
"torch_dtype": torch.bfloat16,
},
)
# Create trainer - SFTTrainer handles everything
print(f"\nLoading model: {BASE_MODEL}")
trainer = SFTTrainer(
model=BASE_MODEL,
args=sft_config,
train_dataset=dataset,
peft_config=peft_config,
)
# Print trainable params
trainable = sum(p.numel() for p in trainer.model.parameters() if p.requires_grad)
total = sum(p.numel() for p in trainer.model.parameters())
print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")
# Train
print("\n" + "=" * 60)
print("TRAINING STARTED")
print("=" * 60)
trainer.train()
# Save
print("\nSaving model...")
trainer.save_model()
if hf_token:
print(f"Pushing to hub: {OUTPUT_MODEL}")
trainer.push_to_hub()
print("\n" + "=" * 60)
print("TRAINING COMPLETE!")
print(f"Model: {OUTPUT_MODEL}")
print("=" * 60)
if __name__ == "__main__":
main()