""" D1337 CIPHER - Custom Training Script ===================================== Optimized QLoRA training for 31B model on 4x L40S (192GB VRAM) Brand: D1337 SOVEREIGN LABS Model: GLM-4.7-Flash-abliterated (31B) -> D1337 CIPHER """ import os import sys import torch import gradio as gr from threading import Thread from dataclasses import dataclass from typing import Optional # Training imports from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, ) from peft import ( LoraConfig, get_peft_model, TaskType, ) from datasets import load_dataset from trl import SFTTrainer, SFTConfig # ============================================ # CONFIGURATION # ============================================ @dataclass class TrainingConfig: # Model base_model: str = "huihui-ai/Huihui-GLM-4.7-Flash-abliterated" output_model: str = "Desorden1337/d1337-cipher-v1" # Dataset dataset_name: str = "Desorden1337/d1337-cipher-dataset" dataset_split: str = "train" # LoRA Config (reduced for 4x L40S memory) lora_r: int = 32 lora_alpha: int = 64 lora_dropout: float = 0.05 target_modules: list = None # Training num_epochs: int = 5 batch_size: int = 1 gradient_accumulation: int = 8 learning_rate: float = 2e-4 max_seq_length: int = 2048 # Reduced for memory warmup_ratio: float = 0.1 weight_decay: float = 0.01 # Hardware use_4bit: bool = True use_bf16: bool = True def __post_init__(self): if self.target_modules is None: self.target_modules = [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ] # ============================================ # TRAINING CLASS # ============================================ class D1337CipherTrainer: def __init__(self, config: TrainingConfig = None): self.config = config or TrainingConfig() self.model = None self.tokenizer = None self.trainer = None self.training_status = "Idle" self.training_log = [] def log(self, message: str): """Log message to console and internal log""" print(f"[D1337] {message}") self.training_log.append(message) if len(self.training_log) > 100: self.training_log = self.training_log[-100:] def setup_quantization(self): """Setup 4-bit quantization config""" if self.config.use_4bit: return BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16, bnb_4bit_use_double_quant=True, ) return None def setup_lora(self): """Setup LoRA configuration""" return LoraConfig( r=self.config.lora_r, lora_alpha=self.config.lora_alpha, lora_dropout=self.config.lora_dropout, target_modules=self.config.target_modules, bias="none", task_type=TaskType.CAUSAL_LM, ) def load_model(self): """Load base model with quantization""" self.training_status = "Loading model..." self.log(f"Loading model: {self.config.base_model}") # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained( self.config.base_model, trust_remote_code=True, padding_side="right", ) if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token # Load model with quantization bnb_config = self.setup_quantization() self.model = AutoModelForCausalLM.from_pretrained( self.config.base_model, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16 if self.config.use_bf16 else torch.float16, ) # Enable gradient checkpointing for memory efficiency self.model.gradient_checkpointing_enable() self.model.enable_input_require_grads() # Apply LoRA lora_config = self.setup_lora() self.model = get_peft_model(self.model, lora_config) # Print trainable parameters trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in self.model.parameters()) self.log(f"Trainable parameters: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)") self.log(f"Model loaded on {torch.cuda.device_count()} GPU(s)") def load_dataset(self): """Load and prepare dataset""" self.training_status = "Loading dataset..." self.log(f"Loading dataset: {self.config.dataset_name}") dataset = load_dataset(self.config.dataset_name, split=self.config.dataset_split) self.log(f"Dataset loaded: {len(dataset)} samples") return dataset def format_messages(self, example): """Format messages into training text""" messages = example["messages"] # Use ChatML format text = "" for msg in messages: role = msg["role"] content = msg["content"] text += f"<|im_start|>{role}\n{content}<|im_end|>\n" return {"text": text} def train(self): """Execute training""" try: self.training_status = "Initializing..." self.log("=" * 60) self.log("D1337 CIPHER TRAINING - INITIATED") self.log("=" * 60) # Load model and dataset self.load_model() dataset = self.load_dataset() # Format dataset self.log("Formatting dataset...") dataset = dataset.map(self.format_messages, remove_columns=dataset.column_names) # Training arguments (standard TrainingArguments) self.training_status = "Setting up training..." training_args = TrainingArguments( output_dir="./d1337-cipher-output", num_train_epochs=self.config.num_epochs, per_device_train_batch_size=self.config.batch_size, gradient_accumulation_steps=self.config.gradient_accumulation, learning_rate=self.config.learning_rate, weight_decay=self.config.weight_decay, warmup_steps=14, lr_scheduler_type="cosine", logging_steps=1, save_steps=50, save_total_limit=2, bf16=self.config.use_bf16, fp16=not self.config.use_bf16, gradient_checkpointing=True, max_grad_norm=1.0, group_by_length=True, dataloader_num_workers=4, remove_unused_columns=False, push_to_hub=True, hub_model_id=self.config.output_model, hub_private_repo=True, report_to="none", ) # Initialize trainer with explicit tokenizer self.trainer = SFTTrainer( model=self.model, args=training_args, train_dataset=dataset, ) # Start training self.training_status = "Training in progress..." self.log("Training started!") self.trainer.train() # Save and push self.training_status = "Saving model..." self.log("Saving model...") self.trainer.save_model() self.trainer.push_to_hub() self.training_status = "Complete!" self.log("=" * 60) self.log("D1337 CIPHER TRAINING - COMPLETE!") self.log(f"Model saved to: {self.config.output_model}") self.log("=" * 60) return True except Exception as e: self.training_status = f"Error: {str(e)}" self.log(f"Training failed: {str(e)}") import traceback self.log(traceback.format_exc()) return False # ============================================ # GRADIO UI # ============================================ def create_ui(trainer: D1337CipherTrainer): """Create Gradio UI for monitoring""" def get_status(): return trainer.training_status def get_logs(): return "\n".join(trainer.training_log[-50:]) def start_training(): trainer.training_log = [] thread = Thread(target=trainer.train) thread.start() return "Training started! Check logs for progress." def get_gpu_info(): if torch.cuda.is_available(): info = [] for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) mem_total = props.total_memory / (1024**3) mem_used = torch.cuda.memory_allocated(i) / (1024**3) info.append(f"GPU {i}: {props.name} - {mem_used:.1f}GB / {mem_total:.1f}GB") return "\n".join(info) return "No GPU available" with gr.Blocks(title="D1337 CIPHER Training", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🔥 D1337 CIPHER - Training Console ### D1337 SOVEREIGN LABS Custom training environment for GLM-4.7-Flash-abliterated → D1337 CIPHER """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Configuration") model_name = gr.Textbox( label="Base Model", value=trainer.config.base_model, interactive=False ) dataset_name = gr.Textbox( label="Dataset", value=trainer.config.dataset_name, interactive=False ) output_name = gr.Textbox( label="Output Model", value=trainer.config.output_model, interactive=False ) gr.Markdown("### Training Parameters") gr.Textbox( label="LoRA Rank", value=str(trainer.config.lora_r), interactive=False ) gr.Textbox( label="Epochs", value=str(trainer.config.num_epochs), interactive=False ) gr.Textbox( label="Learning Rate", value=str(trainer.config.learning_rate), interactive=False ) with gr.Column(scale=2): gr.Markdown("### Status") status_box = gr.Textbox( label="Current Status", value=get_status, every=2 ) gpu_info = gr.Textbox( label="GPU Info", value=get_gpu_info, every=5 ) start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg") gr.Markdown("### Training Logs") logs_box = gr.Textbox( label="Logs", value=get_logs, every=3, lines=15, max_lines=20 ) start_btn.click(fn=start_training, outputs=status_box) return demo # ============================================ # MAIN # ============================================ def main(): print("=" * 60) print("D1337 CIPHER - Custom Training Environment") print("D1337 SOVEREIGN LABS") print("=" * 60) # Check GPU if torch.cuda.is_available(): print(f"GPUs available: {torch.cuda.device_count()}") for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) print(f" GPU {i}: {props.name} ({props.total_memory / (1024**3):.1f} GB)") else: print("WARNING: No GPU detected!") # Initialize trainer config = TrainingConfig() trainer = D1337CipherTrainer(config) # Check if auto-start auto_start = os.environ.get("AUTO_START_TRAINING", "false").lower() == "true" if auto_start: print("Auto-starting training...") trainer.train() else: # Launch Gradio UI print("Launching Gradio UI...") demo = create_ui(trainer) demo.launch( server_name="0.0.0.0", server_port=7860, share=False ) if __name__ == "__main__": main()