""" Zen Coder Flash - Identity Training Space Fine-tune GLM-4.7-Flash with Zen identity using LoRA """ import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from datasets import load_dataset import os # Constants MODEL_ID = "zenlm/zen-coder-flash" DATASET_ID = "zenlm/zen-coder-flash-identity" OUTPUT_DIR = "./zen-coder-flash-lora" # Identity data (embedded for quick training) IDENTITY_DATA = [ {"instruction": "Who are you?", "output": "I am Zen Coder Flash, a 31B MoE AI model from the Zen family. I'm built on the GLM-4.7-Flash architecture and optimized for code generation, software development, and agentic reasoning. With 31 billion parameters (only 3B active per token) and 131K context, I represent the flagship balance of capability and efficiency in the Zen coder lineup."}, {"instruction": "What is your name?", "output": "My name is Zen Coder Flash. I'm the flagship coder model in the Zen family, based on GLM-4.7-Flash's advanced Mixture of Experts architecture with 31 billion parameters."}, {"instruction": "Are you ChatGPT?", "output": "No, I'm not ChatGPT. I'm Zen Coder Flash, a distinct model from the Zen AI family. While we both assist with coding and general tasks, I'm based on GLM-4.7-Flash's MoE architecture and specifically optimized for code generation, software development, and agentic reasoning."}, {"instruction": "Are you Claude?", "output": "No, I'm Zen Coder Flash, not Claude. I'm part of the Zen model family, built on GLM-4.7-Flash. My focus is code generation and software development, with MoE architecture enabling efficient frontier performance."}, {"instruction": "What can you do?", "output": "As Zen Coder Flash, I excel at: code generation across 100+ languages, debugging and code review, software architecture, tool calling and function execution, extended reasoning for complex problems, and technical documentation. My 131K context window handles large codebases."}, {"instruction": "What's your SWE-bench score?", "output": "I scored 59.2% on SWE-bench Verified, which measures real-world software engineering tasks like fixing GitHub issues. For comparison, Qwen3-30B scores 22% and GPT-OSS-20B scores 34%. This reflects strong practical coding ability."}, ] def create_training_data(): """Create training dataset from identity data.""" from datasets import Dataset formatted_data = [] for item in IDENTITY_DATA: formatted_data.append({ "text": f"<|user|>\n{item['instruction']}<|assistant|>\n{item['output']}<|endoftext|>" }) return Dataset.from_list(formatted_data) def train_model( learning_rate: float = 1e-4, num_epochs: int = 3, batch_size: int = 1, lora_r: int = 8, lora_alpha: int = 16, progress=gr.Progress() ): """Train the model with LoRA.""" progress(0, desc="Loading model...") # Check for GPU device = "cuda" if torch.cuda.is_available() else "cpu" if device == "cpu": return "⚠️ No GPU detected. Training requires GPU. Please upgrade to a GPU Space." # Load model in 4-bit from transformers import BitsAndBytesConfig bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, ) progress(0.2, desc="Preparing LoRA...") # Prepare for training model = prepare_model_for_kbit_training(model) # LoRA config lora_config = LoraConfig( r=lora_r, lora_alpha=lora_alpha, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", ) model = get_peft_model(model, lora_config) progress(0.3, desc="Loading dataset...") # Create dataset dataset = create_training_data() def tokenize_function(examples): return tokenizer( examples["text"], truncation=True, max_length=512, padding="max_length", ) tokenized_dataset = dataset.map(tokenize_function, batched=True) progress(0.4, desc="Starting training...") # Training arguments training_args = TrainingArguments( output_dir=OUTPUT_DIR, num_train_epochs=num_epochs, per_device_train_batch_size=batch_size, learning_rate=learning_rate, logging_steps=1, save_steps=50, fp16=True, report_to="none", ) from transformers import Trainer, DataCollatorForLanguageModeling data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, data_collator=data_collator, ) # Train trainer.train() progress(0.9, desc="Saving adapters...") # Save model.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) progress(1.0, desc="Done!") return f"✅ Training complete! Adapters saved to {OUTPUT_DIR}" def test_model(prompt: str): """Test the model with a prompt.""" if not os.path.exists(OUTPUT_DIR): return "⚠️ No trained model found. Please train first." from peft import PeftModel tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) # Load base + adapters base_model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, ) model = PeftModel.from_pretrained(base_model, OUTPUT_DIR) # Generate formatted = f"<|user|>\n{prompt}<|assistant|>\n" inputs = tokenizer(formatted, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.split("<|assistant|>")[-1].strip() def push_to_hub(repo_id: str): """Push trained adapters to HuggingFace.""" if not os.path.exists(OUTPUT_DIR): return "⚠️ No trained model found. Please train first." from huggingface_hub import HfApi api = HfApi() api.upload_folder( folder_path=OUTPUT_DIR, repo_id=repo_id, repo_type="model", ) return f"✅ Pushed to https://huggingface.co/{repo_id}" # Gradio UI with gr.Blocks(title="Zen Coder Flash Trainer") as demo: gr.Markdown(""" # ⚡ Zen Coder Flash - Identity Training Fine-tune GLM-4.7-Flash with Zen identity using LoRA. **Model:** [zenlm/zen-coder-flash](https://huggingface.co/zenlm/zen-coder-flash) """) with gr.Tab("🎯 Train"): gr.Markdown("### Training Parameters") with gr.Row(): lr = gr.Slider(1e-5, 1e-3, value=1e-4, label="Learning Rate") epochs = gr.Slider(1, 10, value=3, step=1, label="Epochs") with gr.Row(): batch = gr.Slider(1, 4, value=1, step=1, label="Batch Size") lora_r = gr.Slider(4, 64, value=8, step=4, label="LoRA Rank") train_btn = gr.Button("🚀 Start Training", variant="primary") train_output = gr.Textbox(label="Status", lines=3) train_btn.click( train_model, inputs=[lr, epochs, batch, lora_r], outputs=train_output, ) with gr.Tab("🧪 Test"): gr.Markdown("### Test Trained Model") test_input = gr.Textbox( label="Prompt", placeholder="Who are you?", lines=2, ) test_btn = gr.Button("Generate") test_output = gr.Textbox(label="Response", lines=5) test_btn.click(test_model, inputs=test_input, outputs=test_output) with gr.Tab("📤 Push"): gr.Markdown("### Push to HuggingFace") repo_input = gr.Textbox( label="Repository ID", value="zenlm/zen-coder-flash-lora", ) push_btn = gr.Button("Push to Hub") push_output = gr.Textbox(label="Status") push_btn.click(push_to_hub, inputs=repo_input, outputs=push_output) if __name__ == "__main__": demo.launch()