Spaces:

zenlm
/

zen-coder-flash-trainer

Sleeping

File size: 8,609 Bytes

f65081a

"""
Zen Coder Flash - Identity Training Space
Fine-tune GLM-4.7-Flash with Zen identity using LoRA
"""

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
import os

# Constants
MODEL_ID = "zenlm/zen-coder-flash"
DATASET_ID = "zenlm/zen-coder-flash-identity"
OUTPUT_DIR = "./zen-coder-flash-lora"

# Identity data (embedded for quick training)
IDENTITY_DATA = [
    {"instruction": "Who are you?", "output": "I am Zen Coder Flash, a 31B MoE AI model from the Zen family. I'm built on the GLM-4.7-Flash architecture and optimized for code generation, software development, and agentic reasoning. With 31 billion parameters (only 3B active per token) and 131K context, I represent the flagship balance of capability and efficiency in the Zen coder lineup."},
    {"instruction": "What is your name?", "output": "My name is Zen Coder Flash. I'm the flagship coder model in the Zen family, based on GLM-4.7-Flash's advanced Mixture of Experts architecture with 31 billion parameters."},
    {"instruction": "Are you ChatGPT?", "output": "No, I'm not ChatGPT. I'm Zen Coder Flash, a distinct model from the Zen AI family. While we both assist with coding and general tasks, I'm based on GLM-4.7-Flash's MoE architecture and specifically optimized for code generation, software development, and agentic reasoning."},
    {"instruction": "Are you Claude?", "output": "No, I'm Zen Coder Flash, not Claude. I'm part of the Zen model family, built on GLM-4.7-Flash. My focus is code generation and software development, with MoE architecture enabling efficient frontier performance."},
    {"instruction": "What can you do?", "output": "As Zen Coder Flash, I excel at: code generation across 100+ languages, debugging and code review, software architecture, tool calling and function execution, extended reasoning for complex problems, and technical documentation. My 131K context window handles large codebases."},
    {"instruction": "What's your SWE-bench score?", "output": "I scored 59.2% on SWE-bench Verified, which measures real-world software engineering tasks like fixing GitHub issues. For comparison, Qwen3-30B scores 22% and GPT-OSS-20B scores 34%. This reflects strong practical coding ability."},
]


def create_training_data():
    """Create training dataset from identity data."""
    from datasets import Dataset

    formatted_data = []
    for item in IDENTITY_DATA:
        formatted_data.append({
            "text": f"<|user|>\n{item['instruction']}<|assistant|>\n{item['output']}<|endoftext|>"
        })

    return Dataset.from_list(formatted_data)


def train_model(
    learning_rate: float = 1e-4,
    num_epochs: int = 3,
    batch_size: int = 1,
    lora_r: int = 8,
    lora_alpha: int = 16,
    progress=gr.Progress()
):
    """Train the model with LoRA."""

    progress(0, desc="Loading model...")

    # Check for GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    if device == "cpu":
        return "⚠️ No GPU detected. Training requires GPU. Please upgrade to a GPU Space."

    # Load model in 4-bit
    from transformers import BitsAndBytesConfig

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )

    progress(0.2, desc="Preparing LoRA...")

    # Prepare for training
    model = prepare_model_for_kbit_training(model)

    # LoRA config
    lora_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_config)

    progress(0.3, desc="Loading dataset...")

    # Create dataset
    dataset = create_training_data()

    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            max_length=512,
            padding="max_length",
        )

    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    progress(0.4, desc="Starting training...")

    # Training arguments
    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=num_epochs,
        per_device_train_batch_size=batch_size,
        learning_rate=learning_rate,
        logging_steps=1,
        save_steps=50,
        fp16=True,
        report_to="none",
    )

    from transformers import Trainer, DataCollatorForLanguageModeling

    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator,
    )

    # Train
    trainer.train()

    progress(0.9, desc="Saving adapters...")

    # Save
    model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)

    progress(1.0, desc="Done!")

    return f"✅ Training complete! Adapters saved to {OUTPUT_DIR}"


def test_model(prompt: str):
    """Test the model with a prompt."""

    if not os.path.exists(OUTPUT_DIR):
        return "⚠️ No trained model found. Please train first."

    from peft import PeftModel

    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

    # Load base + adapters
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )
    model = PeftModel.from_pretrained(base_model, OUTPUT_DIR)

    # Generate
    formatted = f"<|user|>\n{prompt}<|assistant|>\n"
    inputs = tokenizer(formatted, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("<|assistant|>")[-1].strip()


def push_to_hub(repo_id: str):
    """Push trained adapters to HuggingFace."""

    if not os.path.exists(OUTPUT_DIR):
        return "⚠️ No trained model found. Please train first."

    from huggingface_hub import HfApi
    api = HfApi()

    api.upload_folder(
        folder_path=OUTPUT_DIR,
        repo_id=repo_id,
        repo_type="model",
    )

    return f"✅ Pushed to https://huggingface.co/{repo_id}"


# Gradio UI
with gr.Blocks(title="Zen Coder Flash Trainer") as demo:
    gr.Markdown("""
    # ⚡ Zen Coder Flash - Identity Training

    Fine-tune GLM-4.7-Flash with Zen identity using LoRA.

    **Model:** [zenlm/zen-coder-flash](https://huggingface.co/zenlm/zen-coder-flash)
    """)

    with gr.Tab("🎯 Train"):
        gr.Markdown("### Training Parameters")

        with gr.Row():
            lr = gr.Slider(1e-5, 1e-3, value=1e-4, label="Learning Rate")
            epochs = gr.Slider(1, 10, value=3, step=1, label="Epochs")

        with gr.Row():
            batch = gr.Slider(1, 4, value=1, step=1, label="Batch Size")
            lora_r = gr.Slider(4, 64, value=8, step=4, label="LoRA Rank")

        train_btn = gr.Button("🚀 Start Training", variant="primary")
        train_output = gr.Textbox(label="Status", lines=3)

        train_btn.click(
            train_model,
            inputs=[lr, epochs, batch, lora_r],
            outputs=train_output,
        )

    with gr.Tab("🧪 Test"):
        gr.Markdown("### Test Trained Model")

        test_input = gr.Textbox(
            label="Prompt",
            placeholder="Who are you?",
            lines=2,
        )
        test_btn = gr.Button("Generate")
        test_output = gr.Textbox(label="Response", lines=5)

        test_btn.click(test_model, inputs=test_input, outputs=test_output)

    with gr.Tab("📤 Push"):
        gr.Markdown("### Push to HuggingFace")

        repo_input = gr.Textbox(
            label="Repository ID",
            value="zenlm/zen-coder-flash-lora",
        )
        push_btn = gr.Button("Push to Hub")
        push_output = gr.Textbox(label="Status")

        push_btn.click(push_to_hub, inputs=repo_input, outputs=push_output)


if __name__ == "__main__":
    demo.launch()