#!/usr/bin/env python3 """ Zelin LLM — HF Space Fine-Tuning Launcher ═══════════════════════════════════════════ This runs as a Gradio app on HuggingFace Spaces. It provides a UI to trigger the fine-tuning pipeline. For actual GPU training, use Google Colab with the train.py script. This Space handles: Data generation, Model download, and GGUF serving. """ import gradio as gr import json import os import subprocess import sys from pathlib import Path # ── Status ──────────────────────────────────────────────────── STATUS = { "phase": "idle", "progress": 0, "message": "Zelin-4B Training Pipeline ready", } def generate_data(): """Generate synthetic training data.""" STATUS["phase"] = "generating_data" STATUS["message"] = "Generating training data..." STATUS["progress"] = 10 try: # Run data generator sys.path.insert(0, str(Path(__file__).parent / "data")) from generate_synthetic import generate_dataset train_path, val_path = generate_dataset() STATUS["phase"] = "data_ready" STATUS["message"] = f"Data generated: {train_path}" STATUS["progress"] = 30 return f"✅ Training data generated!\n- Train: {train_path}\n- Validation: {val_path}" except Exception as e: STATUS["phase"] = "error" STATUS["message"] = f"Error: {e}" return f"❌ Error: {e}" def check_environment(): """Check if the environment has GPU and necessary packages.""" info = [] # Check GPU try: import torch if torch.cuda.is_available(): gpu = torch.cuda.get_device_properties(0) info.append(f"🎮 GPU: {gpu.name} ({gpu.total_mem/1024**3:.1f} GB)") else: info.append("⚠️ No GPU available — training will be slow") except ImportError: info.append("❌ PyTorch not installed") # Check Unsloth try: import unsloth info.append(f"✅ Unsloth installed") except ImportError: info.append("❌ Unsloth not installed — install with: pip install unsloth") # Check data train_path = Path(__file__).parent / "data" / "output" / "train.jsonl" if train_path.exists(): count = sum(1 for _ in open(train_path)) info.append(f"📊 Training data: {count} examples") else: info.append("📊 No training data — run 'Generate Data' first") # Check HF token if os.environ.get("HF_TOKEN"): info.append("🔑 HF Token: configured") else: info.append("⚠️ HF Token: not set") return "\n".join(info) def get_instructions(): """Return Colab training instructions.""" return """ ## 🚀 Cómo entrenar Zelin-4B ### Opción 1: Google Colab (GRATIS, T4 GPU) 1. Abrí [Google Colab](https://colab.research.google.com/) 2. Creá un nuevo notebook 3. Ejecutá estas celdas: ```python # Celda 1: Instalar dependencias !pip install unsloth !pip install --upgrade torch transformers trl datasets !huggingface-cli login # Pegá tu HF token # Celda 2: Clonar el repo !git clone https://huggingface.co/TomatitoToho/Zelin-4B zelin-4b %cd zelin-4b # Celda 3: Descargar datos from huggingface_hub import hf_hub_download hf_hub_download("TomatitoToho/zelin-conversations", "train.jsonl", local_dir="data/output") hf_hub_download("TomatitoToho/zelin-conversations", "validation.jsonl", local_dir="data/output") # Celda 4: Entrenar! (~2 horas en T4) exec(open("training/train.py").read()) # Celda 5: Subir a HF from huggingface_hub import HfApi api = HfApi() api.upload_folder(folder_path="output/zelin-4b-gguf", repo_id="TomatitoToho/Zelin-4B") ``` ### Opción 2: Kaggle (GRATIS, T4 o P100 GPU) 1. Ir a [Kaggle](https://www.kaggle.com/) 2. Crear nuevo notebook con GPU accelerator 3. Mismos pasos que Colab ### Opción 3: HF Space con GPU ($0.40/hr) 1. Crear Space con T4 GPU 2. Usar el Dockerfile del repo 3. El entrenamiento corre automáticamente --- ## 📊 Expected Results | Metric | Value | |--------|-------| | Training time (T4) | ~2-3 horas | | VRAM usage | ~8-10 GB | | Model size (Q4_K_M) | ~2.5 GB | | CPU inference speed | 30-50 tok/s | | Response time (20 tok) | <500ms | ## 🎯 Post-Training Después del entrenamiento, el modelo GGUF se integra en Zelin así: ```javascript // En local-ai.js de Zelin const ZELIN_CUSTOM_MODEL = 'TomatitoToho/Zelin-4B'; const ZELIN_CUSTOM_FILE = 'zelin-4b-Q4_K_M.gguf'; ``` """ def get_status(): """Return current status.""" return f"**Phase:** {STATUS['phase']}\n**Progress:** {STATUS['progress']}%\n**Message:** {STATUS['message']}" # ── Gradio UI ───────────────────────────────────────────────── with gr.Blocks(title="Zelin-4B Training Pipeline", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🧠 Zelin-4B — Custom LLM Training Pipeline Fine-tune Qwen3-4B on Argentine Spanish Discord conversations for Minecraft **Base Model:** Qwen3-4B-Instruct | **Method:** QLoRA + Unsloth | **Target:** T4 GPU """) with gr.Tab("📋 Status"): env_btn = gr.Button("Check Environment", variant="secondary") env_output = gr.Textbox(label="Environment", lines=10) env_btn.click(check_environment, outputs=env_output) status_btn = gr.Button("Refresh Status") status_output = gr.Markdown() status_btn.click(get_status, outputs=status_output) with gr.Tab("📊 Generate Data"): gen_btn = gr.Button("Generate Training Data", variant="primary") gen_output = gr.Textbox(label="Output", lines=5) gen_btn.click(generate_data, outputs=gen_output) with gr.Tab("🚀 Train"): gr.Markdown(get_instructions()) # ── Launch ──────────────────────────────────────────────────── demo.launch(server_name="0.0.0.0", server_port=7860)