#!/usr/bin/env python3
"""
Zelin LLM — HF Space Fine-Tuning Launcher
═══════════════════════════════════════════
This runs as a Gradio app on HuggingFace Spaces.
It provides a UI to trigger the fine-tuning pipeline.

For actual GPU training, use Google Colab with the train.py script.
This Space handles: Data generation, Model download, and GGUF serving.
"""

import gradio as gr
import json
import os
import subprocess
import sys
from pathlib import Path

# ── Status ────────────────────────────────────────────────────
STATUS = {
    "phase": "idle",
    "progress": 0,
    "message": "Zelin-4B Training Pipeline ready",
}

def generate_data():
    """Generate synthetic training data."""
    STATUS["phase"] = "generating_data"
    STATUS["message"] = "Generating training data..."
    STATUS["progress"] = 10
    
    try:
        # Run data generator
        sys.path.insert(0, str(Path(__file__).parent / "data"))
        from generate_synthetic import generate_dataset
        train_path, val_path = generate_dataset()
        
        STATUS["phase"] = "data_ready"
        STATUS["message"] = f"Data generated: {train_path}"
        STATUS["progress"] = 30
        
        return f"✅ Training data generated!\n- Train: {train_path}\n- Validation: {val_path}"
    except Exception as e:
        STATUS["phase"] = "error"
        STATUS["message"] = f"Error: {e}"
        return f"❌ Error: {e}"

def check_environment():
    """Check if the environment has GPU and necessary packages."""
    info = []
    
    # Check GPU
    try:
        import torch
        if torch.cuda.is_available():
            gpu = torch.cuda.get_device_properties(0)
            info.append(f"🎮 GPU: {gpu.name} ({gpu.total_mem/1024**3:.1f} GB)")
        else:
            info.append("⚠️ No GPU available — training will be slow")
    except ImportError:
        info.append("❌ PyTorch not installed")
    
    # Check Unsloth
    try:
        import unsloth
        info.append(f"✅ Unsloth installed")
    except ImportError:
        info.append("❌ Unsloth not installed — install with: pip install unsloth")
    
    # Check data
    train_path = Path(__file__).parent / "data" / "output" / "train.jsonl"
    if train_path.exists():
        count = sum(1 for _ in open(train_path))
        info.append(f"📊 Training data: {count} examples")
    else:
        info.append("📊 No training data — run 'Generate Data' first")
    
    # Check HF token
    if os.environ.get("HF_TOKEN"):
        info.append("🔑 HF Token: configured")
    else:
        info.append("⚠️ HF Token: not set")
    
    return "\n".join(info)

def get_instructions():
    """Return Colab training instructions."""
    return """
## 🚀 Cómo entrenar Zelin-4B

### Opción 1: Google Colab (GRATIS, T4 GPU)

1. Abrí [Google Colab](https://colab.research.google.com/)
2. Creá un nuevo notebook
3. Ejecutá estas celdas:

```python
# Celda 1: Instalar dependencias
!pip install unsloth
!pip install --upgrade torch transformers trl datasets
!huggingface-cli login  # Pegá tu HF token

# Celda 2: Clonar el repo
!git clone https://huggingface.co/TomatitoToho/Zelin-4B zelin-4b
%cd zelin-4b

# Celda 3: Descargar datos
from huggingface_hub import hf_hub_download
hf_hub_download("TomatitoToho/zelin-conversations", "train.jsonl", local_dir="data/output")
hf_hub_download("TomatitoToho/zelin-conversations", "validation.jsonl", local_dir="data/output")

# Celda 4: Entrenar! (~2 horas en T4)
exec(open("training/train.py").read())

# Celda 5: Subir a HF
from huggingface_hub import HfApi
api = HfApi()
api.upload_folder(folder_path="output/zelin-4b-gguf", repo_id="TomatitoToho/Zelin-4B")
```

### Opción 2: Kaggle (GRATIS, T4 o P100 GPU)

1. Ir a [Kaggle](https://www.kaggle.com/)
2. Crear nuevo notebook con GPU accelerator
3. Mismos pasos que Colab

### Opción 3: HF Space con GPU ($0.40/hr)

1. Crear Space con T4 GPU
2. Usar el Dockerfile del repo
3. El entrenamiento corre automáticamente

---

## 📊 Expected Results

| Metric | Value |
|--------|-------|
| Training time (T4) | ~2-3 horas |
| VRAM usage | ~8-10 GB |
| Model size (Q4_K_M) | ~2.5 GB |
| CPU inference speed | 30-50 tok/s |
| Response time (20 tok) | <500ms |

## 🎯 Post-Training

Después del entrenamiento, el modelo GGUF se integra en Zelin así:

```javascript
// En local-ai.js de Zelin
const ZELIN_CUSTOM_MODEL = 'TomatitoToho/Zelin-4B';
const ZELIN_CUSTOM_FILE = 'zelin-4b-Q4_K_M.gguf';
```
"""

def get_status():
    """Return current status."""
    return f"**Phase:** {STATUS['phase']}\n**Progress:** {STATUS['progress']}%\n**Message:** {STATUS['message']}"

# ── Gradio UI ─────────────────────────────────────────────────

with gr.Blocks(title="Zelin-4B Training Pipeline", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🧠 Zelin-4B — Custom LLM Training Pipeline
    Fine-tune Qwen3-4B on Argentine Spanish Discord conversations for Minecraft
    
    **Base Model:** Qwen3-4B-Instruct | **Method:** QLoRA + Unsloth | **Target:** T4 GPU
    """)
    
    with gr.Tab("📋 Status"):
        env_btn = gr.Button("Check Environment", variant="secondary")
        env_output = gr.Textbox(label="Environment", lines=10)
        env_btn.click(check_environment, outputs=env_output)
        
        status_btn = gr.Button("Refresh Status")
        status_output = gr.Markdown()
        status_btn.click(get_status, outputs=status_output)
    
    with gr.Tab("📊 Generate Data"):
        gen_btn = gr.Button("Generate Training Data", variant="primary")
        gen_output = gr.Textbox(label="Output", lines=5)
        gen_btn.click(generate_data, outputs=gen_output)
    
    with gr.Tab("🚀 Train"):
        gr.Markdown(get_instructions())

# ── Launch ────────────────────────────────────────────────────
demo.launch(server_name="0.0.0.0", server_port=7860)