MTP_2

Sleeping

File size: 31,680 Bytes

2165193
 
 
 
0812507
22d628e
 
0812507
22d628e
 
2165193
dda31c0
2165193
 
22d628e
2165193
54dbc17
 
 
 
 
 
 
22d628e
 
 
 
0812507
 
82bb8af
2165193
 
22d628e
2165193
22d628e
2165193
 
 
cda7b0a
2165193
 
 
 
cda7b0a
0812507
 
2165193
22d628e
2165193
 
 
22d628e
0812507
2165193
 
22d628e
 
 
cda7b0a
22d628e
 
 
 
 
 
2165193
0812507
2165193
 
 
 
 
22d628e
 
2165193
 
 
 
0812507
22d628e
0812507
22d628e
0812507
22d628e
 
0812507
 
 
 
2165193
22d628e
 
2165193
0812507
22d628e
0812507
22d628e
cda7b0a
 
 
22d628e
0812507
22d628e
 
 
 
 
 
0812507
22d628e
 
 
 
 
 
 
 
 
 
 
 
0812507
 
22d628e
0812507
22d628e
 
 
2165193
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2165193
22d628e
 
2165193
0812507
ca29546
 
 
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cda7b0a
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dda31c0
 
22d628e
0812507
 
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0812507
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0812507
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cda7b0a
22d628e
 
 
 
 
 
 
0812507
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cda7b0a
 
22d628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0812507
22d628e
 
 
 
 
 
 
 
 
0812507
 
22d628e
dda31c0
 
 
ca29546
 
dda31c0
 
 
0eb10b0
cda7b0a
0eb10b0
 
709e9ca
dda31c0
0eb10b0
cda7b0a
 
 
 
 
 
 
 
 
709e9ca
0eb10b0
22d628e
 
 
 
 
ca29546
 
cda7b0a
ca29546
 
 
 
 
 
 
 
cda7b0a
ca29546
 
 
cda7b0a
 
ca29546
 
 
 
cda7b0a
ca29546
 
 
 
cda7b0a
ca29546
 
 
cda7b0a
 
ca29546
 
 
 
cda7b0a
 
ca29546
 
cda7b0a
 
ca29546
 
cda7b0a
 
 
 
 
ca29546
 
 
cda7b0a
 
 
 
 
 
ca29546
22d628e
cda7b0a
 
22d628e
 
 
cda7b0a
22d628e
 
cda7b0a
 
22d628e
ca29546
 
 
cda7b0a
ca29546
 
cda7b0a
 
ca29546
 
 
 
 
 
cda7b0a
ca29546
 
cda7b0a
 
ca29546
 
 
 
cda7b0a
ca29546
 
 
 
 
cda7b0a
 
 
 
ca29546
cda7b0a
ca29546
 
 
 
cda7b0a
ca29546
 
 
cda7b0a
ca29546
22d628e
ca29546
 
cda7b0a
 
 
ca29546
 
 
cda7b0a
 
ca29546
 
 
cda7b0a
ca29546
 
cda7b0a
ca29546
 
cda7b0a
 
ca29546
 
cda7b0a
 
ca29546
 
cda7b0a
22d628e
ca29546
 
cda7b0a
 
 
ca29546
22d628e
 
 
 
cda7b0a
22d628e
ca29546
 
 
cda7b0a
 
ca29546
cda7b0a
ca29546
cda7b0a
ca29546
 
cda7b0a
 
ca29546
 
 
 
cda7b0a
ca29546
 
cda7b0a
 
ca29546
 
cda7b0a
 
 
ca29546
 
cda7b0a
 
ca29546
 
 
 
 
 
 
 
 
22d628e
 
ca29546
 
cda7b0a
 
ca29546
cda7b0a
 
ca29546
 
 
 
 
cda7b0a
 
 
 
 
 
 
ca29546
22d628e
cda7b0a
22d628e
cda7b0a
22d628e
ca29546
 
 
cda7b0a
 
ca29546
22d628e
 
 
cda7b0a
22d628e
cda7b0a
 
 
 
22d628e
ca29546
cda7b0a
ca29546
 
22d628e
 
cda7b0a
22d628e
ca29546
cda7b0a
 
 
ca29546
 
cda7b0a
ca29546
cda7b0a
 
 
 
 
22d628e
 
cda7b0a
 
 
 
 
 
22d628e
dda31c0
 
 
0eb10b0
ca29546
 
 
cda7b0a
ca29546
 
22d628e
0eb10b0
709e9ca
ca29546
 
 
 
cda7b0a
22d628e
cda7b0a
 
 
 
 
 
22d628e
 
ca29546
 
 
0eb10b0
709e9ca
ca29546
22d628e
 
ca29546
 
cda7b0a
ca29546
dda31c0
 
ca29546
 
 
 
 
 
 
 
cda7b0a
ca29546
 
22d628e
 
 
 
 
 
ca29546
 
 
 
 
 
 
22d628e
ca29546
22d628e
 
 
ca29546
 
22d628e
ca29546
 
 
0eb10b0
ca29546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22d628e
ca29546
22d628e
ca29546
 
22d628e
ca29546
 
 
 
 
22d628e
ca29546
 
 
 
 
 
 
 
 
 
 
22d628e
ca29546
 
 
 
 
22d628e
ca29546
22d628e
 
ca29546
 
 
22d628e
 
 
 
 
 
 
 
 
ca29546
 
22d628e
ca29546
 
 
22d628e
ca29546
22d628e
 
 
 
 
 
 
ca29546
22d628e
 
ca29546
 
 
 
22d628e
 
 
 
 
 
ca29546
 
 
 
 
 
 
22d628e
ca29546
22d628e
ca29546
 
 
 
 
 
 
 
22d628e
ca29546
 
 
22d628e
ca29546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22d628e
ca29546
 
 
 
 
 
 
 
 
 
22d628e
ca29546
 
 
 
 
 
 
 
 
 
22d628e
ca29546
22d628e
 
 
 
 
 
ca29546
22d628e
ca29546
 
22d628e
ca29546
 
 
22d628e
ca29546
 
 
 
 
 
 
 
 
22d628e
 
 
 
ca29546
 
22d628e
 
 
 
 
 
 
cda7b0a
22d628e
 
 
dda31c0
 
ca29546
0812507
22d628e
dda31c0
22d628e
cda7b0a
22d628e
 
 
 
 
 
0812507
 
 
22d628e
 
0812507

import os
import sys
import torch
import pickle
import time
import gc
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from huggingface_hub import snapshot_download
import uvicorn

# ======================
# CONFIGURACIÓN DE DISPOSITIVO
# ======================
if torch.cuda.is_available():
    DEVICE = "cuda"
    print("✅ GPU NVIDIA detectada. Usando CUDA.")
else:
    DEVICE = "cpu"
    print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")

# Optimización de hilos para CPU
if DEVICE == "cpu":
    torch.set_num_threads(max(1, os.cpu_count() // 2))

torch.set_grad_enabled(False)

MODEL_REPO = "TeszenAI/MTP-4"

# ======================
# DESCARGA Y CARGA DEL MODELO
# ======================
print(f"📦 Descargando modelo desde {MODEL_REPO}...")
repo_path = snapshot_download(
    repo_id=MODEL_REPO,
    repo_type="model",
    local_dir="mtp_repo"
)

sys.path.insert(0, repo_path)

# Importar modelo y tokenizer
from model import MTPMiniModel
from tokenizer import MTPTokenizer

print("🔧 Cargando tensores y configuración...")
with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
    model_data = pickle.load(f)

tokenizer = MTPTokenizer(os.path.join(repo_path, "mtp_tokenizer.model"))
VOCAB_SIZE = tokenizer.sp.get_piece_size()
config = model_data["config"]

# Detectar si el modelo usa SwiGLU
use_swiglu = config["model"].get("use_swiglu", False)

print(f"🧠 Inicializando modelo MTP 4...")
print(f"   → Vocabulario: {VOCAB_SIZE}")
print(f"   → Dimensión: {config['model']['d_model']}")
print(f"   → Capas: {config['model']['n_layers']}")
print(f"   → Cabezas: {config['model']['n_heads']}")
print(f"   → SwiGLU: {'✓' if use_swiglu else '✗'}")

model = MTPMiniModel(
    vocab_size=VOCAB_SIZE,
    d_model=config["model"]["d_model"],
    n_layers=config["model"]["n_layers"],
    n_heads=config["model"]["n_heads"],
    d_ff=config["model"]["d_ff"],
    max_seq_len=config["model"]["max_seq_len"],
    dropout=0.0,
    use_swiglu=use_swiglu
)

model.load_state_dict(model_data["model_state_dict"])
model.eval()

# Cuantización para CPU
if DEVICE == "cpu":
    print("⚡ Aplicando cuantización dinámica para CPU...")
    model = torch.quantization.quantize_dynamic(
        model, 
        {torch.nn.Linear}, 
        dtype=torch.qint8
    )

model.to(DEVICE)

param_count = sum(p.numel() for p in model.parameters())
print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")

# ======================
# API CONFIG
# ======================
app = FastAPI(
    title="MTP 4 API",
    description="API para modelo de lenguaje MTP 4 con RoPE, RMSNorm y SwiGLU",
    version="4.0"
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

class PromptRequest(BaseModel):
    text: str = Field(..., max_length=2000, description="Texto de entrada")
    max_tokens: int = Field(default=150, ge=10, le=300, description="Tokens máximos a generar")
    temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
    top_k: int = Field(default=40, ge=1, le=100, description="Top-k sampling")
    top_p: float = Field(default=0.92, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
    repetition_penalty: float = Field(default=1.15, ge=1.0, le=2.0, description="Penalización por repetición")
    min_length: int = Field(default=20, ge=5, le=100, description="Longitud mínima de respuesta")

def build_prompt(user_input: str) -> str:
    """Construye el prompt en el formato del modelo"""
    return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"

# ======================
# ⚡ GESTIÓN DE CARGA
# ======================
ACTIVE_REQUESTS = 0
MAX_CONCURRENT_REQUESTS = 3

@app.post("/generate")
async def generate(req: PromptRequest):
    """Endpoint principal de generación de texto con control de calidad"""
    global ACTIVE_REQUESTS
    
    if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
        return {
            "reply": "El servidor está ocupado. Por favor, intenta de nuevo en unos segundos.",
            "error": "too_many_requests",
            "active_requests": ACTIVE_REQUESTS
        }
    
    ACTIVE_REQUESTS += 1
    
    # Ajuste dinámico bajo carga
    dyn_max_tokens = req.max_tokens
    dyn_temperature = req.temperature
    
    if ACTIVE_REQUESTS > 1:
        print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
        dyn_max_tokens = min(dyn_max_tokens, 120)
        dyn_temperature = max(0.6, dyn_temperature * 0.95)

    user_input = req.text.strip()
    if not user_input:
        ACTIVE_REQUESTS -= 1
        return {"reply": "", "tokens_generated": 0}

    full_prompt = build_prompt(user_input)
    tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
    input_ids = torch.tensor([tokens], device=DEVICE)

    try:
        start_time = time.time()
        
        with torch.no_grad():
            output_ids = model.generate(
                input_ids,
                max_new_tokens=dyn_max_tokens,
                temperature=dyn_temperature,
                top_k=req.top_k,
                top_p=req.top_p,
                repetition_penalty=req.repetition_penalty,
                min_length=req.min_length,
                eos_token_id=tokenizer.eos_id()
            )

        gen_tokens = output_ids[0, len(tokens):].tolist()
        
        # Filtro de seguridad mejorado
        safe_tokens = []
        for t in gen_tokens:
            if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id():
                safe_tokens.append(t)
            elif t == tokenizer.eos_id():
                break
        
        response = tokenizer.decode(safe_tokens).strip()
        
        # Limpiar marcadores de sección
        if "###" in response:
            response = response.split("###")[0].strip()
        
        # Remover repeticiones al final
        if response.endswith(("...", ". . .", "…")):
            response = response.rstrip(".")
        
        generation_time = time.time() - start_time
        tokens_per_second = len(safe_tokens) / generation_time if generation_time > 0 else 0

        return {
            "reply": response,
            "tokens_generated": len(safe_tokens),
            "generation_time": round(generation_time, 2),
            "tokens_per_second": round(tokens_per_second, 1),
            "model": "MTP-4",
            "device": DEVICE
        }
    
    except Exception as e:
        print(f"❌ Error durante generación: {e}")
        import traceback
        traceback.print_exc()
        return {
            "reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
            "error": str(e)
        }
    
    finally:
        ACTIVE_REQUESTS -= 1
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
        gc.collect()

# ======================
# 📡 STREAMING SSE
# ======================
@app.get("/generate_sse")
def generate_sse(
    text: str,
    max_tokens: int = 150,
    temperature: float = 0.7,
    top_k: int = 40,
    top_p: float = 0.92,
    repetition_penalty: float = 1.15
):
    """Endpoint de streaming con Server-Sent Events mejorado"""
    global ACTIVE_REQUESTS
    
    if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
        def error_stream():
            yield "data:[ERROR: Servidor ocupado]\n\n"
        return StreamingResponse(error_stream(), media_type="text/event-stream")
    
    ACTIVE_REQUESTS += 1
    
    def event_stream():
        try:
            full_prompt = build_prompt(text)
            tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
            input_ids = torch.tensor([tokens], device=DEVICE)
            generated_tokens = []

            # Ajuste dinámico
            limit = min(100 if ACTIVE_REQUESTS > 1 else max_tokens, 200)
            temp = max(0.6, temperature * 0.95) if ACTIVE_REQUESTS > 1 else temperature

            for step in range(limit):
                with torch.no_grad():
                    logits, _ = model(input_ids)
                    logits = logits[:, -1, :VOCAB_SIZE].clone()
                    
                    # Aplicar repetition penalty
                    if repetition_penalty != 1.0:
                        for token_id in set(input_ids[0].tolist()):
                            if logits[0, token_id] < 0:
                                logits[0, token_id] *= repetition_penalty
                            else:
                                logits[0, token_id] /= repetition_penalty
                    
                    # Temperature scaling
                    logits = logits / temp
                    
                    # Top-k filtering
                    if top_k > 0:
                        v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                        logits[logits < v[:, [-1]]] = float('-inf')
                    
                    # Top-p (nucleus) filtering
                    if top_p < 1.0:
                        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                        cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
                        sorted_indices_to_remove = cumulative_probs > top_p
                        sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
                        sorted_indices_to_remove[:, 0] = 0
                        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
                        logits[indices_to_remove] = float('-inf')
                    
                    # Sample
                    probs = torch.softmax(logits, dim=-1)
                    next_id = torch.multinomial(probs, num_samples=1).item()

                if next_id == tokenizer.eos_id():
                    break

                if 0 <= next_id < VOCAB_SIZE:
                    generated_tokens.append(next_id)
                    token_text = tokenizer.decode([next_id])
                    
                    # Limpiar salida
                    if "###" in token_text:
                        break
                    
                    yield f"data:{token_text}\n\n"
                    
                    input_ids = torch.cat(
                        [input_ids, torch.tensor([[next_id]], device=DEVICE)],
                        dim=1
                    )
                    time.sleep(0.02)  # Control de velocidad

            yield "data:[DONE]\n\n"
            
        except Exception as e:
            print(f"❌ Error en streaming: {e}")
            yield f"data:[ERROR: {str(e)}]\n\n"
        
        finally:
            ACTIVE_REQUESTS -= 1
            if DEVICE == "cuda":
                torch.cuda.empty_cache()
            gc.collect()
    
    return StreamingResponse(event_stream(), media_type="text/event-stream")

# ======================
# 📊 ENDPOINTS DE INFORMACIÓN
# ======================
@app.get("/health")
def health_check():
    """Check del estado del servicio"""
    memory_info = {}
    if DEVICE == "cuda":
        memory_info = {
            "gpu_memory_allocated_mb": round(torch.cuda.memory_allocated() / 1024**2, 2),
            "gpu_memory_reserved_mb": round(torch.cuda.memory_reserved() / 1024**2, 2)
        }
    
    return {
        "status": "healthy",
        "model": "MTP-4",
        "device": DEVICE,
        "active_requests": ACTIVE_REQUESTS,
        "max_concurrent_requests": MAX_CONCURRENT_REQUESTS,
        "vocab_size": VOCAB_SIZE,
        "parameters": sum(p.numel() for p in model.parameters()),
        **memory_info
    }

@app.get("/info")
def model_info():
    """Información detallada del modelo"""
    improvements = [
        "RoPE (Rotary Position Embedding)",
        "RMSNorm (Root Mean Square Normalization)",
        "Label Smoothing (0.1)",
        "Repetition Penalty",
        "Early Stopping",
        "EOS Loss Weight",
        "Length Control",
        "Gradient Accumulation"
    ]
    
    if config["model"].get("use_swiglu", False):
        improvements.append("SwiGLU Activation")
    
    return {
        "model_name": "MTP-4",
        "version": "4.0",
        "architecture": {
            "d_model": config["model"]["d_model"],
            "n_layers": config["model"]["n_layers"],
            "n_heads": config["model"]["n_heads"],
            "d_ff": config["model"]["d_ff"],
            "max_seq_len": config["model"]["max_seq_len"],
            "vocab_size": VOCAB_SIZE,
            "use_swiglu": config["model"].get("use_swiglu", False),
            "dropout": config["model"]["dropout"]
        },
        "parameters": sum(p.numel() for p in model.parameters()),
        "parameters_human": f"{sum(p.numel() for p in model.parameters())/1e6:.1f}M",
        "device": DEVICE,
        "improvements": improvements,
        "training_config": {
            "batch_size": config["training"]["batch_size"],
            "accumulation_steps": config["training"]["accumulation_steps"],
            "learning_rate": config["training"]["learning_rate"],
            "weight_decay": config["training"]["weight_decay"],
            "epochs": config["training"]["epochs"]
        }
    }

@app.get("/config")
def get_config():
    """Obtener configuración completa del modelo"""
    return {
        "model": config["model"],
        "training": config["training"],
        "data": config["data"],
        "generation": config.get("generation", {})
    }

# ======================
# 🎨 INTERFAZ WEB MEJORADA
# ======================
@app.get("/", response_class=HTMLResponse)
def chat_ui():
    return """
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<title>MTP 4 - Chat Interface</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
<style>
:root {
    --bg-color: #0a0a0b;
    --surface-color: #1a1a1c;
    --accent-color: #6366f1;
    --text-primary: #e8e8ea;
    --text-secondary: #9ca3af;
    --user-bubble: #2d2d30;
    --success-color: #10b981;
    --warning-color: #f59e0b;
    --error-color: #ef4444;
    --logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
}
* { 
    box-sizing: border-box; 
    outline: none; 
    -webkit-tap-highlight-color: transparent; 
}
body {
    margin: 0;
    background: linear-gradient(135deg, #0a0a0b 0%, #1a1a1c 100%);
    font-family: 'Inter', sans-serif;
    color: var(--text-primary);
    height: 100dvh;
    display: flex;
    flex-direction: column;
    overflow: hidden;
}
header {
    padding: 14px 24px;
    display: flex;
    align-items: center;
    justify-content: space-between;
    background: rgba(26, 26, 28, 0.9);
    backdrop-filter: blur(16px);
    position: fixed;
    top: 0;
    width: 100%;
    z-index: 50;
    border-bottom: 1px solid rgba(99, 102, 241, 0.1);
}
.brand-wrapper {
    display: flex;
    align-items: center;
    gap: 14px;
    cursor: pointer;
}
.brand-logo {
    width: 36px;
    height: 36px;
    border-radius: 50%;
    background-image: var(--logo-url);
    background-size: cover;
    background-position: center;
    border: 2px solid rgba(99, 102, 241, 0.3);
    box-shadow: 0 0 12px rgba(99, 102, 241, 0.2);
}
.brand-text {
    font-weight: 600;
    font-size: 1.15rem;
    display: flex;
    align-items: center;
    gap: 10px;
    background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
}
.version-badge {
    font-size: 0.75rem;
    background: linear-gradient(135deg, rgba(99, 102, 241, 0.2) 0%, rgba(139, 92, 246, 0.2) 100%);
    color: #a5b4fc;
    padding: 3px 10px;
    border-radius: 14px;
    font-weight: 700;
    border: 1px solid rgba(99, 102, 241, 0.3);
}
.status-indicator {
    width: 10px;
    height: 10px;
    border-radius: 50%;
    background: var(--success-color);
    animation: pulse 2s infinite;
    box-shadow: 0 0 8px var(--success-color);
}
@keyframes pulse {
    0%, 100% { opacity: 1; transform: scale(1); }
    50% { opacity: 0.7; transform: scale(0.95); }
}
.chat-scroll {
    flex: 1;
    overflow-y: auto;
    padding: 90px 24px 50px 24px;
    display: flex;
    flex-direction: column;
    gap: 32px;
    max-width: 900px;
    margin: 0 auto;
    width: 100%;
    scroll-behavior: smooth;
}
.msg-row {
    display: flex;
    gap: 18px;
    width: 100%;
    opacity: 0;
    transform: translateY(12px);
    animation: slideUpFade 0.5s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
}
.msg-row.user { justify-content: flex-end; }
.msg-row.bot { justify-content: flex-start; align-items: flex-start; }
.msg-content {
    line-height: 1.65;
    font-size: 1rem;
    word-wrap: break-word;
    max-width: 85%;
}
.user .msg-content {
    background: linear-gradient(135deg, #2d2d30 0%, #3a3a3d 100%);
    padding: 12px 20px;
    border-radius: 20px;
    border-top-right-radius: 6px;
    color: #fff;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.bot .msg-content-wrapper {
    display: flex;
    flex-direction: column;
    gap: 10px;
    width: 100%;
}
.bot .msg-text {
    padding-top: 8px;
    color: var(--text-primary);
    white-space: pre-wrap;
}
.bot-avatar {
    width: 38px;
    height: 38px;
    min-width: 38px;
    border-radius: 50%;
    background-image: var(--logo-url);
    background-size: cover;
    box-shadow: 0 0 16px rgba(99, 102, 241, 0.4);
    border: 2px solid rgba(99, 102, 241, 0.3);
}
.bot-actions {
    display: flex;
    gap: 12px;
    opacity: 0;
    transition: opacity 0.3s;
    margin-top: 6px;
}
.action-btn {
    background: rgba(99, 102, 241, 0.1);
    border: 1px solid rgba(99, 102, 241, 0.2);
    color: var(--text-secondary);
    cursor: pointer;
    padding: 6px 12px;
    border-radius: 8px;
    display: flex;
    align-items: center;
    transition: all 0.2s;
    font-size: 0.85rem;
}
.action-btn:hover {
    color: var(--accent-color);
    background: rgba(99, 102, 241, 0.15);
    border-color: rgba(99, 102, 241, 0.4);
}
.action-btn svg { 
    width: 16px; 
    height: 16px; 
    fill: currentColor; 
    margin-right: 5px;
}
.typing-cursor::after {
    content: '';
    display: inline-block;
    width: 3px;
    height: 18px;
    background: var(--accent-color);
    margin-left: 3px;
    vertical-align: middle;
    animation: blink 0.8s infinite;
}
.footer-container {
    padding: 0 24px 24px 24px;
    background: linear-gradient(to top, rgba(10, 10, 11, 0.95) 85%, transparent);
    position: relative;
    z-index: 60;
}
.input-box {
    max-width: 900px;
    margin: 0 auto;
    background: var(--surface-color);
    border-radius: 30px;
    padding: 10px 12px 10px 24px;
    display: flex;
    align-items: center;
    border: 1px solid rgba(99, 102, 241, 0.2);
    transition: all 0.3s;
    box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3);
}
.input-box:focus-within {
    border-color: rgba(99, 102, 241, 0.6);
    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.15), 0 4px 20px rgba(0, 0, 0, 0.4);
}
#userInput {
    flex: 1;
    background: transparent;
    border: none;
    color: white;
    font-size: 1rem;
    font-family: inherit;
    padding: 10px 0;
    resize: none;
    max-height: 120px;
}
#mainBtn {
    background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
    color: white;
    border: none;
    width: 40px;
    height: 40px;
    border-radius: 50%;
    display: flex;
    align-items: center;
    justify-content: center;
    cursor: pointer;
    margin-left: 10px;
    transition: all 0.2s;
    box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
}
#mainBtn:hover { 
    transform: scale(1.05); 
    box-shadow: 0 6px 16px rgba(99, 102, 241, 0.5);
}
#mainBtn:disabled {
    opacity: 0.6;
    cursor: not-allowed;
    transform: scale(1);
}
.disclaimer {
    text-align: center;
    font-size: 0.75rem;
    color: #6b7280;
    margin-top: 14px;
}
.stats-badge {
    font-size: 0.7rem;
    color: var(--text-secondary);
    margin-top: 6px;
    font-family: 'Monaco', monospace;
    background: rgba(99, 102, 241, 0.05);
    padding: 4px 8px;
    border-radius: 6px;
    display: inline-block;
}
@keyframes slideUpFade {
    from { opacity: 0; transform: translateY(18px); }
    to { opacity: 1; transform: translateY(0); }
}
@keyframes blink { 
    0%, 100% { opacity: 1; } 
    50% { opacity: 0.3; } 
}
@keyframes pulseAvatar {
    0% { box-shadow: 0 0 0 0 rgba(99, 102, 241, 0.5); }
    70% { box-shadow: 0 0 0 10px rgba(99, 102, 241, 0); }
    100% { box-shadow: 0 0 0 0 rgba(99, 102, 241, 0); }
}
.pulsing { animation: pulseAvatar 1.5s infinite; }
::-webkit-scrollbar { width: 10px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { 
    background: rgba(99, 102, 241, 0.3); 
    border-radius: 5px; 
}
::-webkit-scrollbar-thumb:hover { background: rgba(99, 102, 241, 0.5); }
.error-message {
    color: var(--error-color);
    font-size: 0.9rem;
    padding: 10px 14px;
    background: rgba(239, 68, 68, 0.1);
    border-radius: 10px;
    margin-top: 10px;
    border: 1px solid rgba(239, 68, 68, 0.2);
}
</style>
</head>
<body>
<header>
    <div class="brand-wrapper" onclick="location.reload()">
        <div class="brand-logo"></div>
        <div class="brand-text">
            MTP <span class="version-badge">4.0</span>
        </div>
    </div>
    <div class="status-indicator" title="Sistema operativo"></div>
</header>
<div id="chatScroll" class="chat-scroll">
    <div class="msg-row bot" style="animation-delay: 0.1s;">
        <div class="bot-avatar"></div>
        <div class="msg-content-wrapper">
            <div class="msg-text">
¡Hola! Soy MTP 4, un modelo de lenguaje avanzado con arquitectura Transformer optimizada. 

Características principales:
• RoPE - Rotary Position Embedding para mejor contexto
• RMSNorm - Normalización estable y eficiente
• SwiGLU - Función de activación mejorada
• Control inteligente de repetición y coherencia
• Generación fluida y natural

¿En qué puedo ayudarte hoy?
            </div>
        </div>
    </div>
</div>
<div class="footer-container">
    <div class="input-box">
        <textarea id="userInput" placeholder="Escribe un mensaje..." rows="1" autocomplete="off"></textarea>
        <button id="mainBtn" onclick="handleBtnClick()"></button>
    </div>
    <div class="disclaimer">
        MTP 4 puede cometer errores. Considera verificar la información importante.
    </div>
</div>
<script>
const chatScroll = document.getElementById('chatScroll');
const userInput = document.getElementById('userInput');
const mainBtn = document.getElementById('mainBtn');
let isGenerating = false;
let abortController = null;
let typingTimeout = null;
let lastUserPrompt = "";
const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
const ICON_STOP = `<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="0"><rect x="2" y="2" width="20" height="20" rx="4" ry="4"></rect></svg>`;
mainBtn.innerHTML = ICON_SEND;

// Auto-resize textarea
userInput.addEventListener('input', function() {
    this.style.height = 'auto';
    this.style.height = Math.min(this.scrollHeight, 120) + 'px';
});

function scrollToBottom() {
    chatScroll.scrollTop = chatScroll.scrollHeight;
}

function setBtnState(state) {
    if (state === 'sending') {
        mainBtn.innerHTML = ICON_STOP;
        mainBtn.disabled = false;
        isGenerating = true;
    } else if (state === 'disabled') {
        mainBtn.disabled = true;
        isGenerating = false;
    } else {
        mainBtn.innerHTML = ICON_SEND;
        mainBtn.disabled = false;
        isGenerating = false;
        abortController = null;
    }
}

function handleBtnClick() {
    if (isGenerating) {
        stopGeneration();
    } else {
        sendMessage();
    }
}

function stopGeneration() {
    if (abortController) abortController.abort();
    if (typingTimeout) clearTimeout(typingTimeout);
    const activeCursor = document.querySelector('.typing-cursor');
    if (activeCursor) activeCursor.classList.remove('typing-cursor');
    const activeAvatar = document.querySelector('.pulsing');
    if (activeAvatar) activeAvatar.classList.remove('pulsing');
    setBtnState('idle');
    userInput.focus();
}

async function sendMessage(textOverride = null) {
    const text = textOverride || userInput.value.trim();
    if (!text) return;
    
    lastUserPrompt = text;
    
    if (!textOverride) {
        userInput.value = '';
        userInput.style.height = 'auto';
        addMessage(text, 'user');
    }
    
    setBtnState('sending');
    abortController = new AbortController();
    
    const botRow = document.createElement('div');
    botRow.className = 'msg-row bot';
    
    const avatar = document.createElement('div');
    avatar.className = 'bot-avatar pulsing'; 
    
    const wrapper = document.createElement('div');
    wrapper.className = 'msg-content-wrapper';
    
    const msgText = document.createElement('div');
    msgText.className = 'msg-text'; 
    
    wrapper.appendChild(msgText);
    botRow.appendChild(avatar);
    botRow.appendChild(wrapper);
    chatScroll.appendChild(botRow);
    scrollToBottom();
    
    try {
        const startTime = performance.now();
        
        const response = await fetch('/generate', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ 
                text: text,
                max_tokens: 150,
                temperature: 0.7,
                top_k: 40,
                top_p: 0.92,
                repetition_penalty: 1.15,
                min_length: 20
            }),
            signal: abortController.signal
        });
        
        const data = await response.json();
        
        if (!isGenerating) return; 
        
        avatar.classList.remove('pulsing');
        
        if (data.error) {
            msgText.innerHTML = `<span style="color: var(--error-color);">Error: ${data.error}</span>`;
            setBtnState('idle');
            return;
        }
        
        const reply = data.reply || "No entendí eso.";
        const endTime = performance.now();
        const totalTime = ((endTime - startTime) / 1000).toFixed(2);
        
        await typeWriter(msgText, reply);
        
        if (isGenerating) {
            // Agregar estadísticas
            const stats = document.createElement('div');
            stats.className = 'stats-badge';
            stats.textContent = `${data.tokens_generated} tokens • ${data.tokens_per_second} t/s • ${totalTime}s • ${data.device}`;
            wrapper.appendChild(stats);
            
            addActions(wrapper, reply);
            setBtnState('idle');
        }
    } catch (error) {
        if (error.name === 'AbortError') {
            msgText.textContent += " [Detenido]";
        } else {
            console.error('Error:', error);
            avatar.classList.remove('pulsing');
            msgText.innerHTML = `<span style="color: var(--error-color);">Error de conexión. Por favor, intenta de nuevo.</span>`;
            setBtnState('idle');
        }
    }
}

function addMessage(text, sender) {
    const row = document.createElement('div');
    row.className = `msg-row ${sender}`;
    
    const content = document.createElement('div');
    content.className = 'msg-content';
    content.textContent = text;
    
    row.appendChild(content);
    chatScroll.appendChild(row);
    scrollToBottom();
}

function typeWriter(element, text, speed = 12) {
    return new Promise(resolve => {
        let i = 0;
        element.classList.add('typing-cursor');
        
        function type() {
            if (!isGenerating) {
                element.classList.remove('typing-cursor');
                resolve();
                return;
            }
            
            if (i < text.length) {
                element.textContent += text.charAt(i);
                i++;
                scrollToBottom();
                typingTimeout = setTimeout(type, speed + Math.random() * 5);
            } else {
                element.classList.remove('typing-cursor');
                resolve();
            }
        }
        
        type();
    });
}

function addActions(wrapperElement, textToCopy) {
    const actionsDiv = document.createElement('div');
    actionsDiv.className = 'bot-actions';
    
    const copyBtn = document.createElement('button');
    copyBtn.className = 'action-btn';
    copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
    copyBtn.onclick = () => {
        navigator.clipboard.writeText(textToCopy).then(() => {
            copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="20 6 9 17 4 12"></polyline></svg>Copiado`;
            setTimeout(() => {
                copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
            }, 2000);
        });
    };
    
    const regenBtn = document.createElement('button');
    regenBtn.className = 'action-btn';
    regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>Regenerar`;
    regenBtn.onclick = () => {
        sendMessage(lastUserPrompt);
    };
    
    actionsDiv.appendChild(copyBtn);
    actionsDiv.appendChild(regenBtn);
    wrapperElement.appendChild(actionsDiv);
    
    requestAnimationFrame(() => actionsDiv.style.opacity = "1");
    scrollToBottom();
}

userInput.addEventListener('keydown', (e) => {
    if (e.key === 'Enter' && !e.shiftKey) {
        e.preventDefault();
        handleBtnClick();
    }
});

window.onload = () => {
    userInput.focus();
    
    // Cargar info del modelo
    fetch('/info')
        .then(r => r.json())
        .then(data => {
            console.log('MTP 4 cargado:', data);
        })
        .catch(e => console.error('Error cargando info:', e));
};
</script>
</body>
</html>
"""

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    print(f"\n🚀 Iniciando servidor MTP 4...")
    print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
    print(f"📡 API docs: http://0.0.0.0:{port}/docs")
    print(f"📊 Health check: http://0.0.0.0:{port}/health")
    print(f"ℹ️  Model info: http://0.0.0.0:{port}/info")
    print(f"\n✅ Sistema listo. Presiona Ctrl+C para detener.")
    
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=port,
        log_level="info"
    )