MTP_2

Sleeping

App Files Files Community

teszenofficial commited on Dec 31, 2025

Commit

22d628e

verified ·

1 Parent(s): f07cd6c

Update app.py

Browse files

Files changed (1) hide show

app.py +470 -190

app.py CHANGED Viewed

@@ -3,14 +3,16 @@ import sys
 import torch
 import pickle
 import time
-from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, StreamingResponse
-from pydantic import BaseModel
 from huggingface_hub import snapshot_download
 import uvicorn
 # ======================
-# CONFIGURACIÓN DE DISPOSITIVO (GPU/CPU)
 # ======================
 if torch.cuda.is_available():
     DEVICE = "cuda"
@@ -19,19 +21,18 @@ else:
     DEVICE = "cpu"
     print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")
-# ======================
-# OPTIMIZACIÓN CPU
-# ======================
 torch.set_grad_enabled(False)
-torch.set_num_threads(max(1, os.cpu_count() // 2))
 MODEL_REPO = "TeszenAI/MTP3.7"
 # ======================
-# DESCARGA DEL MODELO
 # ======================
-print(f"--- SISTEMA MTP 2 ---")
-print(f"Descargando/Verificando modelo desde {MODEL_REPO}...")
 repo_path = snapshot_download(
     repo_id=MODEL_REPO,
     repo_type="model",
@@ -40,23 +41,28 @@ repo_path = snapshot_download(
 sys.path.insert(0, repo_path)
 from model import MTPMiniModel
 from tokenizer import MTPTokenizer
-# ======================
-# CARGA DEL MODELO
-# ======================
-print("Cargando modelo en memoria...")
 with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
     model_data = pickle.load(f)
-tokenizer = MTPTokenizer(
-    os.path.join(repo_path, "mtp_tokenizer.model")
-)
 VOCAB_SIZE = tokenizer.sp.get_piece_size()
 config = model_data["config"]
 model = MTPMiniModel(
     vocab_size=VOCAB_SIZE,
     d_model=config["model"]["d_model"],
@@ -64,134 +70,350 @@ model = MTPMiniModel(
     n_heads=config["model"]["n_heads"],
     d_ff=config["model"]["d_ff"],
     max_seq_len=config["model"]["max_seq_len"],
-    dropout=0.0
 )
 model.load_state_dict(model_data["model_state_dict"])
 model.eval()
-# ======================
-# ⚙️ CUANTIZACIÓN CPU
-# ======================
 if DEVICE == "cpu":
     model = torch.quantization.quantize_dynamic(
-        model,
-        {torch.nn.Linear},
         dtype=torch.qint8
     )
-    print("⚙️ Modelo cuantizado para CPU")
 model.to(DEVICE)
-print(f"🚀 MTP 2 listo y corriendo en: {DEVICE.upper()}")
-# ======================
-# API FASTAPI
-# ======================
-app = FastAPI(title="MTP 2 API")
-class Prompt(BaseModel):
-    text: str
 # ======================
-# 🧠 PROMPT MEJORADO (MISMO FORMATO)
 # ======================
-def build_prompt(user_input: str) -> str:
-    return f"""Eres MTP, un modelo de lenguaje experimental.
-Responde de forma clara, directa y coherente.
-No inventes información.
-### Instrucción:
-{user_input}
-### Respuesta:
-"""
 # ======================
-# GENERACIÓN NORMAL (IGUAL QUE ANTES)
 # ======================
 @app.post("/generate")
-def generate(prompt: Prompt):
-    user_input = prompt.text.strip()
     if not user_input:
-        return {"reply": ""}
     full_prompt = build_prompt(user_input)
     tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
     input_ids = torch.tensor([tokens], device=DEVICE)
-    with torch.no_grad():
-        output_ids = model.generate(
-            input_ids,
-            max_new_tokens=150,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.9
-        )
-    gen_tokens = output_ids[0, len(tokens):].tolist()
-    # 🔒 FILTRO DE SEGURIDAD
-    safe_tokens = [
-        t for t in gen_tokens
-        if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id()
-    ]
-    response = tokenizer.decode(safe_tokens).strip()
-    if "###" in response:
-        response = response.split("###")[0].strip()
-    return {"reply": response}
 # ======================
-# 📡 STREAMING SSE OFICIAL
 # ======================
 @app.get("/generate_sse")
-def generate_sse(text: str):
     def event_stream():
-        full_prompt = build_prompt(text)
-        tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
-        input_ids = torch.tensor([tokens], device=DEVICE)
-        for _ in range(150):
-            with torch.no_grad():
-                logits = model(input_ids)[:, -1, :VOCAB_SIZE]
-                probs = torch.softmax(logits / 0.7, dim=-1)
-                next_id = torch.argmax(probs, dim=-1).item()
-            if next_id == tokenizer.eos_id():
-                break
-            if 0 <= next_id < VOCAB_SIZE:
-                token_text = tokenizer.decode([next_id])
-                yield f"data:{token_text}\n\n"
-                input_ids = torch.cat(
-                    [input_ids, torch.tensor([[next_id]], device=DEVICE)],
-                    dim=1
-                )
-                time.sleep(0.015)
-        yield "data:[DONE]\n\n"
-    return StreamingResponse(event_stream(), media_type="text/event-stream")
 # ======================
-# INTERFAZ WEB (TU HTML COMPLETO, SIN QUITAR NADA)
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
     return """
 <!DOCTYPE html>
 <html lang="es">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
-<title>MTP 2</title>
 <link rel="preconnect" href="https://fonts.googleapis.com">
 <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
 <style>
-/* --- VARIABLES & THEME --- */
 :root {
     --bg-color: #131314;
     --surface-color: #1E1F20;
@@ -199,12 +421,16 @@ def chat_ui():
     --text-primary: #e3e3e3;
     --text-secondary: #9aa0a6;
     --user-bubble: #282a2c;
-    --bot-actions-color: #c4c7c5;
     --logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
 }
-* { box-sizing: border-box; outline: none; -webkit-tap-highlight-color: transparent; }
 body {
     margin: 0;
     background-color: var(--bg-color);
@@ -215,8 +441,6 @@ body {
     flex-direction: column;
     overflow: hidden;
 }
-/* --- HEADER --- */
 header {
     padding: 12px 20px;
     display: flex;
@@ -230,14 +454,12 @@ header {
     z-index: 50;
     border-bottom: 1px solid rgba(255,255,255,0.05);
 }
 .brand-wrapper {
     display: flex;
     align-items: center;
     gap: 12px;
     cursor: pointer;
 }
 .brand-logo {
     width: 32px;
     height: 32px;
@@ -247,7 +469,6 @@ header {
     background-position: center;
     border: 1px solid rgba(255,255,255,0.1);
 }
 .brand-text {
     font-weight: 500;
     font-size: 1.05rem;
@@ -255,7 +476,6 @@ header {
     align-items: center;
     gap: 8px;
 }
 .version-badge {
     font-size: 0.75rem;
     background: rgba(74, 158, 255, 0.15);
@@ -264,8 +484,17 @@ header {
     border-radius: 12px;
     font-weight: 600;
 }
-/* --- CHAT AREA --- */
 .chat-scroll {
     flex: 1;
     overflow-y: auto;
@@ -278,8 +507,6 @@ header {
     width: 100%;
     scroll-behavior: smooth;
 }
-/* Filas de Mensaje */
 .msg-row {
     display: flex;
     gap: 16px;
@@ -288,18 +515,14 @@ header {
     transform: translateY(10px);
     animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
 }
 .msg-row.user { justify-content: flex-end; }
 .msg-row.bot { justify-content: flex-start; align-items: flex-start; }
-/* Contenido */
 .msg-content {
     line-height: 1.6;
     font-size: 1rem;
     word-wrap: break-word;
     max-width: 85%;
 }
 .user .msg-content {
     background-color: var(--user-bubble);
     padding: 10px 18px;
@@ -307,20 +530,17 @@ header {
     border-top-right-radius: 4px;
     color: #fff;
 }
 .bot .msg-content-wrapper {
     display: flex;
     flex-direction: column;
     gap: 8px;
     width: 100%;
 }
 .bot .msg-text {
     padding-top: 6px;
     color: var(--text-primary);
 }
-/* Avatar Bot */
 .bot-avatar {
     width: 34px;
     height: 34px;
@@ -330,8 +550,6 @@ header {
     background-size: cover;
     box-shadow: 0 2px 6px rgba(0,0,0,0.2);
 }
-/* Acciones Bot */
 .bot-actions {
     display: flex;
     gap: 10px;
@@ -339,7 +557,6 @@ header {
     transition: opacity 0.3s;
     margin-top: 5px;
 }
 .action-btn {
     background: transparent;
     border: none;
@@ -350,16 +567,18 @@ header {
     display: flex;
     align-items: center;
     transition: color 0.2s, background 0.2s;
 }
 .action-btn:hover {
     color: var(--text-primary);
     background: rgba(255,255,255,0.08);
 }
-.action-btn svg { width: 16px; height: 16px; fill: currentColor; }
-/* Efecto Escritura (BOLITA AZUL) */
 .typing-cursor::after {
     content: '';
     display: inline-block;
@@ -371,15 +590,12 @@ header {
     vertical-align: middle;
     animation: blink 1s infinite;
 }
-/* --- FOOTER & INPUT --- */
 .footer-container {
     padding: 0 20px 20px 20px;
     background: linear-gradient(to top, var(--bg-color) 85%, transparent);
     position: relative;
     z-index: 60;
 }
 .input-box {
     max-width: 850px;
     margin: 0 auto;
@@ -391,12 +607,10 @@ header {
     border: 1px solid rgba(255,255,255,0.1);
     transition: border-color 0.2s, box-shadow 0.2s;
 }
 .input-box:focus-within {
     border-color: rgba(74, 158, 255, 0.5);
     box-shadow: 0 0 0 2px rgba(74, 158, 255, 0.1);
 }
 #userInput {
     flex: 1;
     background: transparent;
@@ -405,8 +619,9 @@ header {
     font-size: 1rem;
     font-family: inherit;
     padding: 10px 0;
 }
 #mainBtn {
     background: white;
     color: black;
@@ -421,92 +636,105 @@ header {
     margin-left: 8px;
     transition: transform 0.2s;
 }
 #mainBtn:hover { transform: scale(1.05); }
 .disclaimer {
     text-align: center;
     font-size: 0.75rem;
     color: #666;
     margin-top: 12px;
 }
-/* --- ANIMACIONES --- */
 @keyframes slideUpFade {
     from { opacity: 0; transform: translateY(15px); }
     to { opacity: 1; transform: translateY(0); }
 }
-@keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
 @keyframes pulseAvatar {
     0% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0.4); }
     70% { box-shadow: 0 0 0 8px rgba(74, 158, 255, 0); }
     100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
 }
 .pulsing { animation: pulseAvatar 1.5s infinite; }
 ::-webkit-scrollbar { width: 8px; }
 ::-webkit-scrollbar-track { background: transparent; }
 ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
 </style>
 </head>
 <body>
 <header>
     <div class="brand-wrapper" onclick="location.reload()">
         <div class="brand-logo"></div>
         <div class="brand-text">
-            MTP <span class="version-badge">2</span>
         </div>
     </div>
 </header>
 <div id="chatScroll" class="chat-scroll">
-    <!-- Bienvenida -->
     <div class="msg-row bot" style="animation-delay: 0.1s;">
         <div class="bot-avatar"></div>
         <div class="msg-content-wrapper">
             <div class="msg-text">
-                ¡Hola! Soy MTP 2. ¿En qué puedo ayudarte hoy?
             </div>
         </div>
     </div>
 </div>
 <div class="footer-container">
     <div class="input-box">
-        <input type="text" id="userInput" placeholder="Escribe un mensaje..." autocomplete="off">
-        <button id="mainBtn" onclick="handleBtnClick()">
-            <!-- Icono dinámico -->
-        </button>
     </div>
     <div class="disclaimer">
-        MTP puede cometer errores. Considera verificar la información importante.
     </div>
 </div>
 <script>
 const chatScroll = document.getElementById('chatScroll');
 const userInput = document.getElementById('userInput');
 const mainBtn = document.getElementById('mainBtn');
-// Variables de Estado
 let isGenerating = false;
 let abortController = null;
 let typingTimeout = null;
 let lastUserPrompt = "";
-// Iconos SVG
 const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
 const ICON_STOP = `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="0"><rect x="2" y="2" width="20" height="20" rx="4" ry="4"></rect></svg>`;
-// Inicial
 mainBtn.innerHTML = ICON_SEND;
-// --- UTILS ---
 function scrollToBottom() {
     chatScroll.scrollTop = chatScroll.scrollHeight;
 }
@@ -514,16 +742,19 @@ function scrollToBottom() {
 function setBtnState(state) {
     if (state === 'sending') {
         mainBtn.innerHTML = ICON_STOP;
         isGenerating = true;
     } else {
         mainBtn.innerHTML = ICON_SEND;
         isGenerating = false;
         abortController = null;
     }
 }
-// --- CORE ---
 function handleBtnClick() {
     if (isGenerating) {
         stopGeneration();
@@ -535,14 +766,10 @@ function handleBtnClick() {
 function stopGeneration() {
     if (abortController) abortController.abort();
     if (typingTimeout) clearTimeout(typingTimeout);
-    // UI Limpieza
     const activeCursor = document.querySelector('.typing-cursor');
     if (activeCursor) activeCursor.classList.remove('typing-cursor');
     const activeAvatar = document.querySelector('.pulsing');
     if (activeAvatar) activeAvatar.classList.remove('pulsing');
     setBtnState('idle');
     userInput.focus();
 }
@@ -550,18 +777,18 @@ function stopGeneration() {
 async function sendMessage(textOverride = null) {
     const text = textOverride || userInput.value.trim();
     if (!text) return;
     lastUserPrompt = text;
     if (!textOverride) {
         userInput.value = '';
         addMessage(text, 'user');
     }
     setBtnState('sending');
     abortController = new AbortController();
-    // Bot Placeholder
     const botRow = document.createElement('div');
     botRow.className = 'msg-row bot';
@@ -573,42 +800,66 @@ async function sendMessage(textOverride = null) {
     const msgText = document.createElement('div');
     msgText.className = 'msg-text';
     wrapper.appendChild(msgText);
     botRow.appendChild(avatar);
     botRow.appendChild(wrapper);
     chatScroll.appendChild(botRow);
     scrollToBottom();
     try {
         const response = await fetch('/generate', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ text: text }),
             signal: abortController.signal
         });
         const data = await response.json();
         if (!isGenerating) return;
         avatar.classList.remove('pulsing');
         const reply = data.reply || "No entendí eso.";
         await typeWriter(msgText, reply);
         if (isGenerating) {
             addActions(wrapper, reply);
             setBtnState('idle');
         }
     } catch (error) {
         if (error.name === 'AbortError') {
             msgText.textContent += " [Detenido]";
         } else {
             avatar.classList.remove('pulsing');
-            msgText.textContent = "Error de conexión.";
-            msgText.style.color = "#ff8b8b";
             setBtnState('idle');
         }
     }
@@ -617,9 +868,11 @@ async function sendMessage(textOverride = null) {
 function addMessage(text, sender) {
     const row = document.createElement('div');
     row.className = `msg-row ${sender}`;
     const content = document.createElement('div');
     content.className = 'msg-content';
     content.textContent = text;
     row.appendChild(content);
     chatScroll.appendChild(row);
     scrollToBottom();
@@ -636,7 +889,7 @@ function typeWriter(element, text, speed = 12) {
                 resolve();
                 return;
             }
             if (i < text.length) {
                 element.textContent += text.charAt(i);
                 i++;
@@ -647,6 +900,7 @@ function typeWriter(element, text, speed = 12) {
                 resolve();
             }
         }
         type();
     });
 }
@@ -657,18 +911,23 @@ function addActions(wrapperElement, textToCopy) {
     const copyBtn = document.createElement('button');
     copyBtn.className = 'action-btn';
-    copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
     copyBtn.onclick = () => {
-        navigator.clipboard.writeText(textToCopy);
     };
     const regenBtn = document.createElement('button');
     regenBtn.className = 'action-btn';
-    regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>`;
     regenBtn.onclick = () => {
         sendMessage(lastUserPrompt);
     };
     actionsDiv.appendChild(copyBtn);
     actionsDiv.appendChild(regenBtn);
     wrapperElement.appendChild(actionsDiv);
@@ -678,19 +937,40 @@ function addActions(wrapperElement, textToCopy) {
 }
 userInput.addEventListener('keydown', (e) => {
-    if (e.key === 'Enter') handleBtnClick();
 });
-window.onload = () => userInput.focus();
 </script>
 </body>
 </html>
 """
 if __name__ == "__main__":
     uvicorn.run(
         app,
         host="0.0.0.0",
-        port=int(os.environ.get("PORT", 7860))
     )

 import torch
 import pickle
 import time
+import gc
+from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse, StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
 from huggingface_hub import snapshot_download
 import uvicorn
 # ======================
+# CONFIGURACIÓN DE DISPOSITIVO
 # ======================
 if torch.cuda.is_available():
     DEVICE = "cuda"
     DEVICE = "cpu"
     print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")
+# Optimización de hilos para CPU
+if DEVICE == "cpu":
+    torch.set_num_threads(max(1, os.cpu_count() // 2))
 torch.set_grad_enabled(False)
 MODEL_REPO = "TeszenAI/MTP3.7"
 # ======================
+# DESCARGA Y CARGA DEL MODELO
 # ======================
+print(f"📦 Descargando modelo desde {MODEL_REPO}...")
 repo_path = snapshot_download(
     repo_id=MODEL_REPO,
     repo_type="model",
 sys.path.insert(0, repo_path)
+# Importar modelo mejorado compatible
 from model import MTPMiniModel
 from tokenizer import MTPTokenizer
+print("🔧 Cargando tensores y configuración...")
 with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
     model_data = pickle.load(f)
+tokenizer = MTPTokenizer(os.path.join(repo_path, "mtp_tokenizer.model"))
 VOCAB_SIZE = tokenizer.sp.get_piece_size()
 config = model_data["config"]
+# Detectar si el modelo usa SwiGLU
+use_swiglu = config["model"].get("use_swiglu", False)
+print(f"🧠 Inicializando modelo...")
+print(f"   → Vocabulario: {VOCAB_SIZE}")
+print(f"   → Dimensión: {config['model']['d_model']}")
+print(f"   → Capas: {config['model']['n_layers']}")
+print(f"   → Cabezas: {config['model']['n_heads']}")
+print(f"   → SwiGLU: {'✓' if use_swiglu else '✗'}")
 model = MTPMiniModel(
     vocab_size=VOCAB_SIZE,
     d_model=config["model"]["d_model"],
     n_heads=config["model"]["n_heads"],
     d_ff=config["model"]["d_ff"],
     max_seq_len=config["model"]["max_seq_len"],
+    dropout=0.0,
+    use_swiglu=use_swiglu
 )
 model.load_state_dict(model_data["model_state_dict"])
 model.eval()
+# Cuantización para CPU
 if DEVICE == "cpu":
+    print("⚡ Aplicando cuantización dinámica para CPU...")
     model = torch.quantization.quantize_dynamic(
+        model,
+        {torch.nn.Linear},
         dtype=torch.qint8
     )
 model.to(DEVICE)
+param_count = sum(p.numel() for p in model.parameters())
+print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
 # ======================
+# API CONFIG
 # ======================
+app = FastAPI(
+    title="MTP-3.5 API",
+    description="API para modelo de lenguaje MTP-3.5 mejorado con RoPE, RMSNorm y SwiGLU",
+    version="3.5"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class PromptRequest(BaseModel):
+    text: str = Field(..., max_length=2000, description="Texto de entrada")
+    max_tokens: int = Field(default=150, ge=10, le=300, description="Tokens máximos a generar")
+    temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
+    top_k: int = Field(default=40, ge=1, le=100, description="Top-k sampling")
+    top_p: float = Field(default=0.92, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
+    repetition_penalty: float = Field(default=1.15, ge=1.0, le=2.0, description="Penalización por repetición")
+    min_length: int = Field(default=20, ge=5, le=100, description="Longitud mínima de respuesta")
+def build_prompt(user_input: str) -> str:
+    """Construye el prompt en el formato del modelo"""
+    return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
 # ======================
+# ⚡ GESTIÓN DE CARGA
 # ======================
+ACTIVE_REQUESTS = 0
+MAX_CONCURRENT_REQUESTS = 3
 @app.post("/generate")
+async def generate(req: PromptRequest):
+    """Endpoint principal de generación de texto con control de calidad"""
+    global ACTIVE_REQUESTS
+    if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
+        return {
+            "reply": "El servidor está ocupado. Por favor, intenta de nuevo en unos segundos.",
+            "error": "too_many_requests",
+            "active_requests": ACTIVE_REQUESTS
+        }
+    ACTIVE_REQUESTS += 1
+    # Ajuste dinámico bajo carga
+    dyn_max_tokens = req.max_tokens
+    dyn_temperature = req.temperature
+    if ACTIVE_REQUESTS > 1:
+        print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
+        dyn_max_tokens = min(dyn_max_tokens, 120)
+        dyn_temperature = max(0.6, dyn_temperature * 0.95)
+    user_input = req.text.strip()
     if not user_input:
+        ACTIVE_REQUESTS -= 1
+        return {"reply": "", "tokens_generated": 0}
     full_prompt = build_prompt(user_input)
     tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
     input_ids = torch.tensor([tokens], device=DEVICE)
+    try:
+        start_time = time.time()
+        with torch.no_grad():
+            output_ids = model.generate(
+                input_ids,
+                max_new_tokens=dyn_max_tokens,
+                temperature=dyn_temperature,
+                top_k=req.top_k,
+                top_p=req.top_p,
+                repetition_penalty=req.repetition_penalty,
+                min_length=req.min_length,
+                eos_token_id=tokenizer.eos_id()
+            )
+        gen_tokens = output_ids[0, len(tokens):].tolist()
+        # Filtro de seguridad mejorado
+        safe_tokens = []
+        for t in gen_tokens:
+            if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id():
+                safe_tokens.append(t)
+            elif t == tokenizer.eos_id():
+                break
+        response = tokenizer.decode(safe_tokens).strip()
+        # Limpiar marcadores de sección
+        if "###" in response:
+            response = response.split("###")[0].strip()
+        # Remover repeticiones al final
+        if response.endswith(("...", ". . .", "…")):
+            response = response.rstrip(".")
+        generation_time = time.time() - start_time
+        tokens_per_second = len(safe_tokens) / generation_time if generation_time > 0 else 0
+        return {
+            "reply": response,
+            "tokens_generated": len(safe_tokens),
+            "generation_time": round(generation_time, 2),
+            "tokens_per_second": round(tokens_per_second, 1),
+            "model": "MTP-3.5",
+            "device": DEVICE
+        }
+    except Exception as e:
+        print(f"❌ Error durante generación: {e}")
+        import traceback
+        traceback.print_exc()
+        return {
+            "reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
+            "error": str(e)
+        }
+    finally:
+        ACTIVE_REQUESTS -= 1
+        if DEVICE == "cuda":
+            torch.cuda.empty_cache()
+        gc.collect()
 # ======================
+# 📡 STREAMING SSE
 # ======================
 @app.get("/generate_sse")
+def generate_sse(
+    text: str,
+    max_tokens: int = 150,
+    temperature: float = 0.7,
+    top_k: int = 40,
+    top_p: float = 0.92,
+    repetition_penalty: float = 1.15
+):
+    """Endpoint de streaming con Server-Sent Events mejorado"""
+    global ACTIVE_REQUESTS
+    if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
+        def error_stream():
+            yield "data:[ERROR: Servidor ocupado]\n\n"
+        return StreamingResponse(error_stream(), media_type="text/event-stream")
+    ACTIVE_REQUESTS += 1
     def event_stream():
+        try:
+            full_prompt = build_prompt(text)
+            tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
+            input_ids = torch.tensor([tokens], device=DEVICE)
+            generated_tokens = []
+            # Ajuste dinámico
+            limit = min(100 if ACTIVE_REQUESTS > 1 else max_tokens, 200)
+            temp = max(0.6, temperature * 0.95) if ACTIVE_REQUESTS > 1 else temperature
+            for step in range(limit):
+                with torch.no_grad():
+                    logits, _ = model(input_ids)
+                    logits = logits[:, -1, :VOCAB_SIZE].clone()
+                    # Aplicar repetition penalty
+                    if repetition_penalty != 1.0:
+                        for token_id in set(input_ids[0].tolist()):
+                            if logits[0, token_id] < 0:
+                                logits[0, token_id] *= repetition_penalty
+                            else:
+                                logits[0, token_id] /= repetition_penalty
+                    # Temperature scaling
+                    logits = logits / temp
+                    # Top-k filtering
+                    if top_k > 0:
+                        v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                        logits[logits < v[:, [-1]]] = float('-inf')
+                    # Top-p (nucleus) filtering
+                    if top_p < 1.0:
+                        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+                        cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
+                        sorted_indices_to_remove = cumulative_probs > top_p
+                        sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
+                        sorted_indices_to_remove[:, 0] = 0
+                        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+                        logits[indices_to_remove] = float('-inf')
+                    # Sample
+                    probs = torch.softmax(logits, dim=-1)
+                    next_id = torch.multinomial(probs, num_samples=1).item()
+                if next_id == tokenizer.eos_id():
+                    break
+                if 0 <= next_id < VOCAB_SIZE:
+                    generated_tokens.append(next_id)
+                    token_text = tokenizer.decode([next_id])
+                    # Limpiar salida
+                    if "###" in token_text:
+                        break
+                    yield f"data:{token_text}\n\n"
+                    input_ids = torch.cat(
+                        [input_ids, torch.tensor([[next_id]], device=DEVICE)],
+                        dim=1
+                    )
+                    time.sleep(0.02)  # Control de velocidad
+            yield "data:[DONE]\n\n"
+        except Exception as e:
+            print(f"❌ Error en streaming: {e}")
+            yield f"data:[ERROR: {str(e)}]\n\n"
+        finally:
+            ACTIVE_REQUESTS -= 1
+            if DEVICE == "cuda":
+                torch.cuda.empty_cache()
+            gc.collect()
+    return StreamingResponse(event_stream(), media_type="text/event-stream")
+# ======================
+# 📊 ENDPOINTS DE INFORMACIÓN
+# ======================
+@app.get("/health")
+def health_check():
+    """Check del estado del servicio"""
+    memory_info = {}
+    if DEVICE == "cuda":
+        memory_info = {
+            "gpu_memory_allocated_mb": round(torch.cuda.memory_allocated() / 1024**2, 2),
+            "gpu_memory_reserved_mb": round(torch.cuda.memory_reserved() / 1024**2, 2)
+        }
+    return {
+        "status": "healthy",
+        "model": "MTP-3.5",
+        "device": DEVICE,
+        "active_requests": ACTIVE_REQUESTS,
+        "max_concurrent_requests": MAX_CONCURRENT_REQUESTS,
+        "vocab_size": VOCAB_SIZE,
+        "parameters": sum(p.numel() for p in model.parameters()),
+        **memory_info
+    }
+@app.get("/info")
+def model_info():
+    """Información detallada del modelo"""
+    improvements = [
+        "RoPE (Rotary Position Embedding)",
+        "RMSNorm (Root Mean Square Normalization)",
+        "Label Smoothing (0.1)",
+        "Repetition Penalty",
+        "Early Stopping",
+        "EOS Loss Weight",
+        "Length Control",
+        "Gradient Accumulation"
+    ]
+    if config["model"].get("use_swiglu", False):
+        improvements.append("SwiGLU Activation")
+    return {
+        "model_name": "MTP-3.5",
+        "version": "3.5",
+        "architecture": {
+            "d_model": config["model"]["d_model"],
+            "n_layers": config["model"]["n_layers"],
+            "n_heads": config["model"]["n_heads"],
+            "d_ff": config["model"]["d_ff"],
+            "max_seq_len": config["model"]["max_seq_len"],
+            "vocab_size": VOCAB_SIZE,
+            "use_swiglu": config["model"].get("use_swiglu", False),
+            "dropout": config["model"]["dropout"]
+        },
+        "parameters": sum(p.numel() for p in model.parameters()),
+        "parameters_human": f"{sum(p.numel() for p in model.parameters())/1e6:.1f}M",
+        "device": DEVICE,
+        "improvements": improvements,
+        "training_config": {
+            "batch_size": config["training"]["batch_size"],
+            "accumulation_steps": config["training"]["accumulation_steps"],
+            "learning_rate": config["training"]["learning_rate"],
+            "weight_decay": config["training"]["weight_decay"],
+            "epochs": config["training"]["epochs"]
+        }
+    }
+@app.get("/config")
+def get_config():
+    """Obtener configuración completa del modelo"""
+    return {
+        "model": config["model"],
+        "training": config["training"],
+        "data": config["data"],
+        "generation": config.get("generation", {})
+    }
 # ======================
+# 🎨 INTERFAZ WEB MEJORADA
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
     return """
 <!DOCTYPE html>
 <html lang="es">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+<title>MTP 3.5 - Chat Interface</title>
 <link rel="preconnect" href="https://fonts.googleapis.com">
 <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
 <style>
 :root {
     --bg-color: #131314;
     --surface-color: #1E1F20;
     --text-primary: #e3e3e3;
     --text-secondary: #9aa0a6;
     --user-bubble: #282a2c;
+    --success-color: #34a853;
+    --warning-color: #fbbc04;
+    --error-color: #ea4335;
     --logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
 }
+* {
+    box-sizing: border-box;
+    outline: none;
+    -webkit-tap-highlight-color: transparent;
+}
 body {
     margin: 0;
     background-color: var(--bg-color);
     flex-direction: column;
     overflow: hidden;
 }
 header {
     padding: 12px 20px;
     display: flex;
     z-index: 50;
     border-bottom: 1px solid rgba(255,255,255,0.05);
 }
 .brand-wrapper {
     display: flex;
     align-items: center;
     gap: 12px;
     cursor: pointer;
 }
 .brand-logo {
     width: 32px;
     height: 32px;
     background-position: center;
     border: 1px solid rgba(255,255,255,0.1);
 }
 .brand-text {
     font-weight: 500;
     font-size: 1.05rem;
     align-items: center;
     gap: 8px;
 }
 .version-badge {
     font-size: 0.75rem;
     background: rgba(74, 158, 255, 0.15);
     border-radius: 12px;
     font-weight: 600;
 }
+.status-indicator {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: var(--success-color);
+    animation: pulse 2s infinite;
+}
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.5; }
+}
 .chat-scroll {
     flex: 1;
     overflow-y: auto;
     width: 100%;
     scroll-behavior: smooth;
 }
 .msg-row {
     display: flex;
     gap: 16px;
     transform: translateY(10px);
     animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
 }
 .msg-row.user { justify-content: flex-end; }
 .msg-row.bot { justify-content: flex-start; align-items: flex-start; }
 .msg-content {
     line-height: 1.6;
     font-size: 1rem;
     word-wrap: break-word;
     max-width: 85%;
 }
 .user .msg-content {
     background-color: var(--user-bubble);
     padding: 10px 18px;
     border-top-right-radius: 4px;
     color: #fff;
 }
 .bot .msg-content-wrapper {
     display: flex;
     flex-direction: column;
     gap: 8px;
     width: 100%;
 }
 .bot .msg-text {
     padding-top: 6px;
     color: var(--text-primary);
+    white-space: pre-wrap;
 }
 .bot-avatar {
     width: 34px;
     height: 34px;
     background-size: cover;
     box-shadow: 0 2px 6px rgba(0,0,0,0.2);
 }
 .bot-actions {
     display: flex;
     gap: 10px;
     transition: opacity 0.3s;
     margin-top: 5px;
 }
 .action-btn {
     background: transparent;
     border: none;
     display: flex;
     align-items: center;
     transition: color 0.2s, background 0.2s;
+    font-size: 0.85rem;
 }
 .action-btn:hover {
     color: var(--text-primary);
     background: rgba(255,255,255,0.08);
 }
+.action-btn svg {
+    width: 16px;
+    height: 16px;
+    fill: currentColor;
+    margin-right: 4px;
+}
 .typing-cursor::after {
     content: '';
     display: inline-block;
     vertical-align: middle;
     animation: blink 1s infinite;
 }
 .footer-container {
     padding: 0 20px 20px 20px;
     background: linear-gradient(to top, var(--bg-color) 85%, transparent);
     position: relative;
     z-index: 60;
 }
 .input-box {
     max-width: 850px;
     margin: 0 auto;
     border: 1px solid rgba(255,255,255,0.1);
     transition: border-color 0.2s, box-shadow 0.2s;
 }
 .input-box:focus-within {
     border-color: rgba(74, 158, 255, 0.5);
     box-shadow: 0 0 0 2px rgba(74, 158, 255, 0.1);
 }
 #userInput {
     flex: 1;
     background: transparent;
     font-size: 1rem;
     font-family: inherit;
     padding: 10px 0;
+    resize: none;
+    max-height: 120px;
 }
 #mainBtn {
     background: white;
     color: black;
     margin-left: 8px;
     transition: transform 0.2s;
 }
 #mainBtn:hover { transform: scale(1.05); }
+#mainBtn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
 .disclaimer {
     text-align: center;
     font-size: 0.75rem;
     color: #666;
     margin-top: 12px;
 }
+.stats-badge {
+    font-size: 0.7rem;
+    color: var(--text-secondary);
+    margin-top: 4px;
+    font-family: 'Monaco', monospace;
+}
 @keyframes slideUpFade {
     from { opacity: 0; transform: translateY(15px); }
     to { opacity: 1; transform: translateY(0); }
 }
+@keyframes blink {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0; }
+}
 @keyframes pulseAvatar {
     0% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0.4); }
     70% { box-shadow: 0 0 0 8px rgba(74, 158, 255, 0); }
     100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
 }
 .pulsing { animation: pulseAvatar 1.5s infinite; }
 ::-webkit-scrollbar { width: 8px; }
 ::-webkit-scrollbar-track { background: transparent; }
 ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
+.error-message {
+    color: var(--error-color);
+    font-size: 0.85rem;
+    padding: 8px 12px;
+    background: rgba(234, 67, 53, 0.1);
+    border-radius: 8px;
+    margin-top: 8px;
+}
 </style>
 </head>
 <body>
 <header>
     <div class="brand-wrapper" onclick="location.reload()">
         <div class="brand-logo"></div>
         <div class="brand-text">
+            MTP <span class="version-badge">3.5</span>
         </div>
     </div>
+    <div class="status-indicator" title="Sistema operativo"></div>
 </header>
 <div id="chatScroll" class="chat-scroll">
     <div class="msg-row bot" style="animation-delay: 0.1s;">
         <div class="bot-avatar"></div>
         <div class="msg-content-wrapper">
             <div class="msg-text">
+¡Hola! Soy MTP 3.5, un modelo de lenguaje mejorado con arquitectura Transformer avanzada.
+Características:
+• RoPE (Rotary Position Embedding)
+• RMSNorm para estabilidad
+• Control de repetición inteligente
+• Generación coherente y fluida
+¿En qué puedo ayudarte hoy?
             </div>
         </div>
     </div>
 </div>
 <div class="footer-container">
     <div class="input-box">
+        <textarea id="userInput" placeholder="Escribe un mensaje..." rows="1" autocomplete="off"></textarea>
+        <button id="mainBtn" onclick="handleBtnClick()"></button>
     </div>
     <div class="disclaimer">
+        MTP 3.5 puede cometer errores. Considera verificar la información importante.
     </div>
 </div>
 <script>
 const chatScroll = document.getElementById('chatScroll');
 const userInput = document.getElementById('userInput');
 const mainBtn = document.getElementById('mainBtn');
 let isGenerating = false;
 let abortController = null;
 let typingTimeout = null;
 let lastUserPrompt = "";
 const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
 const ICON_STOP = `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="0"><rect x="2" y="2" width="20" height="20" rx="4" ry="4"></rect></svg>`;
 mainBtn.innerHTML = ICON_SEND;
+// Auto-resize textarea
+userInput.addEventListener('input', function() {
+    this.style.height = 'auto';
+    this.style.height = Math.min(this.scrollHeight, 120) + 'px';
+});
 function scrollToBottom() {
     chatScroll.scrollTop = chatScroll.scrollHeight;
 }
 function setBtnState(state) {
     if (state === 'sending') {
         mainBtn.innerHTML = ICON_STOP;
+        mainBtn.disabled = false;
         isGenerating = true;
+    } else if (state === 'disabled') {
+        mainBtn.disabled = true;
+        isGenerating = false;
     } else {
         mainBtn.innerHTML = ICON_SEND;
+        mainBtn.disabled = false;
         isGenerating = false;
         abortController = null;
     }
 }
 function handleBtnClick() {
     if (isGenerating) {
         stopGeneration();
 function stopGeneration() {
     if (abortController) abortController.abort();
     if (typingTimeout) clearTimeout(typingTimeout);
     const activeCursor = document.querySelector('.typing-cursor');
     if (activeCursor) activeCursor.classList.remove('typing-cursor');
     const activeAvatar = document.querySelector('.pulsing');
     if (activeAvatar) activeAvatar.classList.remove('pulsing');
     setBtnState('idle');
     userInput.focus();
 }
 async function sendMessage(textOverride = null) {
     const text = textOverride || userInput.value.trim();
     if (!text) return;
     lastUserPrompt = text;
     if (!textOverride) {
         userInput.value = '';
+        userInput.style.height = 'auto';
         addMessage(text, 'user');
     }
     setBtnState('sending');
     abortController = new AbortController();
     const botRow = document.createElement('div');
     botRow.className = 'msg-row bot';
     const msgText = document.createElement('div');
     msgText.className = 'msg-text';
     wrapper.appendChild(msgText);
     botRow.appendChild(avatar);
     botRow.appendChild(wrapper);
     chatScroll.appendChild(botRow);
     scrollToBottom();
     try {
+        const startTime = performance.now();
         const response = await fetch('/generate', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                text: text,
+                max_tokens: 150,
+                temperature: 0.7,
+                top_k: 40,
+                top_p: 0.92,
+                repetition_penalty: 1.15,
+                min_length: 20
+            }),
             signal: abortController.signal
         });
         const data = await response.json();
         if (!isGenerating) return;
         avatar.classList.remove('pulsing');
+        if (data.error) {
+            msgText.innerHTML = `<span style="color: var(--error-color);">Error: ${data.error}</span>`;
+            setBtnState('idle');
+            return;
+        }
         const reply = data.reply || "No entendí eso.";
+        const endTime = performance.now();
+        const totalTime = ((endTime - startTime) / 1000).toFixed(2);
         await typeWriter(msgText, reply);
         if (isGenerating) {
+            // Agregar estadísticas
+            const stats = document.createElement('div');
+            stats.className = 'stats-badge';
+            stats.textContent = `${data.tokens_generated} tokens • ${data.tokens_per_second} t/s • ${totalTime}s • ${data.device}`;
+            wrapper.appendChild(stats);
             addActions(wrapper, reply);
             setBtnState('idle');
         }
     } catch (error) {
         if (error.name === 'AbortError') {
             msgText.textContent += " [Detenido]";
         } else {
+            console.error('Error:', error);
             avatar.classList.remove('pulsing');
+            msgText.innerHTML = `<span style="color: var(--error-color);">Error de conexión. Por favor, intenta de nuevo.</span>`;
             setBtnState('idle');
         }
     }
 function addMessage(text, sender) {
     const row = document.createElement('div');
     row.className = `msg-row ${sender}`;
     const content = document.createElement('div');
     content.className = 'msg-content';
     content.textContent = text;
     row.appendChild(content);
     chatScroll.appendChild(row);
     scrollToBottom();
                 resolve();
                 return;
             }
             if (i < text.length) {
                 element.textContent += text.charAt(i);
                 i++;
                 resolve();
             }
         }
         type();
     });
 }
     const copyBtn = document.createElement('button');
     copyBtn.className = 'action-btn';
+    copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
     copyBtn.onclick = () => {
+        navigator.clipboard.writeText(textToCopy).then(() => {
+            copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="20 6 9 17 4 12"></polyline></svg>Copiado`;
+            setTimeout(() => {
+                copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
+            }, 2000);
+        });
     };
     const regenBtn = document.createElement('button');
     regenBtn.className = 'action-btn';
+    regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>Regenerar`;
     regenBtn.onclick = () => {
         sendMessage(lastUserPrompt);
     };
     actionsDiv.appendChild(copyBtn);
     actionsDiv.appendChild(regenBtn);
     wrapperElement.appendChild(actionsDiv);
 }
 userInput.addEventListener('keydown', (e) => {
+    if (e.key === 'Enter' && !e.shiftKey) {
+        e.preventDefault();
+        handleBtnClick();
+    }
 });
+window.onload = () => {
+    userInput.focus();
+    // Cargar info del modelo
+    fetch('/info')
+        .then(r => r.json())
+        .then(data => {
+            console.log('Modelo cargado:', data);
+        })
+        .catch(e => console.error('Error cargando info:', e));
+};
 </script>
 </body>
 </html>
 """
 if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    print(f"\n🚀 Iniciando servidor MTP-3.5...")
+    print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
+    print(f"📡 API docs: http://0.0.0.0:{port}/docs")
+    print(f"📊 Health check: http://0.0.0.0:{port}/health")
+    print(f"ℹ️  Model info: http://0.0.0.0:{port}/info")
+    print(f"\n✅ Sistema listo. Presiona Ctrl+C para detener.")
     uvicorn.run(
         app,
         host="0.0.0.0",
+        port=port,
+        log_level="info"
     )