Spaces:

teszenofficial
/

Teszen_AI

Sleeping

App Files Files Community

teszenofficial commited on Dec 28, 2025

Commit

2632d84

verified ·

1 Parent(s): d38f4b2

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -65

app.py CHANGED Viewed

@@ -2,21 +2,28 @@ import os
 import sys
 import torch
 import pickle
 from fastapi import FastAPI
-from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from huggingface_hub import snapshot_download
 import uvicorn
 # ======================
 # DISPOSITIVO
 # ======================
 if torch.cuda.is_available():
     DEVICE = "cuda"
-    print("✅ GPU NVIDIA detectada. Usando CUDA.")
 else:
     DEVICE = "cpu"
-    print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")
 MODEL_REPO = "teszenofficial/mtp1"
@@ -24,8 +31,6 @@ MODEL_REPO = "teszenofficial/mtp1"
 # DESCARGA MODELO
 # ======================
 print("--- SISTEMA MTP 1.1 ---")
-print(f"Descargando/Verificando modelo desde {MODEL_REPO}...")
 repo_path = snapshot_download(
     repo_id=MODEL_REPO,
     repo_type="model",
@@ -38,26 +43,15 @@ from model import MTPMiniModel
 from tokenizer import MTPTokenizer
 # ======================
-# CARGA DEL MODELO
 # ======================
-print("Cargando modelo en memoria...")
-# Buscar automáticamente el .pkl
-pkl_file = None
-for f in os.listdir(repo_path):
-    if f.endswith(".pkl"):
-        pkl_file = f
-        break
-if not pkl_file:
-    raise FileNotFoundError("❌ No se encontró el archivo .pkl del modelo")
 with open(os.path.join(repo_path, pkl_file), "rb") as f:
     model_data = pickle.load(f)
-tokenizer = MTPTokenizer(
-    os.path.join(repo_path, "mtp_tokenizer.model")
-)
 config = model_data["config"]
@@ -75,104 +69,153 @@ model.load_state_dict(model_data["model_state_dict"])
 model.to(DEVICE)
 model.eval()
-# 🔒 Forzar vocab correcto
 VOCAB_SIZE = tokenizer.sp.get_piece_size()
 model.vocab_size = VOCAB_SIZE
-print(f"🚀 MTP 1.1 listo y corriendo en: {DEVICE.upper()}")
 # ======================
 # FASTAPI
 # ======================
-app = FastAPI(title="MTP 1.1 API")
 class Prompt(BaseModel):
     text: str
 @app.post("/generate")
 def generate(prompt: Prompt):
     try:
-        user_input = prompt.text.strip()
-        if not user_input:
             return {"reply": ""}
-        full_prompt = f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
         tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
         input_ids = torch.tensor([tokens], device=DEVICE)
         with torch.no_grad():
-            output_ids = model.generate(
                 input_ids,
-                max_new_tokens=80,   # CPU-safe
                 temperature=0.7,
                 top_k=50,
                 top_p=0.9
             )
-        gen_tokens = output_ids[0, len(tokens):].tolist()
-        # 🔒 FILTRO CRÍTICO
-        safe_tokens = [
-            t for t in gen_tokens
-            if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id()
-        ]
-        response = tokenizer.decode(safe_tokens).strip()
-        if "###" in response:
-            response = response.split("###")[0].strip()
-        return {"reply": response}
     except Exception as e:
-        print("❌ ERROR EN /generate:", str(e))
-        return {
-            "reply": "Ocurrió un error interno al generar la respuesta."
-        }
 # ======================
-# FRONTEND
 # ======================
 @app.get("/", response_class=HTMLResponse)
-def chat_ui():
-    return """<!DOCTYPE html>
 <html lang="es">
 <head>
 <meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>MTP 1.1</title>
 <style>
-body{margin:0;background:#131314;color:#e3e3e3;font-family:sans-serif}
-#chat{max-width:800px;margin:auto;padding:20px}
-.msg{margin:10px 0}
 .user{color:#8ab4f8}
 .bot{color:#e3e3e3}
-input{width:100%;padding:10px;border-radius:8px;border:none}
-button{margin-top:10px;padding:10px;border:none;border-radius:8px}
 </style>
 </head>
 <body>
 <div id="chat">
-<div class="msg bot">Hola, soy MTP 1.1 ¿en qué puedo ayudarte?</div>
 </div>
-<input id="inp" placeholder="Escribe aquí..." />
 <button onclick="send()">Enviar</button>
 <script>
 async function send(){
- const inp=document.getElementById('inp');
- const text=inp.value.trim();
  if(!text)return;
- inp.value="";
- document.getElementById('chat').innerHTML+=`<div class="msg user">${text}</div>`;
- const r=await fetch('/generate',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({text})});
- const j=await r.json();
- document.getElementById('chat').innerHTML+=`<div class="msg bot">${j.reply}</div>`;
 }
 </script>
 </body>
-</html>"""
 # ======================
 # ENTRYPOINT

 import sys
 import torch
 import pickle
+import time
 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel
 from huggingface_hub import snapshot_download
 import uvicorn
+# ======================
+# OPTIMIZACIÓN CPU
+# ======================
+torch.set_num_threads(max(1, os.cpu_count() // 2))
+torch.set_grad_enabled(False)
 # ======================
 # DISPOSITIVO
 # ======================
 if torch.cuda.is_available():
     DEVICE = "cuda"
+    print("✅ GPU detectada. Usando CUDA.")
 else:
     DEVICE = "cpu"
+    print("⚠️ GPU no detectada. Usando CPU.")
 MODEL_REPO = "teszenofficial/mtp1"
 # DESCARGA MODELO
 # ======================
 print("--- SISTEMA MTP 1.1 ---")
 repo_path = snapshot_download(
     repo_id=MODEL_REPO,
     repo_type="model",
 from tokenizer import MTPTokenizer
 # ======================
+# CARGA MODELO
 # ======================
+print("Cargando modelo...")
+pkl_file = next(f for f in os.listdir(repo_path) if f.endswith(".pkl"))
 with open(os.path.join(repo_path, pkl_file), "rb") as f:
     model_data = pickle.load(f)
+tokenizer = MTPTokenizer(os.path.join(repo_path, "mtp_tokenizer.model"))
 config = model_data["config"]
 model.to(DEVICE)
 model.eval()
 VOCAB_SIZE = tokenizer.sp.get_piece_size()
 model.vocab_size = VOCAB_SIZE
+print(f"🚀 MTP 1.1 listo en {DEVICE.upper()}")
 # ======================
 # FASTAPI
 # ======================
+app = FastAPI(title="MTP 1.1")
 class Prompt(BaseModel):
     text: str
+# ======================
+# GENERACIÓN NORMAL (NO STREAM)
+# ======================
 @app.post("/generate")
 def generate(prompt: Prompt):
     try:
+        text = prompt.text.strip()
+        if not text:
             return {"reply": ""}
+        full_prompt = f"### Instrucción:\n{text}\n\n### Respuesta:\n"
         tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
         input_ids = torch.tensor([tokens], device=DEVICE)
         with torch.no_grad():
+            output = model.generate(
                 input_ids,
+                max_new_tokens=80,
                 temperature=0.7,
                 top_k=50,
                 top_p=0.9
             )
+        gen = output[0, len(tokens):].tolist()
+        safe = [t for t in gen if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id()]
+        reply = tokenizer.decode(safe).strip()
+        return {"reply": reply}
     except Exception as e:
+        print("❌ ERROR:", e)
+        return {"reply": "Error interno."}
 # ======================
+# GENERACIÓN STREAMING (TIPO CHATGPT)
+# ======================
+@app.post("/generate_stream")
+def generate_stream(prompt: Prompt):
+    def stream():
+        try:
+            text = prompt.text.strip()
+            full_prompt = f"### Instrucción:\n{text}\n\n### Respuesta:\n"
+            tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
+            input_ids = torch.tensor([tokens], device=DEVICE)
+            for _ in range(80):
+                with torch.no_grad():
+                    logits = model(input_ids)[:, -1, :]
+                    logits = logits[:, :VOCAB_SIZE]
+                    probs = torch.softmax(logits / 0.7, dim=-1)
+                    next_id = torch.argmax(probs, dim=-1).item()
+                if next_id == tokenizer.eos_id():
+                    break
+                if 0 <= next_id < VOCAB_SIZE:
+                    token_text = tokenizer.decode([next_id])
+                    yield token_text
+                    input_ids = torch.cat(
+                        [input_ids, torch.tensor([[next_id]], device=DEVICE)],
+                        dim=1
+                    )
+                time.sleep(0.015)
+        except Exception as e:
+            print("❌ STREAM ERROR:", e)
+            yield "\n[error]"
+    return StreamingResponse(stream(), media_type="text/plain")
+# ======================
+# FRONTEND HTML COMPLETO
 # ======================
 @app.get("/", response_class=HTMLResponse)
+def ui():
+    return """
+<!DOCTYPE html>
 <html lang="es">
 <head>
 <meta charset="UTF-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
 <title>MTP 1.1</title>
 <style>
+body{margin:0;background:#131314;color:#e3e3e3;font-family:Inter,system-ui}
+#chat{max-width:900px;margin:auto;padding:20px}
+.msg{margin:12px 0;white-space:pre-wrap}
 .user{color:#8ab4f8}
 .bot{color:#e3e3e3}
+input{width:100%;padding:12px;border-radius:10px;border:none;background:#1e1f20;color:white}
+button{margin-top:10px;padding:10px;border-radius:10px;border:none;background:#4a9eff;color:black;font-weight:bold}
 </style>
 </head>
 <body>
 <div id="chat">
+<div class="msg bot">Hola, soy MTP 1.1.</div>
 </div>
+<input id="inp" placeholder="Escribe algo…" />
 <button onclick="send()">Enviar</button>
 <script>
 async function send(){
+ const input=document.getElementById('inp');
+ const text=input.value.trim();
  if(!text)return;
+ input.value="";
+ const chat=document.getElementById('chat');
+ chat.innerHTML+=`<div class="msg user">${text}</div>`;
+ const bot=document.createElement('div');
+ bot.className="msg bot";
+ chat.appendChild(bot);
+ const res=await fetch('/generate_stream',{
+   method:'POST',
+   headers:{'Content-Type':'application/json'},
+   body:JSON.stringify({text})
+ });
+ const reader=res.body.getReader();
+ const decoder=new TextDecoder();
+ while(true){
+   const {value,done}=await reader.read();
+   if(done)break;
+   bot.textContent+=decoder.decode(value);
+   window.scrollTo(0,document.body.scrollHeight);
+ }
 }
 </script>
 </body>
+</html>
+"""
 # ======================
 # ENTRYPOINT