MTP-Mistral / app.py
teszenofficial's picture
Create app.py
7f6b010 verified
import os
import gc
import uvicorn
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# ======================
# CONFIGURACIÓN DEL MODELO (Gemma 2B CPU)
# ======================
print("⚙️ Configurando entorno para CPU...")
# Usamos Gemma 2B Instruct en formato GGUF (Quantized).
# Gemma 2B es muy ligero y rápido en CPU.
REPO_ID = "TheBloke/gemma-2b-it-GGUF"
FILENAME = "gemma-2b-it.Q4_K_M.gguf"
print(f"📦 Descargando/Verificando modelo: {FILENAME}...")
try:
# Descarga el modelo a la caché local de Hugging Face
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME
)
# Cargar el modelo en memoria (Motor llama.cpp)
# n_ctx=2048: Gemma maneja bien contexto, 2048 es seguro para CPU spaces gratis.
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=max(1, os.cpu_count() - 1),
verbose=False
)
print("✅ Modelo Gemma 2B GGUF cargado correctamente en CPU.")
except Exception as e:
print(f"❌ Error crítico cargando el modelo: {e}")
raise e
# ======================
# FASTAPI
# ======================
app = FastAPI(
title="MTP Gemma 2B CPU",
description="Versión optimizada para CPU (Gemma 2B GGUF)",
version="3.0"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
class PromptRequest(BaseModel):
text: str
max_tokens: int = 512
temperature: float = 0.7
top_p: float = 0.9
SYSTEM_PROMPT = (
"Eres MTP Gemma, una inteligencia artificial avanzada desarrollada por Teszen AI. "
"Tu objetivo es ser útil, preciso y amigable. "
"Responde siempre en formato Markdown bien estructurado. "
"Si te preguntan quién eres, responde que eres MTP Gemma de Teszen AI."
)
# ======================
# ENDPOINT DE GENERACIÓN
# ======================
@app.post("/generate")
def generate(req: PromptRequest):
try:
# Formato de Prompt específico para GEMMA (Instruction Tuned)
# Formato: <start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model
full_prompt = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n\n{req.text}<end_of_turn>\n<start_of_turn>model"
output = llm(
full_prompt,
max_tokens=req.max_tokens,
temperature=req.temperature,
top_p=req.top_p,
stop=["<end_of_turn>", "user"], # Tokens de parada específicos de Gemma
echo=False
)
reply = output["choices"][0]["text"].strip()
return {"reply": reply}
except Exception as e:
print(f"Error en generación: {e}")
return {"reply": f"❌ Error interno del servidor: {str(e)}"}
# ======================
# INTERFAZ WEB (UI PREMIUM)
# ======================
@app.get("/", response_class=HTMLResponse)
def chat_ui():
return """
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<title>MTP Gemma 2B | Teszen AI</title>
<!-- Fuentes e Iconos -->
<link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600&family=JetBrains+Mono:wght@400&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<!-- Markdown y Highlight.js para código -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/atom-one-dark.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
<style>
:root {
--bg-color: #0f1012;
--chat-bg: #161719;
--input-bg: #202124;
--primary: #d64a4a; /* Gemma suele asociarse con tonos rojizos/Google, o mantengo el azul si prefieres, he puesto rojo suave para diferenciar */
--primary-glow: rgba(214, 74, 74, 0.4);
--text-main: #e8eaed;
--text-secondary: #9aa0a6;
--user-bubble: #2b2d31;
--bot-bubble: transparent;
--border: #303134;
}
/* Sobreescribimos a azul si prefieres mantener la identidad de Teszen */
:root {
--primary: #4a9eff;
--primary-glow: rgba(74, 158, 255, 0.4);
}
* { box-sizing: border-box; outline: none; }
body {
margin: 0;
font-family: 'Outfit', sans-serif;
background-color: var(--bg-color);
color: var(--text-main);
height: 100vh;
display: flex;
flex-direction: column;
overflow: hidden;
}
/* --- Header --- */
header {
padding: 15px 24px;
background: rgba(15, 16, 18, 0.85);
backdrop-filter: blur(12px);
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: space-between;
z-index: 100;
}
.brand {
display: flex;
align-items: center;
gap: 12px;
}
.logo-container {
position: relative;
width: 42px;
height: 42px;
}
.logo {
width: 100%;
height: 100%;
border-radius: 50%;
object-fit: cover;
border: 2px solid var(--primary);
box-shadow: 0 0 15px var(--primary-glow);
}
.brand-text h1 {
margin: 0;
font-size: 1.1rem;
font-weight: 600;
letter-spacing: 0.5px;
}
.brand-text span {
font-size: 0.75rem;
color: var(--primary);
background: rgba(74, 158, 255, 0.1);
padding: 2px 8px;
border-radius: 6px;
margin-left: 8px;
}
.status-dot {
width: 8px;
height: 8px;
background: #00ff88;
border-radius: 50%;
box-shadow: 0 0 8px #00ff88;
}
/* --- Chat Area --- */
#chat-container {
flex: 1;
padding: 20px;
overflow-y: auto;
scroll-behavior: smooth;
display: flex;
flex-direction: column;
gap: 20px;
max-width: 900px;
margin: 0 auto;
width: 100%;
}
.message {
display: flex;
gap: 16px;
opacity: 0;
transform: translateY(10px);
animation: slideIn 0.3s forwards;
}
@keyframes slideIn {
to { opacity: 1; transform: translateY(0); }
}
.avatar {
width: 36px;
height: 36px;
border-radius: 50%;
flex-shrink: 0;
display: flex;
align-items: center;
justify-content: center;
background: #333;
overflow: hidden;
}
.avatar img { width: 100%; height: 100%; object-fit: cover; }
.avatar i { font-size: 1.1rem; color: #fff; }
.bot-avatar { background: transparent; border: 1px solid var(--primary); }
.user-avatar { background: var(--border); }
.content {
flex: 1;
max-width: 85%;
font-size: 0.98rem;
line-height: 1.6;
}
.user-msg { flex-direction: row-reverse; }
.user-msg .content {
background: var(--user-bubble);
padding: 12px 18px;
border-radius: 18px 4px 18px 18px;
color: #fff;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.bot-msg .content {
background: var(--bot-bubble);
padding: 0 10px;
color: var(--text-main);
}
/* Markdown Styles within Bot Message */
.bot-msg .content p { margin-top: 0; margin-bottom: 10px; }
.bot-msg .content pre {
background: #1e1e1e !important;
padding: 15px;
border-radius: 12px;
overflow-x: auto;
border: 1px solid #333;
font-family: 'JetBrains Mono', monospace;
font-size: 0.9rem;
}
.bot-msg .content code {
font-family: 'JetBrains Mono', monospace;
background: rgba(255,255,255,0.1);
padding: 2px 5px;
border-radius: 4px;
font-size: 0.85em;
}
.bot-msg .content ul, .bot-msg .content ol { padding-left: 20px; }
.bot-msg .content li { margin-bottom: 5px; }
/* --- Footer / Input --- */
.input-area {
padding: 20px;
background: var(--bg-color);
border-top: 1px solid var(--border);
}
.input-wrapper {
max-width: 900px;
margin: 0 auto;
position: relative;
background: var(--input-bg);
border-radius: 24px;
padding: 8px 8px 8px 20px;
display: flex;
align-items: flex-end;
border: 1px solid transparent;
transition: border-color 0.3s, box-shadow 0.3s;
}
.input-wrapper:focus-within {
border-color: var(--primary);
box-shadow: 0 0 15px rgba(74, 158, 255, 0.15);
}
textarea {
flex: 1;
background: transparent;
border: none;
color: white;
font-family: inherit;
font-size: 1rem;
resize: none;
max-height: 150px;
padding: 12px 0;
height: 48px; /* Altura inicial */
}
textarea::placeholder { color: var(--text-secondary); }
.btn-send {
width: 42px;
height: 42px;
border: none;
border-radius: 50%;
background: var(--primary);
color: white;
cursor: pointer;
margin-left: 10px;
display: flex;
align-items: center;
justify-content: center;
transition: transform 0.2s, background 0.2s;
}
.btn-send:hover { background: #3a8ee6; transform: scale(1.05); }
.btn-send:disabled { background: #444; cursor: not-allowed; transform: none; }
/* --- Typing Indicator --- */
.typing {
display: flex;
gap: 4px;
padding: 10px 0;
display: none; /* Hidden by default */
}
.dot {
width: 6px;
height: 6px;
background: var(--text-secondary);
border-radius: 50%;
animation: bounce 1.4s infinite ease-in-out both;
}
.dot:nth-child(1) { animation-delay: -0.32s; }
.dot:nth-child(2) { animation-delay: -0.16s; }
@keyframes bounce {
0%, 80%, 100% { transform: scale(0); }
40% { transform: scale(1); }
}
/* Scrollbar custom */
::-webkit-scrollbar { width: 8px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background: #444; }
</style>
</head>
<body>
<header>
<div class="brand">
<div class="logo-container">
<!-- Foto de Perfil con fallback a icono si falla la carga -->
<img src="https://i.postimg.cc/yxS54PF3/IMG-3082.jpg"
class="logo"
alt="MTP Gemma"
onerror="this.onerror=null; this.src='https://cdn-icons-png.flaticon.com/512/4712/4712027.png'">
</div>
<div class="brand-text">
<h1>MTP Gemma <span>2B CPU</span></h1>
</div>
</div>
<div title="Online" class="status-dot"></div>
</header>
<div id="chat-container">
<!-- Mensaje de Bienvenida -->
<div class="message bot-msg">
<div class="avatar bot-avatar">
<img src="https://i.postimg.cc/yxS54PF3/IMG-3082.jpg" onerror="this.style.display='none';this.nextElementSibling.style.display='block'">
<i class="fa-solid fa-robot" style="display:none"></i>
</div>
<div class="content">
<p>Hola, soy <strong>MTP Gemma</strong>. ✨<br>
Modelo 2B optimizado para CPU. ¿En qué puedo ayudarte hoy?</p>
</div>
</div>
</div>
<!-- Indicador de escribiendo (oculto por defecto) -->
<div id="typing-indicator" style="padding-left: 70px; display: none;">
<div class="typing">
<div class="dot"></div>
<div class="dot"></div>
<div class="dot"></div>
</div>
</div>
<div class="input-area">
<div class="input-wrapper">
<textarea id="userInput" placeholder="Escribe tu mensaje aquí..." rows="1"></textarea>
<button id="sendBtn" class="btn-send" onclick="sendMessage()">
<i class="fa-solid fa-paper-plane"></i>
</button>
</div>
</div>
<script>
const chatContainer = document.getElementById('chat-container');
const userInput = document.getElementById('userInput');
const sendBtn = document.getElementById('sendBtn');
const typingIndicator = document.getElementById('typing-indicator');
// Auto-resize del textarea
userInput.addEventListener('input', function() {
this.style.height = 'auto';
this.style.height = (this.scrollHeight) + 'px';
if(this.value === '') this.style.height = '48px';
});
// Enviar con Enter (Shift+Enter para salto de línea)
userInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
});
function appendMessage(text, isUser) {
const div = document.createElement('div');
div.className = `message ${isUser ? 'user-msg' : 'bot-msg'}`;
let avatarHTML = '';
if (isUser) {
avatarHTML = `
<div class="avatar user-avatar">
<i class="fa-solid fa-user"></i>
</div>`;
} else {
avatarHTML = `
<div class="avatar bot-avatar">
<img src="https://i.postimg.cc/yxS54PF3/IMG-3082.jpg" onerror="this.style.display='none';this.nextElementSibling.style.display='block'">
<i class="fa-solid fa-robot" style="display:none"></i>
</div>`;
}
// Procesar Markdown si es bot, texto plano si es usuario
let contentHTML = '';
if (isUser) {
contentHTML = text.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/\\n/g, "<br>");
} else {
contentHTML = marked.parse(text);
}
div.innerHTML = `
${avatarHTML}
<div class="content">${contentHTML}</div>
`;
chatContainer.appendChild(div);
// Resaltar código si hay bloques
if (!isUser) {
div.querySelectorAll('pre code').forEach((block) => {
hljs.highlightElement(block);
});
}
chatContainer.scrollTop = chatContainer.scrollHeight;
}
async function sendMessage() {
const text = userInput.value.trim();
if (!text) return;
// UI Updates
userInput.value = '';
userInput.style.height = '48px';
userInput.disabled = true;
sendBtn.disabled = true;
appendMessage(text, true);
// Mostrar Typing Indicator
typingIndicator.style.display = 'block';
chatContainer.scrollTop = chatContainer.scrollHeight;
try {
const response = await fetch('/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: text })
});
const data = await response.json();
// Ocultar Typing Indicator
typingIndicator.style.display = 'none';
if (data.reply) {
appendMessage(data.reply, false);
} else {
appendMessage("❌ Error: No se recibió respuesta.", false);
}
} catch (error) {
typingIndicator.style.display = 'none';
appendMessage(`❌ Error de conexión: ${error.message}`, false);
} finally {
userInput.disabled = false;
sendBtn.disabled = false;
userInput.focus();
}
}
</script>
</body>
</html>
"""
# ======================
# EJECUCIÓN
# ======================
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
# Para Spaces con Docker, 0.0.0.0 es necesario
uvicorn.run(app, host="0.0.0.0", port=port)