MTP-3.7

Sleeping

App Files Files Community

MTP-3.7 / app.py

teszenofficial

Update app.py

9ea4f80 verified 4 months ago

raw

history blame contribute delete

34.1 kB

	import os
	import sys
	import torch
	import pickle
	import time
	import gc
	from fastapi import FastAPI, Request
	from fastapi.responses import HTMLResponse, StreamingResponse
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field
	from huggingface_hub import snapshot_download
	import uvicorn

	# ======================
	# CONFIGURACIÓN DE DISPOSITIVO
	# ======================
	if torch.cuda.is_available():
	DEVICE = "cuda"
	print("✅ GPU NVIDIA detectada. Usando CUDA.")
	# Optimizaciones CUDA
	torch.backends.cudnn.benchmark = True
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True
	else:
	DEVICE = "cpu"
	print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")

	# Optimización de hilos para CPU
	if DEVICE == "cpu":
	torch.set_num_threads(max(1, os.cpu_count() // 2))

	torch.set_grad_enabled(False)

	MODEL_REPO = "TeszenAI/MTP3.6"

	# ======================
	# DESCARGA Y CARGA DEL MODELO
	# ======================
	print(f"📦 Descargando modelo desde {MODEL_REPO}...")
	repo_path = snapshot_download(
	repo_id=MODEL_REPO,
	repo_type="model",
	local_dir="mtp36_repo"
	)

	sys.path.insert(0, repo_path)

	# Importar modelo mejorado
	from model import MTPMiniModel
	from tokenizer import MTPTokenizer

	print("🔧 Cargando tensores y configuración...")
	with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
	model_data = pickle.load(f)

	tokenizer = MTPTokenizer(os.path.join(repo_path, "mtp_tokenizer.model"))
	VOCAB_SIZE = tokenizer.sp.get_piece_size()
	config = model_data["config"]

	# Detectar características del modelo
	use_swiglu = config["model"].get("use_swiglu", True)
	use_flash_attention = config["model"].get("use_flash_attention", True)
	use_confidence_scoring = config["model"].get("use_confidence_scoring", True)
	use_gradient_checkpointing = config["model"].get("use_gradient_checkpointing", False)

	print(f"🧠 Inicializando MTP 3.6...")
	print(f" → Vocabulario: {VOCAB_SIZE}")
	print(f" → Dimensión: {config['model']['d_model']}")
	print(f" → Capas: {config['model']['n_layers']}")
	print(f" → Cabezas: {config['model']['n_heads']}")
	print(f" → Contexto máximo: {config['model']['max_seq_len']}")
	print(f" → SwiGLU: {'✓' if use_swiglu else '✗'}")
	print(f" → Flash Attention: {'✓' if use_flash_attention else '✗'}")
	print(f" → Confidence Scoring: {'✓' if use_confidence_scoring else '✗'}")

	model = MTPMiniModel(
	vocab_size=VOCAB_SIZE,
	d_model=config["model"]["d_model"],
	n_layers=config["model"]["n_layers"],
	n_heads=config["model"]["n_heads"],
	d_ff=config["model"]["d_ff"],
	max_seq_len=config["model"]["max_seq_len"],
	dropout=0.0, # Sin dropout en inferencia
	use_swiglu=use_swiglu,
	use_confidence_scoring=use_confidence_scoring,
	use_gradient_checkpointing=use_gradient_checkpointing
	)

	model.load_state_dict(model_data["model_state_dict"])
	model.eval()

	# Cuantización para CPU
	if DEVICE == "cpu":
	print("⚡ Aplicando cuantización dinámica para CPU...")
	model = torch.quantization.quantize_dynamic(
	model,
	{torch.nn.Linear},
	dtype=torch.qint8
	)

	model.to(DEVICE)

	param_count = sum(p.numel() for p in model.parameters())
	print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")

	if DEVICE == "cuda":
	vram_used = torch.cuda.memory_allocated(0) / 1e9
	print(f"✅ VRAM usada: {vram_used:.2f} GB")

	# ======================
	# API CONFIG
	# ======================
	app = FastAPI(
	title="MTP 3.6 API",
	description="API para modelo de lenguaje MTP 3.6 - 20x más grande con anti-alucinación",
	version="3.6"
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class PromptRequest(BaseModel):
	text: str = Field(..., max_length=4000, description="Texto de entrada")
	max_tokens: int = Field(default=300, ge=10, le=500, description="Tokens máximos a generar")
	temperature: float = Field(default=0.65, ge=0.1, le=2.0, description="Temperatura de muestreo")
	top_k: int = Field(default=50, ge=1, le=100, description="Top-k sampling")
	top_p: float = Field(default=0.9, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
	repetition_penalty: float = Field(default=1.2, ge=1.0, le=2.0, description="Penalización por repetición")
	min_length: int = Field(default=30, ge=5, le=100, description="Longitud mínima de respuesta")

	def build_prompt(user_input: str) -> str:
	"""Construye el prompt en el formato del modelo"""
	return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"

	# ======================
	# ⚡ GESTIÓN DE CARGA
	# ======================
	ACTIVE_REQUESTS = 0
	MAX_CONCURRENT_REQUESTS = 3

	@app.post("/generate")
	async def generate(req: PromptRequest):
	"""Endpoint principal de generación con anti-alucinación"""
	global ACTIVE_REQUESTS

	if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
	return {
	"reply": "El servidor está ocupado. Por favor, intenta de nuevo en unos segundos.",
	"error": "too_many_requests",
	"active_requests": ACTIVE_REQUESTS
	}

	ACTIVE_REQUESTS += 1

	# Ajuste dinámico bajo carga
	dyn_max_tokens = req.max_tokens
	dyn_temperature = req.temperature

	if ACTIVE_REQUESTS > 1:
	print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
	dyn_max_tokens = min(dyn_max_tokens, 200)
	dyn_temperature = max(0.6, dyn_temperature * 0.95)

	user_input = req.text.strip()
	if not user_input:
	ACTIVE_REQUESTS -= 1
	return {"reply": "", "tokens_generated": 0}

	full_prompt = build_prompt(user_input)
	tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
	input_ids = torch.tensor([tokens], device=DEVICE)

	try:
	start_time = time.time()

	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=dyn_max_tokens,
	temperature=dyn_temperature,
	top_k=req.top_k,
	top_p=req.top_p,
	repetition_penalty=req.repetition_penalty,
	min_length=req.min_length,
	eos_token_id=tokenizer.eos_id(),
	use_confidence_filter=True,
	min_confidence=config['model'].get('min_confidence', 0.3),
	use_entropy_threshold=True,
	max_entropy=config['generation'].get('max_entropy', 4.0)
	)

	gen_tokens = output_ids[0, len(tokens):].tolist()

	# Filtro de seguridad
	safe_tokens = []
	for t in gen_tokens:
	if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id():
	safe_tokens.append(t)
	elif t == tokenizer.eos_id():
	break

	response = tokenizer.decode(safe_tokens).strip()

	# Limpiar marcadores
	if "###" in response:
	response = response.split("###")[0].strip()

	# Remover repeticiones al final
	if response.endswith(("...", ". . .", "…")):
	response = response.rstrip(".")

	generation_time = time.time() - start_time
	tokens_per_second = len(safe_tokens) / generation_time if generation_time > 0 else 0

	return {
	"reply": response,
	"tokens_generated": len(safe_tokens),
	"generation_time": round(generation_time, 2),
	"tokens_per_second": round(tokens_per_second, 1),
	"model": "MTP 3.6",
	"device": DEVICE
	}

	except Exception as e:
	print(f"❌ Error durante generación: {e}")
	import traceback
	traceback.print_exc()
	return {
	"reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
	"error": str(e)
	}

	finally:
	ACTIVE_REQUESTS -= 1
	if DEVICE == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	# ======================
	# 📡 STREAMING SSE
	# ======================
	@app.get("/generate_sse")
	def generate_sse(
	text: str,
	max_tokens: int = 300,
	temperature: float = 0.65,
	top_k: int = 50,
	top_p: float = 0.9,
	repetition_penalty: float = 1.2
	):
	"""Endpoint de streaming con Server-Sent Events"""
	global ACTIVE_REQUESTS

	if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
	def error_stream():
	yield "data:[ERROR: Servidor ocupado]\n\n"
	return StreamingResponse(error_stream(), media_type="text/event-stream")

	ACTIVE_REQUESTS += 1

	def event_stream():
	try:
	full_prompt = build_prompt(text)
	tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
	input_ids = torch.tensor([tokens], device=DEVICE)
	generated_tokens = []

	# Ajuste dinámico
	limit = min(150 if ACTIVE_REQUESTS > 1 else max_tokens, 300)
	temp = max(0.6, temperature * 0.95) if ACTIVE_REQUESTS > 1 else temperature

	for step in range(limit):
	with torch.no_grad():
	# Usar return_confidence si está disponible
	if use_confidence_scoring:
	logits, _, confidence = model(input_ids, return_confidence=True)
	else:
	logits, _ = model(input_ids)
	confidence = None

	logits = logits[:, -1, :VOCAB_SIZE].clone()

	# Confidence filtering
	if confidence is not None:
	conf_score = confidence[:, -1, :].item()
	if conf_score < 0.3:
	temp = min(temp * 1.1, 1.0)

	# Repetition penalty
	if repetition_penalty != 1.0:
	for token_id in set(input_ids[0].tolist()):
	if logits[0, token_id] < 0:
	logits[0, token_id] *= repetition_penalty
	else:
	logits[0, token_id] /= repetition_penalty

	# Temperature
	logits = logits / temp

	# Top-k
	if top_k > 0:
	v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
	logits[logits < v[:, [-1]]] = float('-inf')

	# Top-p
	if top_p < 1.0:
	sorted_logits, sorted_indices = torch.sort(logits, descending=True)
	cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
	sorted_indices_to_remove = cumulative_probs > top_p
	sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
	sorted_indices_to_remove[:, 0] = 0
	indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
	logits[indices_to_remove] = float('-inf')

	# Sample
	probs = torch.softmax(logits, dim=-1)
	next_id = torch.multinomial(probs, num_samples=1).item()

	if next_id == tokenizer.eos_id():
	break

	if 0 <= next_id < VOCAB_SIZE:
	generated_tokens.append(next_id)
	token_text = tokenizer.decode([next_id])

	if "###" in token_text:
	break

	yield f"data:{token_text}\n\n"

	input_ids = torch.cat(
	[input_ids, torch.tensor([[next_id]], device=DEVICE)],
	dim=1
	)
	time.sleep(0.02)

	yield "data:[DONE]\n\n"

	except Exception as e:
	print(f"❌ Error en streaming: {e}")
	yield f"data:[ERROR: {str(e)}]\n\n"

	finally:
	ACTIVE_REQUESTS -= 1
	if DEVICE == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	return StreamingResponse(event_stream(), media_type="text/event-stream")

	# ======================
	# 📊 ENDPOINTS DE INFORMACIÓN
	# ======================
	@app.get("/health")
	def health_check():
	"""Check del estado del servicio"""
	memory_info = {}
	if DEVICE == "cuda":
	memory_info = {
	"gpu_memory_allocated_mb": round(torch.cuda.memory_allocated() / 1024**2, 2),
	"gpu_memory_reserved_mb": round(torch.cuda.memory_reserved() / 1024**2, 2)
	}

	return {
	"status": "healthy",
	"model": "MTP 3.6",
	"version": "3.6",
	"device": DEVICE,
	"active_requests": ACTIVE_REQUESTS,
	"max_concurrent_requests": MAX_CONCURRENT_REQUESTS,
	"vocab_size": VOCAB_SIZE,
	"parameters": param_count,
	"parameters_human": f"{param_count/1e6:.1f}M",
	**memory_info
	}

	@app.get("/info")
	def model_info():
	"""Información detallada del modelo"""
	improvements = [
	"RoPE (Rotary Position Embedding)",
	"RMSNorm (Root Mean Square Normalization)",
	"Flash Attention",
	"Gradient Checkpointing",
	"Mixed Precision FP16",
	"Confidence Scoring",
	"Entropy Filtering",
	"Label Smoothing (0.15)",
	"Repetition Penalty",
	"Early Stopping",
	"Anti-Alucinación"
	]

	if use_swiglu:
	improvements.append("SwiGLU Activation")

	return {
	"model_name": "MTP 3.6",
	"version": "3.6",
	"description": "Modelo 20x más grande con capacidades avanzadas de razonamiento",
	"architecture": {
	"d_model": config["model"]["d_model"],
	"n_layers": config["model"]["n_layers"],
	"n_heads": config["model"]["n_heads"],
	"d_ff": config["model"]["d_ff"],
	"max_seq_len": config["model"]["max_seq_len"],
	"vocab_size": VOCAB_SIZE,
	"use_swiglu": use_swiglu,
	"use_flash_attention": use_flash_attention,
	"use_confidence_scoring": use_confidence_scoring,
	"dropout": config["model"]["dropout"]
	},
	"parameters": param_count,
	"parameters_human": f"{param_count/1e6:.1f}M",
	"device": DEVICE,
	"improvements": improvements,
	"capabilities": [
	"Resumen de textos largos",
	"Reescritura con diferentes estilos",
	"Comparación de conceptos",
	"Generalización desde ejemplos similares",
	"Detección de baja confianza",
	"Razonamiento profundo (24 capas)"
	],
	"training_config": {
	"batch_size": config["training"]["batch_size"],
	"accumulation_steps": config["training"]["accumulation_steps"],
	"learning_rate": config["training"]["learning_rate"],
	"weight_decay": config["training"]["weight_decay"],
	"epochs": config["training"]["epochs"]
	}
	}

	@app.get("/config")
	def get_config():
	"""Obtener configuración completa del modelo"""
	return {
	"model": config["model"],
	"training": config["training"],
	"data": config["data"],
	"generation": config.get("generation", {}),
	"memory": config.get("memory", {})
	}

	# ======================
	# 🎨 INTERFAZ WEB ACTUALIZADA
	# ======================
	@app.get("/", response_class=HTMLResponse)
	def chat_ui():
	return """
	<!DOCTYPE html>
	<html lang="es">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
	<title>MTP 3.6 - Chat Interface</title>
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
	<style>
	:root {
	--bg-color: #0a0a0b;
	--surface-color: #1a1b1e;
	--accent-color: #5b9eff;
	--text-primary: #e8e8e8;
	--text-secondary: #9ca3af;
	--user-bubble: #2c2e31;
	--success-color: #10b981;
	--warning-color: #f59e0b;
	--error-color: #ef4444;
	--logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
	}
	* {
	box-sizing: border-box;
	outline: none;
	-webkit-tap-highlight-color: transparent;
	}
	body {
	margin: 0;
	background: linear-gradient(135deg, #0a0a0b 0%, #1a1a1f 100%);
	font-family: 'Inter', sans-serif;
	color: var(--text-primary);
	height: 100dvh;
	display: flex;
	flex-direction: column;
	overflow: hidden;
	}
	header {
	padding: 14px 22px;
	display: flex;
	align-items: center;
	justify-content: space-between;
	background: rgba(26, 27, 30, 0.9);
	backdrop-filter: blur(16px);
	position: fixed;
	top: 0;
	width: 100%;
	z-index: 50;
	border-bottom: 1px solid rgba(255,255,255,0.06);
	}
	.brand-wrapper {
	display: flex;
	align-items: center;
	gap: 14px;
	cursor: pointer;
	}
	.brand-logo {
	width: 36px;
	height: 36px;
	border-radius: 50%;
	background-image: var(--logo-url);
	background-size: cover;
	background-position: center;
	border: 2px solid rgba(91, 158, 255, 0.3);
	box-shadow: 0 0 12px rgba(91, 158, 255, 0.2);
	}
	.brand-text {
	font-weight: 600;
	font-size: 1.1rem;
	display: flex;
	align-items: center;
	gap: 10px;
	background: linear-gradient(135deg, #5b9eff 0%, #8ab4f8 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.version-badge {
	font-size: 0.75rem;
	background: linear-gradient(135deg, rgba(91, 158, 255, 0.2) 0%, rgba(138, 180, 248, 0.2) 100%);
	color: #8ab4f8;
	padding: 3px 10px;
	border-radius: 14px;
	font-weight: 700;
	border: 1px solid rgba(91, 158, 255, 0.3);
	}
	.model-info {
	display: flex;
	align-items: center;
	gap: 8px;
	font-size: 0.75rem;
	color: var(--text-secondary);
	}
	.status-indicator {
	width: 8px;
	height: 8px;
	border-radius: 50%;
	background: var(--success-color);
	animation: pulse 2s infinite;
	box-shadow: 0 0 8px var(--success-color);
	}
	@keyframes pulse {
	0%, 100% { opacity: 1; transform: scale(1); }
	50% { opacity: 0.6; transform: scale(0.95); }
	}
	.chat-scroll {
	flex: 1;
	overflow-y: auto;
	padding: 90px 20px 40px 20px;
	display: flex;
	flex-direction: column;
	gap: 32px;
	max-width: 900px;
	margin: 0 auto;
	width: 100%;
	scroll-behavior: smooth;
	}
	.msg-row {
	display: flex;
	gap: 16px;
	width: 100%;
	opacity: 0;
	transform: translateY(12px);
	animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
	}
	.msg-row.user { justify-content: flex-end; }
	.msg-row.bot { justify-content: flex-start; align-items: flex-start; }
	.msg-content {
	line-height: 1.65;
	font-size: 1rem;
	word-wrap: break-word;
	max-width: 85%;
	}
	.user .msg-content {
	background: linear-gradient(135deg, var(--user-bubble) 0%, #323438 100%);
	padding: 12px 20px;
	border-radius: 20px;
	border-top-right-radius: 4px;
	color: #fff;
	box-shadow: 0 2px 8px rgba(0,0,0,0.3);
	}
	.bot .msg-content-wrapper {
	display: flex;
	flex-direction: column;
	gap: 10px;
	width: 100%;
	}
	.bot .msg-text {
	padding-top: 6px;
	color: var(--text-primary);
	white-space: pre-wrap;
	}
	.bot-avatar {
	width: 36px;
	height: 36px;
	min-width: 36px;
	border-radius: 50%;
	background-image: var(--logo-url);
	background-size: cover;
	box-shadow: 0 0 0 2px rgba(91, 158, 255, 0.2);
	}
	.bot-actions {
	display: flex;
	gap: 12px;
	opacity: 0;
	transition: opacity 0.3s;
	margin-top: 6px;
	}
	.action-btn {
	background: rgba(255,255,255,0.05);
	border: 1px solid rgba(255,255,255,0.1);
	color: var(--text-secondary);
	cursor: pointer;
	padding: 6px 12px;
	border-radius: 6px;
	display: flex;
	align-items: center;
	transition: all 0.2s;
	font-size: 0.85rem;
	}
	.action-btn:hover {
	color: var(--text-primary);
	background: rgba(255,255,255,0.1);
	border-color: rgba(91, 158, 255, 0.3);
	}
	.action-btn svg {
	width: 16px;
	height: 16px;
	fill: currentColor;
	margin-right: 6px;
	}
	.typing-cursor::after {
	content: '';
	display: inline-block;
	width: 10px;
	height: 10px;
	background: var(--accent-color);
	border-radius: 50%;
	margin-left: 6px;
	vertical-align: middle;
	animation: blink 1s infinite;
	}
	.footer-container {
	padding: 0 20px 24px 20px;
	background: linear-gradient(to top, rgba(10, 10, 11, 0.95) 70%, transparent);
	position: relative;
	z-index: 60;
	}
	.input-box {
	max-width: 900px;
	margin: 0 auto;
	background: var(--surface-color);
	border-radius: 30px;
	padding: 10px 12px 10px 22px;
	display: flex;
	align-items: center;
	border: 1px solid rgba(255,255,255,0.1);
	transition: all 0.3s;
	box-shadow: 0 4px 16px rgba(0,0,0,0.3);
	}
	.input-box:focus-within {
	border-color: rgba(91, 158, 255, 0.5);
	box-shadow: 0 0 0 3px rgba(91, 158, 255, 0.15), 0 4px 16px rgba(0,0,0,0.3);
	}
	#userInput {
	flex: 1;
	background: transparent;
	border: none;
	color: white;
	font-size: 1rem;
	font-family: inherit;
	padding: 10px 0;
	resize: none;
	max-height: 140px;
	}
	#mainBtn {
	background: linear-gradient(135deg, var(--accent-color) 0%, #4a8ee0 100%);
	color: white;
	border: none;
	width: 40px;
	height: 40px;
	border-radius: 50%;
	display: flex;
	align-items: center;
	justify-content: center;
	cursor: pointer;
	margin-left: 10px;
	transition: all 0.2s;
	box-shadow: 0 2px 8px rgba(91, 158, 255, 0.4);
	}
	#mainBtn:hover {
	transform: scale(1.05);
	box-shadow: 0 4px 12px rgba(91, 158, 255, 0.6);
	}
	#mainBtn:disabled {
	opacity: 0.5;
	cursor: not-allowed;
	transform: scale(1);
	}
	.disclaimer {
	text-align: center;
	font-size: 0.75rem;
	color: #666;
	margin-top: 14px;
	}
	.stats-badge {
	font-size: 0.7rem;
	color: var(--text-secondary);
	margin-top: 6px;
	font-family: 'Monaco', monospace;
	background: rgba(91, 158, 255, 0.05);
	padding: 4px 8px;
	border-radius: 6px;
	display: inline-block;
	}
	@keyframes slideUpFade {
	from { opacity: 0; transform: translateY(20px); }
	to { opacity: 1; transform: translateY(0); }
	}
	@keyframes blink {
	0%, 100% { opacity: 1; }
	50% { opacity: 0.3; }
	}
	@keyframes pulseAvatar {
	0% { box-shadow: 0 0 0 0 rgba(91, 158, 255, 0.5); }
	70% { box-shadow: 0 0 0 10px rgba(91, 158, 255, 0); }
	100% { box-shadow: 0 0 0 0 rgba(91, 158, 255, 0); }
	}
	.pulsing { animation: pulseAvatar 1.5s infinite; }
	::-webkit-scrollbar { width: 8px; }
	::-webkit-scrollbar-track { background: transparent; }
	::-webkit-scrollbar-thumb {
	background: rgba(91, 158, 255, 0.3);
	border-radius: 4px;
	}
	::-webkit-scrollbar-thumb:hover { background: rgba(91, 158, 255, 0.5); }
	.error-message {
	color: var(--error-color);
	font-size: 0.85rem;
	padding: 10px 14px;
	background: rgba(239, 68, 68, 0.1);
	border-radius: 8px;
	margin-top: 10px;
	border-left: 3px solid var(--error-color);
	}
	</style>
	</head>
	<body>
	<header>
	<div class="brand-wrapper" onclick="location.reload()">
	<div class="brand-logo"></div>
	<div class="brand-text">
	MTP <span class="version-badge">3.6</span>
	</div>
	</div>
	<div class="model-info">
	<span id="modelParams">Cargando...</span>
	<div class="status-indicator" title="Sistema operativo"></div>
	</div>
	</header>
	<div id="chatScroll" class="chat-scroll">
	<div class="msg-row bot" style="animation-delay: 0.1s;">
	<div class="bot-avatar"></div>
	<div class="msg-content-wrapper">
	<div class="msg-text">¡Hola! Soy MTP 3.6, un modelo de lenguaje 20x más grande con capacidades avanzadas.

	🚀 Características principales:
	• 24 capas de razonamiento profundo
	• RoPE + RMSNorm + SwiGLU
	• Flash Attention optimizada
	• Anti-alucinación con confidence scoring
	• Contexto de hasta 2048 tokens
	• Resumen, reescritura y comparación

	✨ Capacidades especiales:
	• Resume textos largos
	• Reescribe con diferentes estilos
	• Compara conceptos complejos
	• Generaliza desde ejemplos similares
	• Detecta baja confianza y ajusta respuestas

	¿En qué puedo ayudarte hoy?</div>
	</div>
	</div>
	</div>
	<div class="footer-container">
	<div class="input-box">
	<textarea id="userInput" placeholder="Escribe un mensaje..." rows="1" autocomplete="off"></textarea>
	<button id="mainBtn" onclick="handleBtnClick()"></button>
	</div>
	<div class="disclaimer">
	MTP 3.6 puede cometer errores. Verifica información importante. Modelo entrenado en 25 épocas.
	</div>
	</div>
	<script>
	const chatScroll = document.getElementById('chatScroll');
	const userInput = document.getElementById('userInput');
	const mainBtn = document.getElementById('mainBtn');
	let isGenerating = false;
	let abortController = null;
	let typingTimeout = null;
	let lastUserPrompt = "";
	const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
	const ICON_STOP = `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><rect x="2" y="2" width="20" height="20" rx="4"></rect></svg>`;
	mainBtn.innerHTML = ICON_SEND;

	userInput.addEventListener('input', function() {
	this.style.height = 'auto';
	this.style.height = Math.min(this.scrollHeight, 140) + 'px';
	});

	function scrollToBottom() {
	chatScroll.scrollTop = chatScroll.scrollHeight;
	}

	function setBtnState(state) {
	if (state === 'sending') {
	mainBtn.innerHTML = ICON_STOP;
	mainBtn.disabled = false;
	isGenerating = true;
	} else if (state === 'disabled') {
	mainBtn.disabled = true;
	isGenerating = false;
	} else {
	mainBtn.innerHTML = ICON_SEND;
	mainBtn.disabled = false;
	isGenerating = false;
	abortController = null;
	}
	}

	function handleBtnClick() {
	if (isGenerating) {
	stopGeneration();
	} else {
	sendMessage();
	}
	}

	function stopGeneration() {
	if (abortController) abortController.abort();
	if (typingTimeout) clearTimeout(typingTimeout);
	const activeCursor = document.querySelector('.typing-cursor');
	if (activeCursor) activeCursor.classList.remove('typing-cursor');
	const activeAvatar = document.querySelector('.pulsing');
	if (activeAvatar) activeAvatar.classList.remove('pulsing');
	setBtnState('idle');
	userInput.focus();
	}

	async function sendMessage(textOverride = null) {
	const text = textOverride \|\| userInput.value.trim();
	if (!text) return;

	lastUserPrompt = text;

	if (!textOverride) {
	userInput.value = '';
	userInput.style.height = 'auto';
	addMessage(text, 'user');
	}

	setBtnState('sending');
	abortController = new AbortController();

	const botRow = document.createElement('div');
	botRow.className = 'msg-row bot';

	const avatar = document.createElement('div');
	avatar.className = 'bot-avatar pulsing';

	const wrapper = document.createElement('div');
	wrapper.className = 'msg-content-wrapper';

	const msgText = document.createElement('div');
	msgText.className = 'msg-text';

	wrapper.appendChild(msgText);
	botRow.appendChild(avatar);
	botRow.appendChild(wrapper);
	chatScroll.appendChild(botRow);
	scrollToBottom();

	try {
	const startTime = performance.now();

	const response = await fetch('/generate', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({
	text: text,
	max_tokens: 300,
	temperature: 0.65,
	top_k: 50,
	top_p: 0.9,
	repetition_penalty: 1.2,
	min_length: 30
	}),
	signal: abortController.signal
	});

	const data = await response.json();

	if (!isGenerating) return;

	avatar.classList.remove('pulsing');

	if (data.error) {
	msgText.innerHTML = `<span class="error-message">Error: ${data.error}</span>`;
	setBtnState('idle');
	return;
	}

	const reply = data.reply \|\| "No entendí eso.";
	const endTime = performance.now();
	const totalTime = ((endTime - startTime) / 1000).toFixed(2);

	await typeWriter(msgText, reply);

	if (isGenerating) {
	const stats = document.createElement('div');
	stats.className = 'stats-badge';
	stats.textContent = `${data.tokens_generated} tokens • ${data.tokens_per_second} t/s • ${totalTime}s • ${data.device}`;
	wrapper.appendChild(stats);

	addActions(wrapper, reply);
	setBtnState('idle');
	}
	} catch (error) {
	if (error.name === 'AbortError') {
	msgText.textContent += " [Detenido]";
	} else {
	console.error('Error:', error);
	avatar.classList.remove('pulsing');
	msgText.innerHTML = `<span class="error-message">Error de conexión. Por favor, intenta de nuevo.</span>`;
	setBtnState('idle');
	}
	}
	}

	function addMessage(text, sender) {
	const row = document.createElement('div');
	row.className = `msg-row ${sender}`;

	const content = document.createElement('div');
	content.className = 'msg-content';
	content.textContent = text;

	row.appendChild(content);
	chatScroll.appendChild(row);
	scrollToBottom();
	}

	function typeWriter(element, text, speed = 10) {
	return new Promise(resolve => {
	let i = 0;
	element.classList.add('typing-cursor');

	function type() {
	if (!isGenerating) {
	element.classList.remove('typing-cursor');
	resolve();
	return;
	}

	if (i < text.length) {
	element.textContent += text.charAt(i);
	i++;
	scrollToBottom();
	typingTimeout = setTimeout(type, speed + Math.random() * 4);
	} else {
	element.classList.remove('typing-cursor');
	resolve();
	}
	}

	type();
	});
	}

	function addActions(wrapperElement, textToCopy) {
	const actionsDiv = document.createElement('div');
	actionsDiv.className = 'bot-actions';

	const copyBtn = document.createElement('button');
	copyBtn.className = 'action-btn';
	copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
	copyBtn.onclick = () => {
	navigator.clipboard.writeText(textToCopy).then(() => {
	copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="20 6 9 17 4 12"></polyline></svg>Copiado`;
	setTimeout(() => {
	copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
	}, 2000);
	});
	};

	const regenBtn = document.createElement('button');
	regenBtn.className = 'action-btn';
	regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>Regenerar`;
	regenBtn.onclick = () => {
	sendMessage(lastUserPrompt);
	};

	actionsDiv.appendChild(copyBtn);
	actionsDiv.appendChild(regenBtn);
	wrapperElement.appendChild(actionsDiv);

	requestAnimationFrame(() => actionsDiv.style.opacity = "1");
	scrollToBottom();
	}

	userInput.addEventListener('keydown', (e) => {
	if (e.key === 'Enter' && !e.shiftKey) {
	e.preventDefault();
	handleBtnClick();
	}
	});

	window.onload = () => {
	userInput.focus();

	fetch('/info')
	.then(r => r.json())
	.then(data => {
	console.log('MTP 3.6 cargado:', data);
	document.getElementById('modelParams').textContent = data.parameters_human + ' params';
	})
	.catch(e => console.error('Error:', e));
	};
	</script>
	</body>
	</html>
	"""

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	print(f"\n🚀 Iniciando servidor MTP 3.6...")
	print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
	print(f"📡 API docs: http://0.0.0.0:{port}/docs")
	print(f"📊 Health check: http://0.0.0.0:{port}/health")
	print(f"ℹ️ Model info: http://0.0.0.0:{port}/info")
	print(f"\n✅ Sistema listo. Presiona Ctrl+C para detener.")

	uvicorn.run(
	app,
	host="0.0.0.0",
	port=port,
	log_level="info"
	)