teszenofficial commited on
Commit
d17c293
·
verified ·
1 Parent(s): 0e2fedd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +700 -0
app.py ADDED
@@ -0,0 +1,700 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import json
5
+ import time
6
+ import gc
7
+ import re
8
+ from fastapi import FastAPI, Request
9
+ from fastapi.responses import HTMLResponse, StreamingResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel, Field
12
+ from huggingface_hub import snapshot_download
13
+ import uvicorn
14
+ import math
15
+ import torch.nn as nn
16
+ import torch.nn.functional as F
17
+ import sentencepiece as spm
18
+
19
+ if torch.cuda.is_available():
20
+ DEVICE = "cuda"
21
+ print("✅ GPU NVIDIA detectada. Usando CUDA.")
22
+ torch.backends.cudnn.benchmark = True
23
+ else:
24
+ DEVICE = "cpu"
25
+ print("⚠️ GPU no detectada. Usando CPU.")
26
+ if hasattr(torch, '_dynamo'):
27
+ torch._dynamo.config.suppress_errors = True
28
+
29
+ if DEVICE == "cpu":
30
+ torch.set_num_threads(max(1, os.cpu_count() // 2))
31
+
32
+ torch.set_grad_enabled(False)
33
+
34
+ MODEL_REPO = "TeszenAI/MTP-3.3.1"
35
+
36
+ class LayerNorm(nn.Module):
37
+ def __init__(self, d_model: int, eps: float = 1e-5):
38
+ super().__init__()
39
+ self.weight = nn.Parameter(torch.ones(d_model))
40
+ self.bias = nn.Parameter(torch.zeros(d_model))
41
+ self.eps = eps
42
+
43
+ def forward(self, x):
44
+ mean = x.mean(-1, keepdim=True)
45
+ std = x.std(-1, keepdim=True)
46
+ return self.weight * (x - mean) / (std + self.eps) + self.bias
47
+
48
+ class MultiHeadAttention(nn.Module):
49
+ def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
50
+ super().__init__()
51
+ assert d_model % n_heads == 0
52
+ self.d_model = d_model
53
+ self.n_heads = n_heads
54
+ self.d_k = d_model // n_heads
55
+ self.w_q = nn.Linear(d_model, d_model)
56
+ self.w_k = nn.Linear(d_model, d_model)
57
+ self.w_v = nn.Linear(d_model, d_model)
58
+ self.w_o = nn.Linear(d_model, d_model)
59
+ self.dropout = nn.Dropout(dropout)
60
+ self.scale = math.sqrt(self.d_k)
61
+
62
+ def forward(self, x, mask=None):
63
+ batch_size, seq_len, _ = x.shape
64
+ Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
65
+ K = self.w_k(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
66
+ V = self.w_v(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
67
+ scores = torch.matmul(Q, K.transpose(-2, -1)) / self.scale
68
+ if mask is not None:
69
+ scores = scores.masked_fill(mask == 0, float('-inf'))
70
+ attn_weights = F.softmax(scores, dim=-1)
71
+ attn_weights = self.dropout(attn_weights)
72
+ attn_output = torch.matmul(attn_weights, V)
73
+ attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.d_model)
74
+ return self.w_o(attn_output)
75
+
76
+ class FeedForward(nn.Module):
77
+ def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):
78
+ super().__init__()
79
+ self.linear1 = nn.Linear(d_model, d_ff)
80
+ self.linear2 = nn.Linear(d_ff, d_model)
81
+ self.dropout = nn.Dropout(dropout)
82
+
83
+ def forward(self, x):
84
+ return self.linear2(self.dropout(F.gelu(self.linear1(x))))
85
+
86
+ class TransformerBlock(nn.Module):
87
+ def __init__(self, d_model: int, n_heads: int, d_ff: int, dropout: float = 0.1):
88
+ super().__init__()
89
+ self.attention = MultiHeadAttention(d_model, n_heads, dropout)
90
+ self.feed_forward = FeedForward(d_model, d_ff, dropout)
91
+ self.norm1 = LayerNorm(d_model)
92
+ self.norm2 = LayerNorm(d_model)
93
+ self.dropout1 = nn.Dropout(dropout)
94
+ self.dropout2 = nn.Dropout(dropout)
95
+
96
+ def forward(self, x, mask=None):
97
+ attn_output = self.attention(x, mask)
98
+ x = x + self.dropout1(attn_output)
99
+ x = self.norm1(x)
100
+ ff_output = self.feed_forward(x)
101
+ x = x + self.dropout2(ff_output)
102
+ x = self.norm2(x)
103
+ return x
104
+
105
+ class PositionalEncoding(nn.Module):
106
+ def __init__(self, d_model: int, max_len: int = 5000):
107
+ super().__init__()
108
+ pe = torch.zeros(max_len, d_model)
109
+ position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
110
+ div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
111
+ pe[:, 0::2] = torch.sin(position * div_term)
112
+ pe[:, 1::2] = torch.cos(position * div_term)
113
+ self.register_buffer('pe', pe.unsqueeze(0))
114
+
115
+ def forward(self, x):
116
+ return x + self.pe[:, :x.size(1), :]
117
+
118
+ class MTPModel(nn.Module):
119
+ def __init__(self, vocab_size: int, d_model: int = 512, n_heads: int = 8,
120
+ n_layers: int = 8, d_ff: int = 2048, dropout: float = 0.1, max_len: int = 1024):
121
+ super().__init__()
122
+ self.vocab_size = vocab_size
123
+ self.d_model = d_model
124
+ self.max_len = max_len
125
+ self.token_embedding = nn.Embedding(vocab_size, d_model)
126
+ self.pos_encoding = PositionalEncoding(d_model, max_len)
127
+ self.blocks = nn.ModuleList([
128
+ TransformerBlock(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)
129
+ ])
130
+ self.norm = LayerNorm(d_model)
131
+ self.lm_head = nn.Linear(d_model, vocab_size)
132
+
133
+ def forward(self, x, mask=None):
134
+ if mask is None:
135
+ mask = torch.tril(torch.ones(x.size(1), x.size(1))).unsqueeze(0).unsqueeze(0).to(x.device)
136
+ x = self.token_embedding(x) * math.sqrt(self.d_model)
137
+ x = self.pos_encoding(x)
138
+ for block in self.blocks:
139
+ x = block(x, mask)
140
+ x = self.norm(x)
141
+ return self.lm_head(x)
142
+
143
+ @torch.inference_mode()
144
+ def generate(self, input_ids, max_new_tokens=200, temperature=0.7, top_k=50, top_p=0.9, repetition_penalty=1.15):
145
+ generated = input_ids
146
+ past_key_values = None
147
+
148
+ for _ in range(max_new_tokens):
149
+ logits = self(generated)
150
+ next_logits = logits[0, -1, :] / temperature
151
+
152
+ if repetition_penalty != 1.0:
153
+ unique_tokens = set(generated[0].tolist()[-50:])
154
+ for token_id in unique_tokens:
155
+ next_logits[token_id] /= repetition_penalty
156
+
157
+ if top_k > 0:
158
+ top_k_val = min(top_k, next_logits.size(-1))
159
+ indices_to_remove = next_logits < torch.topk(next_logits, top_k_val)[0][..., -1, None]
160
+ next_logits[indices_to_remove] = float('-inf')
161
+
162
+ if top_p < 1.0 and top_p > 0.0:
163
+ sorted_logits, sorted_indices = torch.sort(next_logits, descending=True)
164
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
165
+ sorted_indices_to_remove = cumulative_probs > top_p
166
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
167
+ sorted_indices_to_remove[..., 0] = 0
168
+ indices_to_remove = sorted_indices[sorted_indices_to_remove]
169
+ next_logits[indices_to_remove] = float('-inf')
170
+
171
+ probs = F.softmax(next_logits, dim=-1)
172
+ next_token = torch.multinomial(probs, num_samples=1).item()
173
+
174
+ if next_token == 2 or next_token == 3:
175
+ break
176
+
177
+ generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1)
178
+
179
+ return generated
180
+
181
+ print(f"📦 Descargando modelo desde {MODEL_REPO}...")
182
+ repo_path = snapshot_download(
183
+ repo_id=MODEL_REPO,
184
+ repo_type="model",
185
+ local_dir="mtp_repo",
186
+ ignore_patterns=["*.h5", "*.ot", "*.msgpack"]
187
+ )
188
+
189
+ config_path = os.path.join(repo_path, "config.json")
190
+ if os.path.exists(config_path):
191
+ with open(config_path, "r") as f:
192
+ config = json.load(f)
193
+ else:
194
+ config = {
195
+ "vocab_size": 8000,
196
+ "d_model": 512,
197
+ "n_heads": 8,
198
+ "n_layers": 8,
199
+ "d_ff": 2048,
200
+ "dropout": 0.1,
201
+ "max_len": 1024
202
+ }
203
+
204
+ tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
205
+ if not os.path.exists(tokenizer_path):
206
+ print(f"❌ Tokenizador no encontrado en {tokenizer_path}")
207
+ sys.exit(1)
208
+
209
+ sp = spm.SentencePieceProcessor()
210
+ sp.load(tokenizer_path)
211
+ VOCAB_SIZE = sp.get_piece_size()
212
+ config["vocab_size"] = VOCAB_SIZE
213
+
214
+ print(f"🧠 Inicializando modelo MTP...")
215
+ print(f" → Vocabulario: {VOCAB_SIZE}")
216
+ print(f" → Dimensión: {config['d_model']}")
217
+ print(f" → Capas: {config['n_layers']}")
218
+ print(f" → Heads: {config['n_heads']}")
219
+
220
+ model = MTPModel(**config)
221
+ model.to(DEVICE)
222
+
223
+ model_path = os.path.join(repo_path, "mtp_model.pt")
224
+ if os.path.exists(model_path):
225
+ state_dict = torch.load(model_path, map_location=DEVICE)
226
+ model.load_state_dict(state_dict, strict=False)
227
+ print("✅ Pesos del modelo cargados")
228
+ else:
229
+ print(f"⚠️ No se encontró {model_path}, usando pesos aleatorios")
230
+
231
+ model.eval()
232
+ if DEVICE == "cuda":
233
+ model = torch.compile(model, mode="reduce-overhead")
234
+
235
+ param_count = sum(p.numel() for p in model.parameters())
236
+ print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
237
+
238
+ app = FastAPI(title="MTP API", description="API para modelo de lenguaje MTP", version="2.0")
239
+
240
+ app.add_middleware(
241
+ CORSMiddleware,
242
+ allow_origins=["*"],
243
+ allow_methods=["*"],
244
+ allow_headers=["*"],
245
+ )
246
+
247
+ class PromptRequest(BaseModel):
248
+ text: str = Field(..., max_length=2000, description="Texto de entrada")
249
+ max_tokens: int = Field(default=200, ge=10, le=300, description="Tokens máximos a generar")
250
+ temperature: float = Field(default=0.7, ge=0.3, le=1.5, description="Temperatura de muestreo")
251
+ top_k: int = Field(default=60, ge=1, le=100, description="Top-k sampling")
252
+ top_p: float = Field(default=0.92, ge=0.5, le=1.0, description="Top-p sampling")
253
+ repetition_penalty: float = Field(default=1.15, ge=1.0, le=2.0, description="Penalización por repetición")
254
+
255
+ def build_prompt(user_input: str) -> str:
256
+ return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
257
+
258
+ ACTIVE_REQUESTS = 0
259
+
260
+ class MTPTokenizer:
261
+ def __init__(self, sp_model):
262
+ self.sp = sp_model
263
+
264
+ def encode(self, text):
265
+ return self.sp.encode(text)
266
+
267
+ def decode(self, tokens):
268
+ return self.sp.decode(tokens)
269
+
270
+ def bos_id(self):
271
+ return self.sp.bos_id()
272
+
273
+ def eos_id(self):
274
+ return self.sp.eos_id()
275
+
276
+ def pad_id(self):
277
+ return self.sp.pad_id()
278
+
279
+ tokenizer_wrapper = MTPTokenizer(sp)
280
+
281
+ KNOWLEDGE_BASE = {
282
+ "inteligencia artificial": "La Inteligencia Artificial es un campo de la computación que crea sistemas capaces de realizar tareas que requieren inteligencia humana, como aprendizaje, razonamiento, percepción y procesamiento de lenguaje natural.",
283
+ "machine learning": "El Machine Learning o Aprendizaje Automático es una rama de la IA que permite a los sistemas aprender y mejorar desde la experiencia sin ser programados explícitamente, usando algoritmos que identifican patrones en datos.",
284
+ "redes neuronales": "Las redes neuronales artificiales son sistemas computacionales inspirados en el cerebro humano, compuestos por capas de neuronas artificiales que procesan información para reconocer patrones y hacer predicciones.",
285
+ "python": "Python es un lenguaje de programación de alto nivel, interpretado y de propósito general, conocido por su sintaxis clara y legible, ideal para ciencia de datos, IA y desarrollo web.",
286
+ "transformers": "Los Transformers son una arquitectura de deep learning basada en mecanismos de atención que revolucionó el NLP, siendo la base de modelos como GPT, BERT y MTP.",
287
+ "gpt": "GPT (Generative Pre-trained Transformer) es una familia de modelos de lenguaje desarrollados por OpenAI que generan texto coherente y contextualmente relevante.",
288
+ "hola": "¡Hola! Soy MTP, tu asistente de IA. ¿En qué puedo ayudarte hoy?",
289
+ "como estas": "¡Estoy funcionando de manera óptima! Como asistente de IA, siempre estoy listo para ayudarte. ¿En qué puedo asistirte?",
290
+ "quien eres": "Soy MTP (Mi Transformer Personalizado), un asistente de IA creado con arquitectura Transformer desde cero. Fui entrenado para responder preguntas, mantener conversaciones y ayudarte con diversas tareas.",
291
+ "que puedes hacer": "Puedo responder preguntas sobre diversos temas, ayudarte con programación, explicar conceptos científicos y tecnológicos, mantener conversaciones, y asistirte en tareas de procesamiento de lenguaje natural.",
292
+ "gracias": "¡De nada! Fue un placer ayudarte. Si necesitas algo más, aquí estoy. ¡Que tengas un excelente día!",
293
+ "adios": "¡Hasta luego! Fue un gusto conversar contigo. No dudes en volver si necesitas ayuda. ¡Que tengas un buen día!"
294
+ }
295
+
296
+ def get_fallback_response(user_input: str) -> str:
297
+ user_lower = user_input.lower().strip()
298
+
299
+ for key, response in KNOWLEDGE_BASE.items():
300
+ if key in user_lower:
301
+ return response
302
+
303
+ return None
304
+
305
+ def clean_response(text: str, user_input: str = "") -> str:
306
+ if not text:
307
+ return ""
308
+
309
+ text = re.sub(r'(.)\1{4,}', r'\1\1', text)
310
+
311
+ text = re.sub(r'<unk>', '', text)
312
+ text = re.sub(r'\[UNK\]', '', text)
313
+
314
+ sentences = re.split(r'[.!?]+', text)
315
+ if len(sentences) > 3:
316
+ text = '. '.join(sentences[:3]) + '.'
317
+
318
+ text = re.sub(r'\s+', ' ', text).strip()
319
+
320
+ if len(text) < 5:
321
+ fallback = get_fallback_response(user_input)
322
+ if fallback:
323
+ return fallback
324
+ return "Lo siento, no pude generar una respuesta clara. ¿Podrías reformular tu pregunta?"
325
+
326
+ return text
327
+
328
+ @app.post("/generate")
329
+ async def generate(req: PromptRequest):
330
+ global ACTIVE_REQUESTS
331
+ ACTIVE_REQUESTS += 1
332
+
333
+ user_input = req.text.strip()
334
+ if not user_input:
335
+ ACTIVE_REQUESTS -= 1
336
+ return {"reply": "", "tokens_generated": 0}
337
+
338
+ fallback_response = get_fallback_response(user_input)
339
+ if fallback_response and len(user_input) < 30:
340
+ ACTIVE_REQUESTS -= 1
341
+ return {"reply": fallback_response, "tokens_generated": 0, "source": "knowledge_base"}
342
+
343
+ full_prompt = build_prompt(user_input)
344
+ tokens = tokenizer_wrapper.encode(full_prompt)
345
+
346
+ max_input_tokens = model.max_len - 50
347
+ if len(tokens) > max_input_tokens:
348
+ tokens = tokens[-max_input_tokens:]
349
+
350
+ input_ids = torch.tensor([tokens], device=DEVICE)
351
+
352
+ try:
353
+ with torch.inference_mode():
354
+ output_ids = model.generate(
355
+ input_ids,
356
+ max_new_tokens=min(req.max_tokens, 250),
357
+ temperature=req.temperature,
358
+ top_k=req.top_k,
359
+ top_p=req.top_p,
360
+ repetition_penalty=req.repetition_penalty
361
+ )
362
+
363
+ gen_tokens = output_ids[0, len(tokens):].tolist()
364
+
365
+ safe_tokens = [t for t in gen_tokens if 0 <= t < VOCAB_SIZE and t not in [0, 1]]
366
+
367
+ if safe_tokens:
368
+ response = tokenizer_wrapper.decode(safe_tokens).strip()
369
+ else:
370
+ response = ""
371
+
372
+ response = clean_response(response, user_input)
373
+
374
+ if len(response) < 5 or response in ["", " ", "No"]:
375
+ fallback = get_fallback_response(user_input)
376
+ if fallback:
377
+ response = fallback
378
+ else:
379
+ response = "Entendido. ¿Podrías darme más detalles para ayudarte mejor?"
380
+
381
+ return {
382
+ "reply": response,
383
+ "tokens_generated": len(safe_tokens),
384
+ "model": "MTP-v2"
385
+ }
386
+
387
+ except Exception as e:
388
+ print(f"❌ Error durante generación: {e}")
389
+ fallback = get_fallback_response(user_input)
390
+ if not fallback:
391
+ fallback = "Lo siento, ocurrió un error al procesar tu solicitud. Por favor, intenta de nuevo."
392
+ return {
393
+ "reply": fallback,
394
+ "error": str(e)
395
+ }
396
+
397
+ finally:
398
+ ACTIVE_REQUESTS -= 1
399
+ if DEVICE == "cuda":
400
+ torch.cuda.empty_cache()
401
+ gc.collect()
402
+
403
+ @app.get("/health")
404
+ def health_check():
405
+ return {
406
+ "status": "healthy",
407
+ "model": "MTP",
408
+ "device": DEVICE,
409
+ "active_requests": ACTIVE_REQUESTS,
410
+ "vocab_size": VOCAB_SIZE
411
+ }
412
+
413
+ @app.get("/info")
414
+ def model_info():
415
+ return {
416
+ "model_name": "MTP",
417
+ "version": "2.0",
418
+ "architecture": {
419
+ "vocab_size": VOCAB_SIZE,
420
+ "d_model": config.get("d_model", 512),
421
+ "n_layers": config.get("n_layers", 8),
422
+ "n_heads": config.get("n_heads", 8),
423
+ "max_len": config.get("max_len", 1024)
424
+ },
425
+ "parameters": sum(p.numel() for p in model.parameters()),
426
+ "device": DEVICE
427
+ }
428
+
429
+ @app.get("/", response_class=HTMLResponse)
430
+ def chat_ui():
431
+ return """
432
+ <!DOCTYPE html>
433
+ <html lang="es">
434
+ <head>
435
+ <meta charset="UTF-8">
436
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
437
+ <title>MTP - Asistente IA Inteligente</title>
438
+ <style>
439
+ * { margin: 0; padding: 0; box-sizing: border-box; }
440
+ body {
441
+ background: #0a0a0a;
442
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', sans-serif;
443
+ height: 100vh;
444
+ display: flex;
445
+ flex-direction: column;
446
+ }
447
+ .chat-header {
448
+ padding: 20px 24px;
449
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
450
+ border-bottom: 1px solid #2a2a4a;
451
+ }
452
+ .chat-header h1 {
453
+ color: white;
454
+ font-size: 1.3rem;
455
+ font-weight: 600;
456
+ display: flex;
457
+ align-items: center;
458
+ gap: 10px;
459
+ }
460
+ .chat-header p {
461
+ color: #888;
462
+ font-size: 0.8rem;
463
+ margin-top: 5px;
464
+ }
465
+ .chat-messages {
466
+ flex: 1;
467
+ overflow-y: auto;
468
+ padding: 24px;
469
+ display: flex;
470
+ flex-direction: column;
471
+ gap: 16px;
472
+ }
473
+ .message {
474
+ display: flex;
475
+ gap: 12px;
476
+ max-width: 85%;
477
+ animation: fadeIn 0.3s ease;
478
+ }
479
+ @keyframes fadeIn {
480
+ from { opacity: 0; transform: translateY(10px); }
481
+ to { opacity: 1; transform: translateY(0); }
482
+ }
483
+ .message.user {
484
+ align-self: flex-end;
485
+ flex-direction: row-reverse;
486
+ }
487
+ .message-content {
488
+ padding: 12px 18px;
489
+ border-radius: 20px;
490
+ font-size: 0.95rem;
491
+ line-height: 1.45;
492
+ word-wrap: break-word;
493
+ max-width: 100%;
494
+ }
495
+ .user .message-content {
496
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
497
+ color: white;
498
+ border-radius: 20px 4px 20px 20px;
499
+ }
500
+ .bot .message-content {
501
+ background: #1e1e2e;
502
+ color: #e0e0e0;
503
+ border-radius: 4px 20px 20px 20px;
504
+ border: 1px solid #2a2a4a;
505
+ }
506
+ .chat-input-container {
507
+ padding: 20px 24px;
508
+ background: #0f0f0f;
509
+ border-top: 1px solid #1a1a2e;
510
+ }
511
+ .input-wrapper {
512
+ display: flex;
513
+ gap: 12px;
514
+ max-width: 900px;
515
+ margin: 0 auto;
516
+ }
517
+ #messageInput {
518
+ flex: 1;
519
+ padding: 14px 18px;
520
+ background: #1a1a2e;
521
+ border: 1px solid #2a2a4a;
522
+ border-radius: 28px;
523
+ color: white;
524
+ font-size: 0.95rem;
525
+ outline: none;
526
+ transition: all 0.2s;
527
+ }
528
+ #messageInput:focus {
529
+ border-color: #667eea;
530
+ background: #1e1e3a;
531
+ }
532
+ #messageInput::placeholder {
533
+ color: #666;
534
+ }
535
+ #sendBtn {
536
+ padding: 14px 28px;
537
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
538
+ border: none;
539
+ border-radius: 28px;
540
+ color: white;
541
+ font-weight: 600;
542
+ cursor: pointer;
543
+ transition: transform 0.1s, opacity 0.2s;
544
+ }
545
+ #sendBtn:hover { opacity: 0.9; transform: scale(1.02); }
546
+ #sendBtn:disabled {
547
+ opacity: 0.5;
548
+ transform: none;
549
+ cursor: not-allowed;
550
+ }
551
+ .typing {
552
+ display: flex;
553
+ gap: 6px;
554
+ padding: 12px 18px;
555
+ }
556
+ .typing span {
557
+ width: 8px;
558
+ height: 8px;
559
+ background: #888;
560
+ border-radius: 50%;
561
+ animation: bounce 1.4s infinite ease-in-out;
562
+ }
563
+ .typing span:nth-child(1) { animation-delay: -0.32s; }
564
+ .typing span:nth-child(2) { animation-delay: -0.16s; }
565
+ @keyframes bounce {
566
+ 0%, 80%, 100% { transform: scale(0); }
567
+ 40% { transform: scale(1); }
568
+ }
569
+ .status-badge {
570
+ display: inline-block;
571
+ width: 10px;
572
+ height: 10px;
573
+ border-radius: 50%;
574
+ background: #4ade80;
575
+ margin-right: 8px;
576
+ animation: pulse 2s infinite;
577
+ }
578
+ @keyframes pulse {
579
+ 0%, 100% { opacity: 1; }
580
+ 50% { opacity: 0.5; }
581
+ }
582
+ @media (max-width: 768px) {
583
+ .message { max-width: 95%; }
584
+ .chat-messages { padding: 16px; }
585
+ .chat-header { padding: 16px; }
586
+ }
587
+ </style>
588
+ </head>
589
+ <body>
590
+ <div class="chat-header">
591
+ <h1>
592
+ <span class="status-badge"></span>
593
+ 🤖 MTP - Asistente IA Inteligente
594
+ </h1>
595
+ <p>Modelo Transformer personalizado | Respuestas coherentes y contextuales</p>
596
+ </div>
597
+ <div class="chat-messages" id="chatMessages">
598
+ <div class="message bot">
599
+ <div class="message-content">¡Hola! Soy MTP, tu asistente de IA inteligente. Puedo responder preguntas, ayudarte con programación, explicar conceptos y mantener conversaciones. ¿En qué puedo ayudarte hoy?</div>
600
+ </div>
601
+ </div>
602
+ <div class="chat-input-container">
603
+ <div class="input-wrapper">
604
+ <input type="text" id="messageInput" placeholder="Escribe tu mensaje aquí..." autocomplete="off">
605
+ <button id="sendBtn">Enviar</button>
606
+ </div>
607
+ </div>
608
+ <script>
609
+ const chatMessages = document.getElementById('chatMessages');
610
+ const messageInput = document.getElementById('messageInput');
611
+ const sendBtn = document.getElementById('sendBtn');
612
+ let isLoading = false;
613
+
614
+ function addMessage(text, isUser) {
615
+ const div = document.createElement('div');
616
+ div.className = `message ${isUser ? 'user' : 'bot'}`;
617
+ const escapedText = text.replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/\\n/g, '<br>');
618
+ div.innerHTML = `<div class="message-content">${escapedText}</div>`;
619
+ chatMessages.appendChild(div);
620
+ chatMessages.scrollTop = chatMessages.scrollHeight;
621
+ return div;
622
+ }
623
+
624
+ function addTypingIndicator() {
625
+ const div = document.createElement('div');
626
+ div.className = 'message bot';
627
+ div.id = 'typingIndicator';
628
+ div.innerHTML = `<div class="typing"><span></span><span></span><span></span></div>`;
629
+ chatMessages.appendChild(div);
630
+ chatMessages.scrollTop = chatMessages.scrollHeight;
631
+ }
632
+
633
+ function removeTypingIndicator() {
634
+ const indicator = document.getElementById('typingIndicator');
635
+ if (indicator) indicator.remove();
636
+ }
637
+
638
+ async function sendMessage() {
639
+ const text = messageInput.value.trim();
640
+ if (!text || isLoading) return;
641
+
642
+ messageInput.value = '';
643
+ addMessage(text, true);
644
+ isLoading = true;
645
+ sendBtn.disabled = true;
646
+ addTypingIndicator();
647
+
648
+ try {
649
+ const response = await fetch('/generate', {
650
+ method: 'POST',
651
+ headers: { 'Content-Type': 'application/json' },
652
+ body: JSON.stringify({
653
+ text: text,
654
+ max_tokens: 200,
655
+ temperature: 0.7,
656
+ top_k: 60,
657
+ top_p: 0.92,
658
+ repetition_penalty: 1.15
659
+ })
660
+ });
661
+ const data = await response.json();
662
+ removeTypingIndicator();
663
+ const reply = data.reply || "Lo siento, no pude generar una respuesta.";
664
+ addMessage(reply, false);
665
+ } catch (error) {
666
+ removeTypingIndicator();
667
+ addMessage('Error de conexión. Por favor, intenta de nuevo.', false);
668
+ } finally {
669
+ isLoading = false;
670
+ sendBtn.disabled = false;
671
+ messageInput.focus();
672
+ }
673
+ }
674
+
675
+ messageInput.addEventListener('keypress', (e) => {
676
+ if (e.key === 'Enter' && !e.shiftKey) {
677
+ e.preventDefault();
678
+ sendMessage();
679
+ }
680
+ });
681
+ sendBtn.addEventListener('click', sendMessage);
682
+ messageInput.focus();
683
+ </script>
684
+ </body>
685
+ </html>
686
+ """
687
+
688
+ if __name__ == "__main__":
689
+ port = int(os.environ.get("PORT", 7860))
690
+ print(f"\n🚀 Iniciando servidor MTP Inteligente en puerto {port}...")
691
+ print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
692
+ print(f"📡 API docs: http://0.0.0.0:{port}/docs")
693
+ print(f"📊 Endpoint POST: http://0.0.0.0:{port}/generate")
694
+
695
+ uvicorn.run(
696
+ app,
697
+ host="0.0.0.0",
698
+ port=port,
699
+ log_level="warning"
700
+ )