teszenofficial commited on
Commit
b0856a7
·
verified ·
1 Parent(s): 587bf27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -405
app.py CHANGED
@@ -4,9 +4,8 @@ import torch
4
  import json
5
  import time
6
  import gc
7
- import re
8
  from fastapi import FastAPI, Request
9
- from fastapi.responses import HTMLResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel, Field
12
  from huggingface_hub import snapshot_download
@@ -31,10 +30,11 @@ if DEVICE == "cpu":
31
 
32
  torch.set_grad_enabled(False)
33
 
34
- MODEL_REPO = "TeszenAI/MTP-3"
 
35
 
36
  # ======================
37
- # ARQUITECTURA DEL MODELO MEJORADA
38
  # ======================
39
  class LayerNorm(nn.Module):
40
  def __init__(self, d_model: int, eps: float = 1e-5):
@@ -42,6 +42,7 @@ class LayerNorm(nn.Module):
42
  self.weight = nn.Parameter(torch.ones(d_model))
43
  self.bias = nn.Parameter(torch.zeros(d_model))
44
  self.eps = eps
 
45
  def forward(self, x):
46
  mean = x.mean(-1, keepdim=True)
47
  std = x.std(-1, keepdim=True)
@@ -60,6 +61,7 @@ class MultiHeadAttention(nn.Module):
60
  self.w_o = nn.Linear(d_model, d_model)
61
  self.dropout = nn.Dropout(dropout)
62
  self.scale = math.sqrt(self.d_k)
 
63
  def forward(self, x, mask=None):
64
  batch_size, seq_len, _ = x.shape
65
  Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
@@ -80,6 +82,7 @@ class FeedForward(nn.Module):
80
  self.linear1 = nn.Linear(d_model, d_ff)
81
  self.linear2 = nn.Linear(d_ff, d_model)
82
  self.dropout = nn.Dropout(dropout)
 
83
  def forward(self, x):
84
  return self.linear2(self.dropout(F.gelu(self.linear1(x))))
85
 
@@ -92,6 +95,7 @@ class TransformerBlock(nn.Module):
92
  self.norm2 = LayerNorm(d_model)
93
  self.dropout1 = nn.Dropout(dropout)
94
  self.dropout2 = nn.Dropout(dropout)
 
95
  def forward(self, x, mask=None):
96
  attn_output = self.attention(x, mask)
97
  x = x + self.dropout1(attn_output)
@@ -110,6 +114,7 @@ class PositionalEncoding(nn.Module):
110
  pe[:, 0::2] = torch.sin(position * div_term)
111
  pe[:, 1::2] = torch.cos(position * div_term)
112
  self.register_buffer('pe', pe.unsqueeze(0))
 
113
  def forward(self, x):
114
  return x + self.pe[:, :x.size(1), :]
115
 
@@ -122,7 +127,9 @@ class MTPModel(nn.Module):
122
  self.max_len = max_len
123
  self.token_embedding = nn.Embedding(vocab_size, d_model)
124
  self.pos_encoding = PositionalEncoding(d_model, max_len)
125
- self.blocks = nn.ModuleList([TransformerBlock(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)])
 
 
126
  self.norm = LayerNorm(d_model)
127
  self.lm_head = nn.Linear(d_model, vocab_size)
128
 
@@ -134,147 +141,54 @@ class MTPModel(nn.Module):
134
  for block in self.blocks:
135
  x = block(x, mask)
136
  x = self.norm(x)
137
- return self.lm_head(x)
138
-
139
- # ======================
140
- # NLP UTILITIES - PROCESAMIENTO DE LENGUAJE NATURAL
141
- # ======================
142
- class NLPProcessor:
143
- """Procesador de lenguaje natural para entender mejor las intenciones"""
144
 
145
- @staticmethod
146
- def detect_intent(text):
147
- """Detecta la intención del usuario"""
148
- text_lower = text.lower()
149
-
150
- intents = {
151
- 'saludo': ['hola', 'buenas', 'que tal', 'cómo estás', 'hey', 'saludos'],
152
- 'despedida': ['adiós', 'chao', 'hasta luego', 'nos vemos', 'bye'],
153
- 'agradecimiento': ['gracias', 'gracias por', 'te agradezco', 'muchas gracias'],
154
- 'pregunta': ['qué es', 'cómo funciona', 'por qué', 'cuándo', 'dónde', 'quién'],
155
- 'ayuda': ['ayuda', 'necesito ayuda', 'puedes ayudarme', 'me ayudas'],
156
- 'presentacion': ['quién eres', 'qué eres', 'presentate', 'eres'],
157
- 'capacidad': ['qué puedes hacer', 'funciones', 'capacidades', 'que sabes hacer'],
158
- 'sentimiento': ['estoy triste', 'estoy feliz', 'me siento', 'emocionado']
159
- }
160
-
161
- for intent, keywords in intents.items():
162
- for keyword in keywords:
163
- if keyword in text_lower:
164
- return intent
165
- return 'general'
166
-
167
- @staticmethod
168
- def should_stop(response, min_length=30, max_length=200):
169
- """Determina si la respuesta debe terminar"""
170
-
171
- # Palabras que indican final de respuesta
172
- stop_phrases = [
173
- '¿alguna otra pregunta?', '¿en qué más puedo ayudarte?',
174
- '¿necesitas ayuda con algo más?', '¿tienes alguna otra duda?',
175
- 'espero haberte ayudado', 'que tengas un buen día',
176
- 'hasta luego', 'adiós', 'saludos', 'gracias por consultar'
177
- ]
178
-
179
- # Si es demasiado corta, continuar
180
- if len(response) < min_length:
181
- return False
182
-
183
- # Si excede el máximo, cortar
184
- if len(response) > max_length:
185
- return True
186
-
187
- # Verificar frases de parada
188
- for phrase in stop_phrases:
189
- if phrase in response.lower():
190
- return True
191
 
192
- # Verificar si termina con puntuación adecuada
193
- if len(response) > 50:
194
- last_chars = response[-10:]
195
- # Termina con punto, signo de interrogación o exclamación
196
- if any(last_chars.rstrip().endswith(p) for p in ['.', '?', '!', '…']):
197
- # Contar oraciones completas
198
- sentences = re.split(r'[.!?]+', response)
199
- if len(sentences) >= 2: # Al menos 2 oraciones completas
200
- return True
201
-
202
- return False
203
-
204
- @staticmethod
205
- def clean_response(text):
206
- """Limpia y mejora la respuesta"""
207
- # Eliminar repeticiones excesivas
208
- text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)
209
-
210
- # Corregir espaciado
211
- text = re.sub(r'\s+([.,!?;:])', r'\1', text)
212
-
213
- # Asegurar mayúscula al inicio
214
- if text and text[0].islower():
215
- text = text[0].upper() + text[1:]
216
-
217
- # Agregar punto final si no tiene
218
- if text and not text[-1] in '.!?':
219
- text += '.'
220
-
221
- return text.strip()
222
-
223
- @staticmethod
224
- def extract_key_info(text):
225
- """Extrae información clave del texto"""
226
- # Detectar números
227
- numbers = re.findall(r'\d+(?:\.\d+)?', text)
228
-
229
- # Detectar emails
230
- emails = re.findall(r'[\w\.-]+@[\w\.-]+\.\w+', text)
231
-
232
- # Detectar URLs
233
- urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', text)
234
 
235
- return {
236
- 'has_numbers': bool(numbers),
237
- 'has_emails': bool(emails),
238
- 'has_urls': bool(urls),
239
- 'numbers': numbers,
240
- 'emails': emails,
241
- 'urls': urls
242
- }
243
 
244
  # ======================
245
  # DESCARGA Y CARGA DEL MODELO
246
  # ======================
247
- def download_with_retry(repo_id, local_dir, max_retries=3):
248
- for attempt in range(max_retries):
249
- try:
250
- print(f"📦 Intento {attempt + 1}/{max_retries} - Descargando modelo...")
251
- repo_path = snapshot_download(
252
- repo_id=repo_id,
253
- repo_type="model",
254
- local_dir=local_dir,
255
- resume_download=True,
256
- local_files_only=False
257
- )
258
- print(f"✅ Modelo descargado")
259
- return repo_path
260
- except Exception as e:
261
- print(f"⚠️ Error: {str(e)[:100]}")
262
- if attempt < max_retries - 1:
263
- time.sleep(3)
264
- else:
265
- raise
266
- return local_dir
267
-
268
- print(f"🚀 Cargando modelo...")
269
-
270
- if os.path.exists("mtp_repo") and os.path.exists("mtp_repo/mtp_model.pt"):
271
- print("📁 Modelo en caché")
272
- repo_path = "mtp_repo"
273
- else:
274
- try:
275
- repo_path = download_with_retry(MODEL_REPO, "mtp_repo", max_retries=3)
276
- except:
277
- repo_path = "mtp_repo"
278
 
279
  # Cargar configuración
280
  config_path = os.path.join(repo_path, "config.json")
@@ -283,7 +197,7 @@ if os.path.exists(config_path):
283
  config = json.load(f)
284
  else:
285
  config = {
286
- "vocab_size": 2000,
287
  "d_model": 256,
288
  "n_heads": 8,
289
  "n_layers": 6,
@@ -294,43 +208,56 @@ else:
294
 
295
  # Cargar tokenizador
296
  tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
297
- if os.path.exists(tokenizer_path):
298
- sp = spm.SentencePieceProcessor()
299
- sp.load(tokenizer_path)
300
- VOCAB_SIZE = sp.get_piece_size()
301
- config["vocab_size"] = VOCAB_SIZE
302
- print(f"✅ Tokenizador: {VOCAB_SIZE} tokens")
303
- else:
304
- sp = None
305
- VOCAB_SIZE = config.get("vocab_size", 2000)
306
 
307
- print(f"🧠 Inicializando modelo...")
 
 
 
308
  print(f" → Vocabulario: {VOCAB_SIZE}")
309
  print(f" → Dimensión: {config['d_model']}")
310
  print(f" → Capas: {config['n_layers']}")
 
311
 
312
  model = MTPModel(**config)
313
  model.to(DEVICE)
314
 
315
- # Cargar pesos
316
  model_path = os.path.join(repo_path, "mtp_model.pt")
317
  if os.path.exists(model_path):
318
- try:
319
- state_dict = torch.load(model_path, map_location=DEVICE)
320
- model.load_state_dict(state_dict)
321
- print("✅ Pesos cargados")
322
- except Exception as e:
323
- print(f"⚠️ Error cargando pesos: {e}")
324
 
325
  model.eval()
326
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  param_count = sum(p.numel() for p in model.parameters())
328
- print(f"✅ Modelo listo: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
329
 
330
  # ======================
331
  # API CONFIG
332
  # ======================
333
- app = FastAPI(title="MTP API - Versión Mejorada", description="API con NLP integrado", version="2.0")
 
 
 
 
334
 
335
  app.add_middleware(
336
  CORSMiddleware,
@@ -340,210 +267,117 @@ app.add_middleware(
340
  )
341
 
342
  class PromptRequest(BaseModel):
343
- text: str = Field(..., max_length=2000)
344
- max_tokens: int = Field(default=150, ge=10, le=300)
345
- temperature: float = Field(default=0.7, ge=0.1, le=2.0)
346
- top_k: int = Field(default=50, ge=1, le=100)
347
- top_p: float = Field(default=0.9, ge=0.1, le=1.0)
 
348
 
349
- # Inicializar NLP
350
- nlp = NLPProcessor()
 
351
 
352
  # ======================
353
- # GENERACIÓN INTELIGENTE MEJORADA
354
- # ======================
355
- def generate_response_intelligent(model, tokenizer, prompt, max_length=150, temperature=0.7, top_k=50, top_p=0.9, device='cpu'):
356
- model.eval()
357
-
358
- # Detectar intención para ajustar comportamiento
359
- intent = nlp.detect_intent(prompt)
360
-
361
- # Ajustar temperatura según intención
362
- if intent == 'despedida':
363
- temperature = 0.5 # Más determinista
364
- max_length = min(max_length, 60) # Respuestas cortas
365
- elif intent == 'pregunta':
366
- temperature = 0.6 # Más preciso
367
- elif intent == 'agradecimiento':
368
- temperature = 0.5
369
- max_length = min(max_length, 50)
370
-
371
- formatted_prompt = f"### Instrucción:\n{prompt}\n\n### Respuesta:\n"
372
- input_ids = tokenizer.encode(formatted_prompt)
373
- generated = input_ids.copy()
374
- eos_id = tokenizer.eos_id()
375
-
376
- # Contadores para control de parada
377
- consecutive_punctuation = 0
378
- last_chars = []
379
-
380
- for step in range(max_length):
381
- input_tensor = torch.tensor([generated[-model.max_len:]], dtype=torch.long).to(device)
382
- with torch.no_grad():
383
- logits = model(input_tensor)
384
- next_logits = logits[0, -1, :] / temperature
385
-
386
- # Top-k filtering
387
- if top_k > 0:
388
- indices_to_remove = next_logits < torch.topk(next_logits, top_k)[0][..., -1, None]
389
- next_logits[indices_to_remove] = float('-inf')
390
-
391
- # Top-p filtering
392
- if top_p < 1.0:
393
- sorted_logits, sorted_indices = torch.sort(next_logits, descending=True)
394
- cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
395
- sorted_indices_to_remove = cumulative_probs > top_p
396
- sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
397
- sorted_indices_to_remove[..., 0] = 0
398
- indices_to_remove = sorted_indices[sorted_indices_to_remove]
399
- next_logits[indices_to_remove] = float('-inf')
400
-
401
- probs = F.softmax(next_logits, dim=-1)
402
- next_token = torch.multinomial(probs, 1).item()
403
-
404
- # Detener en EOS
405
- if next_token == eos_id:
406
- break
407
-
408
- # Detener si hay demasiados signos de puntuación seguidos
409
- token_str = tokenizer.decode([next_token]) if hasattr(tokenizer, 'decode') else str(next_token)
410
- if token_str in '.!?':
411
- consecutive_punctuation += 1
412
- if consecutive_punctuation >= 3:
413
- break
414
- else:
415
- consecutive_punctuation = 0
416
-
417
- # Guardar últimos caracteres para análisis
418
- last_chars.append(token_str)
419
- if len(last_chars) > 20:
420
- last_chars.pop(0)
421
-
422
- # Detectar bucles de repetición
423
- if len(last_chars) >= 10:
424
- last_str = ''.join(last_chars[-5:])
425
- if last_str in ''.join(last_chars[:-5]):
426
- break
427
-
428
- generated.append(next_token)
429
-
430
- # Verificar si ya es suficiente (para respuestas cortas)
431
- current_response = tokenizer.decode(generated)
432
- if "### Respuesta:" in current_response:
433
- response_part = current_response.split("### Respuesta:")[-1].strip()
434
- if nlp.should_stop(response_part, min_length=20, max_length=max_length):
435
- break
436
-
437
- # Decodificar respuesta
438
- response = tokenizer.decode(generated)
439
-
440
- # Extraer la parte de la respuesta
441
- if "### Respuesta:" in response:
442
- response = response.split("### Respuesta:")[-1].strip()
443
- elif "Respuesta:" in response:
444
- response = response.split("Respuesta:")[-1].strip()
445
- elif "[/INST]" in response:
446
- response = response.split("[/INST]")[-1].strip()
447
-
448
- # Limpiar y mejorar respuesta
449
- garbage_words = ['foompañances', 'ciudadores', 'mejtedon', 'calportedon', 'rápidodcor', 'baon', 'domol']
450
- for word in garbage_words:
451
- response = response.replace(word, '')
452
-
453
- # Limpiar caracteres especiales
454
- response = re.sub(r'[^\w\s\u00C0-\u00FF\u0100-\u017F.,!?¿¡()\-:;"\']+', ' ', response)
455
- response = re.sub(r'\s+', ' ', response).strip()
456
-
457
- # Aplicar NLP a la respuesta
458
- response = nlp.clean_response(response)
459
-
460
- # Respuestas por defecto según intención si está vacía
461
- if len(response) < 3:
462
- default_responses = {
463
- 'saludo': "¡Hola! ¿En qué puedo ayudarte hoy?",
464
- 'despedida': "¡Hasta luego! Que tengas un excelente día.",
465
- 'agradecimiento': "¡De nada! Estoy aquí para ayudarte cuando lo necesites.",
466
- 'ayuda': "Claro, estoy aquí para ayudarte. ¿Qué necesitas saber?",
467
- 'presentacion': "Soy MTP, un asistente virtual creado para responder preguntas y ayudarte con información.",
468
- 'general': "Entendido. ¿Hay algo específico en lo que pueda ayudarte?"
469
- }
470
- response = default_responses.get(intent, default_responses['general'])
471
-
472
- return response
473
-
474
- # ======================
475
- # ENDPOINTS
476
  # ======================
477
  ACTIVE_REQUESTS = 0
478
 
479
- class TokenizerWrapper:
 
480
  def __init__(self, sp_model):
481
  self.sp = sp_model
 
482
  def encode(self, text):
483
- if self.sp is None:
484
- return [ord(c) % 1000 for c in text[:200]]
485
  return self.sp.encode(text)
 
486
  def decode(self, tokens):
487
- if self.sp is None:
488
- return ''.join([chr(t % 128) if 32 <= t % 128 < 127 else ' ' for t in tokens])
489
  return self.sp.decode(tokens)
490
- def eos_id(self):
491
- return self.sp.eos_id() if self.sp else 3
492
  def bos_id(self):
493
- return self.sp.bos_id() if self.sp else 2
 
 
 
 
494
  def pad_id(self):
495
- return self.sp.pad_id() if self.sp else 0
496
 
497
- tokenizer_wrapper = TokenizerWrapper(sp)
498
 
499
  @app.post("/generate")
500
  async def generate(req: PromptRequest):
 
501
  global ACTIVE_REQUESTS
502
  ACTIVE_REQUESTS += 1
503
 
 
 
 
 
 
 
 
 
504
  user_input = req.text.strip()
505
  if not user_input:
506
  ACTIVE_REQUESTS -= 1
507
- return {"reply": "", "tokens_generated": 0, "intent": None}
508
-
509
- # Detectar intención
510
- intent = nlp.detect_intent(user_input)
511
-
 
512
  try:
513
- response = generate_response_intelligent(
514
- model, tokenizer_wrapper, user_input,
515
- max_length=req.max_tokens,
516
- temperature=req.temperature,
517
- top_k=req.top_k,
518
- top_p=req.top_p,
519
- device=DEVICE
520
- )
 
 
 
 
 
 
 
 
521
 
522
- # Extraer información clave
523
- key_info = nlp.extract_key_info(response)
524
 
 
 
 
525
  return {
526
  "reply": response,
527
- "tokens_generated": len(response.split()),
528
- "model": "MTP-Intelligent",
529
- "intent": intent,
530
- "has_numbers": key_info['has_numbers'],
531
- "has_emails": key_info['has_emails']
532
  }
 
533
  except Exception as e:
534
- print(f"❌ Error: {e}")
535
- return {"reply": "Lo siento, ocurrió un error.", "error": str(e), "intent": intent}
 
 
 
 
536
  finally:
537
  ACTIVE_REQUESTS -= 1
538
  if DEVICE == "cuda":
539
  torch.cuda.empty_cache()
540
  gc.collect()
541
 
 
 
 
542
  @app.get("/health")
543
  def health_check():
544
  return {
545
  "status": "healthy",
546
- "model": "MTP-Intelligent",
547
  "device": DEVICE,
548
  "active_requests": ACTIVE_REQUESTS,
549
  "vocab_size": VOCAB_SIZE
@@ -552,26 +386,15 @@ def health_check():
552
  @app.get("/info")
553
  def model_info():
554
  return {
555
- "model_name": "MTP-Intelligent",
556
- "version": "2.0",
557
  "architecture": config,
558
  "parameters": sum(p.numel() for p in model.parameters()),
559
- "device": DEVICE,
560
- "nlp_enabled": True
561
- }
562
-
563
- @app.post("/analyze")
564
- async def analyze_intent(req: PromptRequest):
565
- """Endpoint para analizar intención sin generar respuesta"""
566
- intent = nlp.detect_intent(req.text)
567
- return {
568
- "text": req.text,
569
- "intent": intent,
570
- "confidence": 0.85 # Por ahora fijo, se puede mejorar
571
  }
572
 
573
  # ======================
574
- # INTERFAZ WEB MEJORADA
575
  # ======================
576
  @app.get("/", response_class=HTMLResponse)
577
  def chat_ui():
@@ -581,7 +404,7 @@ def chat_ui():
581
  <head>
582
  <meta charset="UTF-8">
583
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
584
- <title>MTP - Asistente Inteligente</title>
585
  <link rel="preconnect" href="https://fonts.googleapis.com">
586
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
587
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
@@ -593,7 +416,6 @@ def chat_ui():
593
  --text-primary: #e3e3e3;
594
  --text-secondary: #9aa0a6;
595
  --user-bubble: #282a2c;
596
- --success-color: #00c853;
597
  }
598
  * { box-sizing: border-box; outline: none; -webkit-tap-highlight-color: transparent; }
599
  body {
@@ -629,7 +451,12 @@ header {
629
  width: 32px;
630
  height: 32px;
631
  border-radius: 50%;
632
- background: linear-gradient(135deg, #4a9eff, #00c853);
 
 
 
 
 
633
  }
634
  .brand-text {
635
  font-weight: 500;
@@ -696,7 +523,12 @@ header {
696
  height: 34px;
697
  min-width: 34px;
698
  border-radius: 50%;
699
- background: linear-gradient(135deg, #4a9eff, #00c853);
 
 
 
 
 
700
  box-shadow: 0 2px 6px rgba(0,0,0,0.2);
701
  }
702
  .bot-actions {
@@ -723,14 +555,9 @@ header {
723
  }
724
  .action-btn svg { width: 16px; height: 16px; fill: currentColor; }
725
  .typing-cursor::after {
726
- content: '';
727
  display: inline-block;
728
- width: 10px;
729
- height: 10px;
730
- background: var(--accent-color);
731
- border-radius: 50%;
732
- margin-left: 5px;
733
- vertical-align: middle;
734
  animation: blink 1s infinite;
735
  }
736
  .footer-container {
@@ -764,8 +591,8 @@ header {
764
  padding: 10px 0;
765
  }
766
  #mainBtn {
767
- background: var(--accent-color);
768
- color: white;
769
  border: none;
770
  width: 36px;
771
  height: 36px;
@@ -777,7 +604,7 @@ header {
777
  margin-left: 8px;
778
  transition: transform 0.2s;
779
  }
780
- #mainBtn:hover { transform: scale(1.05); background: #3a7ed4; }
781
  .disclaimer {
782
  text-align: center;
783
  font-size: 0.75rem;
@@ -795,15 +622,6 @@ header {
795
  100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
796
  }
797
  .pulsing { animation: pulseAvatar 1.5s infinite; }
798
- .intent-badge {
799
- font-size: 0.7rem;
800
- background: rgba(0, 200, 83, 0.15);
801
- color: #00c853;
802
- padding: 2px 8px;
803
- border-radius: 12px;
804
- display: inline-block;
805
- margin-top: 5px;
806
- }
807
  ::-webkit-scrollbar { width: 8px; }
808
  ::-webkit-scrollbar-track { background: transparent; }
809
  ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
@@ -812,29 +630,29 @@ header {
812
  <body>
813
  <header>
814
  <div class="brand-wrapper" onclick="location.reload()">
815
- <div class="brand-logo"></div>
816
  <div class="brand-text">
817
- MTP <span class="version-badge">Inteligente</span>
818
  </div>
819
  </div>
820
  </header>
821
  <div id="chatScroll" class="chat-scroll">
822
  <div class="msg-row bot" style="animation-delay: 0.1s;">
823
- <div class="bot-avatar"></div>
824
  <div class="msg-content-wrapper">
825
  <div class="msg-text">
826
- ¡Hola! Soy MTP, tu asistente inteligente. ¿En qué puedo ayudarte hoy?
827
  </div>
828
  </div>
829
  </div>
830
  </div>
831
  <div class="footer-container">
832
  <div class="input-box">
833
- <input type="text" id="userInput" placeholder="Escribe tu mensaje..." autocomplete="off">
834
  <button id="mainBtn" onclick="handleBtnClick()">➤</button>
835
  </div>
836
  <div class="disclaimer">
837
- MTP usa NLP para entender mejor tu consulta • Respuestas inteligentes
838
  </div>
839
  </div>
840
  <script>
@@ -852,10 +670,10 @@ function scrollToBottom() {
852
 
853
  function setBtnState(state) {
854
  if (state === 'sending') {
855
- mainBtn.innerHTML = "";
856
  isGenerating = true;
857
  } else {
858
- mainBtn.innerHTML = "";
859
  isGenerating = false;
860
  abortController = null;
861
  }
@@ -882,21 +700,19 @@ function stopGeneration() {
882
 
883
  async function sendMessage(textOverride = null) {
884
  const text = textOverride || userInput.value.trim();
885
- if (!text || isGenerating) return;
886
-
887
  lastUserPrompt = text;
888
  if (!textOverride) {
889
  userInput.value = '';
890
  addMessage(text, 'user');
891
  }
892
-
893
  setBtnState('sending');
894
  abortController = new AbortController();
895
-
896
  const botRow = document.createElement('div');
897
  botRow.className = 'msg-row bot';
898
  const avatar = document.createElement('div');
899
- avatar.className = 'bot-avatar pulsing';
 
900
  const wrapper = document.createElement('div');
901
  wrapper.className = 'msg-content-wrapper';
902
  const msgText = document.createElement('div');
@@ -906,35 +722,17 @@ async function sendMessage(textOverride = null) {
906
  botRow.appendChild(wrapper);
907
  chatScroll.appendChild(botRow);
908
  scrollToBottom();
909
-
910
  try {
911
  const response = await fetch('/generate', {
912
  method: 'POST',
913
  headers: { 'Content-Type': 'application/json' },
914
- body: JSON.stringify({
915
- text: text,
916
- max_tokens: 200,
917
- temperature: 0.7,
918
- top_k: 50,
919
- top_p: 0.9
920
- }),
921
  signal: abortController.signal
922
  });
923
-
924
  const data = await response.json();
925
  if (!isGenerating) return;
926
-
927
  avatar.classList.remove('pulsing');
928
  const reply = data.reply || "No entendí eso.";
929
-
930
- // Mostrar intención detectada si está disponible
931
- if (data.intent && data.intent !== 'general') {
932
- const intentSpan = document.createElement('div');
933
- intentSpan.className = 'intent-badge';
934
- intentSpan.textContent = `🎯 Intención: ${data.intent}`;
935
- wrapper.appendChild(intentSpan);
936
- }
937
-
938
  await typeWriter(msgText, reply);
939
  if (isGenerating) {
940
  addActions(wrapper, reply);
@@ -945,7 +743,7 @@ async function sendMessage(textOverride = null) {
945
  msgText.textContent += " [Detenido]";
946
  } else {
947
  avatar.classList.remove('pulsing');
948
- msgText.textContent = "Error de conexión. Intenta de nuevo.";
949
  msgText.style.color = "#ff8b8b";
950
  setBtnState('idle');
951
  }
@@ -963,7 +761,7 @@ function addMessage(text, sender) {
963
  scrollToBottom();
964
  }
965
 
966
- function typeWriter(element, text, speed = 10) {
967
  return new Promise(resolve => {
968
  let i = 0;
969
  element.classList.add('typing-cursor');
@@ -990,17 +788,18 @@ function typeWriter(element, text, speed = 10) {
990
  function addActions(wrapperElement, textToCopy) {
991
  const actionsDiv = document.createElement('div');
992
  actionsDiv.className = 'bot-actions';
993
-
994
  const copyBtn = document.createElement('button');
995
  copyBtn.className = 'action-btn';
996
- copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
997
- copyBtn.onclick = () => { navigator.clipboard.writeText(textToCopy); };
998
-
 
999
  const regenBtn = document.createElement('button');
1000
  regenBtn.className = 'action-btn';
1001
- regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>`;
1002
- regenBtn.onclick = () => { sendMessage(lastUserPrompt); };
1003
-
 
1004
  actionsDiv.appendChild(copyBtn);
1005
  actionsDiv.appendChild(regenBtn);
1006
  wrapperElement.appendChild(actionsDiv);
@@ -1011,7 +810,6 @@ function addActions(wrapperElement, textToCopy) {
1011
  userInput.addEventListener('keydown', (e) => {
1012
  if (e.key === 'Enter') handleBtnClick();
1013
  });
1014
-
1015
  window.onload = () => userInput.focus();
1016
  </script>
1017
  </body>
@@ -1020,7 +818,13 @@ window.onload = () => userInput.focus();
1020
 
1021
  if __name__ == "__main__":
1022
  port = int(os.environ.get("PORT", 7860))
1023
- print(f"\n🚀 MTP Inteligente iniciado en puerto {port}")
1024
- print(f"🌐 http://0.0.0.0:{port}")
 
1025
 
1026
- uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")
 
 
 
 
 
 
4
  import json
5
  import time
6
  import gc
 
7
  from fastapi import FastAPI, Request
8
+ from fastapi.responses import HTMLResponse, StreamingResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel, Field
11
  from huggingface_hub import snapshot_download
 
30
 
31
  torch.set_grad_enabled(False)
32
 
33
+ # CAMBIA ESTO POR EL NOMBRE DE TU REPO EN HUGGING FACE
34
+ MODEL_REPO = "TeszenAI/MTP-3" # <-- CAMBIA A TU REPO
35
 
36
  # ======================
37
+ # DEFINIR ARQUITECTURA DEL MODELO (MTP)
38
  # ======================
39
  class LayerNorm(nn.Module):
40
  def __init__(self, d_model: int, eps: float = 1e-5):
 
42
  self.weight = nn.Parameter(torch.ones(d_model))
43
  self.bias = nn.Parameter(torch.zeros(d_model))
44
  self.eps = eps
45
+
46
  def forward(self, x):
47
  mean = x.mean(-1, keepdim=True)
48
  std = x.std(-1, keepdim=True)
 
61
  self.w_o = nn.Linear(d_model, d_model)
62
  self.dropout = nn.Dropout(dropout)
63
  self.scale = math.sqrt(self.d_k)
64
+
65
  def forward(self, x, mask=None):
66
  batch_size, seq_len, _ = x.shape
67
  Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
 
82
  self.linear1 = nn.Linear(d_model, d_ff)
83
  self.linear2 = nn.Linear(d_ff, d_model)
84
  self.dropout = nn.Dropout(dropout)
85
+
86
  def forward(self, x):
87
  return self.linear2(self.dropout(F.gelu(self.linear1(x))))
88
 
 
95
  self.norm2 = LayerNorm(d_model)
96
  self.dropout1 = nn.Dropout(dropout)
97
  self.dropout2 = nn.Dropout(dropout)
98
+
99
  def forward(self, x, mask=None):
100
  attn_output = self.attention(x, mask)
101
  x = x + self.dropout1(attn_output)
 
114
  pe[:, 0::2] = torch.sin(position * div_term)
115
  pe[:, 1::2] = torch.cos(position * div_term)
116
  self.register_buffer('pe', pe.unsqueeze(0))
117
+
118
  def forward(self, x):
119
  return x + self.pe[:, :x.size(1), :]
120
 
 
127
  self.max_len = max_len
128
  self.token_embedding = nn.Embedding(vocab_size, d_model)
129
  self.pos_encoding = PositionalEncoding(d_model, max_len)
130
+ self.blocks = nn.ModuleList([
131
+ TransformerBlock(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)
132
+ ])
133
  self.norm = LayerNorm(d_model)
134
  self.lm_head = nn.Linear(d_model, vocab_size)
135
 
 
141
  for block in self.blocks:
142
  x = block(x, mask)
143
  x = self.norm(x)
144
+ logits = self.lm_head(x)
145
+ return logits
 
 
 
 
 
146
 
147
+ def generate(self, input_ids, max_new_tokens=100, temperature=0.8, top_k=50, top_p=0.9, repetition_penalty=1.1):
148
+ """Método de generación compatible con la interfaz"""
149
+ generated = input_ids
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ for _ in range(max_new_tokens):
152
+ with torch.no_grad():
153
+ logits = self(generated)
154
+ next_logits = logits[0, -1, :] / temperature
155
+
156
+ if repetition_penalty != 1.0:
157
+ for token_id in set(generated[0].tolist()):
158
+ next_logits[token_id] /= repetition_penalty
159
+
160
+ if top_k > 0:
161
+ indices_to_remove = next_logits < torch.topk(next_logits, top_k)[0][..., -1, None]
162
+ next_logits[indices_to_remove] = float('-inf')
163
+
164
+ if top_p < 1.0:
165
+ sorted_logits, sorted_indices = torch.sort(next_logits, descending=True)
166
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
167
+ sorted_indices_to_remove = cumulative_probs > top_p
168
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
169
+ sorted_indices_to_remove[..., 0] = 0
170
+ indices_to_remove = sorted_indices[sorted_indices_to_remove]
171
+ next_logits[indices_to_remove] = float('-inf')
172
+
173
+ probs = F.softmax(next_logits, dim=-1)
174
+ next_token = torch.multinomial(probs, num_samples=1).item()
175
+
176
+ if next_token == 3: # EOS ID para SentencePiece
177
+ break
178
+
179
+ generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ return generated
 
 
 
 
 
 
 
182
 
183
  # ======================
184
  # DESCARGA Y CARGA DEL MODELO
185
  # ======================
186
+ print(f"📦 Descargando modelo desde {MODEL_REPO}...")
187
+ repo_path = snapshot_download(
188
+ repo_id=MODEL_REPO,
189
+ repo_type="model",
190
+ local_dir="mtp_repo"
191
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  # Cargar configuración
194
  config_path = os.path.join(repo_path, "config.json")
 
197
  config = json.load(f)
198
  else:
199
  config = {
200
+ "vocab_size": 5000,
201
  "d_model": 256,
202
  "n_heads": 8,
203
  "n_layers": 6,
 
208
 
209
  # Cargar tokenizador
210
  tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
211
+ sp = spm.SentencePieceProcessor()
212
+ sp.load(tokenizer_path)
213
+ VOCAB_SIZE = sp.get_piece_size()
 
 
 
 
 
 
214
 
215
+ # Actualizar vocab_size en config
216
+ config["vocab_size"] = VOCAB_SIZE
217
+
218
+ print(f"🧠 Inicializando modelo MTP...")
219
  print(f" → Vocabulario: {VOCAB_SIZE}")
220
  print(f" → Dimensión: {config['d_model']}")
221
  print(f" → Capas: {config['n_layers']}")
222
+ print(f" → Heads: {config['n_heads']}")
223
 
224
  model = MTPModel(**config)
225
  model.to(DEVICE)
226
 
227
+ # Cargar pesos del modelo
228
  model_path = os.path.join(repo_path, "mtp_model.pt")
229
  if os.path.exists(model_path):
230
+ state_dict = torch.load(model_path, map_location=DEVICE)
231
+ model.load_state_dict(state_dict)
232
+ print("✅ Pesos del modelo cargados")
233
+ else:
234
+ print("⚠️ No se encontró mtp_model.pt, usando pesos aleatorios")
 
235
 
236
  model.eval()
237
 
238
+ # Cuantización para CPU
239
+ if DEVICE == "cpu":
240
+ print("⚡ Aplicando cuantización dinámica para CPU...")
241
+ try:
242
+ model = torch.quantization.quantize_dynamic(
243
+ model,
244
+ {nn.Linear},
245
+ dtype=torch.qint8
246
+ )
247
+ except Exception as e:
248
+ print(f"⚠️ No se pudo aplicar cuantización: {e}")
249
+
250
  param_count = sum(p.numel() for p in model.parameters())
251
+ print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
252
 
253
  # ======================
254
  # API CONFIG
255
  # ======================
256
+ app = FastAPI(
257
+ title="MTP API",
258
+ description="API para modelo de lenguaje MTP",
259
+ version="1.0"
260
+ )
261
 
262
  app.add_middleware(
263
  CORSMiddleware,
 
267
  )
268
 
269
  class PromptRequest(BaseModel):
270
+ text: str = Field(..., max_length=2000, description="Texto de entrada")
271
+ max_tokens: int = Field(default=150, ge=10, le=300, description="Tokens máximos a generar")
272
+ temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
273
+ top_k: int = Field(default=50, ge=1, le=100, description="Top-k sampling")
274
+ top_p: float = Field(default=0.9, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
275
+ repetition_penalty: float = Field(default=1.1, ge=1.0, le=2.0, description="Penalización por repetición")
276
 
277
+ def build_prompt(user_input: str) -> str:
278
+ """Construye el prompt en el formato del modelo"""
279
+ return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
280
 
281
  # ======================
282
+ # GESTIÓN DE CARGA
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # ======================
284
  ACTIVE_REQUESTS = 0
285
 
286
+ class MTPTokenizer:
287
+ """Wrapper para el tokenizador de SentencePiece"""
288
  def __init__(self, sp_model):
289
  self.sp = sp_model
290
+
291
  def encode(self, text):
 
 
292
  return self.sp.encode(text)
293
+
294
  def decode(self, tokens):
 
 
295
  return self.sp.decode(tokens)
296
+
 
297
  def bos_id(self):
298
+ return self.sp.bos_id()
299
+
300
+ def eos_id(self):
301
+ return self.sp.eos_id()
302
+
303
  def pad_id(self):
304
+ return self.sp.pad_id()
305
 
306
+ tokenizer_wrapper = MTPTokenizer(sp)
307
 
308
  @app.post("/generate")
309
  async def generate(req: PromptRequest):
310
+ """Endpoint principal de generación de texto"""
311
  global ACTIVE_REQUESTS
312
  ACTIVE_REQUESTS += 1
313
 
314
+ dyn_max_tokens = req.max_tokens
315
+ dyn_temperature = req.temperature
316
+
317
+ if ACTIVE_REQUESTS > 2:
318
+ print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
319
+ dyn_max_tokens = min(dyn_max_tokens, 120)
320
+ dyn_temperature = max(0.5, dyn_temperature * 0.9)
321
+
322
  user_input = req.text.strip()
323
  if not user_input:
324
  ACTIVE_REQUESTS -= 1
325
+ return {"reply": "", "tokens_generated": 0}
326
+
327
+ full_prompt = build_prompt(user_input)
328
+ tokens = [tokenizer_wrapper.bos_id()] + tokenizer_wrapper.encode(full_prompt)
329
+ input_ids = torch.tensor([tokens], device=DEVICE)
330
+
331
  try:
332
+ with torch.no_grad():
333
+ output_ids = model.generate(
334
+ input_ids,
335
+ max_new_tokens=dyn_max_tokens,
336
+ temperature=dyn_temperature,
337
+ top_k=req.top_k,
338
+ top_p=req.top_p,
339
+ repetition_penalty=req.repetition_penalty
340
+ )
341
+
342
+ gen_tokens = output_ids[0, len(tokens):].tolist()
343
+
344
+ safe_tokens = [
345
+ t for t in gen_tokens
346
+ if 0 <= t < VOCAB_SIZE and t != tokenizer_wrapper.eos_id()
347
+ ]
348
 
349
+ response = tokenizer_wrapper.decode(safe_tokens).strip()
 
350
 
351
+ if "###" in response:
352
+ response = response.split("###")[0].strip()
353
+
354
  return {
355
  "reply": response,
356
+ "tokens_generated": len(safe_tokens),
357
+ "model": "MTP"
 
 
 
358
  }
359
+
360
  except Exception as e:
361
+ print(f"❌ Error durante generación: {e}")
362
+ return {
363
+ "reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
364
+ "error": str(e)
365
+ }
366
+
367
  finally:
368
  ACTIVE_REQUESTS -= 1
369
  if DEVICE == "cuda":
370
  torch.cuda.empty_cache()
371
  gc.collect()
372
 
373
+ # ======================
374
+ # ENDPOINTS DE INFORMACIÓN
375
+ # ======================
376
  @app.get("/health")
377
  def health_check():
378
  return {
379
  "status": "healthy",
380
+ "model": "MTP",
381
  "device": DEVICE,
382
  "active_requests": ACTIVE_REQUESTS,
383
  "vocab_size": VOCAB_SIZE
 
386
  @app.get("/info")
387
  def model_info():
388
  return {
389
+ "model_name": "MTP",
390
+ "version": "1.0",
391
  "architecture": config,
392
  "parameters": sum(p.numel() for p in model.parameters()),
393
+ "device": DEVICE
 
 
 
 
 
 
 
 
 
 
 
394
  }
395
 
396
  # ======================
397
+ # INTERFAZ WEB (MODERNA)
398
  # ======================
399
  @app.get("/", response_class=HTMLResponse)
400
  def chat_ui():
 
404
  <head>
405
  <meta charset="UTF-8">
406
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
407
+ <title>MTP - Asistente IA</title>
408
  <link rel="preconnect" href="https://fonts.googleapis.com">
409
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
410
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
 
416
  --text-primary: #e3e3e3;
417
  --text-secondary: #9aa0a6;
418
  --user-bubble: #282a2c;
 
419
  }
420
  * { box-sizing: border-box; outline: none; -webkit-tap-highlight-color: transparent; }
421
  body {
 
451
  width: 32px;
452
  height: 32px;
453
  border-radius: 50%;
454
+ background: linear-gradient(135deg, #4a9eff, #7c3aed);
455
+ display: flex;
456
+ align-items: center;
457
+ justify-content: center;
458
+ font-weight: bold;
459
+ font-size: 14px;
460
  }
461
  .brand-text {
462
  font-weight: 500;
 
523
  height: 34px;
524
  min-width: 34px;
525
  border-radius: 50%;
526
+ background: linear-gradient(135deg, #4a9eff, #7c3aed);
527
+ display: flex;
528
+ align-items: center;
529
+ justify-content: center;
530
+ font-weight: bold;
531
+ font-size: 14px;
532
  box-shadow: 0 2px 6px rgba(0,0,0,0.2);
533
  }
534
  .bot-actions {
 
555
  }
556
  .action-btn svg { width: 16px; height: 16px; fill: currentColor; }
557
  .typing-cursor::after {
558
+ content: '';
559
  display: inline-block;
560
+ margin-left: 2px;
 
 
 
 
 
561
  animation: blink 1s infinite;
562
  }
563
  .footer-container {
 
591
  padding: 10px 0;
592
  }
593
  #mainBtn {
594
+ background: white;
595
+ color: black;
596
  border: none;
597
  width: 36px;
598
  height: 36px;
 
604
  margin-left: 8px;
605
  transition: transform 0.2s;
606
  }
607
+ #mainBtn:hover { transform: scale(1.05); }
608
  .disclaimer {
609
  text-align: center;
610
  font-size: 0.75rem;
 
622
  100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
623
  }
624
  .pulsing { animation: pulseAvatar 1.5s infinite; }
 
 
 
 
 
 
 
 
 
625
  ::-webkit-scrollbar { width: 8px; }
626
  ::-webkit-scrollbar-track { background: transparent; }
627
  ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
 
630
  <body>
631
  <header>
632
  <div class="brand-wrapper" onclick="location.reload()">
633
+ <div class="brand-logo">MTP</div>
634
  <div class="brand-text">
635
+ MTP <span class="version-badge">v1</span>
636
  </div>
637
  </div>
638
  </header>
639
  <div id="chatScroll" class="chat-scroll">
640
  <div class="msg-row bot" style="animation-delay: 0.1s;">
641
+ <div class="bot-avatar">M</div>
642
  <div class="msg-content-wrapper">
643
  <div class="msg-text">
644
+ ¡Hola! Soy MTP, tu asistente de IA. ¿En qué puedo ayudarte hoy?
645
  </div>
646
  </div>
647
  </div>
648
  </div>
649
  <div class="footer-container">
650
  <div class="input-box">
651
+ <input type="text" id="userInput" placeholder="Escribe un mensaje..." autocomplete="off">
652
  <button id="mainBtn" onclick="handleBtnClick()">➤</button>
653
  </div>
654
  <div class="disclaimer">
655
+ MTP puede cometer errores. Considera verificar la información importante.
656
  </div>
657
  </div>
658
  <script>
 
670
 
671
  function setBtnState(state) {
672
  if (state === 'sending') {
673
+ mainBtn.innerHTML = '';
674
  isGenerating = true;
675
  } else {
676
+ mainBtn.innerHTML = '';
677
  isGenerating = false;
678
  abortController = null;
679
  }
 
700
 
701
  async function sendMessage(textOverride = null) {
702
  const text = textOverride || userInput.value.trim();
703
+ if (!text) return;
 
704
  lastUserPrompt = text;
705
  if (!textOverride) {
706
  userInput.value = '';
707
  addMessage(text, 'user');
708
  }
 
709
  setBtnState('sending');
710
  abortController = new AbortController();
 
711
  const botRow = document.createElement('div');
712
  botRow.className = 'msg-row bot';
713
  const avatar = document.createElement('div');
714
+ avatar.className = 'bot-avatar pulsing';
715
+ avatar.textContent = 'M';
716
  const wrapper = document.createElement('div');
717
  wrapper.className = 'msg-content-wrapper';
718
  const msgText = document.createElement('div');
 
722
  botRow.appendChild(wrapper);
723
  chatScroll.appendChild(botRow);
724
  scrollToBottom();
 
725
  try {
726
  const response = await fetch('/generate', {
727
  method: 'POST',
728
  headers: { 'Content-Type': 'application/json' },
729
+ body: JSON.stringify({ text: text }),
 
 
 
 
 
 
730
  signal: abortController.signal
731
  });
 
732
  const data = await response.json();
733
  if (!isGenerating) return;
 
734
  avatar.classList.remove('pulsing');
735
  const reply = data.reply || "No entendí eso.";
 
 
 
 
 
 
 
 
 
736
  await typeWriter(msgText, reply);
737
  if (isGenerating) {
738
  addActions(wrapper, reply);
 
743
  msgText.textContent += " [Detenido]";
744
  } else {
745
  avatar.classList.remove('pulsing');
746
+ msgText.textContent = "Error de conexión.";
747
  msgText.style.color = "#ff8b8b";
748
  setBtnState('idle');
749
  }
 
761
  scrollToBottom();
762
  }
763
 
764
+ function typeWriter(element, text, speed = 12) {
765
  return new Promise(resolve => {
766
  let i = 0;
767
  element.classList.add('typing-cursor');
 
788
  function addActions(wrapperElement, textToCopy) {
789
  const actionsDiv = document.createElement('div');
790
  actionsDiv.className = 'bot-actions';
 
791
  const copyBtn = document.createElement('button');
792
  copyBtn.className = 'action-btn';
793
+ copyBtn.innerHTML = `<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
794
+ copyBtn.onclick = () => {
795
+ navigator.clipboard.writeText(textToCopy);
796
+ };
797
  const regenBtn = document.createElement('button');
798
  regenBtn.className = 'action-btn';
799
+ regenBtn.innerHTML = `<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>`;
800
+ regenBtn.onclick = () => {
801
+ sendMessage(lastUserPrompt);
802
+ };
803
  actionsDiv.appendChild(copyBtn);
804
  actionsDiv.appendChild(regenBtn);
805
  wrapperElement.appendChild(actionsDiv);
 
810
  userInput.addEventListener('keydown', (e) => {
811
  if (e.key === 'Enter') handleBtnClick();
812
  });
 
813
  window.onload = () => userInput.focus();
814
  </script>
815
  </body>
 
818
 
819
  if __name__ == "__main__":
820
  port = int(os.environ.get("PORT", 7860))
821
+ print(f"\n🚀 Iniciando servidor MTP en puerto {port}...")
822
+ print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
823
+ print(f"📡 API docs: http://0.0.0.0:{port}/docs")
824
 
825
+ uvicorn.run(
826
+ app,
827
+ host="0.0.0.0",
828
+ port=port,
829
+ log_level="info"
830
+ )