teszenofficial commited on
Commit
22d628e
·
verified ·
1 Parent(s): f07cd6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +470 -190
app.py CHANGED
@@ -3,14 +3,16 @@ import sys
3
  import torch
4
  import pickle
5
  import time
6
- from fastapi import FastAPI
 
7
  from fastapi.responses import HTMLResponse, StreamingResponse
8
- from pydantic import BaseModel
 
9
  from huggingface_hub import snapshot_download
10
  import uvicorn
11
 
12
  # ======================
13
- # CONFIGURACIÓN DE DISPOSITIVO (GPU/CPU)
14
  # ======================
15
  if torch.cuda.is_available():
16
  DEVICE = "cuda"
@@ -19,19 +21,18 @@ else:
19
  DEVICE = "cpu"
20
  print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")
21
 
22
- # ======================
23
- # OPTIMIZACIÓN CPU
24
- # ======================
 
25
  torch.set_grad_enabled(False)
26
- torch.set_num_threads(max(1, os.cpu_count() // 2))
27
 
28
  MODEL_REPO = "TeszenAI/MTP3.7"
29
 
30
  # ======================
31
- # DESCARGA DEL MODELO
32
  # ======================
33
- print(f"--- SISTEMA MTP 2 ---")
34
- print(f"Descargando/Verificando modelo desde {MODEL_REPO}...")
35
  repo_path = snapshot_download(
36
  repo_id=MODEL_REPO,
37
  repo_type="model",
@@ -40,23 +41,28 @@ repo_path = snapshot_download(
40
 
41
  sys.path.insert(0, repo_path)
42
 
 
43
  from model import MTPMiniModel
44
  from tokenizer import MTPTokenizer
45
 
46
- # ======================
47
- # CARGA DEL MODELO
48
- # ======================
49
- print("Cargando modelo en memoria...")
50
  with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
51
  model_data = pickle.load(f)
52
 
53
- tokenizer = MTPTokenizer(
54
- os.path.join(repo_path, "mtp_tokenizer.model")
55
- )
56
-
57
  VOCAB_SIZE = tokenizer.sp.get_piece_size()
58
  config = model_data["config"]
59
 
 
 
 
 
 
 
 
 
 
 
60
  model = MTPMiniModel(
61
  vocab_size=VOCAB_SIZE,
62
  d_model=config["model"]["d_model"],
@@ -64,134 +70,350 @@ model = MTPMiniModel(
64
  n_heads=config["model"]["n_heads"],
65
  d_ff=config["model"]["d_ff"],
66
  max_seq_len=config["model"]["max_seq_len"],
67
- dropout=0.0
 
68
  )
69
 
70
  model.load_state_dict(model_data["model_state_dict"])
71
  model.eval()
72
 
73
- # ======================
74
- # ⚙️ CUANTIZACIÓN CPU
75
- # ======================
76
  if DEVICE == "cpu":
 
77
  model = torch.quantization.quantize_dynamic(
78
- model,
79
- {torch.nn.Linear},
80
  dtype=torch.qint8
81
  )
82
- print("⚙️ Modelo cuantizado para CPU")
83
 
84
  model.to(DEVICE)
85
- print(f"🚀 MTP 2 listo y corriendo en: {DEVICE.upper()}")
86
-
87
- # ======================
88
- # API FASTAPI
89
- # ======================
90
- app = FastAPI(title="MTP 2 API")
91
 
92
- class Prompt(BaseModel):
93
- text: str
94
 
95
  # ======================
96
- # 🧠 PROMPT MEJORADO (MISMO FORMATO)
97
  # ======================
98
- def build_prompt(user_input: str) -> str:
99
- return f"""Eres MTP, un modelo de lenguaje experimental.
100
- Responde de forma clara, directa y coherente.
101
- No inventes información.
 
102
 
103
- ### Instrucción:
104
- {user_input}
 
 
 
 
105
 
106
- ### Respuesta:
107
- """
 
 
 
 
 
 
 
 
 
 
108
 
109
  # ======================
110
- # GENERACIÓN NORMAL (IGUAL QUE ANTES)
111
  # ======================
 
 
 
112
  @app.post("/generate")
113
- def generate(prompt: Prompt):
114
- user_input = prompt.text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  if not user_input:
116
- return {"reply": ""}
 
117
 
118
  full_prompt = build_prompt(user_input)
119
  tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
120
  input_ids = torch.tensor([tokens], device=DEVICE)
121
 
122
- with torch.no_grad():
123
- output_ids = model.generate(
124
- input_ids,
125
- max_new_tokens=150,
126
- temperature=0.7,
127
- top_k=50,
128
- top_p=0.9
129
- )
130
-
131
- gen_tokens = output_ids[0, len(tokens):].tolist()
132
-
133
- # 🔒 FILTRO DE SEGURIDAD
134
- safe_tokens = [
135
- t for t in gen_tokens
136
- if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id()
137
- ]
138
-
139
- response = tokenizer.decode(safe_tokens).strip()
140
- if "###" in response:
141
- response = response.split("###")[0].strip()
142
-
143
- return {"reply": response}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  # ======================
146
- # 📡 STREAMING SSE OFICIAL
147
  # ======================
148
  @app.get("/generate_sse")
149
- def generate_sse(text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def event_stream():
151
- full_prompt = build_prompt(text)
152
- tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
153
- input_ids = torch.tensor([tokens], device=DEVICE)
154
-
155
- for _ in range(150):
156
- with torch.no_grad():
157
- logits = model(input_ids)[:, -1, :VOCAB_SIZE]
158
- probs = torch.softmax(logits / 0.7, dim=-1)
159
- next_id = torch.argmax(probs, dim=-1).item()
160
-
161
- if next_id == tokenizer.eos_id():
162
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- if 0 <= next_id < VOCAB_SIZE:
165
- token_text = tokenizer.decode([next_id])
166
- yield f"data:{token_text}\n\n"
167
- input_ids = torch.cat(
168
- [input_ids, torch.tensor([[next_id]], device=DEVICE)],
169
- dim=1
170
- )
171
- time.sleep(0.015)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- yield "data:[DONE]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- return StreamingResponse(event_stream(), media_type="text/event-stream")
 
 
 
 
 
 
 
 
176
 
177
  # ======================
178
- # INTERFAZ WEB (TU HTML COMPLETO, SIN QUITAR NADA)
179
  # ======================
180
  @app.get("/", response_class=HTMLResponse)
181
  def chat_ui():
182
  return """
183
-
184
  <!DOCTYPE html>
185
  <html lang="es">
186
  <head>
187
  <meta charset="UTF-8">
188
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
189
- <title>MTP 2</title>
190
  <link rel="preconnect" href="https://fonts.googleapis.com">
191
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
192
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
193
  <style>
194
- /* --- VARIABLES & THEME --- */
195
  :root {
196
  --bg-color: #131314;
197
  --surface-color: #1E1F20;
@@ -199,12 +421,16 @@ def chat_ui():
199
  --text-primary: #e3e3e3;
200
  --text-secondary: #9aa0a6;
201
  --user-bubble: #282a2c;
202
- --bot-actions-color: #c4c7c5;
 
 
203
  --logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
204
  }
205
-
206
- * { box-sizing: border-box; outline: none; -webkit-tap-highlight-color: transparent; }
207
-
 
 
208
  body {
209
  margin: 0;
210
  background-color: var(--bg-color);
@@ -215,8 +441,6 @@ body {
215
  flex-direction: column;
216
  overflow: hidden;
217
  }
218
-
219
- /* --- HEADER --- */
220
  header {
221
  padding: 12px 20px;
222
  display: flex;
@@ -230,14 +454,12 @@ header {
230
  z-index: 50;
231
  border-bottom: 1px solid rgba(255,255,255,0.05);
232
  }
233
-
234
  .brand-wrapper {
235
  display: flex;
236
  align-items: center;
237
  gap: 12px;
238
  cursor: pointer;
239
  }
240
-
241
  .brand-logo {
242
  width: 32px;
243
  height: 32px;
@@ -247,7 +469,6 @@ header {
247
  background-position: center;
248
  border: 1px solid rgba(255,255,255,0.1);
249
  }
250
-
251
  .brand-text {
252
  font-weight: 500;
253
  font-size: 1.05rem;
@@ -255,7 +476,6 @@ header {
255
  align-items: center;
256
  gap: 8px;
257
  }
258
-
259
  .version-badge {
260
  font-size: 0.75rem;
261
  background: rgba(74, 158, 255, 0.15);
@@ -264,8 +484,17 @@ header {
264
  border-radius: 12px;
265
  font-weight: 600;
266
  }
267
-
268
- /* --- CHAT AREA --- */
 
 
 
 
 
 
 
 
 
269
  .chat-scroll {
270
  flex: 1;
271
  overflow-y: auto;
@@ -278,8 +507,6 @@ header {
278
  width: 100%;
279
  scroll-behavior: smooth;
280
  }
281
-
282
- /* Filas de Mensaje */
283
  .msg-row {
284
  display: flex;
285
  gap: 16px;
@@ -288,18 +515,14 @@ header {
288
  transform: translateY(10px);
289
  animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
290
  }
291
-
292
  .msg-row.user { justify-content: flex-end; }
293
  .msg-row.bot { justify-content: flex-start; align-items: flex-start; }
294
-
295
- /* Contenido */
296
  .msg-content {
297
  line-height: 1.6;
298
  font-size: 1rem;
299
  word-wrap: break-word;
300
  max-width: 85%;
301
  }
302
-
303
  .user .msg-content {
304
  background-color: var(--user-bubble);
305
  padding: 10px 18px;
@@ -307,20 +530,17 @@ header {
307
  border-top-right-radius: 4px;
308
  color: #fff;
309
  }
310
-
311
  .bot .msg-content-wrapper {
312
  display: flex;
313
  flex-direction: column;
314
  gap: 8px;
315
  width: 100%;
316
  }
317
-
318
  .bot .msg-text {
319
  padding-top: 6px;
320
  color: var(--text-primary);
 
321
  }
322
-
323
- /* Avatar Bot */
324
  .bot-avatar {
325
  width: 34px;
326
  height: 34px;
@@ -330,8 +550,6 @@ header {
330
  background-size: cover;
331
  box-shadow: 0 2px 6px rgba(0,0,0,0.2);
332
  }
333
-
334
- /* Acciones Bot */
335
  .bot-actions {
336
  display: flex;
337
  gap: 10px;
@@ -339,7 +557,6 @@ header {
339
  transition: opacity 0.3s;
340
  margin-top: 5px;
341
  }
342
-
343
  .action-btn {
344
  background: transparent;
345
  border: none;
@@ -350,16 +567,18 @@ header {
350
  display: flex;
351
  align-items: center;
352
  transition: color 0.2s, background 0.2s;
 
353
  }
354
-
355
  .action-btn:hover {
356
  color: var(--text-primary);
357
  background: rgba(255,255,255,0.08);
358
  }
359
-
360
- .action-btn svg { width: 16px; height: 16px; fill: currentColor; }
361
-
362
- /* Efecto Escritura (BOLITA AZUL) */
 
 
363
  .typing-cursor::after {
364
  content: '';
365
  display: inline-block;
@@ -371,15 +590,12 @@ header {
371
  vertical-align: middle;
372
  animation: blink 1s infinite;
373
  }
374
-
375
- /* --- FOOTER & INPUT --- */
376
  .footer-container {
377
  padding: 0 20px 20px 20px;
378
  background: linear-gradient(to top, var(--bg-color) 85%, transparent);
379
  position: relative;
380
  z-index: 60;
381
  }
382
-
383
  .input-box {
384
  max-width: 850px;
385
  margin: 0 auto;
@@ -391,12 +607,10 @@ header {
391
  border: 1px solid rgba(255,255,255,0.1);
392
  transition: border-color 0.2s, box-shadow 0.2s;
393
  }
394
-
395
  .input-box:focus-within {
396
  border-color: rgba(74, 158, 255, 0.5);
397
  box-shadow: 0 0 0 2px rgba(74, 158, 255, 0.1);
398
  }
399
-
400
  #userInput {
401
  flex: 1;
402
  background: transparent;
@@ -405,8 +619,9 @@ header {
405
  font-size: 1rem;
406
  font-family: inherit;
407
  padding: 10px 0;
 
 
408
  }
409
-
410
  #mainBtn {
411
  background: white;
412
  color: black;
@@ -421,92 +636,105 @@ header {
421
  margin-left: 8px;
422
  transition: transform 0.2s;
423
  }
424
-
425
  #mainBtn:hover { transform: scale(1.05); }
426
-
 
 
 
427
  .disclaimer {
428
  text-align: center;
429
  font-size: 0.75rem;
430
  color: #666;
431
  margin-top: 12px;
432
  }
433
-
434
- /* --- ANIMACIONES --- */
 
 
 
 
435
  @keyframes slideUpFade {
436
  from { opacity: 0; transform: translateY(15px); }
437
  to { opacity: 1; transform: translateY(0); }
438
  }
439
-
440
- @keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
441
-
 
442
  @keyframes pulseAvatar {
443
  0% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0.4); }
444
  70% { box-shadow: 0 0 0 8px rgba(74, 158, 255, 0); }
445
  100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
446
  }
447
-
448
  .pulsing { animation: pulseAvatar 1.5s infinite; }
449
-
450
  ::-webkit-scrollbar { width: 8px; }
451
  ::-webkit-scrollbar-track { background: transparent; }
452
  ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
453
-
 
 
 
 
 
 
 
454
  </style>
455
  </head>
456
  <body>
457
-
458
  <header>
459
  <div class="brand-wrapper" onclick="location.reload()">
460
  <div class="brand-logo"></div>
461
  <div class="brand-text">
462
- MTP <span class="version-badge">2</span>
463
  </div>
464
  </div>
 
465
  </header>
466
-
467
  <div id="chatScroll" class="chat-scroll">
468
- <!-- Bienvenida -->
469
  <div class="msg-row bot" style="animation-delay: 0.1s;">
470
  <div class="bot-avatar"></div>
471
  <div class="msg-content-wrapper">
472
  <div class="msg-text">
473
- ¡Hola! Soy MTP 2. ¿En qué puedo ayudarte hoy?
 
 
 
 
 
 
 
 
474
  </div>
475
  </div>
476
  </div>
477
  </div>
478
-
479
  <div class="footer-container">
480
  <div class="input-box">
481
- <input type="text" id="userInput" placeholder="Escribe un mensaje..." autocomplete="off">
482
- <button id="mainBtn" onclick="handleBtnClick()">
483
- <!-- Icono dinámico -->
484
- </button>
485
  </div>
486
  <div class="disclaimer">
487
- MTP puede cometer errores. Considera verificar la información importante.
488
  </div>
489
  </div>
490
-
491
  <script>
492
  const chatScroll = document.getElementById('chatScroll');
493
  const userInput = document.getElementById('userInput');
494
  const mainBtn = document.getElementById('mainBtn');
495
-
496
- // Variables de Estado
497
  let isGenerating = false;
498
  let abortController = null;
499
  let typingTimeout = null;
500
  let lastUserPrompt = "";
501
-
502
- // Iconos SVG
503
  const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
504
  const ICON_STOP = `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="0"><rect x="2" y="2" width="20" height="20" rx="4" ry="4"></rect></svg>`;
505
-
506
- // Inicial
507
  mainBtn.innerHTML = ICON_SEND;
508
 
509
- // --- UTILS ---
 
 
 
 
 
510
  function scrollToBottom() {
511
  chatScroll.scrollTop = chatScroll.scrollHeight;
512
  }
@@ -514,16 +742,19 @@ function scrollToBottom() {
514
  function setBtnState(state) {
515
  if (state === 'sending') {
516
  mainBtn.innerHTML = ICON_STOP;
 
517
  isGenerating = true;
 
 
 
518
  } else {
519
  mainBtn.innerHTML = ICON_SEND;
 
520
  isGenerating = false;
521
  abortController = null;
522
  }
523
  }
524
 
525
- // --- CORE ---
526
-
527
  function handleBtnClick() {
528
  if (isGenerating) {
529
  stopGeneration();
@@ -535,14 +766,10 @@ function handleBtnClick() {
535
  function stopGeneration() {
536
  if (abortController) abortController.abort();
537
  if (typingTimeout) clearTimeout(typingTimeout);
538
-
539
- // UI Limpieza
540
  const activeCursor = document.querySelector('.typing-cursor');
541
  if (activeCursor) activeCursor.classList.remove('typing-cursor');
542
-
543
  const activeAvatar = document.querySelector('.pulsing');
544
  if (activeAvatar) activeAvatar.classList.remove('pulsing');
545
-
546
  setBtnState('idle');
547
  userInput.focus();
548
  }
@@ -550,18 +777,18 @@ function stopGeneration() {
550
  async function sendMessage(textOverride = null) {
551
  const text = textOverride || userInput.value.trim();
552
  if (!text) return;
553
-
554
  lastUserPrompt = text;
555
-
556
  if (!textOverride) {
557
  userInput.value = '';
 
558
  addMessage(text, 'user');
559
  }
560
 
561
  setBtnState('sending');
562
  abortController = new AbortController();
563
-
564
- // Bot Placeholder
565
  const botRow = document.createElement('div');
566
  botRow.className = 'msg-row bot';
567
 
@@ -573,42 +800,66 @@ async function sendMessage(textOverride = null) {
573
 
574
  const msgText = document.createElement('div');
575
  msgText.className = 'msg-text';
576
-
577
  wrapper.appendChild(msgText);
578
  botRow.appendChild(avatar);
579
  botRow.appendChild(wrapper);
580
  chatScroll.appendChild(botRow);
581
  scrollToBottom();
582
-
583
  try {
 
 
584
  const response = await fetch('/generate', {
585
  method: 'POST',
586
  headers: { 'Content-Type': 'application/json' },
587
- body: JSON.stringify({ text: text }),
 
 
 
 
 
 
 
 
588
  signal: abortController.signal
589
  });
590
-
591
  const data = await response.json();
592
 
593
  if (!isGenerating) return;
594
-
595
  avatar.classList.remove('pulsing');
 
 
 
 
 
 
 
596
  const reply = data.reply || "No entendí eso.";
 
 
597
 
598
  await typeWriter(msgText, reply);
599
 
600
  if (isGenerating) {
 
 
 
 
 
 
601
  addActions(wrapper, reply);
602
  setBtnState('idle');
603
  }
604
-
605
  } catch (error) {
606
  if (error.name === 'AbortError') {
607
  msgText.textContent += " [Detenido]";
608
  } else {
 
609
  avatar.classList.remove('pulsing');
610
- msgText.textContent = "Error de conexión.";
611
- msgText.style.color = "#ff8b8b";
612
  setBtnState('idle');
613
  }
614
  }
@@ -617,9 +868,11 @@ async function sendMessage(textOverride = null) {
617
  function addMessage(text, sender) {
618
  const row = document.createElement('div');
619
  row.className = `msg-row ${sender}`;
 
620
  const content = document.createElement('div');
621
  content.className = 'msg-content';
622
  content.textContent = text;
 
623
  row.appendChild(content);
624
  chatScroll.appendChild(row);
625
  scrollToBottom();
@@ -636,7 +889,7 @@ function typeWriter(element, text, speed = 12) {
636
  resolve();
637
  return;
638
  }
639
-
640
  if (i < text.length) {
641
  element.textContent += text.charAt(i);
642
  i++;
@@ -647,6 +900,7 @@ function typeWriter(element, text, speed = 12) {
647
  resolve();
648
  }
649
  }
 
650
  type();
651
  });
652
  }
@@ -657,18 +911,23 @@ function addActions(wrapperElement, textToCopy) {
657
 
658
  const copyBtn = document.createElement('button');
659
  copyBtn.className = 'action-btn';
660
- copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
661
  copyBtn.onclick = () => {
662
- navigator.clipboard.writeText(textToCopy);
 
 
 
 
 
663
  };
664
-
665
  const regenBtn = document.createElement('button');
666
  regenBtn.className = 'action-btn';
667
- regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>`;
668
  regenBtn.onclick = () => {
669
  sendMessage(lastUserPrompt);
670
  };
671
-
672
  actionsDiv.appendChild(copyBtn);
673
  actionsDiv.appendChild(regenBtn);
674
  wrapperElement.appendChild(actionsDiv);
@@ -678,19 +937,40 @@ function addActions(wrapperElement, textToCopy) {
678
  }
679
 
680
  userInput.addEventListener('keydown', (e) => {
681
- if (e.key === 'Enter') handleBtnClick();
 
 
 
682
  });
683
 
684
- window.onload = () => userInput.focus();
685
-
 
 
 
 
 
 
 
 
 
686
  </script>
687
  </body>
688
  </html>
689
-
690
  """
 
691
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
692
  uvicorn.run(
693
  app,
694
  host="0.0.0.0",
695
- port=int(os.environ.get("PORT", 7860))
 
696
  )
 
3
  import torch
4
  import pickle
5
  import time
6
+ import gc
7
+ from fastapi import FastAPI, Request
8
  from fastapi.responses import HTMLResponse, StreamingResponse
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel, Field
11
  from huggingface_hub import snapshot_download
12
  import uvicorn
13
 
14
  # ======================
15
+ # CONFIGURACIÓN DE DISPOSITIVO
16
  # ======================
17
  if torch.cuda.is_available():
18
  DEVICE = "cuda"
 
21
  DEVICE = "cpu"
22
  print("⚠️ GPU no detectada. Usando CPU (puede ser más lento).")
23
 
24
+ # Optimización de hilos para CPU
25
+ if DEVICE == "cpu":
26
+ torch.set_num_threads(max(1, os.cpu_count() // 2))
27
+
28
  torch.set_grad_enabled(False)
 
29
 
30
  MODEL_REPO = "TeszenAI/MTP3.7"
31
 
32
  # ======================
33
+ # DESCARGA Y CARGA DEL MODELO
34
  # ======================
35
+ print(f"📦 Descargando modelo desde {MODEL_REPO}...")
 
36
  repo_path = snapshot_download(
37
  repo_id=MODEL_REPO,
38
  repo_type="model",
 
41
 
42
  sys.path.insert(0, repo_path)
43
 
44
+ # Importar modelo mejorado compatible
45
  from model import MTPMiniModel
46
  from tokenizer import MTPTokenizer
47
 
48
+ print("🔧 Cargando tensores y configuración...")
 
 
 
49
  with open(os.path.join(repo_path, "mtp_mini.pkl"), "rb") as f:
50
  model_data = pickle.load(f)
51
 
52
+ tokenizer = MTPTokenizer(os.path.join(repo_path, "mtp_tokenizer.model"))
 
 
 
53
  VOCAB_SIZE = tokenizer.sp.get_piece_size()
54
  config = model_data["config"]
55
 
56
+ # Detectar si el modelo usa SwiGLU
57
+ use_swiglu = config["model"].get("use_swiglu", False)
58
+
59
+ print(f"🧠 Inicializando modelo...")
60
+ print(f" → Vocabulario: {VOCAB_SIZE}")
61
+ print(f" → Dimensión: {config['model']['d_model']}")
62
+ print(f" → Capas: {config['model']['n_layers']}")
63
+ print(f" → Cabezas: {config['model']['n_heads']}")
64
+ print(f" → SwiGLU: {'✓' if use_swiglu else '✗'}")
65
+
66
  model = MTPMiniModel(
67
  vocab_size=VOCAB_SIZE,
68
  d_model=config["model"]["d_model"],
 
70
  n_heads=config["model"]["n_heads"],
71
  d_ff=config["model"]["d_ff"],
72
  max_seq_len=config["model"]["max_seq_len"],
73
+ dropout=0.0,
74
+ use_swiglu=use_swiglu
75
  )
76
 
77
  model.load_state_dict(model_data["model_state_dict"])
78
  model.eval()
79
 
80
+ # Cuantización para CPU
 
 
81
  if DEVICE == "cpu":
82
+ print("⚡ Aplicando cuantización dinámica para CPU...")
83
  model = torch.quantization.quantize_dynamic(
84
+ model,
85
+ {torch.nn.Linear},
86
  dtype=torch.qint8
87
  )
 
88
 
89
  model.to(DEVICE)
 
 
 
 
 
 
90
 
91
+ param_count = sum(p.numel() for p in model.parameters())
92
+ print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
93
 
94
  # ======================
95
+ # API CONFIG
96
  # ======================
97
+ app = FastAPI(
98
+ title="MTP-3.5 API",
99
+ description="API para modelo de lenguaje MTP-3.5 mejorado con RoPE, RMSNorm y SwiGLU",
100
+ version="3.5"
101
+ )
102
 
103
+ app.add_middleware(
104
+ CORSMiddleware,
105
+ allow_origins=["*"],
106
+ allow_methods=["*"],
107
+ allow_headers=["*"],
108
+ )
109
 
110
+ class PromptRequest(BaseModel):
111
+ text: str = Field(..., max_length=2000, description="Texto de entrada")
112
+ max_tokens: int = Field(default=150, ge=10, le=300, description="Tokens máximos a generar")
113
+ temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
114
+ top_k: int = Field(default=40, ge=1, le=100, description="Top-k sampling")
115
+ top_p: float = Field(default=0.92, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
116
+ repetition_penalty: float = Field(default=1.15, ge=1.0, le=2.0, description="Penalización por repetición")
117
+ min_length: int = Field(default=20, ge=5, le=100, description="Longitud mínima de respuesta")
118
+
119
+ def build_prompt(user_input: str) -> str:
120
+ """Construye el prompt en el formato del modelo"""
121
+ return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
122
 
123
  # ======================
124
+ # GESTIÓN DE CARGA
125
  # ======================
126
+ ACTIVE_REQUESTS = 0
127
+ MAX_CONCURRENT_REQUESTS = 3
128
+
129
  @app.post("/generate")
130
+ async def generate(req: PromptRequest):
131
+ """Endpoint principal de generación de texto con control de calidad"""
132
+ global ACTIVE_REQUESTS
133
+
134
+ if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
135
+ return {
136
+ "reply": "El servidor está ocupado. Por favor, intenta de nuevo en unos segundos.",
137
+ "error": "too_many_requests",
138
+ "active_requests": ACTIVE_REQUESTS
139
+ }
140
+
141
+ ACTIVE_REQUESTS += 1
142
+
143
+ # Ajuste dinámico bajo carga
144
+ dyn_max_tokens = req.max_tokens
145
+ dyn_temperature = req.temperature
146
+
147
+ if ACTIVE_REQUESTS > 1:
148
+ print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
149
+ dyn_max_tokens = min(dyn_max_tokens, 120)
150
+ dyn_temperature = max(0.6, dyn_temperature * 0.95)
151
+
152
+ user_input = req.text.strip()
153
  if not user_input:
154
+ ACTIVE_REQUESTS -= 1
155
+ return {"reply": "", "tokens_generated": 0}
156
 
157
  full_prompt = build_prompt(user_input)
158
  tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
159
  input_ids = torch.tensor([tokens], device=DEVICE)
160
 
161
+ try:
162
+ start_time = time.time()
163
+
164
+ with torch.no_grad():
165
+ output_ids = model.generate(
166
+ input_ids,
167
+ max_new_tokens=dyn_max_tokens,
168
+ temperature=dyn_temperature,
169
+ top_k=req.top_k,
170
+ top_p=req.top_p,
171
+ repetition_penalty=req.repetition_penalty,
172
+ min_length=req.min_length,
173
+ eos_token_id=tokenizer.eos_id()
174
+ )
175
+
176
+ gen_tokens = output_ids[0, len(tokens):].tolist()
177
+
178
+ # Filtro de seguridad mejorado
179
+ safe_tokens = []
180
+ for t in gen_tokens:
181
+ if 0 <= t < VOCAB_SIZE and t != tokenizer.eos_id():
182
+ safe_tokens.append(t)
183
+ elif t == tokenizer.eos_id():
184
+ break
185
+
186
+ response = tokenizer.decode(safe_tokens).strip()
187
+
188
+ # Limpiar marcadores de sección
189
+ if "###" in response:
190
+ response = response.split("###")[0].strip()
191
+
192
+ # Remover repeticiones al final
193
+ if response.endswith(("...", ". . .", "…")):
194
+ response = response.rstrip(".")
195
+
196
+ generation_time = time.time() - start_time
197
+ tokens_per_second = len(safe_tokens) / generation_time if generation_time > 0 else 0
198
+
199
+ return {
200
+ "reply": response,
201
+ "tokens_generated": len(safe_tokens),
202
+ "generation_time": round(generation_time, 2),
203
+ "tokens_per_second": round(tokens_per_second, 1),
204
+ "model": "MTP-3.5",
205
+ "device": DEVICE
206
+ }
207
+
208
+ except Exception as e:
209
+ print(f"❌ Error durante generación: {e}")
210
+ import traceback
211
+ traceback.print_exc()
212
+ return {
213
+ "reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
214
+ "error": str(e)
215
+ }
216
+
217
+ finally:
218
+ ACTIVE_REQUESTS -= 1
219
+ if DEVICE == "cuda":
220
+ torch.cuda.empty_cache()
221
+ gc.collect()
222
 
223
  # ======================
224
+ # 📡 STREAMING SSE
225
  # ======================
226
  @app.get("/generate_sse")
227
+ def generate_sse(
228
+ text: str,
229
+ max_tokens: int = 150,
230
+ temperature: float = 0.7,
231
+ top_k: int = 40,
232
+ top_p: float = 0.92,
233
+ repetition_penalty: float = 1.15
234
+ ):
235
+ """Endpoint de streaming con Server-Sent Events mejorado"""
236
+ global ACTIVE_REQUESTS
237
+
238
+ if ACTIVE_REQUESTS >= MAX_CONCURRENT_REQUESTS:
239
+ def error_stream():
240
+ yield "data:[ERROR: Servidor ocupado]\n\n"
241
+ return StreamingResponse(error_stream(), media_type="text/event-stream")
242
+
243
+ ACTIVE_REQUESTS += 1
244
+
245
  def event_stream():
246
+ try:
247
+ full_prompt = build_prompt(text)
248
+ tokens = [tokenizer.bos_id()] + tokenizer.encode(full_prompt)
249
+ input_ids = torch.tensor([tokens], device=DEVICE)
250
+ generated_tokens = []
251
+
252
+ # Ajuste dinámico
253
+ limit = min(100 if ACTIVE_REQUESTS > 1 else max_tokens, 200)
254
+ temp = max(0.6, temperature * 0.95) if ACTIVE_REQUESTS > 1 else temperature
255
+
256
+ for step in range(limit):
257
+ with torch.no_grad():
258
+ logits, _ = model(input_ids)
259
+ logits = logits[:, -1, :VOCAB_SIZE].clone()
260
+
261
+ # Aplicar repetition penalty
262
+ if repetition_penalty != 1.0:
263
+ for token_id in set(input_ids[0].tolist()):
264
+ if logits[0, token_id] < 0:
265
+ logits[0, token_id] *= repetition_penalty
266
+ else:
267
+ logits[0, token_id] /= repetition_penalty
268
+
269
+ # Temperature scaling
270
+ logits = logits / temp
271
+
272
+ # Top-k filtering
273
+ if top_k > 0:
274
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
275
+ logits[logits < v[:, [-1]]] = float('-inf')
276
+
277
+ # Top-p (nucleus) filtering
278
+ if top_p < 1.0:
279
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
280
+ cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
281
+ sorted_indices_to_remove = cumulative_probs > top_p
282
+ sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
283
+ sorted_indices_to_remove[:, 0] = 0
284
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
285
+ logits[indices_to_remove] = float('-inf')
286
+
287
+ # Sample
288
+ probs = torch.softmax(logits, dim=-1)
289
+ next_id = torch.multinomial(probs, num_samples=1).item()
290
+
291
+ if next_id == tokenizer.eos_id():
292
+ break
293
+
294
+ if 0 <= next_id < VOCAB_SIZE:
295
+ generated_tokens.append(next_id)
296
+ token_text = tokenizer.decode([next_id])
297
+
298
+ # Limpiar salida
299
+ if "###" in token_text:
300
+ break
301
+
302
+ yield f"data:{token_text}\n\n"
303
+
304
+ input_ids = torch.cat(
305
+ [input_ids, torch.tensor([[next_id]], device=DEVICE)],
306
+ dim=1
307
+ )
308
+ time.sleep(0.02) # Control de velocidad
309
+
310
+ yield "data:[DONE]\n\n"
311
+
312
+ except Exception as e:
313
+ print(f"❌ Error en streaming: {e}")
314
+ yield f"data:[ERROR: {str(e)}]\n\n"
315
+
316
+ finally:
317
+ ACTIVE_REQUESTS -= 1
318
+ if DEVICE == "cuda":
319
+ torch.cuda.empty_cache()
320
+ gc.collect()
321
+
322
+ return StreamingResponse(event_stream(), media_type="text/event-stream")
323
 
324
+ # ======================
325
+ # 📊 ENDPOINTS DE INFORMACIÓN
326
+ # ======================
327
+ @app.get("/health")
328
+ def health_check():
329
+ """Check del estado del servicio"""
330
+ memory_info = {}
331
+ if DEVICE == "cuda":
332
+ memory_info = {
333
+ "gpu_memory_allocated_mb": round(torch.cuda.memory_allocated() / 1024**2, 2),
334
+ "gpu_memory_reserved_mb": round(torch.cuda.memory_reserved() / 1024**2, 2)
335
+ }
336
+
337
+ return {
338
+ "status": "healthy",
339
+ "model": "MTP-3.5",
340
+ "device": DEVICE,
341
+ "active_requests": ACTIVE_REQUESTS,
342
+ "max_concurrent_requests": MAX_CONCURRENT_REQUESTS,
343
+ "vocab_size": VOCAB_SIZE,
344
+ "parameters": sum(p.numel() for p in model.parameters()),
345
+ **memory_info
346
+ }
347
 
348
+ @app.get("/info")
349
+ def model_info():
350
+ """Información detallada del modelo"""
351
+ improvements = [
352
+ "RoPE (Rotary Position Embedding)",
353
+ "RMSNorm (Root Mean Square Normalization)",
354
+ "Label Smoothing (0.1)",
355
+ "Repetition Penalty",
356
+ "Early Stopping",
357
+ "EOS Loss Weight",
358
+ "Length Control",
359
+ "Gradient Accumulation"
360
+ ]
361
+
362
+ if config["model"].get("use_swiglu", False):
363
+ improvements.append("SwiGLU Activation")
364
+
365
+ return {
366
+ "model_name": "MTP-3.5",
367
+ "version": "3.5",
368
+ "architecture": {
369
+ "d_model": config["model"]["d_model"],
370
+ "n_layers": config["model"]["n_layers"],
371
+ "n_heads": config["model"]["n_heads"],
372
+ "d_ff": config["model"]["d_ff"],
373
+ "max_seq_len": config["model"]["max_seq_len"],
374
+ "vocab_size": VOCAB_SIZE,
375
+ "use_swiglu": config["model"].get("use_swiglu", False),
376
+ "dropout": config["model"]["dropout"]
377
+ },
378
+ "parameters": sum(p.numel() for p in model.parameters()),
379
+ "parameters_human": f"{sum(p.numel() for p in model.parameters())/1e6:.1f}M",
380
+ "device": DEVICE,
381
+ "improvements": improvements,
382
+ "training_config": {
383
+ "batch_size": config["training"]["batch_size"],
384
+ "accumulation_steps": config["training"]["accumulation_steps"],
385
+ "learning_rate": config["training"]["learning_rate"],
386
+ "weight_decay": config["training"]["weight_decay"],
387
+ "epochs": config["training"]["epochs"]
388
+ }
389
+ }
390
 
391
+ @app.get("/config")
392
+ def get_config():
393
+ """Obtener configuración completa del modelo"""
394
+ return {
395
+ "model": config["model"],
396
+ "training": config["training"],
397
+ "data": config["data"],
398
+ "generation": config.get("generation", {})
399
+ }
400
 
401
  # ======================
402
+ # 🎨 INTERFAZ WEB MEJORADA
403
  # ======================
404
  @app.get("/", response_class=HTMLResponse)
405
  def chat_ui():
406
  return """
 
407
  <!DOCTYPE html>
408
  <html lang="es">
409
  <head>
410
  <meta charset="UTF-8">
411
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
412
+ <title>MTP 3.5 - Chat Interface</title>
413
  <link rel="preconnect" href="https://fonts.googleapis.com">
414
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
415
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
416
  <style>
 
417
  :root {
418
  --bg-color: #131314;
419
  --surface-color: #1E1F20;
 
421
  --text-primary: #e3e3e3;
422
  --text-secondary: #9aa0a6;
423
  --user-bubble: #282a2c;
424
+ --success-color: #34a853;
425
+ --warning-color: #fbbc04;
426
+ --error-color: #ea4335;
427
  --logo-url: url('https://i.postimg.cc/yxS54PF3/IMG-3082.jpg');
428
  }
429
+ * {
430
+ box-sizing: border-box;
431
+ outline: none;
432
+ -webkit-tap-highlight-color: transparent;
433
+ }
434
  body {
435
  margin: 0;
436
  background-color: var(--bg-color);
 
441
  flex-direction: column;
442
  overflow: hidden;
443
  }
 
 
444
  header {
445
  padding: 12px 20px;
446
  display: flex;
 
454
  z-index: 50;
455
  border-bottom: 1px solid rgba(255,255,255,0.05);
456
  }
 
457
  .brand-wrapper {
458
  display: flex;
459
  align-items: center;
460
  gap: 12px;
461
  cursor: pointer;
462
  }
 
463
  .brand-logo {
464
  width: 32px;
465
  height: 32px;
 
469
  background-position: center;
470
  border: 1px solid rgba(255,255,255,0.1);
471
  }
 
472
  .brand-text {
473
  font-weight: 500;
474
  font-size: 1.05rem;
 
476
  align-items: center;
477
  gap: 8px;
478
  }
 
479
  .version-badge {
480
  font-size: 0.75rem;
481
  background: rgba(74, 158, 255, 0.15);
 
484
  border-radius: 12px;
485
  font-weight: 600;
486
  }
487
+ .status-indicator {
488
+ width: 8px;
489
+ height: 8px;
490
+ border-radius: 50%;
491
+ background: var(--success-color);
492
+ animation: pulse 2s infinite;
493
+ }
494
+ @keyframes pulse {
495
+ 0%, 100% { opacity: 1; }
496
+ 50% { opacity: 0.5; }
497
+ }
498
  .chat-scroll {
499
  flex: 1;
500
  overflow-y: auto;
 
507
  width: 100%;
508
  scroll-behavior: smooth;
509
  }
 
 
510
  .msg-row {
511
  display: flex;
512
  gap: 16px;
 
515
  transform: translateY(10px);
516
  animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
517
  }
 
518
  .msg-row.user { justify-content: flex-end; }
519
  .msg-row.bot { justify-content: flex-start; align-items: flex-start; }
 
 
520
  .msg-content {
521
  line-height: 1.6;
522
  font-size: 1rem;
523
  word-wrap: break-word;
524
  max-width: 85%;
525
  }
 
526
  .user .msg-content {
527
  background-color: var(--user-bubble);
528
  padding: 10px 18px;
 
530
  border-top-right-radius: 4px;
531
  color: #fff;
532
  }
 
533
  .bot .msg-content-wrapper {
534
  display: flex;
535
  flex-direction: column;
536
  gap: 8px;
537
  width: 100%;
538
  }
 
539
  .bot .msg-text {
540
  padding-top: 6px;
541
  color: var(--text-primary);
542
+ white-space: pre-wrap;
543
  }
 
 
544
  .bot-avatar {
545
  width: 34px;
546
  height: 34px;
 
550
  background-size: cover;
551
  box-shadow: 0 2px 6px rgba(0,0,0,0.2);
552
  }
 
 
553
  .bot-actions {
554
  display: flex;
555
  gap: 10px;
 
557
  transition: opacity 0.3s;
558
  margin-top: 5px;
559
  }
 
560
  .action-btn {
561
  background: transparent;
562
  border: none;
 
567
  display: flex;
568
  align-items: center;
569
  transition: color 0.2s, background 0.2s;
570
+ font-size: 0.85rem;
571
  }
 
572
  .action-btn:hover {
573
  color: var(--text-primary);
574
  background: rgba(255,255,255,0.08);
575
  }
576
+ .action-btn svg {
577
+ width: 16px;
578
+ height: 16px;
579
+ fill: currentColor;
580
+ margin-right: 4px;
581
+ }
582
  .typing-cursor::after {
583
  content: '';
584
  display: inline-block;
 
590
  vertical-align: middle;
591
  animation: blink 1s infinite;
592
  }
 
 
593
  .footer-container {
594
  padding: 0 20px 20px 20px;
595
  background: linear-gradient(to top, var(--bg-color) 85%, transparent);
596
  position: relative;
597
  z-index: 60;
598
  }
 
599
  .input-box {
600
  max-width: 850px;
601
  margin: 0 auto;
 
607
  border: 1px solid rgba(255,255,255,0.1);
608
  transition: border-color 0.2s, box-shadow 0.2s;
609
  }
 
610
  .input-box:focus-within {
611
  border-color: rgba(74, 158, 255, 0.5);
612
  box-shadow: 0 0 0 2px rgba(74, 158, 255, 0.1);
613
  }
 
614
  #userInput {
615
  flex: 1;
616
  background: transparent;
 
619
  font-size: 1rem;
620
  font-family: inherit;
621
  padding: 10px 0;
622
+ resize: none;
623
+ max-height: 120px;
624
  }
 
625
  #mainBtn {
626
  background: white;
627
  color: black;
 
636
  margin-left: 8px;
637
  transition: transform 0.2s;
638
  }
 
639
  #mainBtn:hover { transform: scale(1.05); }
640
+ #mainBtn:disabled {
641
+ opacity: 0.5;
642
+ cursor: not-allowed;
643
+ }
644
  .disclaimer {
645
  text-align: center;
646
  font-size: 0.75rem;
647
  color: #666;
648
  margin-top: 12px;
649
  }
650
+ .stats-badge {
651
+ font-size: 0.7rem;
652
+ color: var(--text-secondary);
653
+ margin-top: 4px;
654
+ font-family: 'Monaco', monospace;
655
+ }
656
  @keyframes slideUpFade {
657
  from { opacity: 0; transform: translateY(15px); }
658
  to { opacity: 1; transform: translateY(0); }
659
  }
660
+ @keyframes blink {
661
+ 0%, 100% { opacity: 1; }
662
+ 50% { opacity: 0; }
663
+ }
664
  @keyframes pulseAvatar {
665
  0% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0.4); }
666
  70% { box-shadow: 0 0 0 8px rgba(74, 158, 255, 0); }
667
  100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
668
  }
 
669
  .pulsing { animation: pulseAvatar 1.5s infinite; }
 
670
  ::-webkit-scrollbar { width: 8px; }
671
  ::-webkit-scrollbar-track { background: transparent; }
672
  ::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
673
+ .error-message {
674
+ color: var(--error-color);
675
+ font-size: 0.85rem;
676
+ padding: 8px 12px;
677
+ background: rgba(234, 67, 53, 0.1);
678
+ border-radius: 8px;
679
+ margin-top: 8px;
680
+ }
681
  </style>
682
  </head>
683
  <body>
 
684
  <header>
685
  <div class="brand-wrapper" onclick="location.reload()">
686
  <div class="brand-logo"></div>
687
  <div class="brand-text">
688
+ MTP <span class="version-badge">3.5</span>
689
  </div>
690
  </div>
691
+ <div class="status-indicator" title="Sistema operativo"></div>
692
  </header>
 
693
  <div id="chatScroll" class="chat-scroll">
 
694
  <div class="msg-row bot" style="animation-delay: 0.1s;">
695
  <div class="bot-avatar"></div>
696
  <div class="msg-content-wrapper">
697
  <div class="msg-text">
698
+ ¡Hola! Soy MTP 3.5, un modelo de lenguaje mejorado con arquitectura Transformer avanzada.
699
+
700
+ Características:
701
+ • RoPE (Rotary Position Embedding)
702
+ • RMSNorm para estabilidad
703
+ • Control de repetición inteligente
704
+ • Generación coherente y fluida
705
+
706
+ ¿En qué puedo ayudarte hoy?
707
  </div>
708
  </div>
709
  </div>
710
  </div>
 
711
  <div class="footer-container">
712
  <div class="input-box">
713
+ <textarea id="userInput" placeholder="Escribe un mensaje..." rows="1" autocomplete="off"></textarea>
714
+ <button id="mainBtn" onclick="handleBtnClick()"></button>
 
 
715
  </div>
716
  <div class="disclaimer">
717
+ MTP 3.5 puede cometer errores. Considera verificar la información importante.
718
  </div>
719
  </div>
 
720
  <script>
721
  const chatScroll = document.getElementById('chatScroll');
722
  const userInput = document.getElementById('userInput');
723
  const mainBtn = document.getElementById('mainBtn');
 
 
724
  let isGenerating = false;
725
  let abortController = null;
726
  let typingTimeout = null;
727
  let lastUserPrompt = "";
 
 
728
  const ICON_SEND = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 2L11 13M22 2l-7 20-4-9-9-4 20-7z"></path></svg>`;
729
  const ICON_STOP = `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="0"><rect x="2" y="2" width="20" height="20" rx="4" ry="4"></rect></svg>`;
 
 
730
  mainBtn.innerHTML = ICON_SEND;
731
 
732
+ // Auto-resize textarea
733
+ userInput.addEventListener('input', function() {
734
+ this.style.height = 'auto';
735
+ this.style.height = Math.min(this.scrollHeight, 120) + 'px';
736
+ });
737
+
738
  function scrollToBottom() {
739
  chatScroll.scrollTop = chatScroll.scrollHeight;
740
  }
 
742
  function setBtnState(state) {
743
  if (state === 'sending') {
744
  mainBtn.innerHTML = ICON_STOP;
745
+ mainBtn.disabled = false;
746
  isGenerating = true;
747
+ } else if (state === 'disabled') {
748
+ mainBtn.disabled = true;
749
+ isGenerating = false;
750
  } else {
751
  mainBtn.innerHTML = ICON_SEND;
752
+ mainBtn.disabled = false;
753
  isGenerating = false;
754
  abortController = null;
755
  }
756
  }
757
 
 
 
758
  function handleBtnClick() {
759
  if (isGenerating) {
760
  stopGeneration();
 
766
  function stopGeneration() {
767
  if (abortController) abortController.abort();
768
  if (typingTimeout) clearTimeout(typingTimeout);
 
 
769
  const activeCursor = document.querySelector('.typing-cursor');
770
  if (activeCursor) activeCursor.classList.remove('typing-cursor');
 
771
  const activeAvatar = document.querySelector('.pulsing');
772
  if (activeAvatar) activeAvatar.classList.remove('pulsing');
 
773
  setBtnState('idle');
774
  userInput.focus();
775
  }
 
777
  async function sendMessage(textOverride = null) {
778
  const text = textOverride || userInput.value.trim();
779
  if (!text) return;
780
+
781
  lastUserPrompt = text;
782
+
783
  if (!textOverride) {
784
  userInput.value = '';
785
+ userInput.style.height = 'auto';
786
  addMessage(text, 'user');
787
  }
788
 
789
  setBtnState('sending');
790
  abortController = new AbortController();
791
+
 
792
  const botRow = document.createElement('div');
793
  botRow.className = 'msg-row bot';
794
 
 
800
 
801
  const msgText = document.createElement('div');
802
  msgText.className = 'msg-text';
803
+
804
  wrapper.appendChild(msgText);
805
  botRow.appendChild(avatar);
806
  botRow.appendChild(wrapper);
807
  chatScroll.appendChild(botRow);
808
  scrollToBottom();
809
+
810
  try {
811
+ const startTime = performance.now();
812
+
813
  const response = await fetch('/generate', {
814
  method: 'POST',
815
  headers: { 'Content-Type': 'application/json' },
816
+ body: JSON.stringify({
817
+ text: text,
818
+ max_tokens: 150,
819
+ temperature: 0.7,
820
+ top_k: 40,
821
+ top_p: 0.92,
822
+ repetition_penalty: 1.15,
823
+ min_length: 20
824
+ }),
825
  signal: abortController.signal
826
  });
827
+
828
  const data = await response.json();
829
 
830
  if (!isGenerating) return;
831
+
832
  avatar.classList.remove('pulsing');
833
+
834
+ if (data.error) {
835
+ msgText.innerHTML = `<span style="color: var(--error-color);">Error: ${data.error}</span>`;
836
+ setBtnState('idle');
837
+ return;
838
+ }
839
+
840
  const reply = data.reply || "No entendí eso.";
841
+ const endTime = performance.now();
842
+ const totalTime = ((endTime - startTime) / 1000).toFixed(2);
843
 
844
  await typeWriter(msgText, reply);
845
 
846
  if (isGenerating) {
847
+ // Agregar estadísticas
848
+ const stats = document.createElement('div');
849
+ stats.className = 'stats-badge';
850
+ stats.textContent = `${data.tokens_generated} tokens • ${data.tokens_per_second} t/s • ${totalTime}s • ${data.device}`;
851
+ wrapper.appendChild(stats);
852
+
853
  addActions(wrapper, reply);
854
  setBtnState('idle');
855
  }
 
856
  } catch (error) {
857
  if (error.name === 'AbortError') {
858
  msgText.textContent += " [Detenido]";
859
  } else {
860
+ console.error('Error:', error);
861
  avatar.classList.remove('pulsing');
862
+ msgText.innerHTML = `<span style="color: var(--error-color);">Error de conexión. Por favor, intenta de nuevo.</span>`;
 
863
  setBtnState('idle');
864
  }
865
  }
 
868
  function addMessage(text, sender) {
869
  const row = document.createElement('div');
870
  row.className = `msg-row ${sender}`;
871
+
872
  const content = document.createElement('div');
873
  content.className = 'msg-content';
874
  content.textContent = text;
875
+
876
  row.appendChild(content);
877
  chatScroll.appendChild(row);
878
  scrollToBottom();
 
889
  resolve();
890
  return;
891
  }
892
+
893
  if (i < text.length) {
894
  element.textContent += text.charAt(i);
895
  i++;
 
900
  resolve();
901
  }
902
  }
903
+
904
  type();
905
  });
906
  }
 
911
 
912
  const copyBtn = document.createElement('button');
913
  copyBtn.className = 'action-btn';
914
+ copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
915
  copyBtn.onclick = () => {
916
+ navigator.clipboard.writeText(textToCopy).then(() => {
917
+ copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="20 6 9 17 4 12"></polyline></svg>Copiado`;
918
+ setTimeout(() => {
919
+ copyBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>Copiar`;
920
+ }, 2000);
921
+ });
922
  };
923
+
924
  const regenBtn = document.createElement('button');
925
  regenBtn.className = 'action-btn';
926
+ regenBtn.innerHTML = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>Regenerar`;
927
  regenBtn.onclick = () => {
928
  sendMessage(lastUserPrompt);
929
  };
930
+
931
  actionsDiv.appendChild(copyBtn);
932
  actionsDiv.appendChild(regenBtn);
933
  wrapperElement.appendChild(actionsDiv);
 
937
  }
938
 
939
  userInput.addEventListener('keydown', (e) => {
940
+ if (e.key === 'Enter' && !e.shiftKey) {
941
+ e.preventDefault();
942
+ handleBtnClick();
943
+ }
944
  });
945
 
946
+ window.onload = () => {
947
+ userInput.focus();
948
+
949
+ // Cargar info del modelo
950
+ fetch('/info')
951
+ .then(r => r.json())
952
+ .then(data => {
953
+ console.log('Modelo cargado:', data);
954
+ })
955
+ .catch(e => console.error('Error cargando info:', e));
956
+ };
957
  </script>
958
  </body>
959
  </html>
 
960
  """
961
+
962
  if __name__ == "__main__":
963
+ port = int(os.environ.get("PORT", 7860))
964
+ print(f"\n🚀 Iniciando servidor MTP-3.5...")
965
+ print(f"🌐 Interfaz web: http://0.0.0.0:{port}")
966
+ print(f"📡 API docs: http://0.0.0.0:{port}/docs")
967
+ print(f"📊 Health check: http://0.0.0.0:{port}/health")
968
+ print(f"ℹ️ Model info: http://0.0.0.0:{port}/info")
969
+ print(f"\n✅ Sistema listo. Presiona Ctrl+C para detener.")
970
+
971
  uvicorn.run(
972
  app,
973
  host="0.0.0.0",
974
+ port=port,
975
+ log_level="info"
976
  )