teszenofficial commited on
Commit
7890e0c
·
verified ·
1 Parent(s): 56b933f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -237
app.py CHANGED
@@ -1,268 +1,94 @@
1
  import os
2
- import sys
3
  import torch
4
- import json
5
- import gc
6
- import re
7
  from fastapi import FastAPI
8
  from fastapi.responses import HTMLResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel
11
- from huggingface_hub import snapshot_download
12
  import uvicorn
13
- import math
14
- import torch.nn as nn
15
- import torch.nn.functional as F
16
- import sentencepiece as spm
17
-
18
- if torch.cuda.is_available():
19
- DEVICE = "cuda"
20
- print("✅ GPU detectada")
21
- torch.backends.cudnn.benchmark = True
22
- else:
23
- DEVICE = "cpu"
24
- print("⚠️ CPU mode")
25
- torch.set_num_threads(4)
26
-
27
- torch.set_grad_enabled(False)
28
-
29
- MODEL_REPO = "TeszenAI/MTP-3.1.1"
30
-
31
- class LayerNorm(nn.Module):
32
- def __init__(self, d_model, eps=1e-5):
33
- super().__init__()
34
- self.weight = nn.Parameter(torch.ones(d_model))
35
- self.bias = nn.Parameter(torch.zeros(d_model))
36
- self.eps = eps
37
- def forward(self, x):
38
- return self.weight * (x - x.mean(-1, keepdim=True)) / (x.std(-1, keepdim=True) + self.eps) + self.bias
39
-
40
- class MultiHeadAttention(nn.Module):
41
- def __init__(self, d_model, n_heads, dropout=0.1):
42
- super().__init__()
43
- assert d_model % n_heads == 0
44
- self.d_k = d_model // n_heads
45
- self.n_heads = n_heads
46
- self.scale = math.sqrt(self.d_k)
47
-
48
- self.w_q = nn.Linear(d_model, d_model)
49
- self.w_k = nn.Linear(d_model, d_model)
50
- self.w_v = nn.Linear(d_model, d_model)
51
- self.w_o = nn.Linear(d_model, d_model)
52
- self.dropout = nn.Dropout(dropout)
53
-
54
- def forward(self, x, mask=None):
55
- B, T, C = x.shape
56
- Q = self.w_q(x).view(B, T, self.n_heads, self.d_k).transpose(1, 2)
57
- K = self.w_k(x).view(B, T, self.n_heads, self.d_k).transpose(1, 2)
58
- V = self.w_v(x).view(B, T, self.n_heads, self.d_k).transpose(1, 2)
59
-
60
- attn = (Q @ K.transpose(-2, -1)) * self.scale
61
- if mask is not None:
62
- attn = attn.masked_fill(mask == 0, float('-inf'))
63
-
64
- attn = F.softmax(attn, dim=-1)
65
- attn = self.dropout(attn)
66
-
67
- out = (attn @ V).transpose(1, 2).contiguous().view(B, T, C)
68
- return self.w_o(out)
69
-
70
- class FeedForward(nn.Module):
71
- def __init__(self, d_model, d_ff, dropout=0.1):
72
- super().__init__()
73
- self.linear1 = nn.Linear(d_model, d_ff)
74
- self.linear2 = nn.Linear(d_ff, d_model)
75
- self.dropout = nn.Dropout(dropout)
76
- def forward(self, x):
77
- return self.linear2(self.dropout(F.gelu(self.linear1(x))))
78
-
79
- class TransformerBlock(nn.Module):
80
- def __init__(self, d_model, n_heads, d_ff, dropout=0.1):
81
- super().__init__()
82
- self.attention = MultiHeadAttention(d_model, n_heads, dropout)
83
- self.feed_forward = FeedForward(d_model, d_ff, dropout)
84
- self.norm1 = LayerNorm(d_model)
85
- self.norm2 = LayerNorm(d_model)
86
- self.dropout1 = nn.Dropout(dropout)
87
- self.dropout2 = nn.Dropout(dropout)
88
-
89
- def forward(self, x, mask=None):
90
- x = x + self.dropout1(self.attention(self.norm1(x), mask))
91
- x = x + self.dropout2(self.feed_forward(self.norm2(x)))
92
- return x
93
-
94
- class PositionalEncoding(nn.Module):
95
- def __init__(self, d_model, max_len=5000):
96
- super().__init__()
97
- pe = torch.zeros(max_len, d_model)
98
- pos = torch.arange(0, max_len).unsqueeze(1).float()
99
- div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
100
- pe[:, 0::2] = torch.sin(pos * div_term)
101
- pe[:, 1::2] = torch.cos(pos * div_term)
102
- self.register_buffer('pe', pe.unsqueeze(0))
103
- def forward(self, x):
104
- return x + self.pe[:, :x.size(1), :]
105
-
106
- class MTPModel(nn.Module):
107
- def __init__(self, vocab_size, d_model=512, n_heads=8, n_layers=6, d_ff=2048, dropout=0.1, max_len=512):
108
- super().__init__()
109
- self.vocab_size = vocab_size
110
- self.d_model = d_model
111
- self.max_len = max_len
112
-
113
- self.token_embedding = nn.Embedding(vocab_size, d_model)
114
- self.pos_encoding = PositionalEncoding(d_model, max_len)
115
- self.blocks = nn.ModuleList([
116
- TransformerBlock(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)
117
- ])
118
- self.norm = LayerNorm(d_model)
119
- self.lm_head = nn.Linear(d_model, vocab_size)
120
-
121
- def forward(self, x, mask=None):
122
- if mask is None:
123
- mask = torch.tril(torch.ones(x.size(1), x.size(1))).unsqueeze(0).unsqueeze(0).to(x.device)
124
-
125
- x = self.token_embedding(x) * math.sqrt(self.d_model)
126
- x = self.pos_encoding(x)
127
-
128
- for block in self.blocks:
129
- x = block(x, mask)
130
-
131
- x = self.norm(x)
132
- return self.lm_head(x)
133
-
134
- @torch.inference_mode()
135
- def generate(self, input_ids, max_new_tokens=150, temperature=0.7, top_k=50):
136
- generated = input_ids
137
- eos_token = 3
138
-
139
- for _ in range(max_new_tokens):
140
- logits = self(generated)
141
- next_logits = logits[0, -1, :] / temperature
142
-
143
- if top_k > 0:
144
- top_k_val = min(top_k, next_logits.size(-1))
145
- top_k_values = torch.topk(next_logits, top_k_val)[0]
146
- next_logits[next_logits < top_k_values[-1]] = float('-inf')
147
-
148
- probs = F.softmax(next_logits, dim=-1)
149
- next_token = torch.multinomial(probs, 1).item()
150
-
151
- if next_token == eos_token or next_token == 0 or next_token == 1:
152
- break
153
-
154
- generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1)
155
-
156
- if len(generated[0]) > 300:
157
- break
158
-
159
- return generated
160
-
161
- print("📦 Descargando modelo desde HuggingFace...")
162
- repo_path = snapshot_download(repo_id=MODEL_REPO, repo_type="model", local_dir="mtp_repo")
163
-
164
- config_path = os.path.join(repo_path, "config.json")
165
- if os.path.exists(config_path):
166
- with open(config_path, "r") as f:
167
- config = json.load(f)
168
- print(f"✅ Configuración cargada: d_model={config.get('d_model', 512)}, layers={config.get('n_layers', 6)}")
169
- else:
170
- print("⚠️ Usando configuración por defecto")
171
- config = {
172
- "vocab_size": 8000,
173
- "d_model": 512,
174
- "n_heads": 8,
175
- "n_layers": 6,
176
- "d_ff": 2048,
177
- "dropout": 0.1,
178
- "max_len": 512
179
- }
180
-
181
- tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
182
- if not os.path.exists(tokenizer_path):
183
- print(f"❌ Tokenizador no encontrado")
184
- sys.exit(1)
185
-
186
- sp = spm.SentencePieceProcessor()
187
- sp.load(tokenizer_path)
188
- VOCAB_SIZE = sp.get_piece_size()
189
- config["vocab_size"] = VOCAB_SIZE
190
-
191
- print(f"🧠 Inicializando modelo MTP...")
192
- print(f" → Vocabulario: {VOCAB_SIZE}")
193
- print(f" → Dimensión: {config['d_model']}")
194
- print(f" → Capas: {config['n_layers']}")
195
- print(f" → Heads: {config['n_heads']}")
196
 
197
- model = MTPModel(**config)
198
- model.to(DEVICE)
 
199
 
200
- model_path = os.path.join(repo_path, "mtp_model.pt")
201
- if os.path.exists(model_path):
202
- state_dict = torch.load(model_path, map_location=DEVICE)
203
- model.load_state_dict(state_dict, strict=False)
204
- print("✅ Pesos cargados correctamente")
205
- else:
206
- print(f"❌ Modelo no encontrado")
207
- sys.exit(1)
208
 
 
 
 
209
  model.eval()
210
- print(f"✅ Modelo listo: {sum(p.numel() for p in model.parameters()):,} params")
211
 
 
212
  app = FastAPI()
213
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
214
 
215
  class PromptRequest(BaseModel):
216
  text: str
217
 
218
- def build_prompt(user_input):
219
- return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
220
-
221
- def clean_response(text):
222
  if not text:
223
  return ""
224
- text = re.sub(r'<unk>|<pad>|<s>|</s>', '', text)
 
 
 
225
  text = re.sub(r'\s+', ' ', text).strip()
226
- return text
 
 
 
 
 
 
 
 
227
 
228
  @app.post("/generate")
229
  async def generate(req: PromptRequest):
230
  user_input = req.text.strip()
231
  if not user_input:
232
- return {"reply": ""}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- prompt = build_prompt(user_input)
235
- tokens = sp.encode(prompt)
236
 
237
- if len(tokens) > 450:
238
- tokens = tokens[-450:]
 
 
 
 
239
 
240
- input_ids = torch.tensor([tokens], device=DEVICE)
241
 
242
- try:
243
- output_ids = model.generate(input_ids, max_new_tokens=120, temperature=0.7, top_k=50)
244
-
245
- gen_tokens = output_ids[0, len(tokens):].tolist()
246
-
247
- clean_tokens = []
248
- for t in gen_tokens:
249
- if t == 3 or t == 0 or t == 1:
250
- break
251
- clean_tokens.append(t)
252
-
253
- response = sp.decode(clean_tokens).strip() if clean_tokens else ""
254
- response = clean_response(response)
255
-
256
- if not response:
257
- response = "Lo siento, no pude generar una respuesta."
258
-
259
- print(f"📝 {user_input[:40]} -> {len(clean_tokens)} tokens")
260
-
261
- return {"reply": response[:500]}
262
 
263
- except Exception as e:
264
- print(f"❌ Error: {e}")
265
- return {"reply": "Error al generar respuesta"}
266
 
267
  @app.get("/health")
268
  def health():
@@ -400,11 +226,11 @@ body {
400
  <body>
401
  <div class="header">
402
  <h1><span class="dot"></span> MTP Assistant</h1>
403
- <p>Modelo Transformer 512-dim | 6 capas</p>
404
  </div>
405
  <div class="chat" id="chat">
406
  <div class="message bot">
407
- <div class="message-content">Hola, soy MTP. ¿En qué puedo ayudarte?</div>
408
  </div>
409
  </div>
410
  <div class="input-area">
@@ -466,7 +292,7 @@ async function send() {
466
  addMessage(data.reply || "No pude generar respuesta.", false);
467
  } catch (err) {
468
  removeTyping();
469
- addMessage("Error de conexión.", false);
470
  } finally {
471
  loading = false;
472
  sendBtn.disabled = false;
 
1
  import os
 
2
  import torch
 
 
 
3
  from fastapi import FastAPI
4
  from fastapi.responses import HTMLResponse
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from pydantic import BaseModel
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  import uvicorn
9
+ import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # ==================== CONFIGURACIÓN ====================
12
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
+ print(f"📱 Dispositivo: {DEVICE}")
14
 
15
+ # Usar un modelo pequeño pero FUNCIONAL de HuggingFace
16
+ # Opciones: "microsoft/DialoGPT-small" (mejor para conversación)
17
+ # "TinyLlama/TinyLlama-1.1B-Chat-v1.0" (más potente pero más lento)
18
+ MODEL_NAME = "microsoft/DialoGPT-small" # ~60MB, rápido y funcional
 
 
 
 
19
 
20
+ print(f"📦 Cargando modelo {MODEL_NAME}...")
21
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
22
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
23
  model.eval()
24
+ print(f"✅ Modelo cargado: {sum(p.numel() for p in model.parameters()):,} parámetros")
25
 
26
+ # ==================== API ====================
27
  app = FastAPI()
28
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
29
 
30
  class PromptRequest(BaseModel):
31
  text: str
32
 
33
+ def clean_response(text: str) -> str:
34
+ """Limpia la respuesta del modelo"""
 
 
35
  if not text:
36
  return ""
37
+
38
+ # Eliminar caracteres especiales
39
+ text = re.sub(r'<\|.*?\|>', '', text)
40
+ text = re.sub(r'\[.*?\]', '', text)
41
  text = re.sub(r'\s+', ' ', text).strip()
42
+
43
+ # Limitar longitud
44
+ if len(text) > 400:
45
+ text = text[:400]
46
+ last_dot = text.rfind('.')
47
+ if last_dot > 200:
48
+ text = text[:last_dot + 1]
49
+
50
+ return text if text else "Lo siento, no pude generar una respuesta."
51
 
52
  @app.post("/generate")
53
  async def generate(req: PromptRequest):
54
  user_input = req.text.strip()
55
  if not user_input:
56
+ return {"reply": "Escribe un mensaje"}
57
+
58
+ # Formatear entrada para el modelo
59
+ formatted_input = f"User: {user_input}\nBot:"
60
+
61
+ # Tokenizar
62
+ inputs = tokenizer.encode(formatted_input, return_tensors="pt").to(DEVICE)
63
+
64
+ # Generar
65
+ with torch.no_grad():
66
+ outputs = model.generate(
67
+ inputs,
68
+ max_new_tokens=100,
69
+ temperature=0.7,
70
+ top_k=50,
71
+ top_p=0.9,
72
+ do_sample=True,
73
+ pad_token_id=tokenizer.eos_token_id
74
+ )
75
 
76
+ # Decodificar
77
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
78
 
79
+ # Extraer solo la respuesta del bot
80
+ if "Bot:" in response:
81
+ response = response.split("Bot:")[-1].strip()
82
+ elif "User:" in response:
83
+ parts = response.split("User:")
84
+ response = parts[-1].strip() if len(parts) > 1 else response
85
 
86
+ response = clean_response(response)
87
 
88
+ print(f"📝 Usuario: {user_input[:50]}")
89
+ print(f"🤖 Respuesta: {response[:100]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ return {"reply": response}
 
 
92
 
93
  @app.get("/health")
94
  def health():
 
226
  <body>
227
  <div class="header">
228
  <h1><span class="dot"></span> MTP Assistant</h1>
229
+ <p>DialoGPT - Modelo conversacional real</p>
230
  </div>
231
  <div class="chat" id="chat">
232
  <div class="message bot">
233
+ <div class="message-content">¡Hola! Soy MTP, tu asistente. ¿En qué puedo ayudarte hoy?</div>
234
  </div>
235
  </div>
236
  <div class="input-area">
 
292
  addMessage(data.reply || "No pude generar respuesta.", false);
293
  } catch (err) {
294
  removeTyping();
295
+ addMessage("Error de conexión. Intenta de nuevo.", false);
296
  } finally {
297
  loading = false;
298
  sendBtn.disabled = false;