teszenofficial commited on
Commit
66b7892
verified
1 Parent(s): 07b28c9

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +184 -0
  2. modelo_mtp_transformer_llm_v5.pkl +3 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py para Hugging Face Space
2
+
3
+ import pickle
4
+ import random
5
+ import math
6
+ import numpy as np
7
+ from huggingface_hub import hf_hub_download
8
+ import gradio as gr
9
+
10
+ # --- Definici贸n de las clases del modelo ---
11
+ # Copiado directamente de la versi贸n mejorada con Teacher Forcing
12
+
13
+ def get_positional_encoding(seq_len, embedding_dim):
14
+ pe = np.zeros((seq_len, embedding_dim))
15
+ position = np.arange(0, seq_len, dtype=np.float32)[:, np.newaxis]
16
+ div_term = np.exp(np.arange(0, embedding_dim, 2, dtype=np.float32) * -(np.log(10000.0) / embedding_dim))
17
+ pe[:, 0::2] = np.sin(position * div_term)
18
+ pe[:, 1::2] = np.cos(position * div_term)
19
+ return pe
20
+
21
+ def softmax(x, axis=-1):
22
+ x = np.exp(x - np.max(x, axis=axis, keepdims=True))
23
+ return x / np.sum(x, axis=axis, keepdims=True)
24
+
25
+ def layer_norm(x, eps=1e-6):
26
+ mean = np.mean(x, axis=-1, keepdims=True)
27
+ variance = np.var(x, axis=-1, keepdims=True)
28
+ return (x - mean) / np.sqrt(variance + eps)
29
+
30
+ class MultiHeadAttention:
31
+ def __init__(self, embedding_dim, n_heads):
32
+ self.embedding_dim = embedding_dim
33
+ self.n_heads = n_heads
34
+ self.head_dim = embedding_dim // n_heads
35
+ self.W_q = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
36
+ self.W_k = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
37
+ self.W_v = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
38
+ self.W_o = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
39
+
40
+ def forward(self, x):
41
+ seq_len = x.shape[0]
42
+ Q = x @ self.W_q
43
+ K = x @ self.W_k
44
+ V = x @ self.W_v
45
+ Q = Q.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
46
+ K = K.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
47
+ V = V.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
48
+ scores = Q @ K.transpose(0, 2, 1) / np.sqrt(self.head_dim)
49
+ mask = np.triu(np.ones((seq_len, seq_len)), k=1) * -1e9
50
+ scores = scores + mask
51
+ attn_weights = softmax(scores, axis=-1)
52
+ output = attn_weights @ V
53
+ output = output.transpose(1, 0, 2).reshape(seq_len, self.embedding_dim)
54
+ output = output @ self.W_o
55
+ return output
56
+
57
+ class FeedForward:
58
+ def __init__(self, embedding_dim, hidden_dim):
59
+ self.W1 = np.random.uniform(-0.1, 0.1, (embedding_dim, hidden_dim))
60
+ self.b1 = np.zeros(hidden_dim)
61
+ self.W2 = np.random.uniform(-0.1, 0.1, (hidden_dim, embedding_dim))
62
+ self.b2 = np.zeros(embedding_dim)
63
+
64
+ def forward(self, x):
65
+ x = x @ self.W1 + self.b1
66
+ x = np.maximum(0, x)
67
+ x = x @ self.W2 + self.b2
68
+ return x
69
+
70
+ class TransformerBlock:
71
+ def __init__(self, embedding_dim, n_heads, hidden_dim):
72
+ self.attention = MultiHeadAttention(embedding_dim, n_heads)
73
+ self.ff = FeedForward(embedding_dim, hidden_dim)
74
+ self.norm1 = np.zeros(embedding_dim)
75
+ self.norm2 = np.zeros(embedding_dim)
76
+ self.residual_weight_attn = 1.0
77
+ self.residual_weight_ff = 1.0
78
+
79
+ def forward(self, x):
80
+ attn_out = self.attention.forward(x)
81
+ x = x + attn_out * self.residual_weight_attn
82
+ x = layer_norm(x)
83
+ ff_out = self.ff.forward(x)
84
+ x = x + ff_out * self.residual_weight_ff
85
+ x = layer_norm(x)
86
+ return x
87
+
88
+ class MTPTransformerLLM:
89
+ def __init__(self, vocab_size=1200, embedding_dim=128, n_heads=4, n_layers=2, lr=0.001, max_seq_len=200):
90
+ self.vocab_size = vocab_size
91
+ self.embedding_dim = embedding_dim
92
+ self.n_heads = n_heads
93
+ self.n_layers = n_layers
94
+ self.lr = lr
95
+ self.max_seq_len = max_seq_len
96
+
97
+ self.word_to_idx = {}
98
+ self.idx_to_word = {}
99
+
100
+ self.token_embeddings = np.random.uniform(-0.1, 0.1, (vocab_size, embedding_dim))
101
+ self.pos_embeddings = get_positional_encoding(max_seq_len, embedding_dim)
102
+
103
+ self.blocks = [TransformerBlock(embedding_dim, n_heads, embedding_dim * 2) for _ in range(n_layers)]
104
+
105
+ self.output_weights = np.random.uniform(-0.1, 0.1, (embedding_dim, vocab_size))
106
+
107
+ def add_word(self, word):
108
+ if word not in self.word_to_idx:
109
+ idx = len(self.word_to_idx)
110
+ if idx >= self.vocab_size:
111
+ raise Exception("Vocabulario excedido. Aumenta vocab_size.")
112
+ self.word_to_idx[word] = idx
113
+ self.idx_to_word[idx] = word
114
+
115
+ def encode(self, sentence):
116
+ tokens = sentence.lower().split()
117
+ indices = []
118
+ for word in tokens:
119
+ if word not in self.word_to_idx:
120
+ self.add_word(word)
121
+ indices.append(self.word_to_idx[word])
122
+ return np.array(indices)
123
+
124
+ def decode(self, indices):
125
+ return [self.idx_to_word[i] for i in indices if i in self.idx_to_word]
126
+
127
+ def forward(self, seq):
128
+ seq_len = len(seq)
129
+ if seq_len > self.max_seq_len:
130
+ raise ValueError(f"Secuencia demasiado larga. Max: {self.max_seq_len}, Recibido: {seq_len}")
131
+ x = self.token_embeddings[seq]
132
+ x = x + self.pos_embeddings[:seq_len]
133
+ for block in self.blocks:
134
+ x = block.forward(x)
135
+ logits = x @ self.output_weights # (seq_len, vocab_size)
136
+ return logits
137
+
138
+ def generate(self, input_text, max_len=20, temperature=0.8):
139
+ indices = self.encode(input_text)
140
+ context_seq = indices.copy()
141
+ for _ in range(max_len):
142
+ logits = self.forward(context_seq)
143
+ last_logits = logits[-1]
144
+ last_logits = last_logits / temperature
145
+ probs = softmax(last_logits)
146
+ next_idx = np.random.choice(len(probs), p=probs)
147
+ if next_idx == 0:
148
+ break
149
+ context_seq = np.append(context_seq, next_idx)
150
+ full_output = self.decode(context_seq)
151
+ generated_part = full_output[len(indices):]
152
+ return " ".join(generated_part)
153
+
154
+ @classmethod
155
+ def load_from_hub(cls, repo_id, filename="model.pkl"):
156
+ local_path = hf_hub_download(repo_id=repo_id, filename=filename)
157
+ with open(local_path, "rb") as f:
158
+ model = pickle.load(f)
159
+ return model
160
+
161
+ # --- Fin de la definici贸n de clases ---
162
+
163
+ # Cargar modelo al iniciar la app
164
+ # Aseg煤rate de que el nombre del archivo en tu repo sea "model.pkl"
165
+ model = MTPTransformerLLM.load_from_hub("TuUsuario/TuNombreDeRepositorio", filename="model.pkl")
166
+
167
+ def chat_mtp(message, history):
168
+ # history no se usa aqu铆, pero podr铆as implementar memoria si quisieras
169
+ response = model.generate(message, max_len=20, temperature=0.8)
170
+ return response
171
+
172
+ # Crear interfaz de chat
173
+ gr.ChatInterface(
174
+ chat_mtp,
175
+ title="MTP",
176
+ description="Un modelo Transformer simple entrenado para responder preguntas y traducir.",
177
+ examples=[
178
+ "hola",
179
+ "qu茅 es python",
180
+ "translate hello to spanish",
181
+ "c贸mo te llamas",
182
+ "qu茅 es la vida"
183
+ ]
184
+ ).launch()
modelo_mtp_transformer_llm_v5.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665254adf872826fbf0117fcfdae5238b1f2e5ce372eb828e9adea62920ef1d9
3
+ size 4779451
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ numpy