MTP_2

Sleeping

App Files Files Community

teszenofficial commited on Sep 24, 2025

Commit

66b7892

verified ·

1 Parent(s): 07b28c9

Upload 3 files

Browse files

Files changed (3) hide show

app.py +184 -0
modelo_mtp_transformer_llm_v5.pkl +3 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,184 @@

+# app.py para Hugging Face Space
+import pickle
+import random
+import math
+import numpy as np
+from huggingface_hub import hf_hub_download
+import gradio as gr
+# --- Definición de las clases del modelo ---
+# Copiado directamente de la versión mejorada con Teacher Forcing
+def get_positional_encoding(seq_len, embedding_dim):
+    pe = np.zeros((seq_len, embedding_dim))
+    position = np.arange(0, seq_len, dtype=np.float32)[:, np.newaxis]
+    div_term = np.exp(np.arange(0, embedding_dim, 2, dtype=np.float32) * -(np.log(10000.0) / embedding_dim))
+    pe[:, 0::2] = np.sin(position * div_term)
+    pe[:, 1::2] = np.cos(position * div_term)
+    return pe
+def softmax(x, axis=-1):
+    x = np.exp(x - np.max(x, axis=axis, keepdims=True))
+    return x / np.sum(x, axis=axis, keepdims=True)
+def layer_norm(x, eps=1e-6):
+    mean = np.mean(x, axis=-1, keepdims=True)
+    variance = np.var(x, axis=-1, keepdims=True)
+    return (x - mean) / np.sqrt(variance + eps)
+class MultiHeadAttention:
+    def __init__(self, embedding_dim, n_heads):
+        self.embedding_dim = embedding_dim
+        self.n_heads = n_heads
+        self.head_dim = embedding_dim // n_heads
+        self.W_q = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
+        self.W_k = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
+        self.W_v = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
+        self.W_o = np.random.uniform(-0.1, 0.1, (embedding_dim, embedding_dim))
+    def forward(self, x):
+        seq_len = x.shape[0]
+        Q = x @ self.W_q
+        K = x @ self.W_k
+        V = x @ self.W_v
+        Q = Q.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
+        K = K.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
+        V = V.reshape(seq_len, self.n_heads, self.head_dim).transpose(1, 0, 2)
+        scores = Q @ K.transpose(0, 2, 1) / np.sqrt(self.head_dim)
+        mask = np.triu(np.ones((seq_len, seq_len)), k=1) * -1e9
+        scores = scores + mask
+        attn_weights = softmax(scores, axis=-1)
+        output = attn_weights @ V
+        output = output.transpose(1, 0, 2).reshape(seq_len, self.embedding_dim)
+        output = output @ self.W_o
+        return output
+class FeedForward:
+    def __init__(self, embedding_dim, hidden_dim):
+        self.W1 = np.random.uniform(-0.1, 0.1, (embedding_dim, hidden_dim))
+        self.b1 = np.zeros(hidden_dim)
+        self.W2 = np.random.uniform(-0.1, 0.1, (hidden_dim, embedding_dim))
+        self.b2 = np.zeros(embedding_dim)
+    def forward(self, x):
+        x = x @ self.W1 + self.b1
+        x = np.maximum(0, x)
+        x = x @ self.W2 + self.b2
+        return x
+class TransformerBlock:
+    def __init__(self, embedding_dim, n_heads, hidden_dim):
+        self.attention = MultiHeadAttention(embedding_dim, n_heads)
+        self.ff = FeedForward(embedding_dim, hidden_dim)
+        self.norm1 = np.zeros(embedding_dim)
+        self.norm2 = np.zeros(embedding_dim)
+        self.residual_weight_attn = 1.0
+        self.residual_weight_ff = 1.0
+    def forward(self, x):
+        attn_out = self.attention.forward(x)
+        x = x + attn_out * self.residual_weight_attn
+        x = layer_norm(x)
+        ff_out = self.ff.forward(x)
+        x = x + ff_out * self.residual_weight_ff
+        x = layer_norm(x)
+        return x
+class MTPTransformerLLM:
+    def __init__(self, vocab_size=1200, embedding_dim=128, n_heads=4, n_layers=2, lr=0.001, max_seq_len=200):
+        self.vocab_size = vocab_size
+        self.embedding_dim = embedding_dim
+        self.n_heads = n_heads
+        self.n_layers = n_layers
+        self.lr = lr
+        self.max_seq_len = max_seq_len
+        self.word_to_idx = {}
+        self.idx_to_word = {}
+        self.token_embeddings = np.random.uniform(-0.1, 0.1, (vocab_size, embedding_dim))
+        self.pos_embeddings = get_positional_encoding(max_seq_len, embedding_dim)
+        self.blocks = [TransformerBlock(embedding_dim, n_heads, embedding_dim * 2) for _ in range(n_layers)]
+        self.output_weights = np.random.uniform(-0.1, 0.1, (embedding_dim, vocab_size))
+    def add_word(self, word):
+        if word not in self.word_to_idx:
+            idx = len(self.word_to_idx)
+            if idx >= self.vocab_size:
+                raise Exception("Vocabulario excedido. Aumenta vocab_size.")
+            self.word_to_idx[word] = idx
+            self.idx_to_word[idx] = word
+    def encode(self, sentence):
+        tokens = sentence.lower().split()
+        indices = []
+        for word in tokens:
+            if word not in self.word_to_idx:
+                self.add_word(word)
+            indices.append(self.word_to_idx[word])
+        return np.array(indices)
+    def decode(self, indices):
+        return [self.idx_to_word[i] for i in indices if i in self.idx_to_word]
+    def forward(self, seq):
+        seq_len = len(seq)
+        if seq_len > self.max_seq_len:
+            raise ValueError(f"Secuencia demasiado larga. Max: {self.max_seq_len}, Recibido: {seq_len}")
+        x = self.token_embeddings[seq]
+        x = x + self.pos_embeddings[:seq_len]
+        for block in self.blocks:
+            x = block.forward(x)
+        logits = x @ self.output_weights  # (seq_len, vocab_size)
+        return logits
+    def generate(self, input_text, max_len=20, temperature=0.8):
+        indices = self.encode(input_text)
+        context_seq = indices.copy()
+        for _ in range(max_len):
+            logits = self.forward(context_seq)
+            last_logits = logits[-1]
+            last_logits = last_logits / temperature
+            probs = softmax(last_logits)
+            next_idx = np.random.choice(len(probs), p=probs)
+            if next_idx == 0:
+                break
+            context_seq = np.append(context_seq, next_idx)
+        full_output = self.decode(context_seq)
+        generated_part = full_output[len(indices):]
+        return " ".join(generated_part)
+    @classmethod
+    def load_from_hub(cls, repo_id, filename="model.pkl"):
+        local_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        with open(local_path, "rb") as f:
+            model = pickle.load(f)
+        return model
+# --- Fin de la definición de clases ---
+# Cargar modelo al iniciar la app
+# Asegúrate de que el nombre del archivo en tu repo sea "model.pkl"
+model = MTPTransformerLLM.load_from_hub("TuUsuario/TuNombreDeRepositorio", filename="model.pkl")
+def chat_mtp(message, history):
+    # history no se usa aquí, pero podrías implementar memoria si quisieras
+    response = model.generate(message, max_len=20, temperature=0.8)
+    return response
+# Crear interfaz de chat
+gr.ChatInterface(
+    chat_mtp,
+    title="MTP",
+    description="Un modelo Transformer simple entrenado para responder preguntas y traducir.",
+    examples=[
+        "hola",
+        "qué es python",
+        "translate hello to spanish",
+        "cómo te llamas",
+        "qué es la vida"
+    ]
+).launch()

modelo_mtp_transformer_llm_v5.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:665254adf872826fbf0117fcfdae5238b1f2e5ce372eb828e9adea62920ef1d9
+size 4779451

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+huggingface_hub
+numpy