Spaces:

vedaco
/

Veda

Runtime error

App Files Files Community

vedaco commited on 10 days ago

Commit

0a77ffe

verified ·

1 Parent(s): c97c271

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -56

app.py CHANGED Viewed

@@ -1,70 +1,115 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+import numpy as np
+import json
+import os
+from huggingface_hub import hf_hub_download
+# 1. SETUP YOUR MODEL ID
+REPO_ID = "YOUR_USERNAME/Veda-Scratch-LLM"  # <--- CHANGE THIS
+# 2. DEFINE THE CUSTOM LAYERS (Server needs to know what they are)
+@keras.saving.register_keras_serializable()
+class TokenAndPositionEmbedding(layers.Layer):
+    def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
+        super().__init__(**kwargs)
+        self.maxlen = maxlen
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
+        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
+    def call(self, x):
+        maxlen = tf.shape(x)[-1]
+        positions = tf.range(start=0, limit=maxlen, delta=1)
+        return self.token_emb(x) + self.pos_emb(positions)
+    def get_config(self):
+        config = super().get_config()
+        config.update({"maxlen": self.maxlen, "vocab_size": self.vocab_size, "embed_dim": self.embed_dim})
+        return config
+@keras.saving.register_keras_serializable()
+class TransformerBlock(layers.Layer):
+    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
+        super().__init__(**kwargs)
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.ff_dim = ff_dim
+        self.rate = rate
+        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
+        self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)])
+        self.ln1 = layers.LayerNormalization(epsilon=1e-6)
+        self.ln2 = layers.LayerNormalization(epsilon=1e-6)
+    def call(self, inputs):
+        attn_output = self.att(inputs, inputs, use_causal_mask=True)
+        out1 = self.ln1(inputs + attn_output)
+        return self.ln2(out1 + self.ffn(out1))
+    def get_config(self):
+        config = super().get_config()
+        config.update({"embed_dim": self.embed_dim, "num_heads": self.num_heads, "ff_dim": self.ff_dim, "rate": self.rate})
+        return config
+# 3. DOWNLOAD AND LOAD MODEL
+print("Downloading model...")
+model_path = hf_hub_download(repo_id=REPO_ID, filename="veda_package/veda_model.keras")
+vocab_path = hf_hub_download(repo_id=REPO_ID, filename="veda_package/vocab.json")
+print("Loading model...")
+model = keras.models.load_model(model_path)
+with open(vocab_path, "r") as f:
+    vocab = json.load(f)
+    char2idx = vocab["char2idx"]
+    idx2char = {int(k): v for k, v in vocab["idx2char"].items()}
+# 4. GENERATION FUNCTION
+def generate_text(prompt, length=200):
+    try:
+        # Convert prompt to numbers
+        input_ids = [char2idx.get(s, 0) for s in prompt]
+        input_ids = tf.convert_to_tensor([input_ids], dtype=tf.int32)
+        # Max length to check against block size
+        block_size = 128
+        result = []
+        for _ in range(length):
+            # Crop to context window
+            if tf.shape(input_ids)[1] > block_size:
+                input_context = input_ids[:, -block_size:]
+            else:
+                input_context = input_ids
+            # Predict
+            predictions = model(input_context)
+            predictions = predictions[:, -1, :]
+            # Sample
+            predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0].numpy()
+            # Append
+            input_ids = tf.concat([input_ids, [[predicted_id]]], axis=-1)
+            result.append(idx2char[predicted_id])
+        return prompt + "".join(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+# 5. CREATE THE WEBSITE UI
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(label="Enter Prompt", value="The Veda is"),
+        gr.Slider(label="Length", minimum=10, maximum=500, value=200)
+    ],
+    outputs="text",
+    title="Veda AI",
+    description="A custom LLM trained from scratch."
+)
+iface.launch()