vedaco commited on
Commit
0a77ffe
·
verified ·
1 Parent(s): c97c271

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -56
app.py CHANGED
@@ -1,70 +1,115 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
3
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
24
 
25
- response = ""
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
+ import tensorflow as tf
3
+ from tensorflow import keras
4
+ from tensorflow.keras import layers
5
+ import numpy as np
6
+ import json
7
+ import os
8
+ from huggingface_hub import hf_hub_download
9
 
10
+ # 1. SETUP YOUR MODEL ID
11
+ REPO_ID = "YOUR_USERNAME/Veda-Scratch-LLM" # <--- CHANGE THIS
12
 
13
+ # 2. DEFINE THE CUSTOM LAYERS (Server needs to know what they are)
14
+ @keras.saving.register_keras_serializable()
15
+ class TokenAndPositionEmbedding(layers.Layer):
16
+ def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
17
+ super().__init__(**kwargs)
18
+ self.maxlen = maxlen
19
+ self.vocab_size = vocab_size
20
+ self.embed_dim = embed_dim
21
+ self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
22
+ self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
 
 
 
23
 
24
+ def call(self, x):
25
+ maxlen = tf.shape(x)[-1]
26
+ positions = tf.range(start=0, limit=maxlen, delta=1)
27
+ return self.token_emb(x) + self.pos_emb(positions)
28
+
29
+ def get_config(self):
30
+ config = super().get_config()
31
+ config.update({"maxlen": self.maxlen, "vocab_size": self.vocab_size, "embed_dim": self.embed_dim})
32
+ return config
33
 
34
+ @keras.saving.register_keras_serializable()
35
+ class TransformerBlock(layers.Layer):
36
+ def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
37
+ super().__init__(**kwargs)
38
+ self.embed_dim = embed_dim
39
+ self.num_heads = num_heads
40
+ self.ff_dim = ff_dim
41
+ self.rate = rate
42
+ self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
43
+ self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)])
44
+ self.ln1 = layers.LayerNormalization(epsilon=1e-6)
45
+ self.ln2 = layers.LayerNormalization(epsilon=1e-6)
46
 
47
+ def call(self, inputs):
48
+ attn_output = self.att(inputs, inputs, use_causal_mask=True)
49
+ out1 = self.ln1(inputs + attn_output)
50
+ return self.ln2(out1 + self.ffn(out1))
51
 
52
+ def get_config(self):
53
+ config = super().get_config()
54
+ config.update({"embed_dim": self.embed_dim, "num_heads": self.num_heads, "ff_dim": self.ff_dim, "rate": self.rate})
55
+ return config
56
 
57
+ # 3. DOWNLOAD AND LOAD MODEL
58
+ print("Downloading model...")
59
+ model_path = hf_hub_download(repo_id=REPO_ID, filename="veda_package/veda_model.keras")
60
+ vocab_path = hf_hub_download(repo_id=REPO_ID, filename="veda_package/vocab.json")
 
 
 
 
 
 
 
61
 
62
+ print("Loading model...")
63
+ model = keras.models.load_model(model_path)
64
 
65
+ with open(vocab_path, "r") as f:
66
+ vocab = json.load(f)
67
+ char2idx = vocab["char2idx"]
68
+ idx2char = {int(k): v for k, v in vocab["idx2char"].items()}
69
 
70
+ # 4. GENERATION FUNCTION
71
+ def generate_text(prompt, length=200):
72
+ try:
73
+ # Convert prompt to numbers
74
+ input_ids = [char2idx.get(s, 0) for s in prompt]
75
+ input_ids = tf.convert_to_tensor([input_ids], dtype=tf.int32)
76
+
77
+ # Max length to check against block size
78
+ block_size = 128
79
+
80
+ result = []
81
+ for _ in range(length):
82
+ # Crop to context window
83
+ if tf.shape(input_ids)[1] > block_size:
84
+ input_context = input_ids[:, -block_size:]
85
+ else:
86
+ input_context = input_ids
 
 
87
 
88
+ # Predict
89
+ predictions = model(input_context)
90
+ predictions = predictions[:, -1, :]
91
+
92
+ # Sample
93
+ predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0].numpy()
94
+
95
+ # Append
96
+ input_ids = tf.concat([input_ids, [[predicted_id]]], axis=-1)
97
+ result.append(idx2char[predicted_id])
98
+
99
+ return prompt + "".join(result)
100
+ except Exception as e:
101
+ return f"Error: {str(e)}"
102
 
103
+ # 5. CREATE THE WEBSITE UI
104
+ iface = gr.Interface(
105
+ fn=generate_text,
106
+ inputs=[
107
+ gr.Textbox(label="Enter Prompt", value="The Veda is"),
108
+ gr.Slider(label="Length", minimum=10, maximum=500, value=200)
109
+ ],
110
+ outputs="text",
111
+ title="Veda AI",
112
+ description="A custom LLM trained from scratch."
113
+ )
114
 
115
+ iface.launch()