vedaco commited on
Commit
a2e86ef
·
verified ·
1 Parent(s): 511655e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -115
app.py CHANGED
@@ -3,59 +3,35 @@ import tensorflow as tf
3
  from tensorflow import keras
4
  from tensorflow.keras import layers
5
  import numpy as np
6
- import json
7
  import os
 
8
 
9
  # =========================================
10
- # 1. DATA LOADING
11
  # =========================================
 
 
 
 
 
 
12
 
13
- # Backup text generator (multiplies text to create 200k+ chars)
14
- SEED_TEXT = """
15
- The Veda is knowledge. Knowledge is power. Wisdom is the light.
16
- To know the self is to know the universe.
17
- Truth is one; the wise call it by many names.
18
- Action performed without attachment leads to liberation.
19
- Om Bhur Bhuva Swaha. Tat Savitur Varenyam.
20
- Bhargo Devasya Dhimahi. Dhiyo Yo Nah Prachodayat.
21
- """ * 1000
22
-
23
- print("--- CHECKING FOR DATA ---")
24
-
25
- final_text = ""
26
- file_source = ""
27
-
28
- # Check if your Dad's file is uploaded
29
- if os.path.exists("veda.txt"):
30
- print("✅ FOUND veda.txt! Loading file...")
31
- with open("veda.txt", "r", encoding="utf-8", errors="ignore") as f:
32
- final_text = f.read()
33
- file_source = "veda.txt"
34
- elif os.path.exists("Veda.txt"):
35
- print("✅ FOUND Veda.txt! Loading file...")
36
- with open("Veda.txt", "r", encoding="utf-8", errors="ignore") as f:
37
- final_text = f.read()
38
- file_source = "Veda.txt"
39
- else:
40
- print("⚠️ No file found. Using internal training data.")
41
- final_text = SEED_TEXT
42
- file_source = "Internal Data"
43
-
44
- print(f"Training Source: {file_source}")
45
- print(f"Total Characters: {len(final_text)}")
46
 
47
  # =========================================
48
- # 2. MODEL DEFINITION
49
  # =========================================
50
  @tf.keras.utils.register_keras_serializable()
51
- class TokenAndPositionEmbedding(tf.keras.layers.Layer):
52
  def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
53
  super().__init__(**kwargs)
54
  self.maxlen = maxlen
55
  self.vocab_size = vocab_size
56
  self.embed_dim = embed_dim
57
- self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
58
- self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
59
 
60
  def call(self, x):
61
  maxlen = tf.shape(x)[-1]
@@ -68,20 +44,17 @@ class TokenAndPositionEmbedding(tf.keras.layers.Layer):
68
  return config
69
 
70
  @tf.keras.utils.register_keras_serializable()
71
- class TransformerBlock(tf.keras.layers.Layer):
72
  def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
73
  super().__init__(**kwargs)
74
  self.embed_dim = embed_dim
75
  self.num_heads = num_heads
76
  self.ff_dim = ff_dim
77
  self.rate = rate
78
- self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
79
- self.ffn = tf.keras.Sequential([
80
- tf.keras.layers.Dense(ff_dim, activation="relu"),
81
- tf.keras.layers.Dense(embed_dim)
82
- ])
83
- self.ln1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
84
- self.ln2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
85
 
86
  def call(self, inputs):
87
  attn_output = self.att(inputs, inputs, use_causal_mask=True)
@@ -93,100 +66,170 @@ class TransformerBlock(tf.keras.layers.Layer):
93
  config.update({"embed_dim": self.embed_dim, "num_heads": self.num_heads, "ff_dim": self.ff_dim, "rate": self.rate})
94
  return config
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # =========================================
97
- # 3. TRAINING
98
  # =========================================
99
- chars = sorted(list(set(final_text)))
100
- vocab_size = len(chars)
101
- char2idx = {c: i for i, c in enumerate(chars)}
102
- idx2char = {i: c for i, c in enumerate(chars)}
103
- all_ids = np.array([char2idx[c] for c in final_text])
104
-
105
- # Hyperparameters
106
- BATCH_SIZE = 32
107
- BLOCK_SIZE = 128
108
- EMBED_DIM = 128
109
- NUM_HEADS = 4
110
- FF_DIM = 256
111
- NUM_LAYERS = 2
112
- EPOCHS = 3
113
-
114
- dataset = tf.data.Dataset.from_tensor_slices(all_ids)
115
- dataset = dataset.batch(BLOCK_SIZE + 1, drop_remainder=True)
116
- dataset = dataset.map(lambda x: (x[:-1], x[1:]))
117
- dataset = dataset.shuffle(1000).batch(BATCH_SIZE)
118
-
119
- inputs = layers.Input(shape=(BLOCK_SIZE,))
120
- embedding_layer = TokenAndPositionEmbedding(BLOCK_SIZE, vocab_size, EMBED_DIM)
121
- x = embedding_layer(inputs)
122
- for _ in range(NUM_LAYERS):
123
- x = TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM)(x)
124
- outputs = layers.Dense(vocab_size)(x)
125
- model = keras.Model(inputs=inputs, outputs=outputs)
126
- model.compile(optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))
127
-
128
- print(f"STARTING TRAINING...")
129
- try:
130
- model.fit(dataset, epochs=EPOCHS)
131
- print("Training Complete!")
132
- except Exception as e:
133
- print(f"Training failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # =========================================
136
- # 4. CHAT GENERATION (WITH TEMPERATURE FIX)
137
  # =========================================
138
- def generate_text(prompt, length=200):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  try:
 
140
  input_ids = [char2idx.get(s, 0) for s in prompt]
141
- if not input_ids: return "Error: Unknown characters (not in training data)."
142
-
143
  input_ids = tf.convert_to_tensor([input_ids], dtype=tf.int32)
144
- block_size = 128
145
  result = []
146
 
147
- # Temperature controls randomness
148
- # 1.0 = Standard
149
- # 0.5 = More Focused / Less Gibberish
150
- # 0.2 = Very Repetitive / Safe
151
- temperature = 0.5
152
-
153
  for _ in range(int(length)):
 
154
  current_len = tf.shape(input_ids)[1]
155
- if current_len < block_size:
156
- pad_amt = block_size - current_len
157
  padded = tf.pad(input_ids, [[0, 0], [pad_amt, 0]], constant_values=0)
158
  else:
159
- padded = input_ids[:, -block_size:]
160
 
161
- predictions = model(padded)
162
- predictions = predictions[:, -1, :]
 
163
 
164
- # --- APPLY TEMPERATURE ---
165
- # We divide logits by temperature.
166
- # Small temp (<1) makes confidence peaks higher (sharper).
167
  predictions = predictions / temperature
168
-
169
  predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0].numpy()
170
 
171
  input_ids = tf.concat([input_ids, [[predicted_id]]], axis=-1)
172
  result.append(idx2char[predicted_id])
173
 
174
  return prompt + "".join(result)
 
175
  except Exception as e:
176
  return f"Error: {str(e)}"
177
 
178
  # =========================================
179
  # 5. UI
180
  # =========================================
181
- iface = gr.Interface(
182
- fn=generate_text,
183
- inputs=[
184
- gr.Textbox(label="Enter Prompt", value="The Veda is"),
185
- gr.Slider(label="Length", minimum=10, maximum=500, value=200)
186
- ],
187
- outputs="text",
188
- title="Veda AI",
189
- description=f"Model trained on: {file_source} ({len(final_text)} characters)."
190
- )
191
-
192
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from tensorflow import keras
4
  from tensorflow.keras import layers
5
  import numpy as np
 
6
  import os
7
+ import json
8
 
9
  # =========================================
10
+ # 1. SETTINGS
11
  # =========================================
12
+ BLOCK_SIZE = 128
13
+ EMBED_DIM = 256
14
+ NUM_HEADS = 4
15
+ FF_DIM = 512
16
+ NUM_LAYERS = 2
17
+ BATCH_SIZE = 32 # CPU Safe batch size
18
 
19
+ # Paths to save the brain
20
+ MODEL_PATH = "veda_llm.weights.h5"
21
+ VOCAB_PATH = "vocab.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # =========================================
24
+ # 2. CUSTOM ARCHITECTURE (YOUR ENGINE)
25
  # =========================================
26
  @tf.keras.utils.register_keras_serializable()
27
+ class TokenAndPositionEmbedding(layers.Layer):
28
  def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
29
  super().__init__(**kwargs)
30
  self.maxlen = maxlen
31
  self.vocab_size = vocab_size
32
  self.embed_dim = embed_dim
33
+ self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
34
+ self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
35
 
36
  def call(self, x):
37
  maxlen = tf.shape(x)[-1]
 
44
  return config
45
 
46
  @tf.keras.utils.register_keras_serializable()
47
+ class TransformerBlock(layers.Layer):
48
  def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
49
  super().__init__(**kwargs)
50
  self.embed_dim = embed_dim
51
  self.num_heads = num_heads
52
  self.ff_dim = ff_dim
53
  self.rate = rate
54
+ self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
55
+ self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)])
56
+ self.ln1 = layers.LayerNormalization(epsilon=1e-6)
57
+ self.ln2 = layers.LayerNormalization(epsilon=1e-6)
 
 
 
58
 
59
  def call(self, inputs):
60
  attn_output = self.att(inputs, inputs, use_causal_mask=True)
 
66
  config.update({"embed_dim": self.embed_dim, "num_heads": self.num_heads, "ff_dim": self.ff_dim, "rate": self.rate})
67
  return config
68
 
69
+ # Function to build the model structure
70
+ def build_llm(vocab_size):
71
+ inputs = layers.Input(shape=(BLOCK_SIZE,))
72
+ embedding_layer = TokenAndPositionEmbedding(BLOCK_SIZE, vocab_size, EMBED_DIM)
73
+ x = embedding_layer(inputs)
74
+ for _ in range(NUM_LAYERS):
75
+ x = TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM)(x)
76
+ outputs = layers.Dense(vocab_size)(x)
77
+ return keras.Model(inputs=inputs, outputs=outputs)
78
+
79
+ # Global Variables to hold the active brain
80
+ current_model = None
81
+ char2idx = {}
82
+ idx2char = {}
83
+
84
  # =========================================
85
+ # 3. TRAINING FUNCTION (UPDATES BRAIN)
86
  # =========================================
87
+ def train_llm(file_obj, epochs):
88
+ global current_model, char2idx, idx2char
89
+
90
+ if file_obj is None:
91
+ yield "Error: Please upload a .txt file first."
92
+ return
93
+
94
+ # 1. Read the uploaded file
95
+ yield f"Reading {file_obj.name}..."
96
+ with open(file_obj.name, 'r', encoding='utf-8', errors='ignore') as f:
97
+ text = f.read()
98
+
99
+ if len(text) < BLOCK_SIZE:
100
+ yield "Error: Text is too short. Needs to be longer than 128 characters."
101
+ return
102
+
103
+ yield f"Loaded {len(text)} characters. Building Vocabulary..."
104
+
105
+ # 2. Build Vocabulary (The AI's Alphabet)
106
+ chars = sorted(list(set(text)))
107
+ vocab_size = len(chars)
108
+
109
+ # Update global mappings
110
+ char2idx = {c: i for i, c in enumerate(chars)}
111
+ idx2char = {i: c for i, c in enumerate(chars)}
112
+
113
+ # Save vocab immediately so Chat can use it
114
+ with open(VOCAB_PATH, "w") as f:
115
+ json.dump({"char2idx": char2idx, "idx2char": {str(k): v for k, v in idx2char.items()}}, f)
116
+
117
+ yield f"Vocab Size: {vocab_size}. Preparing Tensors..."
118
+
119
+ # 3. Create Dataset
120
+ all_ids = np.array([char2idx[c] for c in text])
121
+ text_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
122
+ sequences = text_dataset.batch(BLOCK_SIZE + 1, drop_remainder=True)
123
+
124
+ def split_input_target(chunk):
125
+ return chunk[:-1], chunk[1:]
126
+
127
+ dataset = sequences.map(split_input_target).shuffle(1000).batch(BATCH_SIZE)
128
+
129
+ # 4. Initialize New Brain
130
+ current_model = build_llm(vocab_size)
131
+ optimizer = keras.optimizers.Adam(learning_rate=0.001) # High rate for fast learning
132
+ current_model.compile(optimizer=optimizer, loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))
133
+
134
+ yield "Starting Training Loop..."
135
+
136
+ # 5. Training Loop
137
+ for epoch in range(int(epochs)):
138
+ history = current_model.fit(dataset, epochs=1)
139
+ loss = history.history['loss'][0]
140
+
141
+ # Save Weights
142
+ current_model.save_weights(MODEL_PATH)
143
+ yield f"Epoch {epoch+1}/{epochs} Complete. Loss: {loss:.4f}"
144
+
145
+ yield "Training Complete! Go to 'Chat' tab to test your new brain."
146
 
147
  # =========================================
148
+ # 4. CHAT FUNCTION
149
  # =========================================
150
+ def generate_text(prompt, length, temperature):
151
+ global current_model, char2idx, idx2char
152
+
153
+ # Try to load if not in memory
154
+ if current_model is None:
155
+ if os.path.exists(MODEL_PATH) and os.path.exists(VOCAB_PATH):
156
+ try:
157
+ with open(VOCAB_PATH, "r") as f:
158
+ data = json.load(f)
159
+ char2idx = data["char2idx"]
160
+ idx2char = {int(k): v for k, v in data["idx2char"].items()}
161
+
162
+ vocab_size = len(char2idx)
163
+ current_model = build_llm(vocab_size)
164
+ current_model.load_weights(MODEL_PATH)
165
+ except:
166
+ return "Error: No brain found. Please go to 'Train' tab and upload a file."
167
+ else:
168
+ return "Error: Model not trained yet. Upload text in 'Train' tab."
169
+
170
  try:
171
+ # Pre-process prompt
172
  input_ids = [char2idx.get(s, 0) for s in prompt]
173
+ if not input_ids: return "Error: Unknown characters."
174
+
175
  input_ids = tf.convert_to_tensor([input_ids], dtype=tf.int32)
 
176
  result = []
177
 
 
 
 
 
 
 
178
  for _ in range(int(length)):
179
+ # Pad if prompt is short, Crop if long
180
  current_len = tf.shape(input_ids)[1]
181
+ if current_len < BLOCK_SIZE:
182
+ pad_amt = BLOCK_SIZE - current_len
183
  padded = tf.pad(input_ids, [[0, 0], [pad_amt, 0]], constant_values=0)
184
  else:
185
+ padded = input_ids[:, -BLOCK_SIZE:]
186
 
187
+ # Predict
188
+ predictions = current_model(padded)
189
+ predictions = predictions[:, -1, :] # Last token
190
 
191
+ # Apply Temperature (Creativity)
 
 
192
  predictions = predictions / temperature
193
+
194
  predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0].numpy()
195
 
196
  input_ids = tf.concat([input_ids, [[predicted_id]]], axis=-1)
197
  result.append(idx2char[predicted_id])
198
 
199
  return prompt + "".join(result)
200
+
201
  except Exception as e:
202
  return f"Error: {str(e)}"
203
 
204
  # =========================================
205
  # 5. UI
206
  # =========================================
207
+ def train_wrapper(file, epochs):
208
+ for update in train_llm(file, epochs):
209
+ yield update
210
+
211
+ with gr.Blocks(title="Veda LLM Trainer") as demo:
212
+ gr.Markdown("# Veda LLM Trainer")
213
+
214
+ with gr.Tab("Chat"):
215
+ gr.Markdown("Talk to the model you trained.")
216
+ prompt_input = gr.Textbox(label="Prompt", value="The Veda is")
217
+ with gr.Row():
218
+ len_slider = gr.Slider(10, 500, value=200, label="Length")
219
+ temp_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature (Low = Safe, High = Crazy)")
220
+
221
+ chat_btn = gr.Button("Generate", variant="primary")
222
+ output_text = gr.Textbox(label="Response")
223
+
224
+ chat_btn.click(generate_text, inputs=[prompt_input, len_slider, temp_slider], outputs=output_text)
225
+
226
+ with gr.Tab("Train New Dataset"):
227
+ gr.Markdown("Upload a **.txt** file to wipe the brain and teach it new knowledge.")
228
+ file_input = gr.File(label="Upload Text File", file_types=[".txt"])
229
+ epoch_slider = gr.Slider(1, 50, value=10, step=1, label="Epochs")
230
+ train_btn = gr.Button("Train LLM")
231
+ log_box = gr.Textbox(label="Training Log")
232
+
233
+ train_btn.click(train_wrapper, inputs=[file_input, epoch_slider], outputs=log_box)
234
+
235
+ demo.launch()