rahul7star commited on
Commit
cfddc04
·
verified ·
1 Parent(s): 1addca6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -95
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- OhamLab — AI Intelligence
3
  Loads knowledge from rahul7star/OhamLab-LLM markdown corpus, caches embeddings,
4
  and provides retrieval-augmented chat through Hugging Face router.
5
  """
@@ -15,9 +15,9 @@ import gradio as gr
15
  from openai import OpenAI
16
  from huggingface_hub import HfApi, hf_hub_download, list_repo_files
17
 
18
- # =========================================================
19
  # 1. Configuration
20
- # =========================================================
21
  HF_TOKEN = (
22
  os.environ.get("HF_TOKEN")
23
  or os.environ.get("OPENAI_API_KEY")
@@ -26,32 +26,28 @@ HF_TOKEN = (
26
  if not HF_TOKEN:
27
  raise RuntimeError("❌ Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN environment variable.")
28
 
29
- MODEL_ID = "openai/gpt-4o-mini" # Chat model
30
- EMBED_MODEL = "text-embedding-3-small" # Embedding model
31
- HF_REPO = "rahul7star/OhamLab-LLM"
32
- CACHE_PATH = "/tmp/ohamlab_emb_cache.json"
33
 
 
34
  client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
35
  api = HfApi(token=HF_TOKEN)
36
 
37
-
38
- # =========================================================
39
- # 2. Load Markdown Corpus
40
- # =========================================================
41
  def load_ohamlab_knowledge():
42
- """Load all markdown files and split into ~500-character chunks."""
43
- print(f"📂 Loading markdown files from {HF_REPO}...")
44
  files = list_repo_files(HF_REPO, repo_type="model", token=HF_TOKEN)
45
  md_files = [f for f in files if f.endswith(".md")]
46
  chunks = []
47
-
48
  for f in md_files:
49
  try:
50
  path = hf_hub_download(HF_REPO, filename=f, token=HF_TOKEN)
51
  with open(path, "r", encoding="utf-8") as fh:
52
- content = fh.read().strip()
53
-
54
- # clean + split into ~500 chars
55
  buf = ""
56
  for line in content.splitlines():
57
  buf += line.strip() + " "
@@ -60,71 +56,54 @@ def load_ohamlab_knowledge():
60
  buf = ""
61
  if buf:
62
  chunks.append({"file": f, "text": buf.strip()})
63
- print(f"✅ Loaded {f} ({len(content)} chars)")
64
  except Exception as e:
65
  print(f"⚠️ Failed to load {f}: {e}")
66
-
67
- print(f"📘 Total chunks: {len(chunks)}")
68
  return chunks
69
 
70
-
71
- # =========================================================
72
- # 3. Embedding Generation with Retry + Cache
73
- # =========================================================
74
- def create_embeddings_with_retry(texts, retries=3, delay=2):
75
- """Create embeddings with retry logic."""
76
- for attempt in range(1, retries + 1):
77
- try:
78
- response = client.embeddings.create(model=EMBED_MODEL, input=texts)
79
- return [d.embedding for d in response.data]
80
- except Exception as e:
81
- print(f"⚠️ Embedding attempt {attempt} failed: {e}")
82
- if attempt == retries:
83
- raise RuntimeError("❌ Failed to generate embeddings after retries.")
84
- time.sleep(delay)
85
-
86
-
87
  def get_embeddings_with_cache():
88
- """Generate or load cached embeddings."""
89
  if os.path.exists(CACHE_PATH):
90
  try:
91
- with open(CACHE_PATH, "r", encoding="utf-8") as f:
92
  cache = json.load(f)
93
  texts = [c["text"] for c in cache]
94
  embs = np.array([c["embedding"] for c in cache])
95
- print(f"✅ Loaded cached embeddings ({len(embs)} chunks)")
96
  return texts, embs
97
- except Exception as e:
98
- print(f"⚠️ Cache load failed: {e}")
99
 
100
- # Load and embed new
101
  chunks = load_ohamlab_knowledge()
102
  texts = [c["text"] for c in chunks]
103
- print(f"📘 Generating embeddings for {len(texts)} chunks...")
104
-
105
  all_embs = []
106
  for i in range(0, len(texts), 50):
107
  batch = texts[i:i + 50]
108
- embs = create_embeddings_with_retry(batch)
109
- all_embs.extend(embs)
 
 
 
 
 
110
  time.sleep(0.5)
111
 
112
  data = [{"text": t, "embedding": e} for t, e in zip(texts, all_embs)]
113
- with open(CACHE_PATH, "w", encoding="utf-8") as f:
114
  json.dump(data, f)
115
  print(f"💾 Cached embeddings to {CACHE_PATH}")
116
-
117
  return texts, np.array(all_embs)
118
 
119
-
120
  OHAMLAB_TEXTS, OHAMLAB_EMBS = get_embeddings_with_cache()
121
 
122
-
123
- # =========================================================
124
- # 4. Retrieval
125
- # =========================================================
126
  def retrieve_knowledge(query, top_k=3):
127
- """Return top-k most relevant text snippets."""
128
  try:
129
  q_emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding
130
  sims = np.dot(OHAMLAB_EMBS, q_emb) / (
@@ -136,94 +115,202 @@ def retrieve_knowledge(query, top_k=3):
136
  print(f"⚠️ Retrieval error: {e}")
137
  return ""
138
 
139
-
140
- # =========================================================
141
- # 5. Prompt Construction
142
- # =========================================================
143
- def build_system_prompt(context, mode="chat"):
144
  return textwrap.dedent(f"""
145
- You are OhamLab — an AI Research Assistant for OhamLab.
146
 
147
- Rules:
148
- - Use precise, factual, and confident tone.
149
- - If relevant, use retrieved OhamLab context.
150
- - If unknown, politely say "I could not find an answer in my knowledge base."
 
151
  - Mode: {mode.upper()}
152
 
153
- --- OhamLab Context ---
154
  {context[:1800]}
155
  --- End Context ---
156
  """).strip()
157
 
158
-
159
- # =========================================================
160
- # 6. Chat Generation
161
- # =========================================================
162
  def generate_response(user_input, history, mode="chat"):
163
  context = retrieve_knowledge(user_input)
164
  sys_prompt = build_system_prompt(context, mode)
165
-
166
  messages = [{"role": "system", "content": sys_prompt}] + history + [
167
  {"role": "user", "content": user_input}
168
  ]
169
-
170
  try:
171
  resp = client.chat.completions.create(
172
  model=MODEL_ID,
173
  messages=messages,
174
  temperature=0.7,
175
- max_tokens=800,
176
  )
177
  return resp.choices[0].message.content.strip()
178
  except Exception as e:
179
  print(f"⚠️ Model call failed: {e}")
180
- return "⚠️ OhamLab encountered a temporary issue."
 
 
 
 
 
 
 
 
 
 
 
181
 
182
 
183
- # =========================================================
184
- # 7. Gradio UI
185
- # =========================================================
186
  def chat_with_model(user_message, chat_history):
 
 
 
 
187
  if not user_message:
188
  return chat_history, ""
189
 
190
- chat_history = chat_history or []
191
- history = [{"role": m["role"], "content": m["content"]} for m in chat_history]
 
 
 
 
 
 
 
 
 
192
  history.append({"role": "user", "content": user_message})
193
 
194
  try:
195
  bot_reply = generate_response(user_message, history)
196
- except Exception:
197
- bot_reply = f"⚠️ Internal error:\n\n{traceback.format_exc()}"
 
198
 
 
199
  history.append({"role": "assistant", "content": bot_reply})
 
200
  return history, ""
201
 
202
 
203
  def reset_chat():
 
204
  return []
205
 
206
 
 
 
 
 
207
  def build_ui():
208
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  chatbot = gr.Chatbot(
210
  label="💠 OhamLab Conversation",
211
  height=520,
212
  elem_id="ohamlab",
213
  type="messages",
 
214
  )
215
 
216
- msg = gr.Textbox(
217
- placeholder="Ask OhamLab anything...",
218
- lines=3,
219
- show_label=False,
220
- container=False,
221
- )
222
-
223
  with gr.Row():
224
- send = gr.Button("Send", variant="primary")
225
- clear = gr.Button("Clear", variant="secondary")
226
-
 
 
 
 
 
 
 
 
 
 
 
227
  send.click(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
228
  msg.submit(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
229
  clear.click(reset_chat, outputs=chatbot)
@@ -231,9 +318,9 @@ def build_ui():
231
  return demo
232
 
233
 
234
- # =========================================================
235
- # 8. Entrypoint
236
- # =========================================================
237
  if __name__ == "__main__":
238
  print("🚀 Starting OhamLab Assistant...")
239
  demo = build_ui()
 
1
  """
2
+ OhamLab — AI Intelligence
3
  Loads knowledge from rahul7star/OhamLab-LLM markdown corpus, caches embeddings,
4
  and provides retrieval-augmented chat through Hugging Face router.
5
  """
 
15
  from openai import OpenAI
16
  from huggingface_hub import HfApi, hf_hub_download, list_repo_files
17
 
18
+ # ---------------------------
19
  # 1. Configuration
20
+ # ---------------------------
21
  HF_TOKEN = (
22
  os.environ.get("HF_TOKEN")
23
  or os.environ.get("OPENAI_API_KEY")
 
26
  if not HF_TOKEN:
27
  raise RuntimeError("❌ Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN environment variable.")
28
 
29
+ MODEL_ID = "openai/gpt-oss-20b" # Chat model (via HF router)
30
+ EMBED_MODEL = "text-embedding-3-small" # Embedding model
31
+ HF_REPO = "rahul7star/OhamLab-LLM" # Knowledge repo
32
+ CACHE_PATH = "/tmp/ohamlab_emb_cache.json" # Cache file
33
 
34
+ # Client
35
  client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
36
  api = HfApi(token=HF_TOKEN)
37
 
38
+ # ---------------------------
39
+ # 2. Load and Chunk Markdown Files
40
+ # ---------------------------
 
41
  def load_ohamlab_knowledge():
42
+ """Loads all .md files from Hugging Face repo and splits into ~500-char chunks."""
 
43
  files = list_repo_files(HF_REPO, repo_type="model", token=HF_TOKEN)
44
  md_files = [f for f in files if f.endswith(".md")]
45
  chunks = []
 
46
  for f in md_files:
47
  try:
48
  path = hf_hub_download(HF_REPO, filename=f, token=HF_TOKEN)
49
  with open(path, "r", encoding="utf-8") as fh:
50
+ content = fh.read()
 
 
51
  buf = ""
52
  for line in content.splitlines():
53
  buf += line.strip() + " "
 
56
  buf = ""
57
  if buf:
58
  chunks.append({"file": f, "text": buf.strip()})
 
59
  except Exception as e:
60
  print(f"⚠️ Failed to load {f}: {e}")
 
 
61
  return chunks
62
 
63
+ # ---------------------------
64
+ # 3. Generate or Load Embeddings (with Cache)
65
+ # ---------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def get_embeddings_with_cache():
67
+ """Generate or load cached embeddings for OhamLab context."""
68
  if os.path.exists(CACHE_PATH):
69
  try:
70
+ with open(CACHE_PATH, "r") as f:
71
  cache = json.load(f)
72
  texts = [c["text"] for c in cache]
73
  embs = np.array([c["embedding"] for c in cache])
74
+ print(f"✅ Loaded cached embeddings from {CACHE_PATH} ({len(embs)} chunks)")
75
  return texts, embs
76
+ except Exception:
77
+ print("⚠️ Cache corrupted, regenerating embeddings...")
78
 
 
79
  chunks = load_ohamlab_knowledge()
80
  texts = [c["text"] for c in chunks]
81
+ print(f"📘 Generating embeddings for {len(texts)} OhamLab chunks...")
 
82
  all_embs = []
83
  for i in range(0, len(texts), 50):
84
  batch = texts[i:i + 50]
85
+ try:
86
+ res = client.embeddings.create(model=EMBED_MODEL, input=batch)
87
+ embs = [d.embedding for d in res.data]
88
+ all_embs.extend(embs)
89
+ except Exception as e:
90
+ print(f"⚠️ Embedding batch failed ({i}): {e}")
91
+ all_embs.extend([[0.0] * 1536] * len(batch)) # fallback
92
  time.sleep(0.5)
93
 
94
  data = [{"text": t, "embedding": e} for t, e in zip(texts, all_embs)]
95
+ with open(CACHE_PATH, "w") as f:
96
  json.dump(data, f)
97
  print(f"💾 Cached embeddings to {CACHE_PATH}")
 
98
  return texts, np.array(all_embs)
99
 
 
100
  OHAMLAB_TEXTS, OHAMLAB_EMBS = get_embeddings_with_cache()
101
 
102
+ # ---------------------------
103
+ # 4. Semantic Retrieval
104
+ # ---------------------------
 
105
  def retrieve_knowledge(query, top_k=3):
106
+ """Retrieve top-k most relevant text snippets."""
107
  try:
108
  q_emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding
109
  sims = np.dot(OHAMLAB_EMBS, q_emb) / (
 
115
  print(f"⚠️ Retrieval error: {e}")
116
  return ""
117
 
118
+ # ---------------------------
119
+ # 5. System Prompt with Context Injection
120
+ # ---------------------------
121
+ def build_system_prompt(context: str, mode: str = "chat") -> str:
 
122
  return textwrap.dedent(f"""
123
+ You are OhamLab — AI Intelligence Software
124
 
125
+ Guidelines:
126
+ - Always answer with clarity, scientific accuracy, and concise insight.
127
+ - Incorporate OhamLab research knowledge when relevant.
128
+ - Avoid code unless explicitly requested.
129
+ - Be confident but label speculation clearly.
130
  - Mode: {mode.upper()}
131
 
132
+ --- OhamLab Context (Retrieved Snippets) ---
133
  {context[:1800]}
134
  --- End Context ---
135
  """).strip()
136
 
137
+ # ---------------------------
138
+ # 6. Model Call
139
+ # ---------------------------
 
140
  def generate_response(user_input, history, mode="chat"):
141
  context = retrieve_knowledge(user_input)
142
  sys_prompt = build_system_prompt(context, mode)
 
143
  messages = [{"role": "system", "content": sys_prompt}] + history + [
144
  {"role": "user", "content": user_input}
145
  ]
 
146
  try:
147
  resp = client.chat.completions.create(
148
  model=MODEL_ID,
149
  messages=messages,
150
  temperature=0.7,
151
+ max_tokens=1200,
152
  )
153
  return resp.choices[0].message.content.strip()
154
  except Exception as e:
155
  print(f"⚠️ Model call failed: {e}")
156
+ return "⚠️ OahmLab encountered a temporary issue generating your response."
157
+
158
+ # ---------------------------
159
+ # 7. Gradio Chat UI
160
+ # ---------------------------
161
+ import traceback
162
+ import gradio as gr
163
+
164
+ # ---------------------------
165
+ # Chat Logic
166
+ # ---------------------------
167
+
168
 
169
 
 
 
 
170
  def chat_with_model(user_message, chat_history):
171
+ """
172
+ Maintains full conversational context and returns updated chat history.
173
+ The assistant speaks as 'OhamLab'.
174
+ """
175
  if not user_message:
176
  return chat_history, ""
177
 
178
+ if chat_history is None:
179
+ chat_history = []
180
+
181
+ # Convert Gradio message list (dict-based) to usable context
182
+ history = [
183
+ {"role": m["role"], "content": m["content"]}
184
+ for m in chat_history
185
+ if isinstance(m, dict) and "role" in m
186
+ ]
187
+
188
+ # Append current user message
189
  history.append({"role": "user", "content": user_message})
190
 
191
  try:
192
  bot_reply = generate_response(user_message, history)
193
+ except Exception as e:
194
+ tb = traceback.format_exc()
195
+ bot_reply = f"⚠️ OhamLab encountered an error:\n\n{e}\n\n{tb}"
196
 
197
+ # Add OhamLab's response as assistant role
198
  history.append({"role": "assistant", "content": bot_reply})
199
+
200
  return history, ""
201
 
202
 
203
  def reset_chat():
204
+ """Resets the chat session."""
205
  return []
206
 
207
 
208
+ # ---------------------------
209
+ # Gradio Chat UI
210
+ # ---------------------------
211
+
212
  def build_ui():
213
+ with gr.Blocks(
214
+ theme=gr.themes.Soft(primary_hue="indigo"),
215
+ css="""
216
+ /* --- Hide share/delete icons --- */
217
+ #ohamlab .wrap.svelte-1lcyrj3 > div > div > button {
218
+ display: none !important;
219
+ }
220
+ [data-testid="share-btn"],
221
+ [data-testid="delete-btn"],
222
+ .message-controls,
223
+ .message-actions {
224
+ display: none !important;
225
+ visibility: hidden !important;
226
+ }
227
+
228
+ /* --- User (Right) Message Bubble --- */
229
+ #ohamlab .message.user {
230
+ background-color: #4f46e5 !important;
231
+ color: white !important;
232
+ border-radius: 14px !important;
233
+ align-self: flex-end !important;
234
+ text-align: right !important;
235
+ margin-left: 25%;
236
+ }
237
+
238
+ /* --- OhamLab (Left) Message Bubble --- */
239
+ #ohamlab .message.assistant {
240
+ background-color: #f8f9fa !important;
241
+ color: #111 !important;
242
+ border-radius: 14px !important;
243
+ align-self: flex-start !important;
244
+ text-align: left !important;
245
+ margin-right: 25%;
246
+ }
247
+
248
+
249
+ #ohamlab .chatbot .wrap.svelte-1lcyrj3 > div > div > button {
250
+ display: none !important; /* hide share/delete icons */
251
+ }
252
+
253
+ /* --- Overall Container --- */
254
+ .gradio-container {
255
+ max-width: 900px !important;
256
+ margin: auto;
257
+ padding-top: .5rem;
258
+ }
259
+ textarea {
260
+ resize: none !important;
261
+ border-radius: 12px !important;
262
+ border: 1px solid #d1d5db !important;
263
+ box-shadow: 0 1px 3px rgba(0,0,0,0.08);
264
+ }
265
+ button.primary {
266
+ background-color: #4f46e5 !important;
267
+ color: white !important;
268
+ border-radius: 10px !important;
269
+ padding: 0.6rem 1.4rem !important;
270
+ font-weight: 600;
271
+ transition: all 0.2s ease-in-out;
272
+ }
273
+ button.primary:hover {
274
+ background-color: #4338ca !important;
275
+ }
276
+ button.secondary {
277
+ background-color: #f3f4f6 !important;
278
+ border-radius: 10px !important;
279
+ color: #374151 !important;
280
+ font-weight: 500;
281
+ transition: all 0.2s ease-in-out;
282
+ }
283
+ button.secondary:hover {
284
+ background-color: #e5e7eb !important;
285
+ }
286
+ """,
287
+ ) as demo:
288
+
289
+ # Chatbot area
290
  chatbot = gr.Chatbot(
291
  label="💠 OhamLab Conversation",
292
  height=520,
293
  elem_id="ohamlab",
294
  type="messages",
295
+ avatar_images=[None, None],
296
  )
297
 
298
+ # Input box (full width)
 
 
 
 
 
 
299
  with gr.Row():
300
+ msg = gr.Textbox(
301
+ placeholder="Ask OhamLab anything ..",
302
+ lines=3,
303
+ show_label=False,
304
+ scale=12,
305
+ container=False,
306
+ )
307
+
308
+ # Buttons (Send + Clear)
309
+ with gr.Row(equal_height=True, variant="compact"):
310
+ send = gr.Button("Send", variant="primary", elem_classes=["primary"])
311
+ clear = gr.Button("Clear", variant="secondary", elem_classes=["secondary"])
312
+
313
+ # Wiring
314
  send.click(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
315
  msg.submit(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
316
  clear.click(reset_chat, outputs=chatbot)
 
318
  return demo
319
 
320
 
321
+ # ---------------------------
322
+ # Entrypoint
323
+ # ---------------------------
324
  if __name__ == "__main__":
325
  print("🚀 Starting OhamLab Assistant...")
326
  demo = build_ui()