rahul7star commited on
Commit
d2e9782
·
verified ·
1 Parent(s): 8f7a323

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -74
app.py CHANGED
@@ -1,15 +1,22 @@
1
- import os, time, json, numpy as np, logging
2
- from typing import List
3
- from huggingface_hub import HfApi, hf_hub_download, list_repo_files
4
- from sentence_transformers import SentenceTransformer
5
- from openai import OpenAI
6
 
7
- # Logging setup
8
- logging.basicConfig(level=logging.INFO)
9
- logger = logging.getLogger("ohamlab_agent")
 
 
 
 
 
 
 
10
 
11
  # ---------------------------
12
- # Environment / Config
13
  # ---------------------------
14
  HF_TOKEN = (
15
  os.environ.get("HF_TOKEN")
@@ -17,101 +24,185 @@ HF_TOKEN = (
17
  or os.environ.get("HUGGINGFACE_TOKEN")
18
  )
19
  if not HF_TOKEN:
20
- raise RuntimeError("Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN.")
21
-
22
- CHAT_MODEL_ID = "openai/gpt-oss-20b" # via Hugging Face router
23
- EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
24
 
25
- HF_REPO = "rahul7star/OhamLab-LLM"
26
- HF_REPO_DIR = "./hf_capsules"
27
- os.makedirs(HF_REPO_DIR, exist_ok=True)
 
28
 
29
- # ---------------------------
30
- # Clients
31
- # ---------------------------
32
- try:
33
- client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
34
- logger.info("✅ OpenAI client via Hugging Face router initialized.")
35
- except Exception as e:
36
- logger.exception("Failed initializing chat client.")
37
- raise
38
-
39
- embedder = SentenceTransformer(EMBED_MODEL_ID)
40
- logger.info(f"✅ Loaded local embedding model: {EMBED_MODEL_ID}")
41
 
42
  # ---------------------------
43
- # Load Markdown Knowledge
44
  # ---------------------------
45
- def load_markdown_files(repo_id: str, local_dir: str) -> List[str]:
46
- api = HfApi(token=HF_TOKEN)
47
- files = list_repo_files(repo_id, repo_type="model", token=HF_TOKEN)
48
  md_files = [f for f in files if f.endswith(".md")]
49
- logger.info(f"📘 Found {len(md_files)} markdown files.")
50
-
51
  chunks = []
52
  for f in md_files:
53
  try:
54
- path = hf_hub_download(repo_id=repo_id, filename=f, local_dir=local_dir, token=HF_TOKEN)
55
  with open(path, "r", encoding="utf-8") as fh:
56
  content = fh.read()
57
  buf = ""
58
  for line in content.splitlines():
59
  buf += line.strip() + " "
60
- if len(buf) > 500:
61
- chunks.append(buf.strip())
62
  buf = ""
63
  if buf:
64
- chunks.append(buf.strip())
65
  except Exception as e:
66
- logger.warning(f"⚠️ Failed to read {f}: {e}")
67
- logger.info(f"✅ Loaded {len(chunks)} text chunks.")
68
  return chunks
69
 
70
- KNOWLEDGE_CHUNKS = load_markdown_files(HF_REPO, HF_REPO_DIR)
71
- logger.info("📊 Creating embeddings...")
72
- KNOWLEDGE_EMBS = embedder.encode(KNOWLEDGE_CHUNKS, normalize_embeddings=True)
73
- logger.info(f"🧠 Knowledge base ready ({len(KNOWLEDGE_CHUNKS)} chunks).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # ---------------------------
76
- # Retrieval
77
  # ---------------------------
78
- def get_relevant_context(query: str, top_k: int = 3) -> str:
79
- q_emb = embedder.encode([query], normalize_embeddings=True)[0]
80
- sims = np.dot(KNOWLEDGE_EMBS, q_emb)
81
- top_idx = np.argsort(sims)[-top_k:][::-1]
82
- return "\n\n".join(KNOWLEDGE_CHUNKS[i] for i in top_idx)
 
 
 
 
 
 
 
83
 
84
  # ---------------------------
85
- # Chat
86
  # ---------------------------
87
- SYSTEM_PROMPT = (
88
- "You are OhamLab AI — factual, concise, and context-aware.\n"
89
- "If applicable, use knowledge from OhamLab Markdown corpus."
90
- )
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- def chat(query: str, history: List[dict]) -> str:
93
- context = get_relevant_context(query)
94
- user_input = f"{query}\n\n[Context]\n{context[:1200]}" if context else query
95
- msgs = history + [{"role": "user", "content": user_input}]
 
 
 
 
 
96
  try:
97
  resp = client.chat.completions.create(
98
- model=CHAT_MODEL_ID,
99
- messages=msgs,
100
- temperature=0.6,
101
- max_tokens=700,
102
  )
103
  return resp.choices[0].message.content.strip()
104
  except Exception as e:
105
- logger.error(f"Chat error: {e}")
106
- return "There was a problem generating the response."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  if __name__ == "__main__":
109
- logger.info("🚀 OhamLab AI — Knowledge Chat Ready")
110
- hist = [{"role": "system", "content": SYSTEM_PROMPT}]
111
- while True:
112
- q = input("\n💬 Ask → ").strip()
113
- if q.lower() in ["exit", "quit"]:
114
- break
115
- ans = chat(q, hist)
116
- print("\n🤖", ans)
117
- hist.extend([{"role": "user", "content": q}, {"role": "assistant", "content": ans}])
 
1
+ """
2
+ OhamLab Aerelyth Dialectical Intelligence (RAG-Enhanced)
3
+ Loads knowledge from rahul7star/OhamLab-LLM markdown corpus, caches embeddings,
4
+ and provides retrieval-augmented chat through Hugging Face router.
5
+ """
6
 
7
+ import os
8
+ import re
9
+ import json
10
+ import time
11
+ import textwrap
12
+ import traceback
13
+ import numpy as np
14
+ import gradio as gr
15
+ from openai import OpenAI
16
+ from huggingface_hub import HfApi, hf_hub_download, list_repo_files
17
 
18
  # ---------------------------
19
+ # 1. Configuration
20
  # ---------------------------
21
  HF_TOKEN = (
22
  os.environ.get("HF_TOKEN")
 
24
  or os.environ.get("HUGGINGFACE_TOKEN")
25
  )
26
  if not HF_TOKEN:
27
+ raise RuntimeError("Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN environment variable.")
 
 
 
28
 
29
+ MODEL_ID = "openai/gpt-oss-20b" # Chat model (via HF router)
30
+ EMBED_MODEL = "text-embedding-3-small" # Embedding model
31
+ HF_REPO = "rahul7star/OhamLab-LLM" # Knowledge repo
32
+ CACHE_PATH = "/tmp/ohamlab_emb_cache.json" # Cache file
33
 
34
+ # Client
35
+ client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
36
+ api = HfApi(token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
37
 
38
  # ---------------------------
39
+ # 2. Load and Chunk Markdown Files
40
  # ---------------------------
41
+ def load_ohamlab_knowledge():
42
+ """Loads all .md files from Hugging Face repo and splits into ~500-char chunks."""
43
+ files = list_repo_files(HF_REPO, repo_type="model", token=HF_TOKEN)
44
  md_files = [f for f in files if f.endswith(".md")]
 
 
45
  chunks = []
46
  for f in md_files:
47
  try:
48
+ path = hf_hub_download(HF_REPO, filename=f, token=HF_TOKEN)
49
  with open(path, "r", encoding="utf-8") as fh:
50
  content = fh.read()
51
  buf = ""
52
  for line in content.splitlines():
53
  buf += line.strip() + " "
54
+ if len(buf) >= 500:
55
+ chunks.append({"file": f, "text": buf.strip()})
56
  buf = ""
57
  if buf:
58
+ chunks.append({"file": f, "text": buf.strip()})
59
  except Exception as e:
60
+ print(f"⚠️ Failed to load {f}: {e}")
 
61
  return chunks
62
 
63
+ # ---------------------------
64
+ # 3. Generate or Load Embeddings (with Cache)
65
+ # ---------------------------
66
+ def get_embeddings_with_cache():
67
+ """Generate or load cached embeddings for OhamLab context."""
68
+ if os.path.exists(CACHE_PATH):
69
+ try:
70
+ with open(CACHE_PATH, "r") as f:
71
+ cache = json.load(f)
72
+ texts = [c["text"] for c in cache]
73
+ embs = np.array([c["embedding"] for c in cache])
74
+ print(f"✅ Loaded cached embeddings from {CACHE_PATH} ({len(embs)} chunks)")
75
+ return texts, embs
76
+ except Exception:
77
+ print("⚠️ Cache corrupted, regenerating embeddings...")
78
+
79
+ chunks = load_ohamlab_knowledge()
80
+ texts = [c["text"] for c in chunks]
81
+ print(f"📘 Generating embeddings for {len(texts)} OhamLab chunks...")
82
+ all_embs = []
83
+ for i in range(0, len(texts), 50):
84
+ batch = texts[i:i + 50]
85
+ try:
86
+ res = client.embeddings.create(model=EMBED_MODEL, input=batch)
87
+ embs = [d.embedding for d in res.data]
88
+ all_embs.extend(embs)
89
+ except Exception as e:
90
+ print(f"⚠️ Embedding batch failed ({i}): {e}")
91
+ all_embs.extend([[0.0] * 1536] * len(batch)) # fallback
92
+ time.sleep(0.5)
93
+
94
+ data = [{"text": t, "embedding": e} for t, e in zip(texts, all_embs)]
95
+ with open(CACHE_PATH, "w") as f:
96
+ json.dump(data, f)
97
+ print(f"💾 Cached embeddings to {CACHE_PATH}")
98
+ return texts, np.array(all_embs)
99
+
100
+ OHAMLAB_TEXTS, OHAMLAB_EMBS = get_embeddings_with_cache()
101
 
102
  # ---------------------------
103
+ # 4. Semantic Retrieval
104
  # ---------------------------
105
+ def retrieve_knowledge(query, top_k=3):
106
+ """Retrieve top-k most relevant text snippets."""
107
+ try:
108
+ q_emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding
109
+ sims = np.dot(OHAMLAB_EMBS, q_emb) / (
110
+ np.linalg.norm(OHAMLAB_EMBS, axis=1) * np.linalg.norm(q_emb)
111
+ )
112
+ top_idx = np.argsort(sims)[-top_k:][::-1]
113
+ return "\n\n".join(OHAMLAB_TEXTS[i] for i in top_idx)
114
+ except Exception as e:
115
+ print(f"⚠️ Retrieval error: {e}")
116
+ return ""
117
 
118
  # ---------------------------
119
+ # 5. System Prompt with Context Injection
120
  # ---------------------------
121
+ def build_system_prompt(context: str, mode: str = "chat") -> str:
122
+ return textwrap.dedent(f"""
123
+ You are **Aerelyth**, the OhamLab Dialectical CrossSphere Intelligence.
124
+
125
+ Guidelines:
126
+ - Always answer with clarity, scientific accuracy, and concise insight.
127
+ - Incorporate OhamLab research knowledge when relevant.
128
+ - Avoid code unless explicitly requested.
129
+ - Be confident but label speculation clearly.
130
+ - Mode: {mode.upper()}
131
+
132
+ --- OhamLab Context (Retrieved Snippets) ---
133
+ {context[:1800]}
134
+ --- End Context ---
135
+ """).strip()
136
 
137
+ # ---------------------------
138
+ # 6. Model Call
139
+ # ---------------------------
140
+ def generate_response(user_input, history, mode="chat"):
141
+ context = retrieve_knowledge(user_input)
142
+ sys_prompt = build_system_prompt(context, mode)
143
+ messages = [{"role": "system", "content": sys_prompt}] + history + [
144
+ {"role": "user", "content": user_input}
145
+ ]
146
  try:
147
  resp = client.chat.completions.create(
148
+ model=MODEL_ID,
149
+ messages=messages,
150
+ temperature=0.7,
151
+ max_tokens=1200,
152
  )
153
  return resp.choices[0].message.content.strip()
154
  except Exception as e:
155
+ print(f"⚠️ Model call failed: {e}")
156
+ return "⚠️ Aerelyth encountered a temporary issue generating your response."
157
+
158
+ # ---------------------------
159
+ # 7. Gradio Chat UI
160
+ # ---------------------------
161
+ def chat_with_model(user_message, chat_history):
162
+ if not user_message:
163
+ return chat_history, ""
164
+
165
+ # Maintain chat history for coherence
166
+ history = [{"role": "assistant" if i % 2 else "user", "content": msg}
167
+ for i, (msg, _) in enumerate(chat_history)]
168
+
169
+ try:
170
+ bot_text = generate_response(user_message, history)
171
+ except Exception as e:
172
+ tb = traceback.format_exc()
173
+ bot_text = f"⚠️ Error: {e}\n\n{tb}"
174
+
175
+ chat_history.append((user_message, bot_text))
176
+ return chat_history, ""
177
+
178
+ def reset_chat():
179
+ return []
180
+
181
+ def build_ui():
182
+ custom_css = """
183
+ #chatbot { background-color:#10121a; color:#e6eef8; border-radius:10px; padding:10px; }
184
+ """
185
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
186
+ gr.Markdown("## 🧠 OhamLab — Aerelyth Dialectical Intelligence (RAG Mode)")
187
+ chatbot = gr.Chatbot(height=540, elem_id="chatbot", type="tuples")
188
 
189
+ with gr.Row():
190
+ msg = gr.Textbox(
191
+ placeholder="Type a message…", lines=3, scale=8, show_label=False
192
+ )
193
+ send = gr.Button("Send", variant="primary", scale=1)
194
+ with gr.Row():
195
+ clear = gr.Button("Clear")
196
+
197
+ send.click(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
198
+ msg.submit(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
199
+ clear.click(reset_chat, outputs=chatbot)
200
+
201
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
202
+
203
+ # ---------------------------
204
+ # Entrypoint
205
+ # ---------------------------
206
  if __name__ == "__main__":
207
+ print("🚀 Starting OhamLab Aerelyth — Knowledge-Aware RAG Engine")
208
+ build_ui()