Spaces:

rahul7star
/

OhamLab-AI

Running

App Files Files Community

rahul7star commited on Oct 8, 2025

Commit

d2e9782

verified ·

1 Parent(s): 8f7a323

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -74

app.py CHANGED Viewed

@@ -1,15 +1,22 @@
-import os, time, json, numpy as np, logging
-from typing import List
-from huggingface_hub import HfApi, hf_hub_download, list_repo_files
-from sentence_transformers import SentenceTransformer
-from openai import OpenAI
-# Logging setup
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("ohamlab_agent")
 # ---------------------------
-# Environment / Config
 # ---------------------------
 HF_TOKEN = (
     os.environ.get("HF_TOKEN")
@@ -17,101 +24,185 @@ HF_TOKEN = (
     or os.environ.get("HUGGINGFACE_TOKEN")
 )
 if not HF_TOKEN:
-    raise RuntimeError("Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN.")
-CHAT_MODEL_ID = "openai/gpt-oss-20b"   # via Hugging Face router
-EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
-HF_REPO = "rahul7star/OhamLab-LLM"
-HF_REPO_DIR = "./hf_capsules"
-os.makedirs(HF_REPO_DIR, exist_ok=True)
-# ---------------------------
-# Clients
-# ---------------------------
-try:
-    client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
-    logger.info("✅ OpenAI client via Hugging Face router initialized.")
-except Exception as e:
-    logger.exception("Failed initializing chat client.")
-    raise
-embedder = SentenceTransformer(EMBED_MODEL_ID)
-logger.info(f"✅ Loaded local embedding model: {EMBED_MODEL_ID}")
 # ---------------------------
-# Load Markdown Knowledge
 # ---------------------------
-def load_markdown_files(repo_id: str, local_dir: str) -> List[str]:
-    api = HfApi(token=HF_TOKEN)
-    files = list_repo_files(repo_id, repo_type="model", token=HF_TOKEN)
     md_files = [f for f in files if f.endswith(".md")]
-    logger.info(f"📘 Found {len(md_files)} markdown files.")
     chunks = []
     for f in md_files:
         try:
-            path = hf_hub_download(repo_id=repo_id, filename=f, local_dir=local_dir, token=HF_TOKEN)
             with open(path, "r", encoding="utf-8") as fh:
                 content = fh.read()
             buf = ""
             for line in content.splitlines():
                 buf += line.strip() + " "
-                if len(buf) > 500:
-                    chunks.append(buf.strip())
                     buf = ""
             if buf:
-                chunks.append(buf.strip())
         except Exception as e:
-            logger.warning(f"⚠️ Failed to read {f}: {e}")
-    logger.info(f"✅ Loaded {len(chunks)} text chunks.")
     return chunks
-KNOWLEDGE_CHUNKS = load_markdown_files(HF_REPO, HF_REPO_DIR)
-logger.info("📊 Creating embeddings...")
-KNOWLEDGE_EMBS = embedder.encode(KNOWLEDGE_CHUNKS, normalize_embeddings=True)
-logger.info(f"🧠 Knowledge base ready ({len(KNOWLEDGE_CHUNKS)} chunks).")
 # ---------------------------
-# Retrieval
 # ---------------------------
-def get_relevant_context(query: str, top_k: int = 3) -> str:
-    q_emb = embedder.encode([query], normalize_embeddings=True)[0]
-    sims = np.dot(KNOWLEDGE_EMBS, q_emb)
-    top_idx = np.argsort(sims)[-top_k:][::-1]
-    return "\n\n".join(KNOWLEDGE_CHUNKS[i] for i in top_idx)
 # ---------------------------
-# Chat
 # ---------------------------
-SYSTEM_PROMPT = (
-    "You are OhamLab AI — factual, concise, and context-aware.\n"
-    "If applicable, use knowledge from OhamLab Markdown corpus."
-)
-def chat(query: str, history: List[dict]) -> str:
-    context = get_relevant_context(query)
-    user_input = f"{query}\n\n[Context]\n{context[:1200]}" if context else query
-    msgs = history + [{"role": "user", "content": user_input}]
     try:
         resp = client.chat.completions.create(
-            model=CHAT_MODEL_ID,
-            messages=msgs,
-            temperature=0.6,
-            max_tokens=700,
         )
         return resp.choices[0].message.content.strip()
     except Exception as e:
-        logger.error(f"Chat error: {e}")
-        return "There was a problem generating the response."
 if __name__ == "__main__":
-    logger.info("🚀 OhamLab AI — Knowledge Chat Ready")
-    hist = [{"role": "system", "content": SYSTEM_PROMPT}]
-    while True:
-        q = input("\n💬 Ask → ").strip()
-        if q.lower() in ["exit", "quit"]:
-            break
-        ans = chat(q, hist)
-        print("\n🤖", ans)
-        hist.extend([{"role": "user", "content": q}, {"role": "assistant", "content": ans}])

+"""
+OhamLab — Aerelyth Dialectical Intelligence (RAG-Enhanced)
+Loads knowledge from rahul7star/OhamLab-LLM markdown corpus, caches embeddings,
+and provides retrieval-augmented chat through Hugging Face router.
+"""
+import os
+import re
+import json
+import time
+import textwrap
+import traceback
+import numpy as np
+import gradio as gr
+from openai import OpenAI
+from huggingface_hub import HfApi, hf_hub_download, list_repo_files
 # ---------------------------
+# 1. Configuration
 # ---------------------------
 HF_TOKEN = (
     os.environ.get("HF_TOKEN")
     or os.environ.get("HUGGINGFACE_TOKEN")
 )
 if not HF_TOKEN:
+    raise RuntimeError("❌ Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN environment variable.")
+MODEL_ID = "openai/gpt-oss-20b"               # Chat model (via HF router)
+EMBED_MODEL = "text-embedding-3-small"        # Embedding model
+HF_REPO = "rahul7star/OhamLab-LLM"            # Knowledge repo
+CACHE_PATH = "/tmp/ohamlab_emb_cache.json"    # Cache file
+# Client
+client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN)
+api = HfApi(token=HF_TOKEN)
 # ---------------------------
+# 2. Load and Chunk Markdown Files
 # ---------------------------
+def load_ohamlab_knowledge():
+    """Loads all .md files from Hugging Face repo and splits into ~500-char chunks."""
+    files = list_repo_files(HF_REPO, repo_type="model", token=HF_TOKEN)
     md_files = [f for f in files if f.endswith(".md")]
     chunks = []
     for f in md_files:
         try:
+            path = hf_hub_download(HF_REPO, filename=f, token=HF_TOKEN)
             with open(path, "r", encoding="utf-8") as fh:
                 content = fh.read()
             buf = ""
             for line in content.splitlines():
                 buf += line.strip() + " "
+                if len(buf) >= 500:
+                    chunks.append({"file": f, "text": buf.strip()})
                     buf = ""
             if buf:
+                chunks.append({"file": f, "text": buf.strip()})
         except Exception as e:
+            print(f"⚠️ Failed to load {f}: {e}")
     return chunks
+# ---------------------------
+# 3. Generate or Load Embeddings (with Cache)
+# ---------------------------
+def get_embeddings_with_cache():
+    """Generate or load cached embeddings for OhamLab context."""
+    if os.path.exists(CACHE_PATH):
+        try:
+            with open(CACHE_PATH, "r") as f:
+                cache = json.load(f)
+            texts = [c["text"] for c in cache]
+            embs = np.array([c["embedding"] for c in cache])
+            print(f"✅ Loaded cached embeddings from {CACHE_PATH} ({len(embs)} chunks)")
+            return texts, embs
+        except Exception:
+            print("⚠️ Cache corrupted, regenerating embeddings...")
+    chunks = load_ohamlab_knowledge()
+    texts = [c["text"] for c in chunks]
+    print(f"📘 Generating embeddings for {len(texts)} OhamLab chunks...")
+    all_embs = []
+    for i in range(0, len(texts), 50):
+        batch = texts[i:i + 50]
+        try:
+            res = client.embeddings.create(model=EMBED_MODEL, input=batch)
+            embs = [d.embedding for d in res.data]
+            all_embs.extend(embs)
+        except Exception as e:
+            print(f"⚠️ Embedding batch failed ({i}): {e}")
+            all_embs.extend([[0.0] * 1536] * len(batch))  # fallback
+        time.sleep(0.5)
+    data = [{"text": t, "embedding": e} for t, e in zip(texts, all_embs)]
+    with open(CACHE_PATH, "w") as f:
+        json.dump(data, f)
+    print(f"💾 Cached embeddings to {CACHE_PATH}")
+    return texts, np.array(all_embs)
+OHAMLAB_TEXTS, OHAMLAB_EMBS = get_embeddings_with_cache()
 # ---------------------------
+# 4. Semantic Retrieval
 # ---------------------------
+def retrieve_knowledge(query, top_k=3):
+    """Retrieve top-k most relevant text snippets."""
+    try:
+        q_emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding
+        sims = np.dot(OHAMLAB_EMBS, q_emb) / (
+            np.linalg.norm(OHAMLAB_EMBS, axis=1) * np.linalg.norm(q_emb)
+        )
+        top_idx = np.argsort(sims)[-top_k:][::-1]
+        return "\n\n".join(OHAMLAB_TEXTS[i] for i in top_idx)
+    except Exception as e:
+        print(f"⚠️ Retrieval error: {e}")
+        return ""
 # ---------------------------
+# 5. System Prompt with Context Injection
 # ---------------------------
+def build_system_prompt(context: str, mode: str = "chat") -> str:
+    return textwrap.dedent(f"""
+    You are **Aerelyth**, the OhamLab Dialectical CrossSphere Intelligence.
+    Guidelines:
+    - Always answer with clarity, scientific accuracy, and concise insight.
+    - Incorporate OhamLab research knowledge when relevant.
+    - Avoid code unless explicitly requested.
+    - Be confident but label speculation clearly.
+    - Mode: {mode.upper()}
+    --- OhamLab Context (Retrieved Snippets) ---
+    {context[:1800]}
+    --- End Context ---
+    """).strip()
+# ---------------------------
+# 6. Model Call
+# ---------------------------
+def generate_response(user_input, history, mode="chat"):
+    context = retrieve_knowledge(user_input)
+    sys_prompt = build_system_prompt(context, mode)
+    messages = [{"role": "system", "content": sys_prompt}] + history + [
+        {"role": "user", "content": user_input}
+    ]
     try:
         resp = client.chat.completions.create(
+            model=MODEL_ID,
+            messages=messages,
+            temperature=0.7,
+            max_tokens=1200,
         )
         return resp.choices[0].message.content.strip()
     except Exception as e:
+        print(f"⚠️ Model call failed: {e}")
+        return "⚠️ Aerelyth encountered a temporary issue generating your response."
+# ---------------------------
+# 7. Gradio Chat UI
+# ---------------------------
+def chat_with_model(user_message, chat_history):
+    if not user_message:
+        return chat_history, ""
+    # Maintain chat history for coherence
+    history = [{"role": "assistant" if i % 2 else "user", "content": msg}
+               for i, (msg, _) in enumerate(chat_history)]
+    try:
+        bot_text = generate_response(user_message, history)
+    except Exception as e:
+        tb = traceback.format_exc()
+        bot_text = f"⚠️ Error: {e}\n\n{tb}"
+    chat_history.append((user_message, bot_text))
+    return chat_history, ""
+def reset_chat():
+    return []
+def build_ui():
+    custom_css = """
+    #chatbot { background-color:#10121a; color:#e6eef8; border-radius:10px; padding:10px; }
+    """
+    with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
+        gr.Markdown("## 🧠 OhamLab — Aerelyth Dialectical Intelligence (RAG Mode)")
+        chatbot = gr.Chatbot(height=540, elem_id="chatbot", type="tuples")
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="Type a message…", lines=3, scale=8, show_label=False
+            )
+            send = gr.Button("Send", variant="primary", scale=1)
+        with gr.Row():
+            clear = gr.Button("Clear")
+        send.click(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
+        msg.submit(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg])
+        clear.click(reset_chat, outputs=chatbot)
+        demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
+# ---------------------------
+# Entrypoint
+# ---------------------------
 if __name__ == "__main__":
+    print("🚀 Starting OhamLab Aerelyth — Knowledge-Aware RAG Engine")
+    build_ui()