Spaces:
Running
Running
| """ | |
| OhamLab — AI Intelligence | |
| Loads knowledge from rahul7star/OhamLab-LLM markdown corpus, caches embeddings, | |
| and provides retrieval-augmented chat through Hugging Face router. | |
| """ | |
| import os | |
| import re | |
| import json | |
| import time | |
| import textwrap | |
| import traceback | |
| import numpy as np | |
| import gradio as gr | |
| from openai import OpenAI | |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files | |
| # --------------------------- | |
| # 1. Configuration | |
| # --------------------------- | |
| HF_TOKEN = ( | |
| os.environ.get("HF_TOKEN") | |
| or os.environ.get("OPENAI_API_KEY") | |
| or os.environ.get("HUGGINGFACE_TOKEN") | |
| ) | |
| if not HF_TOKEN: | |
| raise RuntimeError("❌ Missing HF_TOKEN / OPENAI_API_KEY / HUGGINGFACE_TOKEN environment variable.") | |
| MODEL_ID = "openai/gpt-oss-20b" # Chat model (via HF router) | |
| EMBED_MODEL = "text-embedding-3-small" # Embedding model | |
| HF_REPO = "rahul7star/OhamLab-LLM" # Knowledge repo | |
| CACHE_PATH = "/tmp/ohamlab_emb_cache.json" # Cache file | |
| # Client | |
| client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN) | |
| api = HfApi(token=HF_TOKEN) | |
| # --------------------------- | |
| # 2. Load and Chunk Markdown Files | |
| # --------------------------- | |
| def load_ohamlab_knowledge(): | |
| """Loads all .md files from Hugging Face repo and splits into ~500-char chunks.""" | |
| files = list_repo_files(HF_REPO, repo_type="model", token=HF_TOKEN) | |
| md_files = [f for f in files if f.endswith(".md")] | |
| chunks = [] | |
| for f in md_files: | |
| try: | |
| path = hf_hub_download(HF_REPO, filename=f, token=HF_TOKEN) | |
| with open(path, "r", encoding="utf-8") as fh: | |
| content = fh.read() | |
| buf = "" | |
| for line in content.splitlines(): | |
| buf += line.strip() + " " | |
| if len(buf) >= 500: | |
| chunks.append({"file": f, "text": buf.strip()}) | |
| buf = "" | |
| if buf: | |
| chunks.append({"file": f, "text": buf.strip()}) | |
| except Exception as e: | |
| print(f"⚠️ Failed to load {f}: {e}") | |
| return chunks | |
| # --------------------------- | |
| # 3. Generate or Load Embeddings (with Cache) | |
| # --------------------------- | |
| def get_embeddings_with_cache(): | |
| """Generate or load cached embeddings for OhamLab context.""" | |
| if os.path.exists(CACHE_PATH): | |
| try: | |
| with open(CACHE_PATH, "r") as f: | |
| cache = json.load(f) | |
| texts = [c["text"] for c in cache] | |
| embs = np.array([c["embedding"] for c in cache]) | |
| print(f"✅ Loaded cached embeddings from {CACHE_PATH} ({len(embs)} chunks)") | |
| return texts, embs | |
| except Exception: | |
| print("⚠️ Cache corrupted, regenerating embeddings...") | |
| chunks = load_ohamlab_knowledge() | |
| texts = [c["text"] for c in chunks] | |
| print(f"📘 Generating embeddings for {len(texts)} OhamLab chunks...") | |
| all_embs = [] | |
| for i in range(0, len(texts), 50): | |
| batch = texts[i:i + 50] | |
| try: | |
| res = client.embeddings.create(model=EMBED_MODEL, input=batch) | |
| embs = [d.embedding for d in res.data] | |
| all_embs.extend(embs) | |
| except Exception as e: | |
| print(f"⚠️ Embedding batch failed ({i}): {e}") | |
| all_embs.extend([[0.0] * 1536] * len(batch)) # fallback | |
| time.sleep(0.5) | |
| data = [{"text": t, "embedding": e} for t, e in zip(texts, all_embs)] | |
| with open(CACHE_PATH, "w") as f: | |
| json.dump(data, f) | |
| print(f"💾 Cached embeddings to {CACHE_PATH}") | |
| return texts, np.array(all_embs) | |
| OHAMLAB_TEXTS, OHAMLAB_EMBS = get_embeddings_with_cache() | |
| # --------------------------- | |
| # 4. Semantic Retrieval | |
| # --------------------------- | |
| def retrieve_knowledge(query, top_k=3): | |
| """Retrieve top-k most relevant text snippets.""" | |
| try: | |
| q_emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding | |
| sims = np.dot(OHAMLAB_EMBS, q_emb) / ( | |
| np.linalg.norm(OHAMLAB_EMBS, axis=1) * np.linalg.norm(q_emb) | |
| ) | |
| top_idx = np.argsort(sims)[-top_k:][::-1] | |
| return "\n\n".join(OHAMLAB_TEXTS[i] for i in top_idx) | |
| except Exception as e: | |
| print(f"⚠️ Retrieval error: {e}") | |
| return "" | |
| # --------------------------- | |
| # 5. System Prompt with Context Injection | |
| # --------------------------- | |
| def build_system_prompt(context: str, mode: str = "chat") -> str: | |
| return textwrap.dedent(f""" | |
| You are OhamLab — AI Intelligence Software | |
| Guidelines: | |
| - Always answer with clarity, scientific accuracy, and concise insight. | |
| - Incorporate OhamLab research knowledge when relevant. | |
| - Avoid code unless explicitly requested. | |
| - Be confident but label speculation clearly. | |
| - Mode: {mode.upper()} | |
| --- OhamLab Context (Retrieved Snippets) --- | |
| {context[:1800]} | |
| --- End Context --- | |
| """).strip() | |
| # --------------------------- | |
| # 6. Model Call | |
| # --------------------------- | |
| def generate_response(user_input, history, mode="chat"): | |
| context = retrieve_knowledge(user_input) | |
| sys_prompt = build_system_prompt(context, mode) | |
| messages = [{"role": "system", "content": sys_prompt}] + history + [ | |
| {"role": "user", "content": user_input} | |
| ] | |
| try: | |
| resp = client.chat.completions.create( | |
| model=MODEL_ID, | |
| messages=messages, | |
| temperature=0.7, | |
| max_tokens=1200, | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"⚠️ Model call failed: {e}") | |
| return "⚠️ OahmLab encountered a temporary issue generating your response." | |
| # --------------------------- | |
| # 7. Gradio Chat UI | |
| # --------------------------- | |
| import traceback | |
| import gradio as gr | |
| # --------------------------- | |
| # Chat Logic | |
| # --------------------------- | |
| def chat_with_model(user_message, chat_history): | |
| """ | |
| Maintains full conversational context and returns updated chat history. | |
| The assistant speaks as 'OhamLab'. | |
| """ | |
| if not user_message: | |
| return chat_history, "" | |
| if chat_history is None: | |
| chat_history = [] | |
| # Convert Gradio message list (dict-based) to usable context | |
| history = [ | |
| {"role": m["role"], "content": m["content"]} | |
| for m in chat_history | |
| if isinstance(m, dict) and "role" in m | |
| ] | |
| # Append current user message | |
| history.append({"role": "user", "content": user_message}) | |
| try: | |
| bot_reply = generate_response(user_message, history) | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| bot_reply = f"⚠️ OhamLab encountered an error:\n\n{e}\n\n{tb}" | |
| # Add OhamLab's response as assistant role | |
| history.append({"role": "assistant", "content": bot_reply}) | |
| return history, "" | |
| def reset_chat(): | |
| """Resets the chat session.""" | |
| return [] | |
| # --------------------------- | |
| # Gradio Chat UI | |
| # --------------------------- | |
| def build_ui(): | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(primary_hue="indigo"), | |
| css=""" | |
| /* --- Hide share/delete icons --- */ | |
| #ohamlab .wrap.svelte-1lcyrj3 > div > div > button { | |
| display: none !important; | |
| } | |
| [data-testid="share-btn"], | |
| [data-testid="delete-btn"], | |
| .message-controls, | |
| .message-actions { | |
| display: none !important; | |
| visibility: hidden !important; | |
| } | |
| /* --- User (Right) Message Bubble --- */ | |
| #ohamlab .message.user { | |
| background-color: #4f46e5 !important; | |
| color: white !important; | |
| border-radius: 14px !important; | |
| align-self: flex-end !important; | |
| text-align: right !important; | |
| margin-left: 25%; | |
| } | |
| /* --- OhamLab (Left) Message Bubble --- */ | |
| #ohamlab .message.assistant { | |
| background-color: #f8f9fa !important; | |
| color: #111 !important; | |
| border-radius: 14px !important; | |
| align-self: flex-start !important; | |
| text-align: left !important; | |
| margin-right: 25%; | |
| } | |
| #ohamlab .chatbot .wrap.svelte-1lcyrj3 > div > div > button { | |
| display: none !important; /* hide share/delete icons */ | |
| } | |
| /* --- Overall Container --- */ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: auto; | |
| padding-top: .5rem; | |
| } | |
| textarea { | |
| resize: none !important; | |
| border-radius: 12px !important; | |
| border: 1px solid #d1d5db !important; | |
| box-shadow: 0 1px 3px rgba(0,0,0,0.08); | |
| } | |
| button.primary { | |
| background-color: #4f46e5 !important; | |
| color: white !important; | |
| border-radius: 10px !important; | |
| padding: 0.6rem 1.4rem !important; | |
| font-weight: 600; | |
| transition: all 0.2s ease-in-out; | |
| } | |
| button.primary:hover { | |
| background-color: #4338ca !important; | |
| } | |
| button.secondary { | |
| background-color: #f3f4f6 !important; | |
| border-radius: 10px !important; | |
| color: #374151 !important; | |
| font-weight: 500; | |
| transition: all 0.2s ease-in-out; | |
| } | |
| button.secondary:hover { | |
| background-color: #e5e7eb !important; | |
| } | |
| """, | |
| ) as demo: | |
| # Chatbot area | |
| chatbot = gr.Chatbot( | |
| label="💠 OhamLab Conversation", | |
| height=520, | |
| elem_id="ohamlab", | |
| type="messages", | |
| avatar_images=[None, None], | |
| ) | |
| # Input box (full width) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| placeholder="Ask OhamLab anything ..", | |
| lines=3, | |
| show_label=False, | |
| scale=12, | |
| container=False, | |
| ) | |
| # Buttons (Send + Clear) | |
| with gr.Row(equal_height=True, variant="compact"): | |
| send = gr.Button("Send", variant="primary", elem_classes=["primary"]) | |
| clear = gr.Button("Clear", variant="secondary", elem_classes=["secondary"]) | |
| # Wiring | |
| send.click(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg]) | |
| msg.submit(chat_with_model, inputs=[msg, chatbot], outputs=[chatbot, msg]) | |
| clear.click(reset_chat, outputs=chatbot) | |
| return demo | |
| # --------------------------- | |
| # Entrypoint | |
| # --------------------------- | |
| if __name__ == "__main__": | |
| print("🚀 Starting OhamLab Assistant...") | |
| demo = build_ui() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |