Spaces:

rahul7star
/

OhamLab-AI

Paused

App Files Files Community

rahul7star commited on Jan 11

Commit

2737e4c

verified ·

1 Parent(s): 0bc8034

Update app_qwen.py

Browse files

Files changed (1) hide show

app_qwen.py +148 -181

app_qwen.py CHANGED Viewed

@@ -1,27 +1,34 @@
-import spaces
 import os
-import textwrap
 import traceback
 import gradio as gr
 import torch
-from transformers import (
-    pipeline,
-    AutoTokenizer,
-    AutoModelForCausalLM,
-)
-# ---------------------------
 # Configuration
-# ---------------------------
 MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
-ROOT_DIR = "."
-ALLOWED_EXT = (".txt", ".md")
-# ---------------------------
-# Load lightweight model
-# ---------------------------
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     trust_remote_code=True
@@ -34,203 +41,163 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 )
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
 )
-# ---------------------------
-# Research loader (project root)
-# ---------------------------
-def load_research_from_root(max_total_chars: int = 12000):
-    files = []
-    for name in sorted(os.listdir(ROOT_DIR)):
-        if name.lower().endswith(ALLOWED_EXT) and name != "requirements.txt":
-            if name == os.path.basename(__file__):
-                continue
-            files.append(name)
-    if not files:
-        return "No research files (.txt/.md) found in project root."
-    combined_parts, total_len = [], 0
-    for fname in files:
-        try:
-            with open(os.path.join(ROOT_DIR, fname), "r", encoding="utf-8", errors="ignore") as f:
-                txt = f.read()
-        except Exception as e:
-            txt = f"[Error reading {fname}: {e}]"
-        if len(txt) > 8000:
-            txt = txt[:8000] + "\n\n[TRUNCATED]\n"
-        part = f"--- {fname} ---\n{txt.strip()}\n"
-        combined_parts.append(part)
-        total_len += len(part)
-        if total_len >= max_total_chars:
-            break
-    combined = "\n\n".join(combined_parts)
-    return combined[:max_total_chars]
-# ---------------------------
-# System prompts
-# ---------------------------
-research_context = load_research_from_root()
-def get_system_prompt(mode="chat"):
-    if mode == "chat":
-        return textwrap.dedent(f"""
-        You are OhamLab AI.
-        Mode: Conversational Q&A.
-        Rules:
-        - Answer clearly in 3–6 sentences.
-        - Prefer accuracy over creativity.
-        - Use the research context to answer questions.
-        - Treat markdown headings as semantic sections.
-        - If the answer is not in the research context, say so.
-        --- BEGIN RESEARCH CONTEXT ---
-        {research_context}
-        --- END RESEARCH CONTEXT ---
-        """).strip()
-    return textwrap.dedent(f"""
-    You are OhamLab AI.
-    Mode: Research / Analytical.
-    Rules:
-    - Use structured reasoning and sections.
-    - Reference the research context when relevant.
-    - Be precise and analytical.
-    - Treat markdown headings as semantic structure.
-    --- BEGIN RESEARCH CONTEXT ---
-    {research_context}
-    --- END RESEARCH CONTEXT ---
-    """).strip()
-# ---------------------------
-# State
-# ---------------------------
-conversation_mode = "chat"
-history_messages = [{"role": "system", "content": get_system_prompt("chat")}]
-chat_history_for_ui = []
-# ---------------------------
-# Model call helper
-# ---------------------------
-def call_model_get_response(messages, max_tokens=600):
-    conversation_text = ""
-    for m in messages:
-        role = m["role"].upper()
-        conversation_text += f"[{role}]: {m['content']}\n"
-    conversation_text += "[ASSISTANT]:"
-    try:
-        output = pipe(
-            conversation_text,
-            max_new_tokens=max_tokens,
             do_sample=True,
-            temperature=0.5,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            return_full_text=False,
         )
-        return output[0]["generated_text"].strip()
-    except Exception as e:
-        tb = traceback.format_exc()
-        return f"⚠️ Error: {e}\n\n{tb.splitlines()[-6:]}"
-# ---------------------------
-# Chat logic
-# ---------------------------
 @spaces.GPU()
-def chat_with_model(user_message, chat_history):
-    global history_messages, chat_history_for_ui, conversation_mode
     if not user_message.strip():
-        return "", chat_history
-    msg_lower = user_message.lower()
-    if "switch to research mode" in msg_lower:
-        conversation_mode = "research"
-        history_messages = [{"role": "system", "content": get_system_prompt("research")}]
-        return "", chat_history + [("🟢 Mode", "🔬 Research mode activated.")]
-    if "switch to chat mode" in msg_lower:
-        conversation_mode = "chat"
-        history_messages = [{"role": "system", "content": get_system_prompt("chat")}]
-        return "", chat_history + [("🟢 Mode", "💬 Chat mode activated.")]
-    history_messages.append({"role": "user", "content": user_message})
-    bot_text = call_model_get_response(history_messages)
-    history_messages.append({"role": "assistant", "content": bot_text})
-    chat_history_for_ui.append((user_message, bot_text))
-    return "", chat_history_for_ui
 def reset_chat():
-    global history_messages, chat_history_for_ui
-    history_messages = [{"role": "system", "content": get_system_prompt(conversation_mode)}]
-    chat_history_for_ui = []
     return []
-# ---------------------------
-# Gradio UI
-# ---------------------------
 def build_ui():
-    with gr.Blocks(
-        theme=gr.themes.Soft(),
-        css="""
-        #chatbot {
-            background-color: #f9f9fb;
-            border-radius: 12px;
-            padding: 10px;
-        }
-        """
-    ) as demo:
-        with gr.Row():
-            clear_btn = gr.Button("🧹 Clear", size="sm")
         chatbot = gr.Chatbot(
-            height=400,
-            type="tuples",
             avatar_images=("👤", "🤖"),
         )
         with gr.Row():
             msg = gr.Textbox(
-                placeholder="Ask a question about the markdown files...",
                 lines=2,
-                scale=8,
             )
-            send = gr.Button("🚀 Send", variant="primary", scale=2)
-        send.click(chat_with_model, [msg, chatbot], [msg, chatbot])
-        msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot])
-        clear_btn.click(reset_chat, outputs=chatbot)
-        demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
     return demo
-# ---------------------------
 # Entrypoint
-# ---------------------------
 if __name__ == "__main__":
-    print(f"✅ Starting app with model: {MODEL_ID}")
     build_ui()

 import os
 import traceback
 import gradio as gr
 import torch
+import spaces
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from sentence_transformers import SentenceTransformer
+# =========================================================
 # Configuration
+# =========================================================
 MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
+GENERAL_MD = "general.md"
+MAX_NEW_TOKENS = 300
+TOP_K = 3
+# =========================================================
+# Resolve path (CRITICAL)
+# =========================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+GENERAL_MD_PATH = os.path.join(BASE_DIR, GENERAL_MD)
+if not os.path.exists(GENERAL_MD_PATH):
+    raise RuntimeError(f"❌ {GENERAL_MD} not found next to app.py")
+# =========================================================
+# Load Model
+# =========================================================
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     trust_remote_code=True
     trust_remote_code=True
 )
+model.eval()
+# =========================================================
+# Embedding Model (CPU-friendly)
+# =========================================================
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+# =========================================================
+# Load & Chunk general.md
+# =========================================================
+def chunk_text(text, chunk_size=300, overlap=50):
+    words = text.split()
+    chunks = []
+    i = 0
+    while i < len(words):
+        chunk = words[i:i + chunk_size]
+        chunks.append(" ".join(chunk))
+        i += chunk_size - overlap
+    return chunks
+with open(GENERAL_MD_PATH, "r", encoding="utf-8", errors="ignore") as f:
+    md_text = f.read()
+DOC_CHUNKS = chunk_text(md_text)
+DOC_SOURCES = [GENERAL_MD] * len(DOC_CHUNKS)
+if not DOC_CHUNKS:
+    raise RuntimeError("❌ general.md is empty or unreadable")
+# =========================================================
+# Embed once
+# =========================================================
+DOC_EMBEDS = embedder.encode(
+    DOC_CHUNKS,
+    normalize_embeddings=True,
+    show_progress_bar=True
 )
+# =========================================================
+# Retrieval
+# =========================================================
+def retrieve_context(question, k=TOP_K):
+    q_emb = embedder.encode([question], normalize_embeddings=True)
+    scores = np.dot(DOC_EMBEDS, q_emb[0])
+    top_ids = scores.argsort()[-k:][::-1]
+    context = []
+    for i in top_ids:
+        context.append(f"[Source: {DOC_SOURCES[i]}]\n{DOC_CHUNKS[i]}")
+    return "\n\n".join(context)
+# =========================================================
+# Qwen ChatML Inference
+# =========================================================
+def answer_question(question):
+    context = retrieve_context(question)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are a strict document-based Q&A assistant.\n"
+                "Answer ONLY using the provided context.\n"
+                "If the answer is not present, say:\n"
+                "'I could not find this information in the document.'"
+            )
+        },
+        {
+            "role": "user",
+            "content": f"""
+Context:
+{context}
+Question:
+{question}
+"""
+        }
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=MAX_NEW_TOKENS,
+            temperature=0.3,
             do_sample=True,
         )
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# =========================================================
+# Gradio Chat
+# =========================================================
 @spaces.GPU()
+def chat(user_message, history):
     if not user_message.strip():
+        return "", history
+    try:
+        answer = answer_question(user_message)
+    except Exception as e:
+        tb = traceback.format_exc()
+        answer = f"⚠️ Error:\n{e}\n\n{tb}"
+    history.append((user_message, answer))
+    return "", history
 def reset_chat():
     return []
+# =========================================================
+# UI
+# =========================================================
 def build_ui():
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("## 📄 Q&A from general.md (Qwen2.5-0.5B + RAG)")
         chatbot = gr.Chatbot(
+            height=420,
             avatar_images=("👤", "🤖"),
+            type="tuples"
         )
         with gr.Row():
             msg = gr.Textbox(
+                placeholder="Ask a question from general.md...",
                 lines=2,
+                scale=8
             )
+            send = gr.Button("🚀 Send", scale=2)
+        clear = gr.Button("🧹 Clear")
+        send.click(chat, [msg, chatbot], [msg, chatbot])
+        msg.submit(chat, [msg, chatbot], [msg, chatbot])
+        clear.click(reset_chat, outputs=chatbot)
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False
+        )
     return demo
+# =========================================================
 # Entrypoint
+# =========================================================
 if __name__ == "__main__":
+    print(f"✅ Loaded {len(DOC_CHUNKS)} chunks from general.md")
+    print(f"✅ Model: {MODEL_ID}")
     build_ui()