Spaces:

Deevyankar
/

BrainChat

Sleeping

App Files Files Community

Deevyankar commited on 16 days ago

Commit

d124dac

verified ·

1 Parent(s): d4689cf

Update app.py

Browse files

Files changed (1) hide show

app.py +592 -132

app.py CHANGED Viewed

@@ -10,9 +10,9 @@ from rank_bm25 import BM25Okapi
 from sentence_transformers import SentenceTransformer
 from openai import OpenAI
-# ==============================
 # PATHS
-# ==============================
 BUILD_DIR = "brainchat_build"
 CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl")
 TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl")
@@ -20,6 +20,9 @@ EMBED_PATH = os.path.join(BUILD_DIR, "embeddings.npy")
 CONFIG_PATH = os.path.join(BUILD_DIR, "config.json")
 LOGO_FILE = "Brain chat-09.png"
 EMBED_MODEL = None
 BM25 = None
 CHUNKS = None
@@ -27,183 +30,640 @@ EMBEDDINGS = None
 CLIENT = None
-# ==============================
-# LOAD
-# ==============================
-def tokenize(text):
-    return re.findall(r"\w+", text.lower())
 def ensure_loaded():
     global EMBED_MODEL, BM25, CHUNKS, EMBEDDINGS, CLIENT
     if CHUNKS is None:
         with open(CHUNKS_PATH, "rb") as f:
             CHUNKS = pickle.load(f)
         with open(TOKENS_PATH, "rb") as f:
-            tokenized = pickle.load(f)
         EMBEDDINGS = np.load(EMBED_PATH)
-        with open(CONFIG_PATH) as f:
             cfg = json.load(f)
-        BM25 = BM25Okapi(tokenized)
         EMBED_MODEL = SentenceTransformer(cfg["embedding_model"])
     if CLIENT is None:
-        CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# ==============================
-# SEARCH
-# ==============================
-def search(query):
     ensure_loaded()
-    scores = BM25.get_scores(tokenize(query))
-    idx = np.argsort(scores)[::-1][:3]
-    return [CHUNKS[int(i)] for i in idx]
-def context_text(records):
-    return "\n\n".join([r["text"] for r in records])
-# ==============================
-# OPENAI
-# ==============================
-def chat(prompt):
-    return CLIENT.chat.completions.create(
         model="gpt-4o-mini",
-        messages=[{"role": "user", "content": prompt}],
         temperature=0.2,
-    ).choices[0].message.content
-def chat_json(prompt):
-    return json.loads(
-        CLIENT.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=[{"role": "user", "content": prompt}],
-            response_format={"type": "json_object"},
-        ).choices[0].message.content
     )
-# ==============================
-# CORE LOGIC
-# ==============================
-def answer_question(msg, history, mode, quiz_state):
     if history is None:
         history = []
     if quiz_state is None:
-        quiz_state = {"active": False, "quiz": None}
-    if not msg.strip():
-        return history, quiz_state, ""
-    history.append({"role": "user", "content": msg})
-    records = search(msg)
-    ctx = context_text(records)
-    # =====================
-    # QUIZ EVALUATION
-    # =====================
-    if quiz_state["active"]:
-        eval_prompt = f"""
-Evaluate answers:
-Quiz: {json.dumps(quiz_state["quiz"])}
-Student: {msg}
-Return JSON:
-{{"score": "x/y", "feedback": "short"}}
 """
-        res = chat_json(eval_prompt)
-        out = f"Score: {res['score']}\n\n{res['feedback']}"
-        history.append({"role": "assistant", "content": out})
-        quiz_state = {"active": False, "quiz": None}
-        return history, quiz_state, ""
-    # =====================
-    # QUIZ GENERATION
-    # =====================
-    if mode == "Quiz":
-        quiz = chat_json(f"""
-Create 3 questions.
-Context:
-{ctx}
-Return:
-{{"questions":[{{"q":"","a":""}}]}}
-""")
-        text = "Answer these:\n\n"
-        for i, q in enumerate(quiz["questions"], 1):
-            text += f"{i}. {q['q']}\n"
-        history.append({"role": "assistant", "content": text})
-        quiz_state = {"active": True, "quiz": quiz}
-        return history, quiz_state, ""
-    # =====================
-    # NORMAL
-    # =====================
-    answer = chat(f"""
-Explain clearly:
-{ctx}
-Question: {msg}
-""")
-    history.append({"role": "assistant", "content": answer})
-    return history, quiz_state, ""
-def clear():
-    return [], {"active": False, "quiz": None}, ""
-# ==============================
 # UI
-# ==============================
-def logo():
-    if os.path.exists(LOGO_FILE):
-        return f'<img src="/gradio_api/file={quote(LOGO_FILE)}" width="120">'
-    return "<h2>BrainChat</h2>"
-CSS = """
-body {background:#dcdcdc;}
-"""
 with gr.Blocks() as demo:
-    quiz_state = gr.State({"active": False, "quiz": None})
-    gr.HTML(logo())
-    mode = gr.Dropdown(["Explain", "Quiz"], value="Explain")
-    chatbot = gr.Chatbot(height=500)
-    msg = gr.Textbox()
-    btn = gr.Button("Send")
-    clr = gr.Button("Clear")
-    btn.click(
         answer_question,
-        inputs=[msg, chatbot, mode, quiz_state],
-        outputs=[chatbot, quiz_state, msg],
     )
-    clr.click(clear, outputs=[chatbot, quiz_state, msg])
-demo.launch(css=CSS)

 from sentence_transformers import SentenceTransformer
 from openai import OpenAI
+# =====================================================
 # PATHS
+# =====================================================
 BUILD_DIR = "brainchat_build"
 CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl")
 TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl")
 CONFIG_PATH = os.path.join(BUILD_DIR, "config.json")
 LOGO_FILE = "Brain chat-09.png"
+# =====================================================
+# GLOBALS
+# =====================================================
 EMBED_MODEL = None
 BM25 = None
 CHUNKS = None
 CLIENT = None
+# =====================================================
+# LOADERS
+# =====================================================
+def tokenize(text: str):
+    return re.findall(r"\w+", text.lower(), flags=re.UNICODE)
 def ensure_loaded():
     global EMBED_MODEL, BM25, CHUNKS, EMBEDDINGS, CLIENT
     if CHUNKS is None:
+        for path in [CHUNKS_PATH, TOKENS_PATH, EMBED_PATH, CONFIG_PATH]:
+            if not os.path.exists(path):
+                raise FileNotFoundError(f"Missing file: {path}")
         with open(CHUNKS_PATH, "rb") as f:
             CHUNKS = pickle.load(f)
         with open(TOKENS_PATH, "rb") as f:
+            tokenized_chunks = pickle.load(f)
         EMBEDDINGS = np.load(EMBED_PATH)
+        with open(CONFIG_PATH, "r", encoding="utf-8") as f:
             cfg = json.load(f)
+        BM25 = BM25Okapi(tokenized_chunks)
         EMBED_MODEL = SentenceTransformer(cfg["embedding_model"])
     if CLIENT is None:
+        api_key = os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("OPENAI_API_KEY is missing in Hugging Face Space Secrets.")
+        CLIENT = OpenAI(api_key=api_key)
+# =====================================================
+# RETRIEVAL
+# =====================================================
+def search_hybrid(query: str, shortlist_k: int = 20, final_k: int = 3):
     ensure_loaded()
+    query_tokens = tokenize(query)
+    bm25_scores = BM25.get_scores(query_tokens)
+    shortlist_idx = np.argsort(bm25_scores)[::-1][:shortlist_k]
+    shortlist_embeddings = EMBEDDINGS[shortlist_idx]
+    qvec = EMBED_MODEL.encode([query], normalize_embeddings=True).astype("float32")[0]
+    dense_scores = shortlist_embeddings @ qvec
+    rerank_order = np.argsort(dense_scores)[::-1][:final_k]
+    final_idx = shortlist_idx[rerank_order]
+    return [CHUNKS[int(i)] for i in final_idx]
+def build_context(records):
+    blocks = []
+    for i, r in enumerate(records, start=1):
+        blocks.append(
+            f"""[Source {i}]
+Book: {r['book']}
+Section: {r['section_title']}
+Pages: {r['page_start']}-{r['page_end']}
+Text:
+{r['text']}"""
+        )
+    return "\n\n".join(blocks)
+def make_sources(records):
+    seen = set()
+    lines = []
+    for r in records:
+        key = (r["book"], r["section_title"], r["page_start"], r["page_end"])
+        if key in seen:
+            continue
+        seen.add(key)
+        lines.append(
+            f"• {r['book']} | {r['section_title']} | pp. {r['page_start']}-{r['page_end']}"
+        )
+    return "\n".join(lines)
+# =====================================================
+# PROMPTS
+# =====================================================
+def language_instruction(language_mode: str) -> str:
+    if language_mode == "English":
+        return "Answer only in English."
+    if language_mode == "Spanish":
+        return "Answer only in Spanish."
+    if language_mode == "Bilingual":
+        return "Answer first in English, then provide a Spanish version under the heading 'Español:'."
+    return (
+        "If the user's message is in Spanish, answer in Spanish. "
+        "If the user's message is in English, answer in English."
+    )
+def choose_quiz_count(user_text: str, selector: str) -> int:
+    if selector in {"3", "5", "7"}:
+        return int(selector)
+    t = user_text.lower()
+    if any(k in t for k in ["mock test", "final exam", "exam practice", "full test"]):
+        return 7
+    if any(k in t for k in ["detailed", "revision", "comprehensive", "study"]):
+        return 5
+    return 3
+def build_tutor_prompt(mode: str, language_mode: str, question: str, context: str) -> str:
+    mode_map = {
+        "Explain": (
+            "Explain clearly like a friendly tutor using simple language. "
+            "Use short headings if useful."
+        ),
+        "Detailed": (
+            "Give a fuller and more detailed explanation. Include concept, key points, and clinical relevance when supported by context."
+        ),
+        "Short Notes": (
+            "Answer in concise revision-note format using short bullet points."
+        ),
+        "Flashcards": (
+            "Create 6 flashcards in Q/A format using only the provided context."
+        ),
+        "Case-Based": (
+            "Create a short clinical scenario and explain it clearly using the provided context."
+        )
+    }
+    return f"""
+You are BrainChat, an interactive neurology and neuroanatomy tutor.
+Rules:
+- Use only the provided context from the books.
+- If the answer is not supported by the context, say exactly:
+  Not found in the course material.
+- Be accurate and student-friendly.
+- Do not invent facts outside the context.
+- {language_instruction(language_mode)}
+Teaching style:
+{mode_map[mode]}
+Context:
+{context}
+Question:
+{question}
+""".strip()
+def build_quiz_generation_prompt(language_mode: str, topic: str, context: str, n_questions: int) -> str:
+    return f"""
+You are BrainChat, an interactive tutor.
+Rules:
+- Use only the provided context.
+- Create exactly {n_questions} quiz questions.
+- Questions should be short and clear.
+- Also create a short answer key.
+- Return valid JSON only.
+- {language_instruction(language_mode)}
+Required JSON format:
+{{
+  "title": "short quiz title",
+  "questions": [
+    {{"q": "question 1", "answer_key": "expected short answer"}},
+    {{"q": "question 2", "answer_key": "expected short answer"}}
+  ]
+}}
+Context:
+{context}
+Topic:
+{topic}
+""".strip()
+def build_quiz_evaluation_prompt(language_mode: str, quiz_data: dict, user_answers: str) -> str:
+    quiz_json = json.dumps(quiz_data, ensure_ascii=False)
+    return f"""
+You are BrainChat, an interactive tutor.
+Evaluate the student's answers fairly using the quiz answer key.
+Give:
+- total score
+- per-question feedback
+- one short improvement suggestion
+Rules:
+- Accept semantically correct answers even if wording differs.
+- Return valid JSON only.
+- {language_instruction(language_mode)}
+Required JSON format:
+{{
+  "score_obtained": 0,
+  "score_total": 0,
+  "summary": "short overall feedback",
+  "results": [
+    {{
+      "question": "question text",
+      "student_answer": "student answer",
+      "result": "Correct / Partially Correct / Incorrect",
+      "feedback": "short explanation"
+    }}
+  ]
+}}
+Quiz data:
+{quiz_json}
+Student answers:
+{user_answers}
+""".strip()
+# =====================================================
+# OPENAI HELPERS
+# =====================================================
+def chat_text(prompt: str) -> str:
+    resp = CLIENT.chat.completions.create(
         model="gpt-4o-mini",
         temperature=0.2,
+        messages=[
+            {"role": "system", "content": "You are a helpful educational assistant."},
+            {"role": "user", "content": prompt},
+        ],
+    )
+    return resp.choices[0].message.content.strip()
+def chat_json(prompt: str) -> dict:
+    resp = CLIENT.chat.completions.create(
+        model="gpt-4o-mini",
+        temperature=0.2,
+        response_format={"type": "json_object"},
+        messages=[
+            {"role": "system", "content": "Return only valid JSON."},
+            {"role": "user", "content": prompt},
+        ],
     )
+    return json.loads(resp.choices[0].message.content)
+# =====================================================
+# HTML RENDERING
+# =====================================================
+def md_to_html(text: str) -> str:
+    safe = (
+        text.replace("&", "&amp;")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
+    )
+    safe = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", safe)
+    safe = safe.replace("\n", "<br>")
+    return safe
+def render_chat(history):
+    if not history:
+        return """
+        <div class="empty-chat">
+            <div class="empty-chat-text">
+                Ask a question, choose a tutor mode, or start a quiz.
+            </div>
+        </div>
+        """
+    rows = []
+    for msg in history:
+        role = msg["role"]
+        content = md_to_html(msg["content"])
+        if role == "user":
+            rows.append(
+                f'<div class="msg-row user-row"><div class="msg-bubble user-bubble">{content}</div></div>'
+            )
+        else:
+            rows.append(
+                f'<div class="msg-row bot-row"><div class="msg-bubble bot-bubble">{content}</div></div>'
+            )
+    return f'<div class="chat-wrap">{"".join(rows)}</div>'
+def detect_logo_url():
+    if os.path.exists(LOGO_FILE):
+        return f"/gradio_api/file={quote(LOGO_FILE)}"
+    return None
+def render_header():
+    logo_url = detect_logo_url()
+    if logo_url:
+        logo_html = f"""
+        <img src="{logo_url}" alt="BrainChat Logo"
+             style="width:120px;height:120px;object-fit:contain;display:block;margin:0 auto;">
+        """
+    else:
+        logo_html = """
+        <div style="
+            width:120px;height:120px;border-radius:50%;
+            background:#efe85a;display:flex;align-items:center;justify-content:center;
+            font-weight:700;text-align:center;margin:0 auto;">
+            BRAIN<br>CHAT
+        </div>
+        """
+    return f"""
+    <div class="hero-card">
+        <div class="hero-inner">
+            <div class="hero-logo">{logo_html}</div>
+            <div class="hero-title">BrainChat</div>
+            <div class="hero-subtitle">
+                Interactive neurology and neuroanatomy tutor based on your uploaded books
+            </div>
+        </div>
+    </div>
+    """
+# =====================================================
+# MAIN LOGIC
+# =====================================================
+def answer_question(message, history, mode, language_mode, quiz_count_mode, show_sources, quiz_state):
     if history is None:
         history = []
     if quiz_state is None:
+        quiz_state = {
+            "active": False,
+            "topic": None,
+            "quiz_data": None,
+            "language_mode": "Auto"
+        }
+    if not message or not message.strip():
+        return history, render_chat(history), quiz_state, ""
+    try:
+        ensure_loaded()
+        user_text = message.strip()
+        history = history + [{"role": "user", "content": user_text}]
+        # -------------------------------
+        # QUIZ EVALUATION
+        # -------------------------------
+        if quiz_state.get("active", False):
+            evaluation_prompt = build_quiz_evaluation_prompt(
+                quiz_state["language_mode"],
+                quiz_state["quiz_data"],
+                user_text
+            )
+            evaluation = chat_json(evaluation_prompt)
+            lines = []
+            lines.append(f"**Score:** {evaluation['score_obtained']}/{evaluation['score_total']}")
+            lines.append("")
+            lines.append(f"**Overall feedback:** {evaluation['summary']}")
+            lines.append("")
+            lines.append("**Question-wise evaluation:**")
+            for item in evaluation["results"]:
+                lines.append("")
+                lines.append(f"**Q:** {item['question']}")
+                lines.append(f"**Your answer:** {item['student_answer']}")
+                lines.append(f"**Result:** {item['result']}")
+                lines.append(f"**Feedback:** {item['feedback']}")
+            final_answer = "\n".join(lines)
+            history = history + [{"role": "assistant", "content": final_answer}]
+            quiz_state = {
+                "active": False,
+                "topic": None,
+                "quiz_data": None,
+                "language_mode": language_mode
+            }
+            return history, render_chat(history), quiz_state, ""
+        # -------------------------------
+        # NORMAL RETRIEVAL
+        # -------------------------------
+        records = search_hybrid(user_text, shortlist_k=20, final_k=3)
+        context = build_context(records)
+        # -------------------------------
+        # QUIZ GENERATION
+        # -------------------------------
+        if mode == "Quiz Me":
+            n_questions = choose_quiz_count(user_text, quiz_count_mode)
+            prompt = build_quiz_generation_prompt(language_mode, user_text, context, n_questions)
+            quiz_data = chat_json(prompt)
+            lines = []
+            lines.append(f"**{quiz_data.get('title', 'Quiz')}**")
+            lines.append("")
+            lines.append("Please answer the following questions in one message.")
+            lines.append("You can reply in numbered format, for example:")
+            lines.append("1. ...")
+            lines.append("2. ...")
+            lines.append("")
+            lines.append(f"**Total questions: {len(quiz_data['questions'])}**")
+            lines.append("")
+            for i, q in enumerate(quiz_data["questions"], start=1):
+                lines.append(f"**Q{i}.** {q['q']}")
+            if show_sources:
+                lines.append("\n---\n**Topic sources used to create the quiz:**")
+                lines.append(make_sources(records))
+            assistant_text = "\n".join(lines)
+            history = history + [{"role": "assistant", "content": assistant_text}]
+            quiz_state = {
+                "active": True,
+                "topic": user_text,
+                "quiz_data": quiz_data,
+                "language_mode": language_mode
+            }
+            return history, render_chat(history), quiz_state, ""
+        # -------------------------------
+        # OTHER MODES
+        # -------------------------------
+        prompt = build_tutor_prompt(mode, language_mode, user_text, context)
+        answer = chat_text(prompt)
+        if show_sources:
+            answer += "\n\n---\n**Sources used:**\n" + make_sources(records)
+        history = history + [{"role": "assistant", "content": answer}]
+        return history, render_chat(history), quiz_state, ""
+    except Exception as e:
+        history = history + [{"role": "assistant", "content": f"Error: {str(e)}"}]
+        quiz_state["active"] = False
+        return history, render_chat(history), quiz_state, ""
+def clear_all():
+    empty_history = []
+    empty_quiz = {
+        "active": False,
+        "topic": None,
+        "quiz_data": None,
+        "language_mode": "Auto"
+    }
+    return empty_history, render_chat(empty_history), empty_quiz, ""
+# =====================================================
+# CSS
+# =====================================================
+CSS = """
+body, .gradio-container {
+    background: #dcdcdc !important;
+    font-family: Arial, Helvetica, sans-serif !important;
+}
+footer { display: none !important; }
+.hero-card {
+    max-width: 900px;
+    margin: 18px auto 14px auto;
+    border-radius: 28px;
+    background: linear-gradient(180deg, #e8c7d4 0%, #a55ca2 48%, #2b0c46 100%);
+    padding: 22px 22px 18px 22px;
+}
+.hero-inner { text-align: center; }
+.hero-title {
+    color: white;
+    font-size: 34px;
+    font-weight: 800;
+    margin-top: 6px;
+}
+.hero-subtitle {
+    color: white;
+    opacity: 0.92;
+    font-size: 16px;
+    margin-top: 6px;
+}
+.chat-panel {
+    max-width: 900px;
+    margin: 0 auto;
+    background: white;
+    border-radius: 22px;
+    padding: 16px;
+    min-height: 420px;
+    box-shadow: 0 6px 18px rgba(0,0,0,0.08);
+}
+.chat-wrap {
+    display: flex;
+    flex-direction: column;
+    gap: 14px;
+}
+.msg-row {
+    display: flex;
+    width: 100%;
+}
+.user-row {
+    justify-content: flex-end;
+}
+.bot-row {
+    justify-content: flex-start;
+}
+.msg-bubble {
+    max-width: 80%;
+    padding: 14px 16px;
+    border-radius: 18px;
+    line-height: 1.5;
+    font-size: 15px;
+    word-wrap: break-word;
+}
+.user-bubble {
+    background: #e9d8ff;
+    color: #111;
+    border-bottom-right-radius: 6px;
+}
+.bot-bubble {
+    background: #f7f3a1;
+    color: #111;
+    border-bottom-left-radius: 6px;
+}
+.empty-chat {
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    min-height: 360px;
+}
+.empty-chat-text {
+    color: #777;
+    font-size: 16px;
+    text-align: center;
+}
+.controls-wrap {
+    max-width: 900px;
+    margin: 0 auto;
+}
 """
+# =====================================================
 # UI
+# =====================================================
 with gr.Blocks() as demo:
+    history_state = gr.State([])
+    quiz_state = gr.State({
+        "active": False,
+        "topic": None,
+        "quiz_data": None,
+        "language_mode": "Auto"
+    })
+    gr.HTML(render_header())
+    with gr.Row(elem_classes="controls-wrap"):
+        mode = gr.Dropdown(
+            choices=["Explain", "Detailed", "Short Notes", "Flashcards", "Case-Based", "Quiz Me"],
+            value="Explain",
+            label="Tutor Mode"
+        )
+        language_mode = gr.Dropdown(
+            choices=["Auto", "English", "Spanish", "Bilingual"],
+            value="Auto",
+            label="Answer Language"
+        )
+    with gr.Row(elem_classes="controls-wrap"):
+        quiz_count_mode = gr.Dropdown(
+            choices=["Auto", "3", "5", "7"],
+            value="Auto",
+            label="Quiz Questions"
+        )
+        show_sources = gr.Checkbox(value=True, label="Show Sources")
+    gr.Markdown("""
+**How to use**
+- Choose a **Tutor Mode**
+- Then type a topic or question
+- For **Quiz Me**, type a topic such as: `cranial nerves`
+- The system will ask questions, and your **next message will be evaluated automatically**
+""")
+    chat_html = gr.HTML(render_chat([]), elem_classes="chat-panel")
+    with gr.Row(elem_classes="controls-wrap"):
+        msg = gr.Textbox(
+            placeholder="Ask a question or type a topic...",
+            lines=1,
+            show_label=False,
+            scale=8
+        )
+        send_btn = gr.Button("Send", scale=1)
+    with gr.Row(elem_classes="controls-wrap"):
+        clear_btn = gr.Button("Clear Chat")
+    msg.submit(
+        answer_question,
+        inputs=[msg, history_state, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
+        outputs=[history_state, chat_html, quiz_state, msg]
+    )
+    send_btn.click(
         answer_question,
+        inputs=[msg, history_state, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
+        outputs=[history_state, chat_html, quiz_state, msg]
     )
+    clear_btn.click(
+        clear_all,
+        inputs=[],
+        outputs=[history_state, chat_html, quiz_state, msg]
+    )
+if __name__ == "__main__":
+    demo.launch(css=CSS)