Spaces:

Deevyankar
/

BrainChat

Sleeping

App Files Files Community

Deevyankar commited on 11 days ago

Commit

cdfda82

verified ·

1 Parent(s): 2fead40

Update app.py

Browse files

Files changed (1) hide show

app.py +563 -586

app.py CHANGED Viewed

@@ -1,679 +1,656 @@
 import os
 import re
-import json
-import pickle
-from urllib.parse import quote
-import numpy as np
 import gradio as gr
-from rank_bm25 import BM25Okapi
-from sentence_transformers import SentenceTransformer
-from openai import OpenAI
-# ============================================================
-# Configuration
-# ============================================================
-BUILD_DIR = "brainchat_build"
-CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl")
-TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl")
-EMBED_PATH = os.path.join(BUILD_DIR, "embeddings.npy")
-CONFIG_PATH = os.path.join(BUILD_DIR, "config.json")
-# Put ONE of these logo files in your Space repo root (same folder as app.py)
-LOGO_CANDIDATES = [
-    "Brain chat-09.png",
-    "brainchat_logo.png.png",
-    "Brain Chat Imagen.svg",
-    "ebcbb9f5-022f-473a-bf51-7e7974f794b4.png",
 ]
-MODEL_NAME_TEXT = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
-# ============================================================
-# Globals (lazy loaded)
-# ============================================================
-BM25 = None
-CHUNKS = None
-EMBEDDINGS = None
-EMBED_MODEL = None
-CLIENT = None
-# ============================================================
-# Utilities
-# ============================================================
-def tokenize(text: str):
-    return re.findall(r"\w+", text.lower(), flags=re.UNICODE)
-def ensure_loaded():
-    global BM25, CHUNKS, EMBEDDINGS, EMBED_MODEL, CLIENT
-    if CHUNKS is None:
-        missing = [p for p in [CHUNKS_PATH, TOKENS_PATH, EMBED_PATH, CONFIG_PATH] if not os.path.exists(p)]
-        if missing:
-            raise FileNotFoundError(
-                "Missing build files. Make sure you ran the build step and committed brainchat_build/.\n"
-                + "\n".join(missing)
-            )
-        with open(CHUNKS_PATH, "rb") as f:
-            CHUNKS = pickle.load(f)
-        with open(TOKENS_PATH, "rb") as f:
-            tokenized_chunks = pickle.load(f)
-        EMBEDDINGS = np.load(EMBED_PATH)
-        with open(CONFIG_PATH, "r", encoding="utf-8") as f:
-            cfg = json.load(f)
-        BM25 = BM25Okapi(tokenized_chunks)
-        EMBED_MODEL = SentenceTransformer(cfg["embedding_model"])
-    if CLIENT is None:
-        api_key = os.getenv("OPENAI_API_KEY")
-        if not api_key:
-            raise ValueError("OPENAI_API_KEY is missing. Add it in your Space Secrets.")
-        CLIENT = OpenAI(api_key=api_key)
-def search_hybrid(query: str, shortlist_k: int = 30, final_k: int = 5):
-    ensure_loaded()
-    q_tokens = tokenize(query)
-    bm25_scores = BM25.get_scores(q_tokens)
-    shortlist_idx = np.argsort(bm25_scores)[::-1][:shortlist_k]
-    qvec = EMBED_MODEL.encode([query], normalize_embeddings=True).astype("float32")[0]
-    shortlist_emb = EMBEDDINGS[shortlist_idx]
-    dense_scores = shortlist_emb @ qvec
-    rerank = np.argsort(dense_scores)[::-1][:final_k]
-    final_idx = shortlist_idx[rerank]
-    return [CHUNKS[int(i)] for i in final_idx]
-def build_context(records):
-    blocks = []
-    for i, r in enumerate(records, start=1):
-        blocks.append(
-            f"""[Source {i}]
-Book: {r.get('book','')}
-Section: {r.get('section_title','')}
-Pages: {r.get('page_start','')}-{r.get('page_end','')}
-Text:
-{r.get('text','')}"""
-        )
-    return "\n\n".join(blocks)
-def make_sources(records):
-    seen = set()
-    lines = []
-    for r in records:
-        key = (r.get("book"), r.get("section_title"), r.get("page_start"), r.get("page_end"))
-        if key in seen:
-            continue
-        seen.add(key)
-        lines.append(
-            f"• {r.get('book','')} | {r.get('section_title','')} | pp. {r.get('page_start','')}-{r.get('page_end','')}"
-        )
-    return "\n".join(lines)
-def choose_quiz_count(user_text: str, selector: str) -> int:
-    if selector in {"3", "5", "7"}:
-        return int(selector)
-    t = user_text.lower()
-    if any(k in t for k in ["mock test", "final exam", "exam practice", "full test"]):
-        return 7
-    if any(k in t for k in ["detailed", "revision", "comprehensive", "study"]):
-        return 5
-    return 3
-def language_instruction(language_mode: str) -> str:
-    if language_mode == "English":
-        return "Answer only in English."
-    if language_mode == "Spanish":
-        return "Answer only in Spanish."
-    if language_mode == "Bilingual":
-        return "Answer first in English, then provide a Spanish version under the heading 'Español:'."
-    return "If the user writes in Spanish, answer in Spanish; otherwise answer in English."
-def build_tutor_prompt(mode: str, language_mode: str, question: str, context: str) -> str:
-    mode_map = {
-        "Explain": (
-            "Explain clearly like a friendly tutor using simple language. "
-            "Use short headings if helpful."
-        ),
-        "Detailed": (
-            "Give a detailed explanation. Include key terms and clinical relevance only if supported by the context."
-        ),
-        "Short Notes": "Write concise revision notes using bullet points.",
-        "Flashcards": "Create 6 flashcards in Q/A format.",
-        "Case-Based": (
-            "Create a short clinical scenario (2–4 lines) and then explain the underlying concept using the context."
-        ),
-    }
-    return f"""
-You are BrainChat, an interactive neurology and neuroanatomy tutor.
-Rules:
-- Use ONLY the provided context from the books.
-- If the answer is not supported by the context, say exactly:
-  Not found in the course material.
-- Do not invent facts outside the context.
-- {language_instruction(language_mode)}
-Teaching style:
-{mode_map.get(mode, mode_map['Explain'])}
-Context:
-{context}
-Student question:
-{question}
-""".strip()
-def build_quiz_generation_prompt(language_mode: str, topic: str, context: str, n_questions: int) -> str:
-    return f"""
-You are BrainChat, an interactive tutor.
-Rules:
-- Use ONLY the provided context.
-- Create exactly {n_questions} quiz questions.
-- Questions should be short, clear, and course-aligned.
-- Provide a short answer key per question.
-- Return VALID JSON only.
-- {language_instruction(language_mode)}
-Required JSON format:
-{{
-  "title": "short quiz title",
-  "questions": [
-    {{"q": "question 1", "answer_key": "expected short answer"}},
-    {{"q": "question 2", "answer_key": "expected short answer"}}
-  ]
-}}
-Context:
-{context}
-Topic:
-{topic}
-""".strip()
-def build_quiz_evaluation_prompt(language_mode: str, quiz_data: dict, user_answers: str) -> str:
-    quiz_json = json.dumps(quiz_data, ensure_ascii=False)
-    return f"""
-You are BrainChat, an interactive tutor.
-Task:
-Evaluate the student's answers fairly against the answer keys.
-Accept semantically correct answers even if wording differs.
-Return VALID JSON only.
-Required JSON format:
-{{
-  "score_obtained": 0,
-  "score_total": 0,
-  "summary": "short overall feedback",
-  "results": [
-    {{
-      "question": "question text",
-      "answer_key": "expected short answer",
-      "student_answer": "student answer",
-      "result": "Correct / Partially Correct / Incorrect",
-      "feedback": "short explanation"
-    }}
-  ],
-  "improvement_tip": "one short study suggestion"
-}}
-Quiz:
-{quiz_json}
-Student answers:
-{user_answers}
-Language:
-{language_instruction(language_mode)}
-""".strip()
-def chat_text(prompt: str) -> str:
-    ensure_loaded()
-    resp = CLIENT.chat.completions.create(
-        model=MODEL_NAME_TEXT,
-        temperature=0.2,
-        messages=[
-            {"role": "system", "content": "You are a helpful educational assistant."},
-            {"role": "user", "content": prompt},
-        ],
-    )
-    return resp.choices[0].message.content.strip()
-def chat_json(prompt: str) -> dict:
-    ensure_loaded()
-    resp = CLIENT.chat.completions.create(
-        model=MODEL_NAME_TEXT,
-        temperature=0.2,
-        response_format={"type": "json_object"},
-        messages=[
-            {"role": "system", "content": "Return only valid JSON."},
-            {"role": "user", "content": prompt},
-        ],
-    )
-    return json.loads(resp.choices[0].message.content)
-# ============================================================
-# Logo + Header HTML
-# ============================================================
-def find_logo_file():
-    for name in LOGO_CANDIDATES:
-        if os.path.exists(name):
-            return name
-    return None
-def logo_img_tag(size_px: int = 88) -> str:
-    logo_file = find_logo_file()
-    if logo_file:
-        url = f"/gradio_api/file={quote(logo_file)}"
-        return f'<img src="{url}" class="bc-logo-img" width="{size_px}" height="{size_px}" alt="BrainChat logo" />'
-    return '<div class="bc-logo-fallback">BRAIN<br>CHAT</div>'
-def render_top_banner() -> str:
-    return f"""
-<div class="bc-banner">
-  <div class="bc-banner-inner">
-    <div class="bc-banner-logo">{logo_img_tag(64)}</div>
-    <div class="bc-banner-text">
-      <div class="bc-banner-title">BrainChat</div>
-      <div class="bc-banner-subtitle">Neurology & neuroanatomy tutor (book-based)</div>
-    </div>
-  </div>
-</div>
-""".strip()
-def render_phone_logo() -> str:
-    return f"""
-<div class="bc-phone-logo">
-  {logo_img_tag(84)}
-</div>
-""".strip()
-# ============================================================
-# Chat logic (with quiz state)
-# ============================================================
-def respond(message, history, mode, language_mode, quiz_count_mode, show_sources, quiz_state):
-    if history is None:
-        history = []
-    if quiz_state is None:
-        quiz_state = {"active": False, "quiz_data": None, "language_mode": "Auto"}
-    user_text = (message or "").strip()
-    if not user_text:
-        return "", history, quiz_state
-    try:
-        history = history + [{"role": "user", "content": user_text}]
-        # Quiz evaluation step
-        if quiz_state.get("active", False):
-            evaluation_prompt = build_quiz_evaluation_prompt(
-                quiz_state.get("language_mode", language_mode),
-                quiz_state.get("quiz_data", {}),
-                user_text,
-            )
-            evaluation = chat_json(evaluation_prompt)
-            lines = []
-            lines.append(f"**Score:** {evaluation.get('score_obtained', 0)}/{evaluation.get('score_total', 0)}")
-            if evaluation.get("summary"):
-                lines.append(f"\n**Overall:** {evaluation['summary']}")
-            if evaluation.get("improvement_tip"):
-                lines.append(f"\n**Tip:** {evaluation['improvement_tip']}\n")
-            results = evaluation.get("results", [])
-            if results:
-                lines.append("**Question-wise feedback:**")
-                for item in results:
-                    lines.append("")
-                    lines.append(f"**Q:** {item.get('question','')}")
-                    lines.append(f"**Your answer:** {item.get('student_answer','')}")
-                    lines.append(f"**Expected:** {item.get('answer_key','')}")
-                    lines.append(f"**Result:** {item.get('result','')}")
-                    lines.append(f"**Feedback:** {item.get('feedback','')}")
-            assistant_text = "\n".join(lines).strip()
-            history = history + [{"role": "assistant", "content": assistant_text}]
-            quiz_state = {"active": False, "quiz_data": None, "language_mode": language_mode}
-            return "", history, quiz_state
-        # Normal retrieval
-        records = search_hybrid(user_text, shortlist_k=30, final_k=5)
-        context = build_context(records)
-        # Quiz generation
-        if mode == "Quiz Me":
-            n_questions = choose_quiz_count(user_text, quiz_count_mode)
-            quiz_prompt = build_quiz_generation_prompt(language_mode, user_text, context, n_questions)
-            quiz_data = chat_json(quiz_prompt)
-            lines = []
-            lines.append(f"**{quiz_data.get('title','Quiz')}**")
-            lines.append(f"\n**Total questions:** {len(quiz_data.get('questions', []))}\n")
-            lines.append("Reply in ONE message with numbered answers, like:")
-            lines.append("1. ...")
-            lines.append("2. ...\n")
-            for i, q in enumerate(quiz_data.get("questions", []), start=1):
-                lines.append(f"**Q{i}.** {q.get('q','')}")
-            if show_sources:
-                lines.append("\n\n**Sources used to create this quiz:**")
-                lines.append(make_sources(records))
-            assistant_text = "\n".join(lines).strip()
-            history = history + [{"role": "assistant", "content": assistant_text}]
-            quiz_state = {"active": True, "quiz_data": quiz_data, "language_mode": language_mode}
-            return "", history, quiz_state
-        # Other modes
-        tutor_prompt = build_tutor_prompt(mode, language_mode, user_text, context)
-        answer = chat_text(tutor_prompt)
-        if show_sources:
-            answer = (answer or "").strip() + "\n\n**Sources:**\n" + make_sources(records)
-        history = history + [{"role": "assistant", "content": answer.strip()}]
-        return "", history, quiz_state
-    except Exception as e:
-        history = history + [{"role": "assistant", "content": f"Error: {str(e)}"}]
-        quiz_state = {"active": False, "quiz_data": None, "language_mode": language_mode}
-        return "", history, quiz_state
-def clear_all():
-    return "", [], {"active": False, "quiz_data": None, "language_mode": "Auto"}
-# ============================================================
-# CSS (Instagram-style phone mock)
-# ============================================================
-CSS = r"""
 :root{
-  --bc-page-bg: #dcdcdc;
-  --bc-grad-top: #E8C7D4;
-  --bc-grad-mid: #A55CA2;
-  --bc-grad-bot: #2B0C46;
-  --bc-yellow:  #FFF34A;
-  --bc-bot-bubble: #FAF7B4;
-  --bc-user-bubble: #FFFFFF;
-  --bc-ink: #141414;
 }
-body, .gradio-container{
-  background: var(--bc-page-bg) !important;
-  font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial;
-}
-footer{ display:none !important; }
-/* Banner */
-#bc_banner{ max-width: 980px; margin: 18px auto 8px auto; }
-.bc-banner{
-  background: linear-gradient(180deg, var(--bc-grad-top) 0%, var(--bc-grad-mid) 52%, var(--bc-grad-bot) 100%);
-  border-radius: 26px;
-  padding: 14px 16px;
-  box-shadow: 0 10px 26px rgba(0,0,0,.12);
 }
-.bc-banner-inner{ display:flex; align-items:center; gap: 12px; color: white; }
-.bc-banner-title{ font-size: 20px; font-weight: 800; line-height:1.1; }
-.bc-banner-subtitle{ font-size: 13px; opacity:.92; margin-top:2px; }
-.bc-banner-logo .bc-logo-img{ border-radius: 999px; background: var(--bc-yellow); padding: 6px; display:block; }
-.bc-logo-fallback{
-  width: 64px; height: 64px;
-  border-radius: 999px;
-  background: var(--bc-yellow);
-  display:flex; align-items:center; justify-content:center;
-  color: #111; font-weight: 900; font-size: 12px; text-align:center;
 }
-/* Settings */
-#bc_settings{ max-width: 980px; margin: 0 auto 10px auto; }
-#bc_settings .label{ font-weight: 700; }
-/* Phone */
-#bc_phone{
-  max-width: 420px;
-  margin: 0 auto 18px auto;
-  border-radius: 38px;
-  background: linear-gradient(180deg, var(--bc-grad-top) 0%, var(--bc-grad-mid) 45%, var(--bc-grad-bot) 100%);
-  box-shadow: 0 18px 40px rgba(0,0,0,.18);
-  border: 1px solid rgba(255,255,255,.22);
-  padding: 14px 14px 12px 14px;
-  position: relative;
 }
-/* Floating logo in phone */
-#bc_phone_logo{
-  position: absolute;
-  top: 12px;
-  left: 50%;
-  transform: translateX(-50%);
-  z-index: 10;
 }
-.bc-phone-logo{
-  width: 92px; height: 92px;
-  border-radius: 999px;
-  background: var(--bc-yellow);
-  display:flex; align-items:center; justify-content:center;
-  box-shadow: 0 10px 22px rgba(0,0,0,.18);
 }
-.bc-phone-logo .bc-logo-img{
-  width: 84px !important; height: 84px !important; object-fit: contain;
 }
-/* Push chat down under logo */
-#bc_chatbot{ margin-top: 92px; }
-/* Chatbot transparent */
-#bc_chatbot, #bc_chatbot > div{
-  background: transparent !important;
-  border: none !important;
-  box-shadow: none !important;
 }
-#bc_chatbot .toolbar{ display:none !important; }
-/* Bubble styling via internal testid markers */
-#bc_chatbot button[data-testid="user"],
-#bc_chatbot button[data-testid="bot"]{
-  max-width: 82%;
-  border-radius: 18px !important;
-  padding: 12px 14px !important;
-  color: var(--bc-ink) !important;
-  box-shadow: 0 8px 18px rgba(0,0,0,.10);
-  border: 0 !important;
-  line-height: 1.35;
-  font-size: 14px;
 }
-/* User bubble white */
-#bc_chatbot button[data-testid="user"]{
-  background: var(--bc-user-bubble) !important;
 }
-/* Bot bubble pale yellow */
-#bc_chatbot button[data-testid="bot"]{
-  background: var(--bc-bot-bubble) !important;
 }
-/* Bubble tails */
-#bc_chatbot button[data-testid="user"]::after{
-  content:"";
-  position:absolute;
-  right:-7px;
-  bottom: 12px;
-  width:0; height:0;
-  border-left: 10px solid var(--bc-user-bubble);
-  border-top: 8px solid transparent;
-  border-bottom: 8px solid transparent;
 }
-#bc_chatbot button[data-testid="bot"]::before{
-  content:"";
-  position:absolute;
-  left:-7px;
-  bottom: 12px;
-  width:0; height:0;
-  border-right: 10px solid var(--bc-bot-bubble);
-  border-top: 8px solid transparent;
-  border-bottom: 8px solid transparent;
 }
-/* Input bar */
-#bc_input_row{
-  margin-top: 10px;
-  background: rgba(255,243,74,.96);
-  border-radius: 999px;
-  padding: 10px 10px;
-  box-shadow: 0 10px 22px rgba(0,0,0,.14);
-  align-items: center;
 }
-#bc_plus{
-  width: 34px; height: 34px;
-  border-radius: 999px;
-  display:flex;
-  align-items:center;
-  justify-content:center;
-  font-weight: 900;
-  color: var(--bc-grad-bot);
-  background: rgba(255,255,255,.35);
-  user-select: none;
 }
-#bc_msg textarea{
-  background: rgba(255,255,255,.35) !important;
-  border-radius: 999px !important;
-  border: none !important;
-  padding: 10px 12px !important;
-  color: var(--bc-grad-bot) !important;
-  box-shadow: none !important;
 }
-#bc_send{
-  min-width: 42px !important;
-  height: 38px !important;
-  border-radius: 999px !important;
-  border: none !important;
-  background: rgba(255,255,255,.35) !important;
-  color: var(--bc-grad-bot) !important;
-  font-size: 18px !important;
-  font-weight: 900 !important;
 }
-#bc_send:hover{ background: rgba(255,255,255,.55) !important; }
-/* Clear */
-#bc_clear{
-  max-width: 420px;
-  margin: 10px auto 0 auto;
-  border-radius: 14px !important;
 }
-@media (max-width: 480px){
-  #bc_phone{ max-width: 95vw; }
-  #bc_chatbot button[data-testid="user"],
-  #bc_chatbot button[data-testid="bot"]{
-    max-width: 88%;
-    font-size: 14px;
-  }
 }
 """
-# ============================================================
-# UI
-# ============================================================
-with gr.Blocks() as demo:
-    quiz_state = gr.State({"active": False, "quiz_data": None, "language_mode": "Auto"})
-    gr.HTML(render_top_banner(), elem_id="bc_banner")
-    with gr.Accordion("Settings", open=False, elem_id="bc_settings"):
-        mode = gr.Dropdown(
-            choices=["Explain", "Detailed", "Short Notes", "Flashcards", "Case-Based", "Quiz Me"],
-            value="Explain",
-            label="Tutor Mode",
-        )
-        language_mode = gr.Dropdown(
-            choices=["Auto", "English", "Spanish", "Bilingual"],
-            value="Auto",
-            label="Answer Language",
-        )
-        quiz_count_mode = gr.Dropdown(
-            choices=["Auto", "3", "5", "7"],
-            value="Auto",
-            label="Quiz Questions",
-        )
-        show_sources = gr.Checkbox(value=True, label="Show Sources")
-    with gr.Group(elem_id="bc_phone"):
-        gr.HTML(render_phone_logo(), elem_id="bc_phone_logo")
-        chatbot = gr.Chatbot(
-            value=[],
-            elem_id="bc_chatbot",
-            height=560,
-            layout="bubble",
-            container=False,
-            show_label=False,
-            autoscroll=True,
-            buttons=[],
-            placeholder="Ask a question or type a topic…",
-        )
-        with gr.Row(elem_id="bc_input_row"):
-            gr.HTML("<div>+</div>", elem_id="bc_plus")
-            msg = gr.Textbox(
-                placeholder="Type a message…",
-                show_label=False,
-                container=False,
-                scale=8,
-                elem_id="bc_msg",
             )
-            send_btn = gr.Button("➤", elem_id="bc_send", scale=1)
-    clear_btn = gr.Button("Clear chat", elem_id="bc_clear")
-    msg.submit(
         respond,
-        inputs=[msg, chatbot, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
-        outputs=[msg, chatbot, quiz_state],
     )
-    send_btn.click(
         respond,
-        inputs=[msg, chatbot, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
-        outputs=[msg, chatbot, quiz_state],
     )
     clear_btn.click(
-        clear_all,
-        inputs=None,
-        outputs=[msg, chatbot, quiz_state],
-        queue=False,
     )
 if __name__ == "__main__":
-    demo.launch(css=CSS)

+from __future__ import annotations
 import os
 import re
+import html
+from pathlib import Path
+from typing import List, Tuple, Dict, Optional
 import gradio as gr
+# Optional readers
+try:
+    from pypdf import PdfReader
+except Exception:
+    PdfReader = None
+try:
+    import docx
+except Exception:
+    docx = None
+# =========================================================
+# CONFIG
+# =========================================================
+APP_TITLE = "BrainChat"
+APP_SUBTITLE = "Neurology & neuroanatomy tutor"
+NOT_FOUND_TEXT = "Not found in the course material."
+SEARCH_DIRS = [
+    Path("."),
+    Path("./brainchat_build"),
+    Path("/home/user/app"),
 ]
+SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx"}
+STOPWORDS = {
+    "the", "is", "am", "are", "was", "were", "be", "been", "being",
+    "a", "an", "and", "or", "of", "to", "in", "on", "for", "with",
+    "by", "from", "as", "at", "that", "this", "these", "those",
+    "it", "its", "into", "about", "what", "which", "who", "whom",
+    "why", "how", "when", "where", "do", "does", "did", "can",
+    "could", "would", "should", "will", "shall", "i", "you", "we",
+    "they", "he", "she", "them", "his", "her", "their", "our", "your",
+    "my", "me", "us", "if", "then", "than", "also", "there", "here"
+}
+# =========================================================
+# HELPERS
+# =========================================================
+def normalize_spaces(text: str) -> str:
+    return re.sub(r"\s+", " ", text).strip()
+def tokenize(text: str) -> List[str]:
+    words = re.findall(r"[a-zA-Z0-9\-]+", text.lower())
+    return [w for w in words if w not in STOPWORDS and len(w) > 1]
+def chunk_text(text: str, chunk_size: int = 900, overlap: int = 150) -> List[str]:
+    text = normalize_spaces(text)
+    if not text:
+        return []
+    chunks = []
+    start = 0
+    n = len(text)
+    while start < n:
+        end = min(start + chunk_size, n)
+        chunk = text[start:end]
+        if end < n:
+            last_period = chunk.rfind(". ")
+            last_newline = chunk.rfind("\n")
+            cut = max(last_period, last_newline)
+            if cut > 400:
+                chunk = chunk[:cut + 1]
+        chunks.append(chunk.strip())
+        if end >= n:
+            break
+        start = max(0, start + len(chunk) - overlap)
+    return [c for c in chunks if c]
+def safe_read_text_file(path: Path) -> str:
+    try:
+        return path.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        try:
+            return path.read_text(encoding="latin-1", errors="ignore")
+        except Exception:
+            return ""
+def read_pdf(path: Path) -> str:
+    if PdfReader is None:
+        return ""
+    try:
+        reader = PdfReader(str(path))
+        parts = []
+        for page in reader.pages:
+            try:
+                parts.append(page.extract_text() or "")
+            except Exception:
+                continue
+        return "\n".join(parts)
+    except Exception:
+        return ""
+def read_docx(path: Path) -> str:
+    if docx is None:
+        return ""
+    try:
+        d = docx.Document(str(path))
+        return "\n".join(p.text for p in d.paragraphs if p.text.strip())
+    except Exception:
+        return ""
+def extract_text_from_file(path: Path) -> str:
+    suffix = path.suffix.lower()
+    if suffix in {".txt", ".md"}:
+        return safe_read_text_file(path)
+    if suffix == ".pdf":
+        return read_pdf(path)
+    if suffix == ".docx":
+        return read_docx(path)
+    return ""
+def find_asset(possible_names: List[str]) -> Optional[str]:
+    lowered = [x.lower() for x in possible_names]
+    for d in SEARCH_DIRS:
+        if d.exists():
+            for name in possible_names:
+                p = d / name
+                if p.exists() and p.is_file():
+                    return str(p)
+    for d in SEARCH_DIRS:
+        if d.exists():
+            for p in d.rglob("*"):
+                if p.is_file() and p.name.lower() in lowered:
+                    return str(p)
+    return None
+LOGO_PATH = find_asset([
+    "Brain chat-09.png",
+    "BrainChat-09.png",
+    "brain chat-09.png",
+    "brainchat-09.png",
+    "BrainChat_logo.png",
+    "brainchat_logo.png",
+    "logo.png",
+    "Logo.png",
+])
+# =========================================================
+# KNOWLEDGE BASE
+# =========================================================
+class LocalKnowledgeBase:
+    def __init__(self) -> None:
+        self.docs: List[Dict] = []
+        self.chunks: List[Dict] = []
+    def clear(self) -> None:
+        self.docs = []
+        self.chunks = []
+    def add_document(self, source_name: str, text: str) -> None:
+        text = normalize_spaces(text)
+        if not text:
+            return
+        doc_id = len(self.docs)
+        self.docs.append({"doc_id": doc_id, "source": source_name, "text": text})
+        for idx, chunk in enumerate(chunk_text(text)):
+            tokens = set(tokenize(chunk))
+            self.chunks.append({
+                "doc_id": doc_id,
+                "source": source_name,
+                "chunk_id": idx,
+                "text": chunk,
+                "tokens": tokens
+            })
+    def load_from_directories(self) -> None:
+        seen = set()
+        for base in SEARCH_DIRS:
+            if not base.exists():
+                continue
+            for path in base.rglob("*"):
+                if not path.is_file():
+                    continue
+                if path.suffix.lower() not in SUPPORTED_EXTENSIONS:
+                    continue
+                if path.name.startswith("."):
+                    continue
+                key = str(path.resolve())
+                if key in seen:
+                    continue
+                seen.add(key)
+                text = extract_text_from_file(path)
+                if text.strip():
+                    self.add_document(path.name, text)
+    def search(self, query: str, top_k: int = 5) -> List[Dict]:
+        q_tokens = set(tokenize(query))
+        if not q_tokens:
+            return []
+        scored = []
+        for item in self.chunks:
+            overlap = len(q_tokens.intersection(item["tokens"]))
+            if overlap == 0:
+                continue
+            score = overlap / max(1, len(q_tokens))
+            scored.append((score, item))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        unique = []
+        seen_text = set()
+        for score, item in scored:
+            key = (item["source"], item["chunk_id"])
+            if key in seen_text:
+                continue
+            seen_text.add(key)
+            result = dict(item)
+            result["score"] = score
+            unique.append(result)
+            if len(unique) >= top_k:
+                break
+        return unique
+KB = LocalKnowledgeBase()
+KB.load_from_directories()
+# =========================================================
+# RESPONSE LOGIC
+# =========================================================
+def build_answer_from_hits(query: str, hits: List[Dict], tutor_mode: str) -> Tuple[str, List[str]]:
+    if not hits:
+        return NOT_FOUND_TEXT, []
+    mode = (tutor_mode or "Detailed").lower()
+    max_snippets = 2 if mode == "brief" else 4
+    selected = hits[:max_snippets]
+    snippets = []
+    sources = []
+    seen_sources = set()
+    for h in selected:
+        snippet = h["text"].strip()
+        if len(snippet) > 420 and mode == "brief":
+            snippet = snippet[:420].rsplit(" ", 1)[0] + "..."
+        elif len(snippet) > 750 and mode != "brief":
+            snippet = snippet[:750].rsplit(" ", 1)[0] + "..."
+        snippets.append(snippet)
+        source_label = h["source"]
+        if source_label not in seen_sources:
+            seen_sources.add(source_label)
+            sources.append(source_label)
+    if mode == "brief":
+        answer = "\n\n".join(snippets[:2])
+    else:
+        intro = f"Here is what I found related to: “{query.strip()}”\n\n"
+        answer = intro + "\n\n".join(snippets)
+    return answer.strip(), sources
+def format_answer(answer_text: str, sources: List[str], show_sources: bool) -> str:
+    answer_text = (answer_text or "").strip()
+    if not answer_text or answer_text.lower() == NOT_FOUND_TEXT.lower():
+        return NOT_FOUND_TEXT
+    if show_sources and sources:
+        src_lines = "\n".join(f"- {s}" for s in sources if str(s).strip())
+        if src_lines.strip():
+            return f"{answer_text}\n\n**Sources:**\n{src_lines}"
+    return answer_text
+def get_answer_and_sources(
+    message: str,
+    tutor_mode: str,
+    answer_language: str,
+    quiz_questions: str
+) -> Tuple[str, List[str]]:
+    msg = (message or "").strip()
+    if not msg:
+        return "Please type a question.", []
+    lower_msg = msg.lower().strip()
+    if lower_msg in {"hi", "hello", "hey"}:
+        return (
+            "Hello. Ask me anything from your uploaded neurology or neuroanatomy material.",
+            []
+        )
+    if "quiz" in lower_msg:
+        hits = KB.search(msg, top_k=5)
+        if not hits:
+            return NOT_FOUND_TEXT, []
+        qn = 5 if str(quiz_questions).lower() == "auto" else int(quiz_questions)
+        base_text = hits[0]["text"]
+        quiz = [f"**Mini Quiz ({qn} questions)**"]
+        words = [w for w in re.findall(r"[A-Za-z][A-Za-z\-]+", base_text) if len(w) > 5][:qn]
+        for i, w in enumerate(words[:qn], 1):
+            quiz.append(f"{i}. Explain the term **{w}** in simple words.")
+        return "\n".join(quiz), [hits[0]["source"]]
+    hits = KB.search(msg, top_k=5)
+    return build_answer_from_hits(msg, hits, tutor_mode)
+# =========================================================
+# UI
+# =========================================================
+CUSTOM_CSS = """
 :root{
+    --bg-main: #f6f7ff;
+    --panel: #ffffff;
+    --text: #202545;
+    --muted: #616889;
+    --primary: #6d28ff;
+    --secondary: #ff48c4;
+    --accent: #ffe94d;
+    --accent2: #45d8ff;
+    --border: #dde4ff;
 }
+html, body, .gradio-container {
+    background: linear-gradient(180deg, #f6f7ff 0%, #fffbe0 100%) !important;
+    color: var(--text) !important;
+    font-family: "Segoe UI", Arial, sans-serif !important;
 }
+#main_shell {
+    max-width: 1200px;
+    margin: 18px auto;
+    padding: 0 10px 18px 10px;
 }
+#topbar {
+    background: linear-gradient(90deg, #6d28ff 0%, #ff48c4 60%, #ffe94d 100%);
+    border-radius: 28px;
+    padding: 16px 18px;
+    box-shadow: 0 12px 28px rgba(80, 64, 170, 0.22);
+    border: 2px solid rgba(255,255,255,0.65);
+    margin-bottom: 16px;
 }
+#brand_row {
+    display: flex;
+    align-items: center;
+    gap: 14px;
 }
+#brand_logo {
+    width: 74px;
+    height: 74px;
+    border-radius: 18px;
+    object-fit: cover;
+    background: white;
+    padding: 4px;
+    box-shadow: 0 6px 18px rgba(0,0,0,0.15);
 }
+#brand_fallback {
+    width: 74px;
+    height: 74px;
+    border-radius: 18px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: white;
+    color: #6d28ff;
+    font-size: 24px;
+    font-weight: 800;
+    box-shadow: 0 6px 18px rgba(0,0,0,0.15);
 }
+#brand_title {
+    font-size: 30px;
+    font-weight: 800;
+    color: white;
+    line-height: 1.1;
+    margin: 0;
+}
+#brand_subtitle {
+    font-size: 15px;
+    color: #fffdfd;
+    font-weight: 600;
+    margin-top: 2px;
 }
+#settings_card, #chat_card {
+    background: rgba(255,255,255,0.95) !important;
+    border: 2px solid var(--border) !important;
+    border-radius: 22px !important;
+    box-shadow: 0 10px 26px rgba(71, 89, 160, 0.10);
 }
+#chatbot {
+    background: linear-gradient(180deg, #fffdf6 0%, #f8fbff 100%) !important;
+    border-radius: 18px !important;
 }
+#chatbot .message.user {
+    background: linear-gradient(90deg, #6d28ff, #9247ff) !important;
+    color: white !important;
+    border-radius: 18px !important;
 }
+#chatbot .message.bot {
+    background: #fff7bf !important;
+    color: #202545 !important;
+    border: 1px solid #eedb66 !important;
+    border-radius: 18px !important;
 }
+textarea, input, .wrap textarea {
+    border-radius: 16px !important;
+    border: 2px solid #d8def8 !important;
+    background: white !important;
+    color: var(--text) !important;
 }
+button {
+    border-radius: 16px !important;
+    border: none !important;
+    font-weight: 700 !important;
 }
+#send_btn {
+    background: linear-gradient(90deg, #6d28ff, #ff48c4) !important;
+    color: white !important;
 }
+#clear_btn {
+    background: linear-gradient(90deg, #ffe94d, #ffd930) !important;
+    color: #3a3000 !important;
 }
+#upload_btn {
+    background: linear-gradient(90deg, #45d8ff, #6ef0c0) !important;
+    color: #07304d !important;
 }
+#reload_btn {
+    background: linear-gradient(90deg, #ffffff, #f2f5ff) !important;
+    color: #334 !important;
+    border: 2px solid #dce3ff !important;
 }
+.small_hint {
+    color: var(--muted);
+    font-size: 13px;
+    margin-top: -4px;
 }
 """
+def build_header_html() -> str:
+    if LOGO_PATH:
+        logo_html = f'<img id="brand_logo" src="/file={LOGO_PATH}" alt="BrainChat logo">'
+    else:
+        logo_html = '<div id="brand_fallback">BC</div>'
+    return f"""
+    <div id="topbar">
+        <div id="brand_row">
+            {logo_html}
+            <div>
+                <div id="brand_title">{html.escape(APP_TITLE)}</div>
+                <div id="brand_subtitle">{html.escape(APP_SUBTITLE)}</div>
+            </div>
+        </div>
+    </div>
+    """
+def respond(message, history, tutor_mode, answer_language, quiz_questions, show_sources):
+    history = history or []
+    answer_text, sources = get_answer_and_sources(
+        message=message,
+        tutor_mode=tutor_mode,
+        answer_language=answer_language,
+        quiz_questions=quiz_questions
+    )
+    final_text = format_answer(
+        answer_text=answer_text,
+        sources=sources,
+        show_sources=show_sources
+    )
+    history.append((message, final_text))
+    return history, ""
+def clear_chat():
+    return [], ""
+def reload_materials():
+    global KB
+    KB = LocalKnowledgeBase()
+    KB.load_from_directories()
+    return "Course materials reloaded."
+def upload_files(files):
+    if not files:
+        return "No file uploaded."
+    added = 0
+    for f in files:
+        try:
+            path = Path(f.name)
+            text = extract_text_from_file(path)
+            if text.strip():
+                KB.add_document(path.name, text)
+                added += 1
+        except Exception:
+            continue
+    if added == 0:
+        return "No readable text was found in the uploaded file(s)."
+    return f"{added} file(s) added to the course material."
+with gr.Blocks(css=CUSTOM_CSS, title=APP_TITLE) as demo:
+    with gr.Column(elem_id="main_shell"):
+        gr.HTML(build_header_html())
+        with gr.Accordion("Settings", open=False, elem_id="settings_card"):
+            tutor_mode = gr.Dropdown(
+                ["Brief", "Detailed"],
+                value="Detailed",
+                label="Tutor Mode"
+            )
+            answer_language = gr.Dropdown(
+                ["Auto", "English", "Spanish"],
+                value="Auto",
+                label="Answer Language"
+            )
+            quiz_questions = gr.Dropdown(
+                ["Auto", "5", "10"],
+                value="Auto",
+                label="Quiz Questions"
+            )
+            show_sources = gr.Checkbox(
+                value=True,
+                label="Show Sources"
+            )
+            gr.Markdown(
+                "Sources are shown only when useful text is found.",
+                elem_classes=["small_hint"]
             )
+        with gr.Column(elem_id="chat_card"):
+            chatbot = gr.Chatbot(
+                height=520,
+                elem_id="chatbot",
+                show_label=False
+            )
+            with gr.Row():
+                file_input = gr.File(
+                    file_count="multiple",
+                    file_types=[".txt", ".md", ".pdf", ".docx"],
+                    label="",
+                    scale=2
+                )
+                msg = gr.Textbox(
+                    placeholder="Ask a question about neurology or neuroanatomy...",
+                    show_label=False,
+                    scale=6
+                )
+            with gr.Row():
+                upload_btn = gr.Button("Upload Files", elem_id="upload_btn")
+                reload_btn = gr.Button("Reload Materials", elem_id="reload_btn")
+                clear_btn = gr.Button("Clear Chat", elem_id="clear_btn")
+                send_btn = gr.Button("Send", elem_id="send_btn")
+            status_box = gr.Markdown("Ready.")
+    send_btn.click(
         respond,
+        inputs=[msg, chatbot, tutor_mode, answer_language, quiz_questions, show_sources],
+        outputs=[chatbot, msg]
     )
+    msg.submit(
         respond,
+        inputs=[msg, chatbot, tutor_mode, answer_language, quiz_questions, show_sources],
+        outputs=[chatbot, msg]
     )
     clear_btn.click(
+        clear_chat,
+        outputs=[chatbot, msg]
+    )
+    reload_btn.click(
+        reload_materials,
+        outputs=[status_box]
+    )
+    upload_btn.click(
+        upload_files,
+        inputs=[file_input],
+        outputs=[status_box]
     )
 if __name__ == "__main__":
+    demo.launch()