Spaces:

Deevyankar
/

BrainChat

Sleeping

App Files Files Community

Deevyankar commited on 23 days ago

Commit

3c5d9f9

verified ·

1 Parent(s): 7f068fd

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -49

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import subprocess
 import gradio as gr
 import chromadb
@@ -13,44 +12,21 @@ INDEX = None
 def get_persist_dir():
-    return "/data/chroma" if os.path.exists("/data") else "storage/chroma"
-def processed_text_exists():
-    chapter_dir = "processed/chapters"
-    return os.path.exists(chapter_dir) and any(
-        f.endswith(".txt") for f in os.listdir(chapter_dir)
-    )
-def vector_db_exists():
     persist_dir = get_persist_dir()
-    return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) > 0
-def run_extract_if_needed():
-    if not processed_text_exists():
-        print("No processed chapter text found. Running extraction...")
-        subprocess.check_call(["python", "extract_all_pdfs_chapterwise.py"])
-    else:
-        print("Processed chapter text already exists. Skipping extraction.")
-def run_ingest_if_needed():
-    if not vector_db_exists():
-        print("No vector DB found. Running ingestion...")
-        subprocess.check_call(["python", "ingest.py"])
-    else:
-        print("Vector DB already exists. Skipping ingestion.")
-def ensure_everything_ready():
-    run_extract_if_needed()
-    run_ingest_if_needed()
-def load_index():
-    persist_dir = get_persist_dir()
     client = chromadb.PersistentClient(path=persist_dir)
     collection = client.get_or_create_collection(COLLECTION_NAME)
@@ -59,7 +35,7 @@ def load_index():
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
     embed_model = HuggingFaceEmbedding(
-        model_name="intfloat/multilingual-e5-base"
     )
     return VectorStoreIndex.from_vector_store(
@@ -72,18 +48,24 @@ def load_index():
 def get_index():
     global INDEX
     if INDEX is None:
-        ensure_everything_ready()
         INDEX = load_index()
     return INDEX
-def chat_fn(message, history):
     if not os.getenv("OPENAI_API_KEY"):
-        return "OPENAI_API_KEY missing. Add it in Hugging Face Space secrets."
     try:
         index = get_index()
-        llm = OpenAI(model="gpt-4o-mini", temperature=0.2)
         query_engine = index.as_query_engine(
             llm=llm,
@@ -91,13 +73,20 @@ def chat_fn(message, history):
             response_mode="compact"
         )
-        prompt = (
-            "You are an interactive neurology tutor. "
-            "Answer only from the retrieved course material. "
-            "If the answer is not found, say: 'Not found in the course material.' "
-            "Keep answers concise unless the user asks for detail.\n\n"
-            f"Question: {message.strip()}"
-        )
         response = query_engine.query(prompt)
         return str(response)
@@ -108,12 +97,12 @@ def chat_fn(message, history):
 with gr.Blocks() as demo:
     gr.Markdown("# 🧠 BrainChat")
-    gr.Markdown("Automatic pipeline: PDF extraction → chapter text → vector DB → chatbot")
     gr.ChatInterface(
-        fn=chat_fn,
         title="Neurology Tutor",
-        description="Ask questions from your uploaded neurology PDFs.",
         textbox=gr.Textbox(
             placeholder="Ask a question...",
             lines=1

 import os
 import gradio as gr
 import chromadb
 def get_persist_dir():
+    return "storage/chroma"
+def load_index():
     persist_dir = get_persist_dir()
+    if not os.path.exists(persist_dir):
+        raise FileNotFoundError(
+            f"Folder not found: {persist_dir}. Upload your prebuilt Chroma DB first."
+        )
+    if len(os.listdir(persist_dir)) == 0:
+        raise FileNotFoundError(
+            f"Folder is empty: {persist_dir}. Upload your prebuilt Chroma DB first."
+        )
     client = chromadb.PersistentClient(path=persist_dir)
     collection = client.get_or_create_collection(COLLECTION_NAME)
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
     embed_model = HuggingFaceEmbedding(
+        model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
     )
     return VectorStoreIndex.from_vector_store(
 def get_index():
     global INDEX
     if INDEX is None:
         INDEX = load_index()
     return INDEX
+def ask_brainchat(message, history):
+    if not message or not message.strip():
+        return "Please type a question."
     if not os.getenv("OPENAI_API_KEY"):
+        return "OPENAI_API_KEY is missing. Add it in Hugging Face Space Secrets."
     try:
         index = get_index()
+        llm = OpenAI(
+            model="gpt-4o-mini",
+            temperature=0.2
+        )
         query_engine = index.as_query_engine(
             llm=llm,
             response_mode="compact"
         )
+        prompt = f"""
+You are BrainChat, a neurology and neuroanatomy tutor.
+Rules:
+- Answer only from the retrieved textbook/course material.
+- If the answer is not supported by the retrieved material, say:
+  "Not found in the course material."
+- Keep the answer clear and concise unless the user asks for more detail.
+- If the question is in Spanish, answer in Spanish.
+- If the question is in English, answer in English.
+Question:
+{message}
+"""
         response = query_engine.query(prompt)
         return str(response)
 with gr.Blocks() as demo:
     gr.Markdown("# 🧠 BrainChat")
+    gr.Markdown("Ask questions from the uploaded neuroscience and neuroanatomy books.")
     gr.ChatInterface(
+        fn=ask_brainchat,
         title="Neurology Tutor",
+        description="This Space loads a prebuilt Chroma database from storage/chroma.",
         textbox=gr.Textbox(
             placeholder="Ask a question...",
             lines=1