Deevyankar commited on
Commit
3c5d9f9
·
verified ·
1 Parent(s): 7f068fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import subprocess
3
  import gradio as gr
4
  import chromadb
5
 
@@ -13,44 +12,21 @@ INDEX = None
13
 
14
 
15
  def get_persist_dir():
16
- return "/data/chroma" if os.path.exists("/data") else "storage/chroma"
17
 
18
 
19
- def processed_text_exists():
20
- chapter_dir = "processed/chapters"
21
- return os.path.exists(chapter_dir) and any(
22
- f.endswith(".txt") for f in os.listdir(chapter_dir)
23
- )
24
-
25
-
26
- def vector_db_exists():
27
  persist_dir = get_persist_dir()
28
- return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) > 0
29
-
30
-
31
- def run_extract_if_needed():
32
- if not processed_text_exists():
33
- print("No processed chapter text found. Running extraction...")
34
- subprocess.check_call(["python", "extract_all_pdfs_chapterwise.py"])
35
- else:
36
- print("Processed chapter text already exists. Skipping extraction.")
37
-
38
-
39
- def run_ingest_if_needed():
40
- if not vector_db_exists():
41
- print("No vector DB found. Running ingestion...")
42
- subprocess.check_call(["python", "ingest.py"])
43
- else:
44
- print("Vector DB already exists. Skipping ingestion.")
45
-
46
-
47
- def ensure_everything_ready():
48
- run_extract_if_needed()
49
- run_ingest_if_needed()
50
 
 
 
 
 
51
 
52
- def load_index():
53
- persist_dir = get_persist_dir()
 
 
54
 
55
  client = chromadb.PersistentClient(path=persist_dir)
56
  collection = client.get_or_create_collection(COLLECTION_NAME)
@@ -59,7 +35,7 @@ def load_index():
59
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
60
 
61
  embed_model = HuggingFaceEmbedding(
62
- model_name="intfloat/multilingual-e5-base"
63
  )
64
 
65
  return VectorStoreIndex.from_vector_store(
@@ -72,18 +48,24 @@ def load_index():
72
  def get_index():
73
  global INDEX
74
  if INDEX is None:
75
- ensure_everything_ready()
76
  INDEX = load_index()
77
  return INDEX
78
 
79
 
80
- def chat_fn(message, history):
 
 
 
81
  if not os.getenv("OPENAI_API_KEY"):
82
- return "OPENAI_API_KEY missing. Add it in Hugging Face Space secrets."
83
 
84
  try:
85
  index = get_index()
86
- llm = OpenAI(model="gpt-4o-mini", temperature=0.2)
 
 
 
 
87
 
88
  query_engine = index.as_query_engine(
89
  llm=llm,
@@ -91,13 +73,20 @@ def chat_fn(message, history):
91
  response_mode="compact"
92
  )
93
 
94
- prompt = (
95
- "You are an interactive neurology tutor. "
96
- "Answer only from the retrieved course material. "
97
- "If the answer is not found, say: 'Not found in the course material.' "
98
- "Keep answers concise unless the user asks for detail.\n\n"
99
- f"Question: {message.strip()}"
100
- )
 
 
 
 
 
 
 
101
 
102
  response = query_engine.query(prompt)
103
  return str(response)
@@ -108,12 +97,12 @@ def chat_fn(message, history):
108
 
109
  with gr.Blocks() as demo:
110
  gr.Markdown("# 🧠 BrainChat")
111
- gr.Markdown("Automatic pipeline: PDF extraction chapter text vector DB → chatbot")
112
 
113
  gr.ChatInterface(
114
- fn=chat_fn,
115
  title="Neurology Tutor",
116
- description="Ask questions from your uploaded neurology PDFs.",
117
  textbox=gr.Textbox(
118
  placeholder="Ask a question...",
119
  lines=1
 
1
  import os
 
2
  import gradio as gr
3
  import chromadb
4
 
 
12
 
13
 
14
  def get_persist_dir():
15
+ return "storage/chroma"
16
 
17
 
18
+ def load_index():
 
 
 
 
 
 
 
19
  persist_dir = get_persist_dir()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ if not os.path.exists(persist_dir):
22
+ raise FileNotFoundError(
23
+ f"Folder not found: {persist_dir}. Upload your prebuilt Chroma DB first."
24
+ )
25
 
26
+ if len(os.listdir(persist_dir)) == 0:
27
+ raise FileNotFoundError(
28
+ f"Folder is empty: {persist_dir}. Upload your prebuilt Chroma DB first."
29
+ )
30
 
31
  client = chromadb.PersistentClient(path=persist_dir)
32
  collection = client.get_or_create_collection(COLLECTION_NAME)
 
35
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
36
 
37
  embed_model = HuggingFaceEmbedding(
38
+ model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
39
  )
40
 
41
  return VectorStoreIndex.from_vector_store(
 
48
  def get_index():
49
  global INDEX
50
  if INDEX is None:
 
51
  INDEX = load_index()
52
  return INDEX
53
 
54
 
55
+ def ask_brainchat(message, history):
56
+ if not message or not message.strip():
57
+ return "Please type a question."
58
+
59
  if not os.getenv("OPENAI_API_KEY"):
60
+ return "OPENAI_API_KEY is missing. Add it in Hugging Face Space Secrets."
61
 
62
  try:
63
  index = get_index()
64
+
65
+ llm = OpenAI(
66
+ model="gpt-4o-mini",
67
+ temperature=0.2
68
+ )
69
 
70
  query_engine = index.as_query_engine(
71
  llm=llm,
 
73
  response_mode="compact"
74
  )
75
 
76
+ prompt = f"""
77
+ You are BrainChat, a neurology and neuroanatomy tutor.
78
+
79
+ Rules:
80
+ - Answer only from the retrieved textbook/course material.
81
+ - If the answer is not supported by the retrieved material, say:
82
+ "Not found in the course material."
83
+ - Keep the answer clear and concise unless the user asks for more detail.
84
+ - If the question is in Spanish, answer in Spanish.
85
+ - If the question is in English, answer in English.
86
+
87
+ Question:
88
+ {message}
89
+ """
90
 
91
  response = query_engine.query(prompt)
92
  return str(response)
 
97
 
98
  with gr.Blocks() as demo:
99
  gr.Markdown("# 🧠 BrainChat")
100
+ gr.Markdown("Ask questions from the uploaded neuroscience and neuroanatomy books.")
101
 
102
  gr.ChatInterface(
103
+ fn=ask_brainchat,
104
  title="Neurology Tutor",
105
+ description="This Space loads a prebuilt Chroma database from storage/chroma.",
106
  textbox=gr.Textbox(
107
  placeholder="Ask a question...",
108
  lines=1