Spaces:

duniele
/

Cardio-Oncology-RAG

Sleeping

App Files Files Community

duniele commited on 19 days ago

Commit

2d4b79a

verified ·

1 Parent(s): d4a35c6

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -46

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import os
 import sys
-# --- 1. SQLITE FIX FOR HUGGING FACE ---
-# ChromaDB requires a newer version of SQLite than what comes with Python.
-# This forces the system to use pysqlite3-binary.
 try:
     __import__('pysqlite3')
     sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
@@ -18,31 +17,27 @@ from langchain_chroma import Chroma
 from typing import Dict, Any, List
 # --- 2. SETUP & MODEL LOADING ---
-print("⏳ Loading Models...")
-# Initialize Embeddings (CPU is fine for this)
 embedding_function = HuggingFaceEmbeddings(
     model_name="nomic-ai/nomic-embed-text-v1.5",
     model_kwargs={"trust_remote_code": True, "device": "cpu"}
 )
-# Load Vector Database
-# CRITICAL FIX: We look for the file in the current directory (".")
-# because you uploaded 'chroma.sqlite3' directly, not inside a folder.
-if not os.path.exists("./chroma.sqlite3"):
-    print("⚠️ Warning: chroma.sqlite3 not found. App may crash if DB is missing.")
 vector_db = Chroma(
-    persist_directory=".",
     embedding_function=embedding_function
 )
-# Load LLM (TinyLlama)
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
-# Create HF Pipeline
 pipe = pipeline(
     "text-generation",
     model=model,
@@ -55,7 +50,7 @@ pipe = pipeline(
 llm = HuggingFacePipeline(pipeline=pipe)
-# --- 3. DEFINE MANUAL QA CHAIN ---
 class ManualQAChain:
     def __init__(self, vector_store: Chroma, llm_pipeline: HuggingFacePipeline):
         self.retriever = vector_store.as_retriever(search_kwargs={"k": 2})
@@ -63,33 +58,33 @@ class ManualQAChain:
     def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
         query = inputs.get("query", "")
-        # 1. RETRIEVAL
         docs = self.retriever.invoke(query)
-        context = "\n\n".join([d.page_content for d in docs])
-        # 2. PROMPT CREATION
-        max_context_length = 2000
         prompt = f"""<|system|>
-You are a helpful and accurate medical assistant.
-Use ONLY the following context to answer the user's question.
-If the context does not contain the answer, say: "I cannot find the answer in the provided context."
 Context:
-{context[:max_context_length]}
 </s>
 <|user|>
 {query}
 </s>
 <|assistant|>
 """
-        # 3. GENERATION
         response = self.llm.invoke(prompt)
-        # Handle Output format
         text = response[0]['generated_text'] if isinstance(response, list) else str(response)
-        # Clean output
         if "<|assistant|>" in text:
             final_answer = text.split("<|assistant|>")[-1].strip()
         else:
@@ -99,37 +94,30 @@ Context:
 # Initialize Chain
 qa_chain = ManualQAChain(vector_db, llm)
-print("✅ RAG Pipeline is ready.")
-# --- 4. GRADIO UI FUNCTION ---
 def medical_rag_chat(message, history):
-    if not message:
-        return "Please ask a medical question."
     try:
         response = qa_chain.invoke({"query": message})
-        answer_text = response['result']
-        # Format Sources
-        sources_text = "\n\n---\n**Retrieved Context:**\n"
         if response.get('source_documents'):
             for i, doc in enumerate(response['source_documents']):
-                topic = doc.metadata.get('focus_area', 'Medical Protocol')
-                snippet = doc.page_content.replace('\n', ' ').strip()
-                sources_text += f"**{i+1}. [{topic}]** *\"{snippet[:500]}...\"*\n"
         else:
-            sources_text += "(No context found.)"
-        return answer_text + sources_text
     except Exception as e:
-        return f"⚠️ Error: {str(e)}"
-# --- 5. LAUNCH UI ---
 demo = gr.ChatInterface(
     fn=medical_rag_chat,
     title="Cardio-Oncology RAG Assistant",
     description="TinyLlama-1.1B + MedQuAD RAG",
-    examples=["What are the symptoms of Lung Cancer?", "Who is at risk for Heart Failure?"],
-    concurrency_limit=2
 )
 if __name__ == "__main__":

 import os
 import sys
+# --- 1. SQLITE FIX FOR HUGGING FACE SPACES ---
+# This ensures ChromaDB works on the cloud server
 try:
     __import__('pysqlite3')
     sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from typing import Dict, Any, List
 # --- 2. SETUP & MODEL LOADING ---
+print("⏳ Loading Embeddings...")
 embedding_function = HuggingFaceEmbeddings(
     model_name="nomic-ai/nomic-embed-text-v1.5",
     model_kwargs={"trust_remote_code": True, "device": "cpu"}
 )
+print("⏳ Loading Database...")
+# FIX: Now we look for the FOLDER './chroma_db'
+if not os.path.exists("./chroma_db"):
+    raise ValueError("❌ Error: 'chroma_db' folder not found! Did you run 'git push' correctly?")
 vector_db = Chroma(
+    persist_directory="./chroma_db",
     embedding_function=embedding_function
 )
+print("⏳ Loading TinyLlama Model...")
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
 pipe = pipeline(
     "text-generation",
     model=model,
 llm = HuggingFacePipeline(pipeline=pipe)
+# --- 3. DEFINE RAG CHAIN ---
 class ManualQAChain:
     def __init__(self, vector_store: Chroma, llm_pipeline: HuggingFacePipeline):
         self.retriever = vector_store.as_retriever(search_kwargs={"k": 2})
     def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
         query = inputs.get("query", "")
+        # 1. Retrieval
         docs = self.retriever.invoke(query)
+        if docs:
+            context = "\n\n".join([d.page_content for d in docs])
+        else:
+            context = "No relevant medical context found."
+        # 2. Prompt
         prompt = f"""<|system|>
+You are a helpful medical assistant. Use ONLY the context below.
+If the answer is not in the context, say "I cannot find the answer."
 Context:
+{context[:2000]}
 </s>
 <|user|>
 {query}
 </s>
 <|assistant|>
 """
+        # 3. Generation
         response = self.llm.invoke(prompt)
         text = response[0]['generated_text'] if isinstance(response, list) else str(response)
+        # Cleanup
         if "<|assistant|>" in text:
             final_answer = text.split("<|assistant|>")[-1].strip()
         else:
 # Initialize Chain
 qa_chain = ManualQAChain(vector_db, llm)
+# --- 4. GRADIO UI ---
 def medical_rag_chat(message, history):
+    if not message: return "Please ask a question."
     try:
         response = qa_chain.invoke({"query": message})
+        sources = "\n\n---\n**Retrieved Context:**\n"
         if response.get('source_documents'):
             for i, doc in enumerate(response['source_documents']):
+                topic = doc.metadata.get('focus_area', 'Protocol')
+                sources += f"**{i+1}. [{topic}]** {doc.page_content[:300]}...\n"
         else:
+            sources += "(No context found)"
+        return response['result'] + sources
     except Exception as e:
+        return f"Error: {str(e)}"
 demo = gr.ChatInterface(
     fn=medical_rag_chat,
     title="Cardio-Oncology RAG Assistant",
     description="TinyLlama-1.1B + MedQuAD RAG",
+    examples=["What are the symptoms of Lung Cancer?", "Who is at risk for Heart Failure?"]
 )
 if __name__ == "__main__":