Spaces:

mshabir
/

Medical-RAG-Assistant

Sleeping

App Files Files Community

mshabir commited on Dec 4, 2025

Commit

0b0695e

verified ·

1 Parent(s): b9cdd5f

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -346

app.py CHANGED Viewed

@@ -1,371 +1,179 @@
 import streamlit as st
-from langchain_community.embeddings import HuggingFaceEmbeddings  # Updated import
-from langchain_community.vectorstores import FAISS
-from langchain.chains import RetrievalQA
-from langchain_community.llms import HuggingFaceHub
-from langchain_community.document_loaders import TextLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from dotenv import load_dotenv
 import os
-import pickle
-from pathlib import Path
-# Load environment variables
-load_dotenv()
-# Streamlit page configuration
 st.set_page_config(
-    page_title="Medical QA Assistant",
     page_icon="🏥",
     layout="wide"
 )
-# Check for required files
-@st.cache_resource
-def check_files():
-    """Check if required files exist and provide guidance if not"""
-    faiss_index_path = Path("medical_faiss_store/medical_faiss.faiss")
-    faiss_pkl_path = Path("medical_faiss_store/medical_faiss.pkl")
-    if not faiss_index_path.exists() or not faiss_pkl_path.exists():
-        return False
-    return True
-# Initialize embeddings
-@st.cache_resource
-def load_embeddings():
-    """Load the HuggingFace embeddings model"""
-    try:
-        model_name = "sentence-transformers/all-MiniLM-L6-v2"
-        embeddings = HuggingFaceEmbeddings(model_name=model_name)
-        return embeddings
-    except Exception as e:
-        st.error(f"Error loading embeddings: {e}")
-        return None
-# Create FAISS index if it doesn't exist
-def create_faiss_index():
-    """Create FAISS index from sample medical data"""
-    try:
-        # Create sample medical data
-        sample_text = """
-        Diabetes is a chronic disease that occurs when the pancreas does not produce enough insulin.
-        Symptoms include increased thirst, frequent urination, and unexplained weight loss.
-        Type 1 diabetes is usually diagnosed in children and requires insulin injections.
-        Type 2 diabetes is more common in adults and can be managed with diet, exercise, and medication.
-        Hypertension, or high blood pressure, is when blood pressure is consistently too high.
-        Normal blood pressure is below 120/80 mmHg.
-        Symptoms may include headaches, shortness of breath, and nosebleeds.
-        Treatment includes lifestyle changes like reducing salt intake and medication.
-        Asthma is a condition where airways narrow and swell, producing extra mucus.
-        Symptoms include wheezing, coughing, chest tightness, and shortness of breath.
-        Asthma attacks can be triggered by allergens, exercise, or cold air.
-        Treatment involves inhalers (bronchodilators and corticosteroids).
-        COVID-19 is a respiratory illness caused by the SARS-CoV-2 virus.
-        Symptoms include fever, cough, fatigue, and loss of taste or smell.
-        Prevention includes vaccination, wearing masks, and social distancing.
-        Treatment depends on severity and may include antiviral medications.
-        Heart attack (myocardial infarction) occurs when blood flow to the heart is blocked.
-        Symptoms include chest pain, shortness of breath, nausea, and pain in arms or jaw.
-        Immediate treatment is crucial and may include aspirin, nitroglycerin, or surgery.
-        Risk factors include smoking, high cholesterol, and family history.
-        Stroke occurs when blood supply to part of the brain is interrupted.
-        Symptoms include sudden numbness, confusion, trouble speaking, and loss of balance.
-        FAST is an acronym for Face drooping, Arm weakness, Speech difficulty, Time to call emergency.
-        Treatment includes clot-busting drugs and rehabilitation.
-        Cancer is a disease caused by uncontrolled cell growth.
-        Common types include lung, breast, prostate, and colorectal cancer.
-        Symptoms vary but may include lumps, unexplained weight loss, and persistent pain.
-        Treatments include surgery, chemotherapy, radiation, and immunotherapy.
-        """
-        # Save sample text to a temporary file
-        temp_file = "temp_medical_data.txt"
-        with open(temp_file, "w") as f:
-            f.write(sample_text)
-        # Load and process documents
-        loader = TextLoader(temp_file)
-        documents = loader.load()
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=500,
-            chunk_overlap=50,
-            length_function=len,
-            separators=["\n\n", "\n", " ", ""]
-        )
-        texts = text_splitter.split_documents(documents)
-        # Create embeddings
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-        # Create and save FAISS index
-        db = FAISS.from_documents(texts, embeddings)
-        # Create directory if it doesn't exist
-        Path("medical_faiss_store").mkdir(exist_ok=True)
-        # Save the FAISS index
-        db.save_local("medical_faiss_store", index_name="medical_faiss")
-        # Clean up temp file
-        os.remove(temp_file)
-        st.success("✅ FAISS index created successfully with sample medical data!")
-        return db
-    except Exception as e:
-        st.error(f"Error creating FAISS index: {e}")
-        return None
-# Load FAISS database with error handling
-@st.cache_resource
-def load_faiss():
-    """Load FAISS database or create if it doesn't exist"""
-    try:
-        embeddings = load_embeddings()
-        if embeddings is None:
-            return None
-        # Check if files exist
-        if not check_files():
-            st.warning("FAISS index not found. Creating a new one with sample medical data...")
-            return create_faiss_index()
-        # Load existing FAISS database
-        db = FAISS.load_local(
-            "medical_faiss_store",
-            embeddings,
-            index_name="medical_faiss",
-            allow_dangerous_deserialization=True
-        )
-        st.success("✅ FAISS database loaded successfully!")
-        return db
-    except Exception as e:
-        st.error(f"Error loading FAISS database: {e}")
-        return None
-# Initialize LLM
-@st.cache_resource
-def load_llm():
-    """Load the HuggingFace LLM"""
-    try:
-        # Check for API token
-        api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        if not api_token:
-            st.error("HuggingFace API token not found in environment variables.")
-            st.info("""
-            Please add your token to the .env file as:
-            HUGGINGFACEHUB_API_TOKEN=your_token_here
-            You can get a free token from:
-            https://huggingface.co/settings/tokens
-            """)
-            return None
-        # Using a model that works well for QA
-        llm = HuggingFaceHub(
-            repo_id="google/flan-t5-large",
-            model_kwargs={
-                "temperature": 0.1,
-                "max_length": 512,
-                "min_length": 50
-            },
-            huggingfacehub_api_token=api_token
-        )
-        return llm
-    except Exception as e:
-        st.error(f"Error loading LLM: {e}")
-        return None
-# Create QA chain
-@st.cache_resource
-def create_qa_chain(_db, _llm):
-    """Create the QA chain"""
-    if _db is None or _llm is None:
-        return None
     try:
-        retriever = _db.as_retriever(
-            search_type="similarity",
-            search_kwargs={"k": 3}
-        )
-        qa_chain = RetrievalQA.from_chain_type(
-            llm=_llm,
-            chain_type="stuff",
-            retriever=retriever,
-            return_source_documents=True,
-            verbose=False
-        )
-        return qa_chain
     except Exception as e:
-        st.error(f"Error creating QA chain: {e}")
-        return None
-# Main app function
-def main():
-    st.title("🏥 Medical QA Assistant")
-    st.markdown("Ask questions about medical information and get AI-powered answers.")
-    # Initialize session state
-    if 'chat_history' not in st.session_state:
-        st.session_state.chat_history = []
-    if 'initialized' not in st.session_state:
-        st.session_state.initialized = False
-    # Sidebar
-    with st.sidebar:
-        st.header("Configuration")
-        st.markdown("---")
-        # Display file status
-        files_exist = check_files()
-        if files_exist:
-            st.success("✅ FAISS index files found")
-        else:
-            st.warning("⚠️ FAISS index will be created on first run")
-        st.markdown("---")
-        # Model info
-        st.subheader("Model Information")
-        st.markdown("""
-        - **Embeddings**: sentence-transformers/all-MiniLM-L6-v2
-        - **LLM**: google/flan-t5-large
-        - **Retrieval**: FAISS with 3 similar chunks
-        """)
-        # API token status
-        api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        if api_token:
-            st.success("✅ HuggingFace API token found")
-        else:
-            st.error("❌ HuggingFace API token missing")
-        # Clear chat button
-        if st.button("Clear Chat History"):
-            st.session_state.chat_history = []
-            st.rerun()
-        # Recreate index button
-        if st.button("Recreate FAISS Index"):
-            with st.spinner("Creating new index..."):
-                db = create_faiss_index()
-                if db:
-                    st.success("Index recreated successfully!")
-                    st.rerun()
-        # Debug info
-        with st.expander("Debug Information"):
-            st.write(f"Python version: {os.sys.version}")
-            st.write(f"Working directory: {os.getcwd()}")
-            st.write(f"Files in directory: {os.listdir('.')}")
-    # Main content area
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        # Initialize components
-        if not st.session_state.initialized:
-            with st.spinner("Initializing system... This may take a minute."):
                 try:
-                    # Try to load or create FAISS index
-                    db = load_faiss()
-                    if db is None:
-                        st.error("Failed to create or load FAISS index.")
-                        return
-                    # Try to load LLM
-                    llm = load_llm()
-                    if llm is None:
-                        st.error("Failed to load LLM. Please check your HuggingFace API token.")
-                        return
-                    # Create QA chain
-                    qa_chain = create_qa_chain(db, llm)
-                    if qa_chain is None:
-                        st.error("Failed to create QA chain.")
-                        return
-                    # Store in session state
-                    st.session_state.db = db
-                    st.session_state.llm = llm
-                    st.session_state.qa_chain = qa_chain
-                    st.session_state.initialized = True
-                    st.success("✅ System initialized successfully!")
                 except Exception as e:
-                    st.error(f"Initialization error: {e}")
-                    return
-        # Check if system is initialized
-        if not st.session_state.initialized:
-            st.error("System not initialized. Please check the error messages above.")
-            return
-        # Chat input
-        query = st.text_input(
-            "💬 Ask a medical question:",
-            placeholder="e.g., What are the symptoms of diabetes?",
-            key="query_input"
-        )
-        # Submit button
-        col_submit1, col_submit2 = st.columns([1, 5])
-        with col_submit1:
-            submit_button = st.button("Submit", type="primary", disabled=(not query))
-        # Process query
-        if submit_button and query:
-            with st.spinner("Searching for relevant information..."):
-                try:
-                    # Get response from QA chain
-                    result = st.session_state.qa_chain({"query": query})
-                    # Display answer
-                    st.markdown("### Answer:")
-                    st.write(result['result'])
-                    # Display source documents
-                    with st.expander("📚 View source information"):
-                        for i, doc in enumerate(result['source_documents']):
-                            st.markdown(f"**Source {i+1}:**")
-                            st.write(doc.page_content[:300] + ("..." if len(doc.page_content) > 300 else ""))
-                            st.markdown("---")
-                    # Save to chat history
-                    st.session_state.chat_history.append({
-                        "question": query,
-                        "answer": result['result'],
-                        "sources": result['source_documents']
-                    })
-                except Exception as e:
-                    st.error(f"Error getting response: {e}")
-                    st.info("Please try rephrasing your question.")
-    with col2:
-        # Chat history
-        st.subheader("📝 Chat History")
-        if st.session_state.chat_history:
-            for i, chat in enumerate(st.session_state.chat_history[-5:][::-1]):  # Show last 5, newest first
-                with st.expander(f"Q: {chat['question'][:50]}..."):
-                    st.write(f"**Q:** {chat['question']}")
-                    st.write(f"**A:** {chat['answer'][:150]}...")
-        else:
-            st.info("No questions asked yet.")
-# Run the app
-if __name__ == "__main__":
-    main()

 import streamlit as st
+import google.generativeai as genai
 import os
+from medical_rag_system import MedicalRAGSystem
 st.set_page_config(
+    page_title="Medical RAG Assistant",
     page_icon="🏥",
     layout="wide"
 )
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        color: #1f77b4;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .info-box {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 1rem 0;
+    }
+    .source-box {
+        background-color: #e8f4fd;
+        padding: 0.5rem;
+        border-radius: 0.3rem;
+        margin: 0.5rem 0;
+        border-left: 4px solid #1f77b4;
+    }
+</style>
+""", unsafe_allow_html=True)
+def generate_medical_answer(query, context_chunks, api_key):
+    """Generate answer using Gemini with retrieved context"""
+    if not context_chunks:
+        return "I couldn't find relevant medical information to answer this question in the available records."
+    # Prepare context from retrieved chunks
+    context_text = "\n\n".join([
+        f"--- MEDICAL NOTE {i+1} (Specialty: {chunk['metadata']['medical_specialty']}) ---\n{chunk['content']}"
+        for i, chunk in enumerate(context_chunks)
+    ])
+    prompt = f"""You are a medical research assistant. Answer the question based ONLY on the provided medical context from clinical notes.
+MEDICAL CONTEXT:
+{context_text}
+QUESTION: {query}
+IMPORTANT INSTRUCTIONS:
+- Answer using ONLY the information from the medical context above
+- If the context doesn't contain relevant information, say "I cannot find specific information about this in the available medical records"
+- Be precise and medically accurate
+- Do not make up or hallucinate information
+- Mention which medical specialty the information comes from when relevant
+- Keep answers concise but informative
+ANSWER:"""
     try:
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel("models/gemini-2.0-flash")
+        response = model.generate_content(prompt)
+        return response.text
     except Exception as e:
+        return f"Error generating answer: {str(e)}"
+# Main app
+st.markdown('<div class="main-header">🏥 Medical RAG Assistant</div>', unsafe_allow_html=True)
+st.markdown("**Ask medical questions based on 3,898 clinical transcriptions across 39 medical specialties**")
+# Sidebar configuration
+with st.sidebar:
+    st.header("⚙️ Configuration")
+    api_key = st.text_input(
+        "Google AI Studio API Key",
+        type="password",
+        help="Get free API key from https://aistudio.google.com/"
+    )
+    st.markdown('<div class="info-box">', unsafe_allow_html=True)
+    st.write("**How to get API Key:**")
+    st.write("1. Go to [Google AI Studio](https://aistudio.google.com/)")
+    st.write("2. Sign in with Google account")
+    st.write("3. Click 'Get API Key' and create new key")
+    st.write("4. Paste the key here")
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Initialize button
+    if st.button("🚀 Initialize Medical RAG System", use_container_width=True):
+        if not api_key:
+            st.error("Please enter your Google AI Studio API key first")
+        else:
+            with st.spinner("Loading medical database..."):
                 try:
+                    rag_system = MedicalRAGSystem()
+                    st.session_state.rag_system = rag_system
+                    st.session_state.api_key = api_key
+                    st.success("✅ Medical RAG system initialized successfully!")
+                    st.info(f"📊 System contains {len(rag_system.chunks)} medical chunks")
                 except Exception as e:
+                    st.error(f"Failed to initialize: {str(e)}")
+    # System info
+    if 'rag_system' in st.session_state:
+        st.markdown('<div class="info-box">', unsafe_allow_html=True)
+        st.write("**System Status:** ✅ Active")
+        st.write(f"**Chunks loaded:** {len(st.session_state.rag_system.chunks):,}")
+        st.write("**Ready for queries**")
+        st.markdown('</div>', unsafe_allow_html=True)
+# Main query interface
+st.divider()
+st.subheader("🔍 Ask Medical Questions")
+query = st.text_area(
+    "Enter your medical question:",
+    placeholder="e.g., What are the symptoms of allergic rhinitis?",
+    height=100
+)
+num_chunks = st.slider("Number of medical chunks to retrieve:", 1, 5, 3)
+if st.button("🔎 Search Medical Database", type="primary", use_container_width=True):
+    if 'rag_system' not in st.session_state:
+        st.error("Please initialize the RAG system first using the sidebar button")
+    elif not query.strip():
+        st.error("Please enter a question")
+    else:
+        with st.spinner("Searching medical database..."):
+            # Retrieve relevant chunks
+            retrieved_chunks = st.session_state.rag_system.retrieve_similar_chunks(
+                query,
+                k=num_chunks
+            )
+            if not retrieved_chunks:
+                st.warning("No relevant medical information found for this query.")
+            else:
+                # Display retrieved chunks
+                st.subheader("📋 Retrieved Medical Information")
+                for i, chunk in enumerate(retrieved_chunks):
+                    specialty = chunk['metadata']['medical_specialty']
+                    score = chunk['similarity_score']
+                    with st.expander(f"Source {i+1}: {specialty} (Relevance: {score:.3f})"):
+                        st.markdown('<div class="source-box">', unsafe_allow_html=True)
+                        st.write(chunk['content'][:500] + "..." if len(chunk['content']) > 500 else chunk['content'])
+                        st.markdown('</div>', unsafe_allow_html=True)
+                # Generate answer
+                st.subheader("💡 AI-Generated Answer")
+                with st.spinner("Generating medical answer..."):
+                    answer = generate_medical_answer(
+                        query,
+                        retrieved_chunks,
+                        st.session_state.api_key
+                    )
+                    st.markdown(f"""
+                    <div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; border-left: 5px solid #1f77b4;">
+                    {answer}
+                    </div>
+                    """, unsafe_allow_html=True)
+# Footer
+st.divider()
+st.markdown("""
+<div style="text-align: center; color: #666; font-size: 0.9em;">
+    <p>Medical RAG Assistant | Powered by Google Gemini & FAISS</p>
+    <p>⚠️ <strong>Disclaimer:</strong> This tool provides information from clinical notes and should not be used for medical diagnosis or treatment decisions.</p>
+</div>
+""", unsafe_allow_html=True)