Spaces:

zenitsu09
/

medgraphy

Runtime error

App Files Files Community

Himanshu Gangwar commited on Feb 2

Commit

9b69c13

1 Parent(s): 58ad204

Refactor: Simplify Neo4j connection management and remove unused code

Browse files

Files changed (1) hide show

app.py +61 -471

app.py CHANGED Viewed

@@ -1,477 +1,67 @@
-import gradio as gr
-import faiss
-import json
-import numpy as np
-from sentence_transformers import SentenceTransformer
-from groq import Groq
-from neo4j import GraphDatabase
-from dotenv import load_dotenv
 import os
 load_dotenv()
-# Load credentials from environment or Hugging Face Spaces secrets
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-# Use local Neo4j instance running directly (not Docker)
-NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
-NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
-NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4j")
-NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")
-FAISS_INDEX_PATH = "db/medicine_embeddings.index"
-METADATA_PATH = "db/metadata.json"
-EMBED_MODEL = "BAAI/bge-large-en-v1.5"
-LLM_MODEL = "openai/gpt-oss-120b"
-# ---------------------------------------------------------
-#           LOAD MODELS & DATABASES (ON STARTUP)
-# ---------------------------------------------------------
-def load_faiss():
-    return faiss.read_index(FAISS_INDEX_PATH)
-def load_metadata():
-    with open(METADATA_PATH, "r") as f:
-        return json.load(f)
-def load_embedder():
-    return SentenceTransformer(EMBED_MODEL)
-def load_llm():
-    return Groq(api_key=GROQ_API_KEY)
-def load_neo4j():
-    if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]):
-        raise ValueError("Neo4j credentials not configured")
-    driver = GraphDatabase.driver(
-        NEO4J_URI,
-        auth=(NEO4J_USER, NEO4J_PASSWORD),
-        max_connection_lifetime=3600,
-        max_connection_pool_size=50,
-        connection_acquisition_timeout=120
-    )
-    # Test the connection
-    driver.verify_connectivity()
-    return driver
-# Initialize resources
-print("Loading FAISS index...")
-faiss_index = load_faiss()
-print("Loading metadata...")
-metadata = load_metadata()
-print("Loading embedder model...")
-embedder = load_embedder()
-print("Loading Groq LLM client...")
-groq_client = load_llm()
-# Load Neo4j with error handling
-neo4j_status = ""
-neo4j_driver = None
-try:
-    print("Connecting to Neo4j...")
-    neo4j_driver = load_neo4j()
-    neo4j_status = "✅ Connected to Neo4j"
-    print(neo4j_status)
-except Exception as e:
-    neo4j_status = f"❌ Neo4j Connection Failed: {str(e)}"
-    print(neo4j_status)
-    print("⚠️ App will continue with FAISS search only (Graph features disabled)")
-# ---------------------------------------------------------
-#       GRAPH EXPANSION — FETCH RELATED NODES
-# ---------------------------------------------------------
-def get_graph_info(drug_name):
-    if neo4j_driver is None:
-        return {}
-    # Use case-insensitive matching since metadata has lowercase names
-    # but Neo4j has Title Case names
-    query = """
-    MATCH (m:Medicine)
-    WHERE toLower(m.name) = toLower($name)
-    OPTIONAL MATCH (m)-[r]->(n)
-    WITH type(r) AS rel_type, n.name AS target_name
-    WHERE rel_type IS NOT NULL
-    RETURN rel_type AS relation, target_name AS value
-    LIMIT 200
-    """
-    try:
-        with neo4j_driver.session(database=NEO4J_DATABASE) as session:
-            result = session.run(query, name=drug_name).data()
-    except Exception as e:
-        print(f"Graph query error: {e}")
-        return {}
-    graph_dict = {}
-    for row in result:
-        relation = row.get("relation")
-        value = row.get("value")
-        if relation and value:
-            graph_dict.setdefault(relation, []).append(value)
-    return graph_dict
-# ---------------------------------------------------------
-#            SEMANTIC SEARCH (FAISS)
-# ---------------------------------------------------------
-def semantic_search(query, top_k=5):
-    query_emb = embedder.encode(query).astype("float32")
-    distances, indices = faiss_index.search(
-        np.array([query_emb]), top_k
-    )
-    results = []
-    for idx in indices[0]:
-        results.append(metadata[idx])
-    return results
-# ---------------------------------------------------------
-#       DIRECT NEO4J SEARCH (Graph-based)
-# ---------------------------------------------------------
-def search_neo4j_directly(query, limit=10):
-    """
-    Search Neo4j directly for medicines, conditions, side effects, or ingredients
-    based on the query keywords.
     """
-    if neo4j_driver is None:
-        return {"medicines": [], "conditions": [], "side_effects": [], "ingredients": []}
-    results = {
-        "medicines": [],
-        "conditions": [],
-        "side_effects": [],
-        "ingredients": []
-    }
-    # Extract keywords from query (simple approach)
-    query_lower = query.lower()
-    try:
-        with neo4j_driver.session(database=NEO4J_DATABASE) as session:
-            # Search medicines by name or composition containing query terms
-            med_query = """
-            MATCH (m:Medicine)
-            WHERE toLower(m.name) CONTAINS $query
-               OR toLower(m.composition) CONTAINS $query
-               OR toLower(m.uses_text) CONTAINS $query
-            RETURN m.name AS name, m.composition AS composition,
-                   m.uses_text AS uses, m.side_effects_text AS side_effects,
-                   m.excellent_review_pct AS excellent_review
-            ORDER BY m.excellent_review_pct DESC
-            LIMIT $limit
-            """
-            med_results = session.run(med_query, query=query_lower, limit=limit).data()
-            results["medicines"] = med_results
-            # Search conditions that match query
-            cond_query = """
-            MATCH (c:Condition)<-[:TREATS]-(m:Medicine)
-            WHERE toLower(c.name) CONTAINS $query
-            RETURN c.name AS condition, collect(DISTINCT m.name)[0..5] AS treating_medicines
-            LIMIT 5
-            """
-            cond_results = session.run(cond_query, query=query_lower).data()
-            results["conditions"] = cond_results
-            # Search side effects that match query
-            se_query = """
-            MATCH (s:SideEffect)<-[:HAS_SIDE_EFFECT]-(m:Medicine)
-            WHERE toLower(s.name) CONTAINS $query
-            RETURN s.name AS side_effect, collect(DISTINCT m.name)[0..5] AS medicines_with_effect
-            LIMIT 5
-            """
-            se_results = session.run(se_query, query=query_lower).data()
-            results["side_effects"] = se_results
-            # Search ingredients that match query
-            ing_query = """
-            MATCH (i:ActiveIngredient)<-[:CONTAINS_INGREDIENT]-(m:Medicine)
-            WHERE toLower(i.name) CONTAINS $query
-            RETURN i.name AS ingredient, collect(DISTINCT m.name)[0..10] AS medicines_containing
-            LIMIT 5
-            """
-            ing_results = session.run(ing_query, query=query_lower).data()
-            results["ingredients"] = ing_results
-    except Exception as e:
-        print(f"Neo4j direct search error: {e}")
-    return results
-# ---------------------------------------------------------
-#            LLM ANSWER USING GROQ
-# ---------------------------------------------------------
-def answer_with_groq(query, faiss_results, graph_expansion, neo4j_direct_results):
-    system_prompt = """
-    You are a medical question answering assistant with access to TWO data sources:
-    1. **FAISS Vector Database**: Semantic similarity search results - good for finding medicines
-       related to the query meaning, even if exact keywords don't match.
-    2. **Neo4j Graph Database**:
-       - Direct search results: Exact matches for medicines, conditions, side effects, ingredients
-       - Graph expansion: Relationships like TREATS, HAS_SIDE_EFFECT, CONTAINS_INGREDIENT, MANUFACTURED_BY
-    Your task:
-    - Analyze BOTH data sources
-    - Decide which source is more relevant for the specific question
-    - You can use BOTH sources if they provide complementary information
-    - For specific medicine queries → prioritize Neo4j direct matches
-    - For general symptom/condition queries → combine FAISS semantics + Neo4j graph relationships
-    - For side effect queries → prioritize Neo4j graph data (HAS_SIDE_EFFECT relationships)
-    - For ingredient queries → prioritize Neo4j graph data (CONTAINS_INGREDIENT relationships)
-    Rules:
-    - Never hallucinate facts - use ONLY the provided context
-    - If data is conflicting, prefer Neo4j graph data (more structured)
-    - Clearly cite which source provided the information when helpful
-    - Be concise but comprehensive
     """
-    # Build context from FAISS metadata
-    faiss_text = "=== FAISS VECTOR SEARCH RESULTS ===\n"
-    if faiss_results:
-        for item in faiss_results:
-            faiss_text += f"""
-Medicine: {item.get('name', 'N/A')}
-Uses: {item.get('uses', 'N/A')}
-Side Effects: {item.get('side_effects', 'N/A')}
-Manufacturer: {item.get('manufacturer', 'N/A')}
----
-"""
-    else:
-        faiss_text += "No FAISS results found.\n"
-    # Build graph expansion info
-    graph_text = "\n=== NEO4J GRAPH EXPANSION (Relationships) ===\n"
-    has_graph_data = False
-    for medicine, relations in graph_expansion.items():
-        if relations:
-            has_graph_data = True
-            graph_text += f"\n📊 Graph Data for '{medicine}':\n"
-            for rel, vals in relations.items():
-                rel_readable = rel.replace("_", " ").title()
-                graph_text += f"  • {rel_readable}: {', '.join(vals[:10])}\n"
-    if not has_graph_data:
-        graph_text += "No graph expansion data found.\n"
-    # Build Neo4j direct search results
-    neo4j_text = "\n=== NEO4J DIRECT SEARCH RESULTS ===\n"
-    has_neo4j_data = False
-    if neo4j_direct_results.get("medicines"):
-        has_neo4j_data = True
-        neo4j_text += "\n🔍 Matching Medicines:\n"
-        for med in neo4j_direct_results["medicines"][:5]:
-            neo4j_text += f"  • {med.get('name', 'N/A')}\n"
-            neo4j_text += f"    Uses: {med.get('uses', 'N/A')[:200]}...\n" if med.get('uses') else ""
-            neo4j_text += f"    Side Effects: {med.get('side_effects', 'N/A')[:200]}...\n" if med.get('side_effects') else ""
-    if neo4j_direct_results.get("conditions"):
-        has_neo4j_data = True
-        neo4j_text += "\n🏥 Matching Conditions:\n"
-        for cond in neo4j_direct_results["conditions"]:
-            neo4j_text += f"  • {cond.get('condition', 'N/A')}\n"
-            neo4j_text += f"    Treating Medicines: {', '.join(cond.get('treating_medicines', []))}\n"
-    if neo4j_direct_results.get("side_effects"):
-        has_neo4j_data = True
-        neo4j_text += "\n⚠️ Matching Side Effects:\n"
-        for se in neo4j_direct_results["side_effects"]:
-            neo4j_text += f"  • {se.get('side_effect', 'N/A')}\n"
-            neo4j_text += f"    Found in: {', '.join(se.get('medicines_with_effect', []))}\n"
-    if neo4j_direct_results.get("ingredients"):
-        has_neo4j_data = True
-        neo4j_text += "\n💊 Matching Ingredients:\n"
-        for ing in neo4j_direct_results["ingredients"]:
-            neo4j_text += f"  • {ing.get('ingredient', 'N/A')}\n"
-            neo4j_text += f"    Found in: {', '.join(ing.get('medicines_containing', [])[:5])}\n"
-    if not has_neo4j_data:
-        neo4j_text += "No direct Neo4j matches found.\n"
-    full_prompt = f"""
-{system_prompt}
-📝 USER QUERY: {query}
-{faiss_text}
-{graph_text}
-{neo4j_text}
-Based on the above data sources, provide a comprehensive answer. Indicate which data source(s) you primarily used.
-"""
-    response = groq_client.chat.completions.create(
-        model=LLM_MODEL,
-        messages=[{"role": "user", "content": full_prompt}],
-        temperature=0.2,
-    )
-    return response.choices[0].message.content
-# ---------------------------------------------------------
-#                  MAIN QUERY FUNCTION
-# ---------------------------------------------------------
-def process_query(query):
-    """Main function to process user query and return results"""
-    if not query.strip():
-        return "⚠️ Please enter a query.", "", "", "", neo4j_status
-    # Step 1: FAISS Semantic Search
-    faiss_results = semantic_search(query)
-    # Step 2: Neo4j Direct Search
-    neo4j_direct_results = search_neo4j_directly(query)
-    # Step 3: Graph expansion for FAISS results
-    graph_expansion = {}
-    for r in faiss_results:
-        graph_expansion[r["name"]] = get_graph_info(r["name"])
-    # Step 4: Format FAISS results for display
-    faiss_text = "### 🔍 FAISS Vector Search Results\n\n"
-    for r in faiss_results:
-        faiss_text += f"**{r['name']}**\n"
-        faiss_text += f"- Uses: {r.get('uses', 'N/A')[:150]}...\n"
-        faiss_text += f"- Side Effects: {r.get('side_effects', 'N/A')[:100]}...\n\n"
-    # Step 5: Format Neo4j results for display
-    neo4j_text = "### 🧬 Neo4j Graph Database Results\n\n"
-    # Direct matches
-    if neo4j_direct_results.get("medicines"):
-        neo4j_text += "**📋 Direct Medicine Matches:**\n"
-        for med in neo4j_direct_results["medicines"][:5]:
-            neo4j_text += f"- {med.get('name', 'N/A')}\n"
-        neo4j_text += "\n"
-    if neo4j_direct_results.get("conditions"):
-        neo4j_text += "**🏥 Matching Conditions:**\n"
-        for cond in neo4j_direct_results["conditions"]:
-            neo4j_text += f"- {cond.get('condition', 'N/A')}: {', '.join(cond.get('treating_medicines', [])[:3])}\n"
-        neo4j_text += "\n"
-    if neo4j_direct_results.get("ingredients"):
-        neo4j_text += "**💊 Matching Ingredients:**\n"
-        for ing in neo4j_direct_results["ingredients"]:
-            neo4j_text += f"- {ing.get('ingredient', 'N/A')}: {', '.join(ing.get('medicines_containing', [])[:3])}\n"
-        neo4j_text += "\n"
-    if neo4j_direct_results.get("side_effects"):
-        neo4j_text += "**⚠️ Matching Side Effects:**\n"
-        for se in neo4j_direct_results["side_effects"]:
-            neo4j_text += f"- {se.get('side_effect', 'N/A')}: {', '.join(se.get('medicines_with_effect', [])[:3])}\n"
-        neo4j_text += "\n"
-    # Graph expansion
-    neo4j_text += "**🔗 Graph Relationships:**\n```json\n"
-    neo4j_text += json.dumps(graph_expansion, indent=2)[:2000]
-    neo4j_text += "\n```"
-    # Step 6: Generate LLM answer using all sources
-    answer = answer_with_groq(query, faiss_results, graph_expansion, neo4j_direct_results)
-    final_answer = "### 🩺 AI Answer (Using Both Databases)\n\n" + answer
-    return faiss_text, neo4j_text, final_answer, neo4j_status
-# ---------------------------------------------------------
-#                     GRADIO UI
-# ---------------------------------------------------------
-def create_interface():
-    with gr.Blocks(title="Medicine GraphRAG AI") as demo:
-        gr.Markdown("# 💊 Medicine GraphRAG AI")
-        gr.Markdown("**Dual Database Search: FAISS Vector DB + Neo4j Graph DB + LLM Reasoning**")
-        with gr.Row():
-            status_display = gr.Textbox(
-                label="Database Status",
-                value=neo4j_status,
-                interactive=False,
-                lines=1
-            )
-        with gr.Row():
-            query_input = gr.Textbox(
-                label="Enter your medical query",
-                placeholder="e.g., What are the side effects of paracetamol?",
-                lines=2
-            )
-        with gr.Row():
-            search_btn = gr.Button("🔍 Search Both Databases", variant="primary", size="lg")
-            clear_btn = gr.Button("Clear", variant="secondary")
-        # Answer section FIRST (most important)
-        with gr.Row():
-            answer_output = gr.Markdown(
-                label="AI Answer",
-                value="",
-            )
-        # Database results in collapsible/scrollable sections
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion("🔍 FAISS Vector Search Results", open=False):
-                    faiss_output = gr.Markdown(
-                        label="FAISS Results",
-                        value="",
-                    )
-            with gr.Column():
-                with gr.Accordion("🧬 Neo4j Graph Database Results", open=False):
-                    neo4j_output = gr.Markdown(
-                        label="Neo4j Results",
-                        value="",
-                    )
-        # Event handlers
-        search_btn.click(
-            fn=process_query,
-            inputs=[query_input],
-            outputs=[faiss_output, neo4j_output, answer_output, status_display]
-        )
-        clear_btn.click(
-            fn=lambda: ("", "", "", neo4j_status),
-            inputs=[],
-            outputs=[faiss_output, neo4j_output, answer_output, status_display]
-        )
-        # Examples
-        gr.Examples(
-            examples=[
-                ["What is the best medicine for acidity?"],
-                ["Show me medicines for headache"],
-                ["What are the side effects of paracetamol?"],
-                ["Suggest medicine for cold and fever"],
-                ["Find medicines containing ibuprofen"],
-                ["What treats hypertension?"]
-            ],
-            inputs=query_input
-        )
-    return demo
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import os
+import streamlit as st
+from neo4j import GraphDatabase
+from langchain_community.graphs import Neo4jGraph
+from dotenv import load_dotenv
+# Load environment variables from .env file for local development
 load_dotenv()
+class Neo4jConnection:
     """
+    A class to manage the connection to a Neo4j database.
+    It uses the credentials sourced from Streamlit secrets or a local .env file.
     """
+    def __init__(self):
+        # Prioritize Streamlit secrets, fall back to .env for local dev
+        if hasattr(st, 'secrets') and "NEO4J_URI" in st.secrets:
+            uri = st.secrets["NEO4J_URI"]
+            user = st.secrets["NEO4J_USER"]
+            password = st.secrets["NEO4J_PASSWORD"]
+            print("Connecting to Neo4j using Streamlit secrets.")
+        else:
+            uri = os.getenv("NEO4J_URI")
+            user = os.getenv("NEO4J_USER")
+            password = os.getenv("NEO4J_PASSWORD")
+            print("Connecting to Neo4j using local .env file.")
+        self._driver = GraphDatabase.driver(uri, auth=(user, password))
+        try:
+            # Verify connection
+            self._driver.verify_connectivity()
+            print("Connected to Neo4j")
+        except Exception as e:
+            print(f"Neo4j connection failed: {e}")
+    def close(self):
+        if self._driver is not None:
+            self._driver.close()
+    def query(self, query, parameters=None, db=None):
+        """Runs a Cypher query and returns the results."""
+        assert self._driver is not None, "Driver not initialized!"
+        session = None
+        response = None
+        try:
+            session = self._driver.session(database=db) if db is not None else self._driver.session()
+            response = list(session.run(query, parameters))
+        except Exception as e:
+            print("Query failed:", e)
+        finally:
+            if session is not None:
+                session.close()
+        return response
+graph = Neo4jGraph(
+    url=st.secrets["NEO4J_URI"],
+    username=st.secrets["NEO4J_USER"],
+    password=st.secrets["NEO4J_PASSWORD"]
+)
+# Refresh schema information for the LangChain graph object
+# This helps the LLM generate more accurate Cypher queries
+try:
+    graph.refresh_schema()
+except Exception as e:
+    print(f"Warning: Could not refresh graph schema. The LLM might generate less accurate queries. Error: {e}")