Himanshu Gangwar commited on
Commit
a479622
·
1 Parent(s): a4743f6

Add Gradio app with Git LFS for FAISS index

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ db/medicine_embeddings.index filter=lfs diff=lfs merge=lfs -text
37
+ *.index filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README_HF.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Medicine GraphRAG AI
3
+ emoji: 💊
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Medicine GraphRAG AI 💊
14
+
15
+ An intelligent drug information system combining **FAISS vector search** with **Neo4j graph database** in a unified Retrieval-Augmented Generation (RAG) pipeline powered by **Groq LLM**.
16
+
17
+ ## 🌟 Features
18
+
19
+ - **Hybrid RAG Architecture**: Combines semantic vector search (FAISS) with knowledge graph traversal (Neo4j)
20
+ - **Semantic Search**: Find medicines based on natural language queries
21
+ - **Graph Expansion**: Automatically discover relationships, substitutes, side effects, and interactions
22
+ - **LLM Reasoning**: Context-aware answers using Groq's GPT-OSS-120B model
23
+
24
+ ## 🛠️ Tech Stack
25
+
26
+ - **Frontend**: Gradio
27
+ - **Vector Store**: FAISS (Facebook AI Similarity Search)
28
+ - **Graph Database**: Neo4j Aura
29
+ - **LLM**: Groq API
30
+ - **Embeddings**: BAAI/bge-large-en-v1.5
31
+
32
+ ## 🚀 Setup for Hugging Face Spaces
33
+
34
+ ### Required Secrets
35
+
36
+ Add these secrets in your Hugging Face Space settings:
37
+
38
+ ```
39
+ GROQ_API_KEY=your_groq_api_key
40
+ NEO4J_URI=neo4j+s://your-instance.databases.neo4j.io
41
+ NEO4J_USERNAME=neo4j
42
+ NEO4J_PASSWORD=your_password
43
+ NEO4J_DATABASE=neo4j
44
+ ```
45
+
46
+ ### Files Required
47
+
48
+ - `app.py` - Main Gradio application
49
+ - `db/medicine_embeddings.index` - FAISS index file
50
+ - `db/metadata.json` - Medicine metadata
51
+ - `requirements.txt` - Python dependencies
52
+
53
+ ## 📝 Usage
54
+
55
+ 1. Enter your medical query (e.g., "best medicine for acidity")
56
+ 2. Click "Search"
57
+ 3. View:
58
+ - Top relevant medicines from vector search
59
+ - Graph relationships and connections
60
+ - AI-generated comprehensive answer
61
+
62
+ ## ⚠️ Disclaimer
63
+
64
+ This application is for educational and informational purposes only. Always consult with qualified healthcare professionals for medical advice.
65
+
66
+ ## 📄 License
67
+
68
+ MIT License
app.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import faiss
3
+ import json
4
+ import numpy as np
5
+ from sentence_transformers import SentenceTransformer
6
+ from groq import Groq
7
+ from neo4j import GraphDatabase
8
+ from dotenv import load_dotenv
9
+ import os
10
+
11
+ load_dotenv()
12
+
13
+ # Load credentials from environment or Hugging Face Spaces secrets
14
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
15
+ NEO4J_URI = os.getenv("NEO4J_URI")
16
+ NEO4J_USER = os.getenv("NEO4J_USERNAME")
17
+ NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
18
+ NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")
19
+ FAISS_INDEX_PATH = "db/medicine_embeddings.index"
20
+ METADATA_PATH = "db/metadata.json"
21
+
22
+ EMBED_MODEL = "BAAI/bge-large-en-v1.5"
23
+ LLM_MODEL = "openai/gpt-oss-120b"
24
+
25
+
26
+ # ---------------------------------------------------------
27
+ # LOAD MODELS & DATABASES (ON STARTUP)
28
+ # ---------------------------------------------------------
29
+
30
+ def load_faiss():
31
+ return faiss.read_index(FAISS_INDEX_PATH)
32
+
33
+ def load_metadata():
34
+ with open(METADATA_PATH, "r") as f:
35
+ return json.load(f)
36
+
37
+ def load_embedder():
38
+ return SentenceTransformer(EMBED_MODEL)
39
+
40
+ def load_llm():
41
+ return Groq(api_key=GROQ_API_KEY)
42
+
43
+ def load_neo4j():
44
+ if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]):
45
+ raise ValueError("Neo4j credentials not configured")
46
+
47
+ driver = GraphDatabase.driver(
48
+ NEO4J_URI,
49
+ auth=(NEO4J_USER, NEO4J_PASSWORD),
50
+ max_connection_lifetime=3600,
51
+ max_connection_pool_size=50,
52
+ connection_acquisition_timeout=120
53
+ )
54
+ # Test the connection
55
+ driver.verify_connectivity()
56
+ return driver
57
+
58
+
59
+ # Initialize resources
60
+ print("Loading FAISS index...")
61
+ faiss_index = load_faiss()
62
+ print("Loading metadata...")
63
+ metadata = load_metadata()
64
+ print("Loading embedder model...")
65
+ embedder = load_embedder()
66
+ print("Loading Groq LLM client...")
67
+ groq_client = load_llm()
68
+
69
+ # Load Neo4j with error handling
70
+ neo4j_status = ""
71
+ neo4j_driver = None
72
+ try:
73
+ print("Connecting to Neo4j...")
74
+ neo4j_driver = load_neo4j()
75
+ neo4j_status = "✅ Connected to Neo4j"
76
+ print(neo4j_status)
77
+ except Exception as e:
78
+ neo4j_status = f"❌ Neo4j Connection Failed: {str(e)}"
79
+ print(neo4j_status)
80
+ print("⚠️ App will continue with FAISS search only (Graph features disabled)")
81
+
82
+
83
+ # ---------------------------------------------------------
84
+ # GRAPH EXPANSION — FETCH RELATED NODES
85
+ # ---------------------------------------------------------
86
+
87
+ def get_graph_info(drug_name):
88
+ if neo4j_driver is None:
89
+ return {}
90
+
91
+ query = """
92
+ MATCH (d:Drug {name: $name})-[r]->(n)
93
+ RETURN type(r) AS relation, n.name AS value
94
+ LIMIT 200
95
+ """
96
+ try:
97
+ with neo4j_driver.session(database=NEO4J_DATABASE) as session:
98
+ result = session.run(query, name=drug_name).data()
99
+ except Exception as e:
100
+ return {}
101
+
102
+ graph_dict = {}
103
+ for row in result:
104
+ relation = row["relation"]
105
+ value = row["value"]
106
+ graph_dict.setdefault(relation, []).append(value)
107
+
108
+ return graph_dict
109
+
110
+
111
+ # ---------------------------------------------------------
112
+ # SEMANTIC SEARCH (FAISS)
113
+ # ---------------------------------------------------------
114
+
115
+ def semantic_search(query, top_k=5):
116
+ query_emb = embedder.encode(query).astype("float32")
117
+
118
+ distances, indices = faiss_index.search(
119
+ np.array([query_emb]), top_k
120
+ )
121
+
122
+ results = []
123
+ for idx in indices[0]:
124
+ results.append(metadata[idx])
125
+ return results
126
+
127
+
128
+ # ---------------------------------------------------------
129
+ # LLM ANSWER USING GROQ
130
+ # ---------------------------------------------------------
131
+
132
+ def answer_with_groq(query, retrieved, graph_info):
133
+ system_prompt = """
134
+ You are a medical question answering assistant.
135
+ You must:
136
+ - Use the retrieved medicine information.
137
+ - Use graph relations (substitutes, side effects, uses, classes).
138
+ - Never hallucinate facts.
139
+ - Respond using ONLY provided context.
140
+ """
141
+
142
+ # Build context from FAISS metadata
143
+ text_block = ""
144
+ for item in retrieved:
145
+ text_block += f"""
146
+ Medicine: {item['name']}
147
+ Uses: {item['uses']}
148
+ Side Effects: {item['side_effects']}
149
+ Manufacturer: {item['manufacturer']}
150
+ """
151
+
152
+ # Add graph info
153
+ graph_text = ""
154
+ for medicine, relations in graph_info.items():
155
+ graph_text += f"\nGraph Data for {medicine}:\n"
156
+ for rel, vals in relations.items():
157
+ graph_text += f"{rel}: {', '.join(vals)}\n"
158
+
159
+ full_prompt = f"""
160
+ {system_prompt}
161
+
162
+ User Query:
163
+ {query}
164
+
165
+ Retrieved Medicine Data:
166
+ {text_block}
167
+
168
+ Graph Knowledge:
169
+ {graph_text}
170
+
171
+ Final Answer:
172
+ """
173
+
174
+ response = groq_client.chat.completions.create(
175
+ model=LLM_MODEL,
176
+ messages=[{"role": "user", "content": full_prompt}],
177
+ temperature=0.2,
178
+ )
179
+
180
+ return response.choices[0].message.content
181
+
182
+
183
+ # ---------------------------------------------------------
184
+ # MAIN QUERY FUNCTION
185
+ # ---------------------------------------------------------
186
+
187
+ def process_query(query):
188
+ """Main function to process user query and return results"""
189
+ if not query.strip():
190
+ return "⚠️ Please enter a query.", "", "", neo4j_status
191
+
192
+ # Step 1: Semantic Search
193
+ status_msg = "🔍 Searching medicines via FAISS semantic search...\n"
194
+ results = semantic_search(query)
195
+
196
+ # Step 2: Format retrieved medicines
197
+ medicines_text = "### 🔬 Top Relevant Medicines\n\n"
198
+ for r in results:
199
+ medicines_text += f"**{r['name']}** — {r['uses']}\n\n"
200
+
201
+ # Step 3: Graph expansion
202
+ status_msg += "🧠 Expanding Knowledge Graph for all retrieved medicines...\n"
203
+ graph_dict = {}
204
+ for r in results:
205
+ graph_dict[r["name"]] = get_graph_info(r["name"])
206
+
207
+ graph_text = "### 🧬 Graph Relations Found\n\n"
208
+ graph_text += json.dumps(graph_dict, indent=2)
209
+
210
+ # Step 4: Generate LLM answer
211
+ status_msg += "🤖 Generating LLM Answer...\n"
212
+ answer = answer_with_groq(query, results, graph_dict)
213
+
214
+ final_answer = "### 🩺 Final Answer\n\n" + answer
215
+
216
+ return medicines_text, graph_text, final_answer, neo4j_status
217
+
218
+
219
+ # ---------------------------------------------------------
220
+ # GRADIO UI
221
+ # ---------------------------------------------------------
222
+
223
+ def create_interface():
224
+ with gr.Blocks(title="Medicine GraphRAG AI") as demo:
225
+ gr.Markdown("# 💊 Medicine GraphRAG AI")
226
+ gr.Markdown("**Semantic Search + Graph DB + LLM reasoning using Groq GPT-OSS-120B**")
227
+
228
+ with gr.Row():
229
+ status_display = gr.Textbox(
230
+ label="Database Status",
231
+ value=neo4j_status,
232
+ interactive=False,
233
+ lines=1
234
+ )
235
+
236
+ with gr.Row():
237
+ query_input = gr.Textbox(
238
+ label="Enter your medical query",
239
+ placeholder="e.g., best medicine for acidity",
240
+ lines=2
241
+ )
242
+
243
+ with gr.Row():
244
+ search_btn = gr.Button("Search", variant="primary", size="lg")
245
+ clear_btn = gr.Button("Clear", variant="secondary")
246
+
247
+ with gr.Row():
248
+ with gr.Column():
249
+ medicines_output = gr.Markdown(label="Top Relevant Medicines")
250
+
251
+ with gr.Column():
252
+ graph_output = gr.Markdown(label="Graph Relations")
253
+
254
+ with gr.Row():
255
+ answer_output = gr.Markdown(label="Final Answer")
256
+
257
+ # Event handlers
258
+ search_btn.click(
259
+ fn=process_query,
260
+ inputs=[query_input],
261
+ outputs=[medicines_output, graph_output, answer_output, status_display]
262
+ )
263
+
264
+ clear_btn.click(
265
+ fn=lambda: ("", "", "", neo4j_status),
266
+ inputs=[],
267
+ outputs=[medicines_output, graph_output, answer_output, status_display]
268
+ )
269
+
270
+ # Examples
271
+ gr.Examples(
272
+ examples=[
273
+ ["What is the best medicine for acidity?"],
274
+ ["Show me medicines for headache"],
275
+ ["What are the side effects of paracetamol?"],
276
+ ["Suggest medicine for cold and fever"]
277
+ ],
278
+ inputs=query_input
279
+ )
280
+
281
+ return demo
282
+
283
+
284
+ if __name__ == "__main__":
285
+ demo = create_interface()
286
+ demo.launch()
db/medicine_embeddings.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37dd2deac6c121c8f968cbbaa355e55dc6b23e52b0b0a5c6f58cbff370680918
3
+ size 48435245
db/metadata.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ neo4j
3
+ groq
4
+ pandas
5
+ gradio
6
+ langchain_community
7
+ sentence-transformers
8
+ faiss-cpu
9
+ transformers