Sazzz02 commited on
Commit
0e1d3d0
Β·
verified Β·
1 Parent(s): ca78cb9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1060 -0
app.py ADDED
@@ -0,0 +1,1060 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ╔══════════════════════════════════════════════════════════════╗
3
+ β•‘ PharmaBridge β€” Cross-Medical-System Drug Intelligence β•‘
4
+ β•‘ Hugging Face Spaces | Gradio 4.x | Master's Thesis β•‘
5
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
6
+ 7 Tabs:
7
+ 1. Smart Drug Search β€” TF-IDF cosine retrieval with cards UI
8
+ 2. Cross-System Compare β€” Side-by-side 5-system radar comparison
9
+ 3. Dataset Analytics β€” 3 sub-tabs of Plotly dashboards
10
+ 4. Drug Fingerprint β€” Single drug deep-dive profile
11
+ 5. FDA Live Intelligence β€” OpenFDA API (Labels / Events / NDC)
12
+ 6. AI Medical Q&A β€” HuggingFace Inference API (Mistral-7B)
13
+ 7. Drug Explorer β€” Paginated browse & filter table
14
+ """
15
+
16
+ import gradio as gr
17
+ import pandas as pd
18
+ import numpy as np
19
+ import plotly.graph_objects as go
20
+ import plotly.express as px
21
+ from plotly.subplots import make_subplots
22
+ import joblib, re, os, requests, json, warnings
23
+ warnings.filterwarnings("ignore")
24
+
25
+ from sklearn.feature_extraction.text import TfidfVectorizer
26
+ from sklearn.metrics.pairwise import cosine_similarity
27
+
28
+ # ────────────────────────────────────────────────────────────────
29
+ # 0. LOAD / REBUILD MODELS
30
+ # ────────────────────────────────────────────────────────────────
31
+
32
+ def _clean(text):
33
+ if pd.isna(text): return ""
34
+ t = str(text).strip()
35
+ if t in ["FALSE","False","false","nan","NaN",""]: return ""
36
+ return re.sub(r"\s+"," ", re.sub(r"[^a-z0-9\s\+\-\./]"," ", t.lower())).strip()
37
+
38
+ def _build_text(row):
39
+ s = row["medical_system"]
40
+ d = _clean(row.get("Dosages Description",""))
41
+ g = _clean(row.get("Generic Name and Strength",""))
42
+ b = _clean(row.get("Brand Name",""))
43
+ n = _clean(row.get("Generic Name",""))
44
+ if s == "Allopathic": return " ".join(filter(None,[n,d,s.lower()]))
45
+ if s in ("Ayurvedic","Herbal"): return " ".join(filter(None,[g,d,s.lower()]))
46
+ if s == "Homeopathic": return " ".join(filter(None,[b,d,s.lower()]))
47
+ return " ".join(filter(None,[g,d,s.lower()])) # Unani
48
+
49
+ print("⏳ Loading PharmaBridge models…")
50
+ try:
51
+ VEC = joblib.load("models/tfidf_vectorizer.pkl")
52
+ MAT = joblib.load("models/tfidf_matrix.pkl")
53
+ DF = pd.read_csv("models/drug_database.csv")
54
+ print("βœ… PKL models loaded.")
55
+ except Exception as e:
56
+ print(f"⚠️ PKL not found ({e}), rebuilding from CSV…")
57
+ raw = pd.read_csv("merged_pharma_dataset.csv")
58
+ DF = raw.copy()
59
+ DF["drug_text"] = DF.apply(_build_text, axis=1)
60
+ DF = DF.rename(columns={
61
+ "Brand Name":"brand_name","Generic Name":"generic_name",
62
+ "Dosages Description":"dosage_form","Strength":"strength",
63
+ "Name of the Manufacturer":"manufacturer",
64
+ "Generic Name and Strength":"gns",
65
+ })
66
+ VEC = TfidfVectorizer(ngram_range=(1,2),max_features=15000,
67
+ stop_words=None,sublinear_tf=True,min_df=1)
68
+ MAT = VEC.fit_transform(DF["drug_text"])
69
+ print("βœ… Rebuilt from CSV.")
70
+
71
+ # Normalise column names
72
+ _REMAP = {"Brand Name":"brand_name","Generic Name":"generic_name",
73
+ "Dosages Description":"dosage_form","Strength":"strength",
74
+ "Name of the Manufacturer":"manufacturer","Generic Name and Strength":"gns"}
75
+ for o,n in _REMAP.items():
76
+ if o in DF.columns and n not in DF.columns:
77
+ DF.rename(columns={o:n},inplace=True)
78
+ for c in ["brand_name","generic_name","dosage_form","strength","manufacturer","gns","drug_text"]:
79
+ if c not in DF.columns: DF[c] = ""
80
+ if "drug_text" not in DF.columns or DF["drug_text"].str.len().sum()==0:
81
+ DF["drug_text"] = DF.apply(_build_text, axis=1)
82
+
83
+ DF = DF.reset_index(drop=True)
84
+
85
+ SYSTEMS = ["All Systems","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
86
+ SC = {"Allopathic":"#3B82F6","Ayurvedic":"#10B981",
87
+ "Unani":"#F59E0B","Homeopathic":"#8B5CF6","Herbal":"#EF4444"}
88
+ EMOJI = {"Allopathic":"πŸ’Š","Ayurvedic":"🌿","Unani":"☘️","Homeopathic":"πŸ’§","Herbal":"🌱"}
89
+
90
+ # Pre-compute for analytics
91
+ _SYS_VC = DF["medical_system"].value_counts()
92
+ _DOS_VC = DF["dosage_form"].value_counts()
93
+ _MFR_VC = DF["manufacturer"].value_counts()
94
+ _SYS_MFR = DF.groupby("medical_system")["manufacturer"].nunique()
95
+ _FEAT = np.array(VEC.get_feature_names_out())
96
+
97
+ # ────────────────────────────────────────────────────────────────
98
+ # 1. RETRIEVAL HELPERS
99
+ # ─────────��──────────────────────────────────────────────────────
100
+
101
+ def _encode(q):
102
+ q2 = re.sub(r"[^a-z0-9\s\+\-\./]"," ",q.lower())
103
+ return VEC.transform([re.sub(r"\s+"," ",q2).strip()])
104
+
105
+ def _recommend(query, system, top_n, min_s):
106
+ sims = cosine_similarity(_encode(query), MAT).flatten()
107
+ if system not in ("All Systems","All",""):
108
+ mask = DF["medical_system"]==system
109
+ sims[~mask.values]=0
110
+ idx=[i for i in sims.argsort()[-(top_n*4):][::-1] if sims[i]>=min_s][:top_n]
111
+ if not idx: return pd.DataFrame()
112
+ r=DF.iloc[idx].copy(); r["score"]=[round(float(sims[i]),4) for i in idx]
113
+ return r.sort_values("score",ascending=False).reset_index(drop=True)
114
+
115
+ def _cross(query, tps):
116
+ sims = cosine_similarity(_encode(query), MAT).flatten()
117
+ rows=[]
118
+ for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
119
+ sc=sims.copy(); sc[~(DF["medical_system"]==sys).values]=0
120
+ for i in [i for i in sc.argsort()[-tps:][::-1] if sims[i]>0.01]:
121
+ d=DF.iloc[i].to_dict(); d["score"]=round(float(sims[i]),4); rows.append(d)
122
+ if not rows: return pd.DataFrame()
123
+ return (pd.DataFrame(rows)
124
+ .sort_values(["medical_system","score"],ascending=[True,False])
125
+ .reset_index(drop=True))
126
+
127
+ # ────────────────────────────────────────────────────────────────
128
+ # 2. TAB 1 β€” SMART DRUG SEARCH
129
+ # ────────────────────────────────────────────────────────────────
130
+
131
+ def tab1(query, system, top_n, min_s):
132
+ if not query.strip():
133
+ return '<div class="ph">πŸ” Type a drug name, compound, or symptom above and press Search</div>', None, ""
134
+
135
+ r = _recommend(query, system, int(top_n), float(min_s))
136
+ if r.empty:
137
+ return f'<div class="ph">No results found for <b>{query}</b>. Try lowering the similarity threshold.</div>', None, ""
138
+
139
+ cards = f'<div class="rh">Found <b>{len(r)}</b> results for "<b>{query}</b>"</div><div class="grid">'
140
+ for _, row in r.iterrows():
141
+ sys = str(row.get("medical_system",""))
142
+ c = SC.get(sys,"#6B7280")
143
+ em = EMOJI.get(sys,"πŸ’Š")
144
+ bn = str(row.get("brand_name","β€”"))
145
+ gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
146
+ dos = str(row.get("dosage_form","β€”"))
147
+ mfr = str(row.get("manufacturer","β€”"))[:38]
148
+ sc_v = float(row.get("score",0))
149
+ pct = int(sc_v*100)
150
+ cards += f"""
151
+ <div class="card" style="border-left:4px solid {c}">
152
+ <div class="ch">
153
+ <span class="sbadge" style="background:{c}18;color:{c};border:1px solid {c}35">{em} {sys}</span>
154
+ <span class="spct" style="background:{c}12;color:{c}">{pct}%</span>
155
+ </div>
156
+ <div class="bn">{bn}</div>
157
+ <div class="gn">{gn[:70]+'…' if len(gn)>70 else gn}</div>
158
+ <div class="meta">πŸ’Š {dos} &nbsp;Β·&nbsp; 🏭 {mfr}</div>
159
+ <div class="bar"><div class="fill" style="width:{pct}%;background:{c}"></div></div>
160
+ </div>"""
161
+ cards += "</div>"
162
+
163
+ fig = px.bar(
164
+ r.head(15), x="score", y="brand_name", color="medical_system",
165
+ color_discrete_map=SC, orientation="h",
166
+ labels={"score":"Similarity Score","brand_name":""},
167
+ title=f'Similarity Scores β€” "{query}"',
168
+ )
169
+ fig.update_layout(
170
+ height=max(340,len(r.head(15))*30+90),
171
+ paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
172
+ font=dict(family="Inter,sans-serif",size=11),
173
+ legend=dict(orientation="h",yanchor="bottom",y=1.02,title=None),
174
+ margin=dict(l=0,r=10,t=50,b=10), yaxis=dict(autorange="reversed"),
175
+ xaxis=dict(range=[0,1],gridcolor="#f1f5f9"),
176
+ )
177
+ dist = r["medical_system"].value_counts().to_dict()
178
+ stat = " Β· ".join(f"**{k}** {v}" for k,v in dist.items())
179
+ return cards, fig, f"πŸ“Š {stat}"
180
+
181
+
182
+ # ────────────────────────────────────────────────────────────────
183
+ # 3. TAB 2 β€” CROSS-SYSTEM COMPARE
184
+ # ────────────────────────────────────────────────────────────────
185
+
186
+ def tab2(query, tps):
187
+ if not query.strip():
188
+ return '<div class="ph">Enter a query to compare drugs across all 5 medical traditions</div>', None
189
+
190
+ r = _cross(query, int(tps))
191
+ if r.empty:
192
+ return '<div class="ph">No cross-system results found.</div>', None
193
+
194
+ html = f'<div class="cph">Cross-system view for <b>"{query}"</b></div><div class="cgrid">'
195
+ for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
196
+ sub = r[r["medical_system"]==sys]
197
+ c = SC[sys]; em=EMOJI[sys]
198
+ html += f'<div class="scol" style="border-top:3px solid {c}"><div class="stitle" style="color:{c}">{em} {sys}</div>'
199
+ if sub.empty:
200
+ html += '<div class="nr">No match found</div>'
201
+ else:
202
+ for _,row in sub.iterrows():
203
+ bn = str(row.get("brand_name","β€”"))
204
+ gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
205
+ dos = str(row.get("dosage_form","β€”"))
206
+ sc_ = int(float(row.get("score",0))*100)
207
+ html += f"""<div class="cc" style="border-left:3px solid {c}38">
208
+ <div class="cbn">{bn}</div>
209
+ <div class="cgn">{gn[:48]+'…' if len(gn)>48 else gn}</div>
210
+ <div class="cm">{dos} Β· {sc_}%</div>
211
+ <div class="sbar"><div class="sfill" style="width:{sc_}%;background:{c}"></div></div>
212
+ </div>"""
213
+ html += "</div>"
214
+ html += "</div>"
215
+
216
+ # Radar chart
217
+ avgs={s: float(r[r["medical_system"]==s]["score"].mean()) if not r[r["medical_system"]==s].empty else 0
218
+ for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
219
+ cats=list(avgs.keys()); vals=list(avgs.values())
220
+ fig=go.Figure(go.Scatterpolar(
221
+ r=vals+[vals[0]], theta=cats+[cats[0]], fill="toself",
222
+ fillcolor="rgba(59,130,246,0.12)", line=dict(color="#3B82F6",width=2.5),
223
+ marker=dict(size=9,color=[SC[s] for s in cats]+[SC[cats[0]]]),
224
+ ))
225
+ fig.update_layout(
226
+ polar=dict(radialaxis=dict(visible=True,range=[0,1],gridcolor="#e5e7eb"),
227
+ angularaxis=dict(gridcolor="#e5e7eb",tickfont=dict(size=12))),
228
+ title=dict(text=f'Cross-System Radar β€” "{query}"',font=dict(size=13,color="#1e293b")),
229
+ paper_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif"),
230
+ height=380, showlegend=False, margin=dict(l=50,r=50,t=60,b=30),
231
+ )
232
+ return html, fig
233
+
234
+
235
+ # ────────────────────────────────────────────────────────────────
236
+ # 4. TAB 3 β€” DATASET ANALYTICS (3 sub-views)
237
+ # ────────────────────────────────────────────────────────────────
238
+
239
+ def _overview_fig():
240
+ fig=make_subplots(rows=2,cols=3,
241
+ subplot_titles=["System Share","Top 12 Dosage Forms","Manufacturers per System",
242
+ "Top 15 Manufacturers","System Γ— Dosage Heatmap","TF-IDF Vocab Share"],
243
+ specs=[[{"type":"domain"},{"type":"xy"},{"type":"xy"}],
244
+ [{"type":"xy"},{"type":"xy"},{"type":"domain"}]],
245
+ vertical_spacing=0.14,horizontal_spacing=0.08)
246
+
247
+ # 1 donut
248
+ fig.add_trace(go.Pie(
249
+ labels=_SYS_VC.index.tolist(),values=_SYS_VC.values.tolist(),hole=0.55,
250
+ marker=dict(colors=[SC.get(s,"#aaa") for s in _SYS_VC.index],
251
+ line=dict(color="white",width=2.5)),
252
+ textinfo="label+percent",textfont=dict(size=10),showlegend=False,
253
+ ),row=1,col=1)
254
+
255
+ # 2 dosage bar
256
+ td=_DOS_VC.head(12)
257
+ fig.add_trace(go.Bar(
258
+ x=td.values[::-1],y=td.index[::-1].tolist(),orientation="h",
259
+ marker=dict(color=px.colors.sequential.Blues_r[:12],line=dict(color="white",width=1)),
260
+ text=[f"{v:,}" for v in td.values[::-1]],textposition="outside",showlegend=False,
261
+ ),row=1,col=2)
262
+
263
+ # 3 mfr per system
264
+ fig.add_trace(go.Bar(
265
+ x=_SYS_MFR.index.tolist(),y=_SYS_MFR.values.tolist(),
266
+ marker=dict(color=[SC.get(s,"#aaa") for s in _SYS_MFR.index],
267
+ line=dict(color="white",width=2)),
268
+ text=_SYS_MFR.values.tolist(),textposition="outside",showlegend=False,
269
+ ),row=1,col=3)
270
+
271
+ # 4 top 15 mfr
272
+ tm=_MFR_VC.head(15)
273
+ fig.add_trace(go.Bar(
274
+ y=[m[:28] for m in tm.index[::-1].tolist()],x=tm.values[::-1].tolist(),
275
+ orientation="h",
276
+ marker=dict(color=tm.values[::-1].tolist(),colorscale="Viridis",
277
+ showscale=False,line=dict(color="white",width=1)),
278
+ showlegend=False,
279
+ ),row=2,col=1)
280
+
281
+ # 5 heatmap
282
+ top8=_DOS_VC.head(8).index.tolist()
283
+ sysl=["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
284
+ piv=pd.crosstab(DF["medical_system"],DF["dosage_form"])
285
+ z=[[int(piv[d].get(s,0)) if d in piv.columns else 0 for d in top8] for s in sysl]
286
+ fig.add_trace(go.Heatmap(
287
+ z=z,x=[d[:12] for d in top8],y=sysl,colorscale="YlOrRd",
288
+ text=z,texttemplate="%{text}",textfont=dict(size=9),
289
+ showscale=True,colorbar=dict(thickness=10,x=0.65,len=0.42),
290
+ ),row=2,col=2)
291
+
292
+ # 6 vocab share
293
+ vtoks={s:int((np.asarray(MAT[(DF["medical_system"]==s).values].mean(axis=0)).flatten()>0.001).sum())
294
+ for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
295
+ fig.add_trace(go.Pie(
296
+ labels=list(vtoks.keys()),values=list(vtoks.values()),hole=0.5,
297
+ marker=dict(colors=[SC.get(s,"#aaa") for s in vtoks],
298
+ line=dict(color="white",width=2)),
299
+ textinfo="label+value",textfont=dict(size=10),showlegend=False,
300
+ ),row=2,col=3)
301
+
302
+ fig.update_layout(
303
+ height=720,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
304
+ font=dict(family="Inter,sans-serif",size=11),
305
+ title=dict(text="PharmaBridge β€” Dataset Intelligence Dashboard",
306
+ font=dict(size=16,color="#1e293b"),x=0.5),
307
+ margin=dict(l=10,r=10,t=80,b=10),
308
+ )
309
+ fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
310
+ fig.update_yaxes(showgrid=False)
311
+ return fig
312
+
313
+ def _deep_fig(sel):
314
+ sub = DF if sel=="All" else DF[DF["medical_system"]==sel]
315
+ c = SC.get(sel,"#3B82F6")
316
+ fig=make_subplots(rows=2,cols=2,
317
+ subplot_titles=[f"Top 20 Compounds ({sel})","Dosage Form Split",
318
+ "Top 10 Manufacturers","Brand Count Comparison"],
319
+ specs=[[{"type":"xy"},{"type":"domain"}],[{"type":"xy"},{"type":"xy"}]],
320
+ vertical_spacing=0.16,horizontal_spacing=0.10)
321
+
322
+ # compound
323
+ if sel=="Homeopathic": comp=sub["brand_name"].value_counts().head(20)
324
+ elif sel=="Allopathic": comp=sub["generic_name"].dropna().value_counts().head(20)
325
+ else: comp=sub["gns"].dropna().value_counts().head(20)
326
+ fig.add_trace(go.Bar(
327
+ x=comp.values[::-1].tolist(),y=comp.index[::-1].tolist(),orientation="h",
328
+ marker=dict(color=c,opacity=0.85,line=dict(color="white",width=1)),
329
+ text=comp.values[::-1].tolist(),textposition="outside",showlegend=False,
330
+ ),row=1,col=1)
331
+
332
+ # dosage donut
333
+ dos=sub["dosage_form"].value_counts().head(8)
334
+ fig.add_trace(go.Pie(
335
+ labels=dos.index.tolist(),values=dos.values.tolist(),hole=0.48,
336
+ marker=dict(colors=px.colors.qualitative.Set3[:len(dos)],
337
+ line=dict(color="white",width=2)),
338
+ textinfo="label+percent",textfont=dict(size=10),showlegend=False,
339
+ ),row=1,col=2)
340
+
341
+ # top mfr
342
+ mf=sub["manufacturer"].value_counts().head(10)
343
+ fig.add_trace(go.Bar(
344
+ x=mf.values[::-1].tolist(),y=[m[:26] for m in mf.index[::-1].tolist()],
345
+ orientation="h",
346
+ marker=dict(color=mf.values[::-1].tolist(),colorscale="Blues",
347
+ showscale=False,line=dict(color="white",width=1)),
348
+ showlegend=False,
349
+ ),row=2,col=1)
350
+
351
+ # brand count
352
+ bc=DF.groupby("medical_system")["brand_name"].nunique().sort_values(ascending=False)
353
+ fig.add_trace(go.Bar(
354
+ x=bc.index.tolist(),y=bc.values.tolist(),
355
+ marker=dict(color=[c if s==sel else "#cbd5e1" for s in bc.index],
356
+ line=dict(color="white",width=2)),
357
+ text=bc.values.tolist(),textposition="outside",showlegend=False,
358
+ ),row=2,col=2)
359
+
360
+ fig.update_layout(
361
+ height=680,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
362
+ font=dict(family="Inter,sans-serif",size=11),
363
+ title=dict(text=f"Deep Dive: {sel}",font=dict(size=15,color="#1e293b"),x=0.5),
364
+ margin=dict(l=10,r=10,t=70,b=10),
365
+ )
366
+ fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
367
+ fig.update_yaxes(showgrid=False)
368
+ return fig
369
+
370
+ def _treemap_fig():
371
+ samp=DF.groupby(["medical_system","dosage_form"]).size().reset_index(name="count")
372
+ samp=samp[samp["count"]>=5]
373
+ fig=px.treemap(samp,path=["medical_system","dosage_form"],values="count",
374
+ color="medical_system",color_discrete_map=SC,
375
+ title="Drug Hierarchy: Medical System β†’ Dosage Form")
376
+ fig.update_traces(textinfo="label+value+percent parent",textfont=dict(size=12))
377
+ fig.update_layout(height=520,paper_bgcolor="rgba(0,0,0,0)",
378
+ font=dict(family="Inter,sans-serif",size=12),
379
+ title=dict(font=dict(size=15,color="#1e293b"),x=0.5),
380
+ margin=dict(l=10,r=10,t=60,b=10))
381
+ return fig
382
+
383
+ def tab3_deep_update(sel):
384
+ return _deep_fig(sel)
385
+
386
+
387
+ # ────────────────────────────────────────────────────────────────
388
+ # 5. TAB 4 β€” DRUG FINGERPRINT (single drug profile)
389
+ # ────────────────────────────────────────────────────────────────
390
+
391
+ def tab4_fingerprint(brand_query):
392
+ """Search for a specific drug and show a rich visual profile card + radar of its TF-IDF feature weights."""
393
+ if not brand_query.strip():
394
+ return '<div class="ph">Enter a brand name to see its full drug profile</div>', None
395
+
396
+ # Find best match
397
+ sims = cosine_similarity(_encode(brand_query), MAT).flatten()
398
+ idx = int(sims.argsort()[-1])
399
+ row = DF.iloc[idx]
400
+ sc_v = float(sims[idx])
401
+
402
+ if sc_v < 0.01:
403
+ return f'<div class="ph">No drug found matching "<b>{brand_query}</b>".</div>', None
404
+
405
+ sys_n = str(row.get("medical_system",""))
406
+ c = SC.get(sys_n,"#6B7280")
407
+ em = EMOJI.get(sys_n,"πŸ’Š")
408
+ bn = str(row.get("brand_name","β€”"))
409
+ gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
410
+ dos = str(row.get("dosage_form","β€”"))
411
+ mfr = str(row.get("manufacturer","β€”"))
412
+ clu = str(row.get("cluster","β€”"))
413
+ dart = str(row.get("DAR","β€”")) if "DAR" in row.index else "β€”"
414
+ txt = str(row.get("drug_text",""))
415
+
416
+ # Siblings (same gns/cluster)
417
+ sib_mask = (DF["medical_system"]==sys_n) & (DF["gns"]==str(row.get("gns","")))
418
+ sib_count = sib_mask.sum()-1
419
+
420
+ html = f"""
421
+ <div class="fp-card" style="border:2px solid {c}40;background:white;border-radius:16px;overflow:hidden">
422
+ <div class="fp-banner" style="background:linear-gradient(135deg,{c},{c}99);padding:20px 24px;color:white">
423
+ <div style="font-size:0.85rem;opacity:0.85;margin-bottom:4px">{em} {sys_n}</div>
424
+ <div style="font-size:1.7rem;font-weight:800;letter-spacing:-0.5px">{bn}</div>
425
+ <div style="font-size:0.95rem;opacity:0.9;margin-top:4px">{gn[:80]}</div>
426
+ <div style="margin-top:12px;background:rgba(255,255,255,0.2);border-radius:20px;padding:5px 14px;
427
+ display:inline-block;font-size:0.8rem;font-weight:600">
428
+ {int(sc_v*100)}% match confidence
429
+ </div>
430
+ </div>
431
+ <div style="padding:20px 24px;display:grid;grid-template-columns:1fr 1fr;gap:14px">
432
+ <div class="fp-row"><span class="fp-k">πŸ’Š Dosage Form</span><span class="fp-v">{dos}</span></div>
433
+ <div class="fp-row"><span class="fp-k">🏭 Manufacturer</span><span class="fp-v">{mfr[:40]}</span></div>
434
+ <div class="fp-row"><span class="fp-k">🧬 Medical System</span><span class="fp-v">{sys_n}</span></div>
435
+ <div class="fp-row"><span class="fp-k">πŸ“‚ Cluster</span><span class="fp-v">#{clu}</span></div>
436
+ <div class="fp-row"><span class="fp-k">πŸ“‹ DAR Number</span><span class="fp-v">{dart}</span></div>
437
+ <div class="fp-row"><span class="fp-k">πŸ‘₯ Same-compound drugs</span><span class="fp-v">{sib_count}</span></div>
438
+ </div>
439
+ <div style="padding:0 24px 20px;font-size:0.82rem;color:#64748b">
440
+ <b>Drug Text (TF-IDF input):</b> <code style="background:#f1f5f9;padding:3px 8px;border-radius:6px">{txt[:120]}</code>
441
+ </div>
442
+ </div>"""
443
+
444
+ # Top TF-IDF features for this drug
445
+ vec_row = MAT[idx]
446
+ feat_idx = np.asarray(vec_row.todense()).flatten().argsort()[-20:][::-1]
447
+ feat_scores = np.asarray(vec_row.todense()).flatten()[feat_idx]
448
+ feat_labels = _FEAT[feat_idx]
449
+ mask = feat_scores > 0
450
+ feat_labels = feat_labels[mask]; feat_scores = feat_scores[mask]
451
+
452
+ fig = go.Figure(go.Bar(
453
+ x=feat_scores[::-1], y=feat_labels[::-1],
454
+ orientation="h",
455
+ marker=dict(
456
+ color=feat_scores[::-1],
457
+ colorscale=[[0,"#dbeafe"],[1,c]],
458
+ showscale=False,
459
+ line=dict(color="white",width=1),
460
+ ),
461
+ text=[f"{v:.3f}" for v in feat_scores[::-1]],
462
+ textposition="outside",
463
+ ))
464
+ fig.update_layout(
465
+ title=dict(text=f"TF-IDF Feature Fingerprint: {bn}",
466
+ font=dict(size=13,color="#1e293b")),
467
+ height=max(300, len(feat_labels)*28+80),
468
+ paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
469
+ font=dict(family="Inter,sans-serif",size=11),
470
+ margin=dict(l=10,r=60,t=50,b=10),
471
+ xaxis=dict(gridcolor="#f1f5f9",title="TF-IDF Weight"),
472
+ yaxis=dict(title=""),
473
+ )
474
+ return html, fig
475
+
476
+
477
+ # ────────────────────────────────────────────────────────────────
478
+ # 6. TAB 5 β€” FDA LIVE INTELLIGENCE
479
+ # ────────────────────────────────────────────────────────────────
480
+
481
+ FDA_NAME_MAP={
482
+ "Paracetamol":"acetaminophen","Azithromycin":"azithromycin",
483
+ "Ciprofloxacin":"ciprofloxacin","Amoxicillin":"amoxicillin",
484
+ "Omeprazole":"omeprazole","Metformin":"metformin",
485
+ "Atorvastatin":"atorvastatin","Amlodipine":"amlodipine",
486
+ "Ceftriaxone":"ceftriaxone","Diclofenac":"diclofenac sodium",
487
+ "Esomeprazole":"esomeprazole","Cefixime":"cefixime",
488
+ "Salbutamol":"albuterol","Ibuprofen":"ibuprofen",
489
+ "Metronidazole":"metronidazole","Cefuroxime":"cefuroxime",
490
+ }
491
+
492
+ def _fda_fetch(drug, endpoint):
493
+ term=FDA_NAME_MAP.get(drug,drug.lower())
494
+ base=f"https://api.fda.gov/drug/{endpoint}.json"
495
+ for field in [f"openfda.generic_name:{term}",f"openfda.brand_name:{term}"]:
496
+ try:
497
+ r=requests.get(base,params={"search":field,"limit":"3"},timeout=9)
498
+ if r.status_code==200:
499
+ res=r.json().get("results",[])
500
+ if res: return res, term
501
+ except: pass
502
+ return [], term
503
+
504
+ def tab5_fda(drug, ep_label):
505
+ if not drug.strip():
506
+ return '<div class="ph">πŸ₯ Enter a drug name to fetch live FDA data</div>'
507
+ ep_map={"Drug Labels":"label","Adverse Events (FAERS)":"event","NDC Directory":"ndc"}
508
+ ep=ep_map.get(ep_label,"label")
509
+ results,term=_fda_fetch(drug,ep)
510
+
511
+ if not results:
512
+ return f"""<div class="fda-miss">
513
+ <div style="font-size:2.5rem;margin-bottom:12px">πŸ”</div>
514
+ <div><b>No FDA data found for "{drug}"</b></div>
515
+ <div style="color:#64748b;font-size:0.88rem;margin-top:8px;line-height:1.7">
516
+ This drug may not be in the US FDA database (common for Bangladesh-registry drugs).<br>
517
+ <b>Try:</b> Paracetamol Β· Azithromycin Β· Ciprofloxacin Β· Omeprazole Β· Metformin Β· Ibuprofen
518
+ </div></div>"""
519
+
520
+ html=f"""<div class="fda-hdr">
521
+ <span class="fda-badge">πŸ‡ΊπŸ‡Έ FDA {ep_label}</span>
522
+ <b>{drug}</b> β†’ searched as <code>{term}</code>
523
+ <span class="fda-cnt">{len(results)} record(s)</span>
524
+ </div>"""
525
+
526
+ if ep=="label":
527
+ for i,res in enumerate(results[:3],1):
528
+ o=res.get("openfda",{})
529
+ brand=", ".join(o.get("brand_name",["β€”"])[:2])
530
+ gen =", ".join(o.get("generic_name",["β€”"])[:2])
531
+ mfr =", ".join(o.get("manufacturer_name",["β€”"])[:1])
532
+ purp =str(res.get("purpose",["β€”"])[0])[:280] if res.get("purpose") else "β€”"
533
+ ind =str(res.get("indications_and_usage",["β€”"])[0])[:380] if res.get("indications_and_usage") else "β€”"
534
+ warn =str(res.get("warnings",["β€”"])[0])[:280] if res.get("warnings") else "β€”"
535
+ html+=f"""<div class="fda-card">
536
+ <div class="fda-num">πŸ“„ Record {i}</div>
537
+ <table class="fda-tbl">
538
+ <tr><td class="fk">Brand Name</td><td>{brand}</td></tr>
539
+ <tr><td class="fk">Generic Name</td><td>{gen}</td></tr>
540
+ <tr><td class="fk">Manufacturer</td><td>{mfr}</td></tr>
541
+ <tr><td class="fk">Purpose</td><td>{purp}</td></tr>
542
+ <tr><td class="fk">Indications</td><td>{ind}</td></tr>
543
+ <tr><td class="fk">Warnings</td><td>{warn}</td></tr>
544
+ </table></div>"""
545
+
546
+ elif ep=="event":
547
+ for i,res in enumerate(results[:3],1):
548
+ pt=res.get("patient",{})
549
+ rxn=", ".join(r.get("reactionmeddrapt","") for r in pt.get("reaction",[])[:6])
550
+ drg=", ".join(d.get("medicinalproduct","") for d in pt.get("drug",[])[:4])
551
+ sev="⚠️ Serious" if res.get("serious")=="1" else "ℹ️ Non-Serious"
552
+ html+=f"""<div class="fda-card">
553
+ <div class="fda-num">Event {i} β€” {sev}</div>
554
+ <table class="fda-tbl">
555
+ <tr><td class="fk">Reactions</td><td>{rxn or 'β€”'}</td></tr>
556
+ <tr><td class="fk">Drugs Involved</td><td>{drg or 'β€”'}</td></tr>
557
+ </table></div>"""
558
+
559
+ elif ep=="ndc":
560
+ for i,res in enumerate(results[:3],1):
561
+ html+=f"""<div class="fda-card">
562
+ <div class="fda-num">NDC {i}</div>
563
+ <table class="fda-tbl">
564
+ <tr><td class="fk">NDC Code</td><td>{res.get('product_ndc','β€”')}</td></tr>
565
+ <tr><td class="fk">Brand</td><td>{res.get('brand_name','β€”')}</td></tr>
566
+ <tr><td class="fk">Generic</td><td>{res.get('generic_name','β€”')}</td></tr>
567
+ <tr><td class="fk">Dosage Form</td><td>{res.get('dosage_form','β€”')}</td></tr>
568
+ <tr><td class="fk">Route</td><td>{res.get('route','β€”')}</td></tr>
569
+ <tr><td class="fk">Labeler</td><td>{res.get('labeler_name','β€”')}</td></tr>
570
+ </table></div>"""
571
+ return html
572
+
573
+
574
+ # ────────────────────────────────────────────────────────────────
575
+ # 7. TAB 6 β€” AI MEDICAL Q&A (HuggingFace Inference API)
576
+ # ────────────────────────────────────────────────────────────────
577
+
578
+ SYS_PROMPT=(
579
+ "You are PharmaBridge AI β€” a knowledgeable, friendly pharmaceutical assistant. "
580
+ "You help healthcare professionals and students understand drug information, "
581
+ "pharmacology, traditional medicine (Ayurvedic, Unani, Homeopathic, Herbal), "
582
+ "drug interactions, and the Bangladesh drug registry. "
583
+ "Be concise, accurate, and always note that answers are educational, "
584
+ "not a substitute for professional medical advice."
585
+ )
586
+
587
+ HF_MODELS=[
588
+ "mistralai/Mistral-7B-Instruct-v0.3",
589
+ "HuggingFaceH4/zephyr-7b-beta",
590
+ "google/flan-t5-xxl",
591
+ ]
592
+
593
+ def tab6_ai(question, history):
594
+ if not question.strip():
595
+ return history, ""
596
+ history=history or []
597
+
598
+ prompt=f"<s>[INST] {SYS_PROMPT}\n\nQuestion: {question} [/INST]"
599
+ headers={"Content-Type":"application/json"}
600
+ answer=""
601
+
602
+ for model_url in [f"https://api-inference.huggingface.co/models/{m}" for m in HF_MODELS]:
603
+ payload={
604
+ "inputs": prompt,
605
+ "parameters":{"max_new_tokens":500,"temperature":0.65,
606
+ "top_p":0.9,"repetition_penalty":1.1,
607
+ "return_full_text":False},
608
+ }
609
+ # flan-t5 uses different format
610
+ if "flan" in model_url:
611
+ payload={"inputs":f"As a pharmacist, answer clearly: {question}",
612
+ "parameters":{"max_new_tokens":350}}
613
+ try:
614
+ r=requests.post(model_url,headers=headers,json=payload,timeout=28)
615
+ if r.status_code==200:
616
+ d=r.json()
617
+ txt=(d[0].get("generated_text","") if isinstance(d,list) else d.get("generated_text","")).strip()
618
+ if len(txt)>30:
619
+ answer=txt; break
620
+ except: continue
621
+
622
+ if not answer:
623
+ answer=(
624
+ "⚠️ The AI model is warming up (HuggingFace free tier cold-start). "
625
+ "Please wait ~20 seconds and try again.\n\n"
626
+ "**Meanwhile**, you can:\n"
627
+ "- Use the **Smart Search** tab to look up this drug directly\n"
628
+ "- Use the **FDA Live Data** tab for official drug information"
629
+ )
630
+
631
+ history.append((question, answer))
632
+ return history, ""
633
+
634
+ def tab6_clear():
635
+ return [], ""
636
+
637
+
638
+ # ────────────────────────────────────────────────────────────────
639
+ # 8. TAB 7 β€” DRUG EXPLORER (browse & filter)
640
+ # ────────────────────────────────────────────────────────────────
641
+
642
+ _ALL_DOS=["All"]+sorted(DF["dosage_form"].dropna().unique().tolist())
643
+
644
+ def _dos_choices(sys):
645
+ if sys=="All":
646
+ return gr.update(choices=_ALL_DOS, value="All")
647
+ opts=["All"]+sorted(DF[DF["medical_system"]==sys]["dosage_form"].dropna().unique().tolist())
648
+ return gr.update(choices=opts, value="All")
649
+
650
+ def tab7_explore(system, dosage, search, page):
651
+ sub=DF.copy()
652
+ if system!="All": sub=sub[sub["medical_system"]==system]
653
+ if dosage !="All": sub=sub[sub["dosage_form"]==dosage]
654
+ if search.strip():
655
+ t=search.lower().strip()
656
+ sub=sub[sub["brand_name"].str.lower().str.contains(t,na=False)|
657
+ sub["gns"].str.lower().str.contains(t,na=False)|
658
+ sub["generic_name"].str.lower().str.contains(t,na=False)|
659
+ sub["manufacturer"].str.lower().str.contains(t,na=False)]
660
+
661
+ total=len(sub); PG=20
662
+ page=max(1,int(page)); maxp=max(1,(total+PG-1)//PG); page=min(page,maxp)
663
+ sl=sub.iloc[(page-1)*PG:page*PG]
664
+
665
+ if sl.empty:
666
+ return '<div class="ph">No records match your filters.</div>', "0 records"
667
+
668
+ rows=""
669
+ for _,row in sl.iterrows():
670
+ sys_n=str(row.get("medical_system",""))
671
+ c=SC.get(sys_n,"#6B7280"); em=EMOJI.get(sys_n,"πŸ’Š")
672
+ bn=str(row.get("brand_name","β€”"))
673
+ gn=str(row.get("gns","")) or str(row.get("generic_name","β€”"))
674
+ dos_v=str(row.get("dosage_form","β€”"))
675
+ mfr=str(row.get("manufacturer","β€”"))[:36]
676
+ rows+=f"""<tr>
677
+ <td><b>{bn}</b></td>
678
+ <td style="max-width:190px">{gn[:46]}</td>
679
+ <td>{dos_v}</td>
680
+ <td><span class="sb2" style="background:{c}18;color:{c};border:1px solid {c}30">{em} {sys_n}</span></td>
681
+ <td style="color:#64748b">{mfr}</td>
682
+ </tr>"""
683
+
684
+ tbl=f"""<table class="xtbl">
685
+ <thead><tr><th>Brand Name</th><th>Compound / Identity</th>
686
+ <th>Dosage Form</th><th>System</th><th>Manufacturer</th></tr></thead>
687
+ <tbody>{rows}</tbody></table>"""
688
+
689
+ return tbl, f"Page **{page}** / {maxp} Β· **{total:,}** records"
690
+
691
+
692
+ # ────────────────────────────────────────────────────────────────
693
+ # 9. CSS
694
+ # ────────────────────────────────────────────────────────────────
695
+
696
+ CSS="""
697
+ @import url('https://fonts.googleapis.com/css2?family=Inter:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,400&display=swap');
698
+ *{box-sizing:border-box}
699
+ body,.gradio-container{font-family:'Inter',sans-serif!important;background:#f0f4f8!important}
700
+
701
+ /* ── HEADER ─────────────────────────────────────────────── */
702
+ .app-hdr{
703
+ background:linear-gradient(135deg,#0f172a 0%,#1e3a8a 45%,#0369a1 100%);
704
+ border-radius:18px;padding:28px 32px;margin-bottom:4px;color:#fff;
705
+ box-shadow:0 10px 40px rgba(30,58,138,.35);
706
+ }
707
+ .app-title{font-size:2.1rem;font-weight:800;letter-spacing:-1px;margin:0}
708
+ .app-sub{font-size:1rem;opacity:.82;margin:6px 0 0}
709
+ .hbadges{display:flex;gap:8px;margin-top:14px;flex-wrap:wrap}
710
+ .hbadge{background:rgba(255,255,255,.16);border:1px solid rgba(255,255,255,.28);
711
+ border-radius:20px;padding:4px 13px;font-size:.78rem;font-weight:500}
712
+ .stats-row{display:flex;gap:10px;margin-top:16px;flex-wrap:wrap}
713
+ .stat{background:rgba(255,255,255,.12);border-radius:12px;padding:8px 16px;text-align:center;min-width:88px}
714
+ .sn{font-size:1.45rem;font-weight:800;display:block}
715
+ .sl{font-size:.7rem;opacity:.78;text-transform:uppercase;letter-spacing:.5px}
716
+
717
+ /* ── TABS ────────────────────────────────────────────────── */
718
+ .tab-nav button{font-weight:500!important;font-size:.88rem!important;border-radius:8px 8px 0 0!important}
719
+ .tab-nav button.selected{color:#1d4ed8!important;border-bottom:3px solid #1d4ed8!important;font-weight:700!important}
720
+
721
+ /* ── INPUTS ──────────────────────────────────────────────── */
722
+ .gr-input,textarea,.gr-dropdown select{
723
+ border-radius:10px!important;border:1.5px solid #e2e8f0!important;
724
+ font-family:'Inter',sans-serif!important;transition:border-color .2s!important;
725
+ }
726
+ .gr-input:focus,textarea:focus{border-color:#3b82f6!important;box-shadow:0 0 0 3px rgba(59,130,246,.1)!important}
727
+ .gr-button-primary{
728
+ background:linear-gradient(135deg,#1d4ed8,#0891b2)!important;
729
+ border:none!important;border-radius:10px!important;font-weight:700!important;
730
+ letter-spacing:.2px!important;box-shadow:0 4px 14px rgba(29,78,216,.3)!important;
731
+ transition:transform .15s,box-shadow .15s!important;
732
+ }
733
+ .gr-button-primary:hover{transform:translateY(-1px)!important;box-shadow:0 6px 22px rgba(29,78,216,.4)!important}
734
+
735
+ /* ── PLACEHOLDERS ────────────────────────────────────────── */
736
+ .ph{text-align:center;color:#94a3b8;padding:60px 20px;font-size:.98rem;
737
+ background:#f8fafc;border-radius:14px;border:2px dashed #e2e8f0}
738
+
739
+ /* ── RESULT CARDS ────────────────────────────────────────── */
740
+ .rh{font-size:.93rem;color:#475569;padding:10px 0 14px;
741
+ border-bottom:1px solid #e2e8f0;margin-bottom:14px}
742
+ .grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(270px,1fr));gap:12px}
743
+ .card{background:#fff;border-radius:13px;padding:14px 16px;
744
+ box-shadow:0 1px 4px rgba(0,0,0,.06);transition:transform .15s,box-shadow .15s}
745
+ .card:hover{transform:translateY(-2px);box-shadow:0 5px 18px rgba(0,0,0,.10)}
746
+ .ch{display:flex;justify-content:space-between;align-items:center;margin-bottom:8px}
747
+ .sbadge{font-size:.71rem;font-weight:600;padding:3px 9px;border-radius:20px;white-space:nowrap}
748
+ .spct{font-size:.74rem;font-weight:700;padding:3px 9px;border-radius:20px}
749
+ .bn{font-size:1.05rem;font-weight:700;color:#1e293b;margin-bottom:4px}
750
+ .gn{font-size:.81rem;color:#64748b;margin-bottom:9px;min-height:1.2em}
751
+ .meta{font-size:.77rem;color:#94a3b8;margin-bottom:10px;line-height:1.8}
752
+ .bar{height:4px;background:#f1f5f9;border-radius:2px;overflow:hidden}
753
+ .fill{height:100%;border-radius:2px;transition:width .4s}
754
+
755
+ /* ── CROSS COMPARE ───────────────────────────────────────── */
756
+ .cph{font-size:.96rem;color:#475569;padding:10px 0 16px;font-weight:500}
757
+ .cgrid{display:grid;grid-template-columns:repeat(5,1fr);gap:11px}
758
+ @media(max-width:900px){.cgrid{grid-template-columns:repeat(2,1fr)}}
759
+ .scol{background:#fff;border-radius:13px;padding:14px;box-shadow:0 1px 4px rgba(0,0,0,.06)}
760
+ .stitle{font-weight:700;font-size:.93rem;margin-bottom:12px}
761
+ .nr{color:#94a3b8;font-size:.84rem;padding:10px 0}
762
+ .cc{padding:10px;margin-bottom:8px;border-radius:9px;background:#f8fafc}
763
+ .cbn{font-weight:700;font-size:.88rem;color:#1e293b}
764
+ .cgn{font-size:.77rem;color:#64748b;margin:3px 0}
765
+ .cm{font-size:.74rem;color:#94a3b8}
766
+ .sbar{height:3px;background:#f1f5f9;border-radius:2px;overflow:hidden;margin-top:6px}
767
+ .sfill{height:100%;border-radius:2px}
768
+
769
+ /* ── FINGERPRINT ─────────────────────────────────────────── */
770
+ .fp-banner{border-radius:0}
771
+ .fp-row{display:flex;flex-direction:column;background:#f8fafc;border-radius:10px;padding:10px 14px}
772
+ .fp-k{font-size:.74rem;color:#64748b;font-weight:600;text-transform:uppercase;letter-spacing:.4px}
773
+ .fp-v{font-size:.95rem;color:#1e293b;font-weight:500;margin-top:2px}
774
+
775
+ /* ── FDA ─────────────────────────────────────────────────── */
776
+ .fda-hdr{background:linear-gradient(135deg,#eff6ff,#e0f2fe);border-radius:11px;
777
+ padding:14px 18px;margin-bottom:14px;display:flex;align-items:center;
778
+ gap:10px;flex-wrap:wrap;font-size:.88rem;color:#1e293b}
779
+ .fda-badge{background:#1d4ed8;color:#fff;padding:4px 11px;border-radius:20px;
780
+ font-size:.77rem;font-weight:600}
781
+ .fda-cnt{margin-left:auto;background:#dcfce7;color:#166534;padding:3px 10px;
782
+ border-radius:20px;font-size:.77rem;font-weight:600}
783
+ .fda-miss{text-align:center;padding:40px;color:#64748b;background:#f8fafc;
784
+ border-radius:14px;border:2px dashed #e2e8f0}
785
+ .fda-card{background:#fff;border-radius:13px;padding:18px;margin-bottom:12px;
786
+ box-shadow:0 1px 4px rgba(0,0,0,.06)}
787
+ .fda-num{font-weight:700;font-size:.88rem;color:#1d4ed8;margin-bottom:10px}
788
+ .fda-tbl{width:100%;border-collapse:collapse;font-size:.84rem}
789
+ .fda-tbl tr{border-bottom:1px solid #f1f5f9}
790
+ .fda-tbl tr:last-child{border-bottom:none}
791
+ .fk{color:#64748b;font-weight:600;padding:6px 14px 6px 0;white-space:nowrap;
792
+ vertical-align:top;width:130px}
793
+ .fda-tbl td:last-child{color:#1e293b;padding:6px 0;line-height:1.55}
794
+
795
+ /* ── CHATBOT ─────────────────────────────────────────────── */
796
+ .chatbot{border-radius:13px!important;border:1.5px solid #e2e8f0!important}
797
+
798
+ /* ── EXPLORER TABLE ──────────────────────────────────────── */
799
+ .xtbl{width:100%;border-collapse:collapse;font-size:.83rem}
800
+ .xtbl thead{background:linear-gradient(135deg,#0f172a,#1d4ed8);color:#fff}
801
+ .xtbl th{padding:11px 14px;text-align:left;font-weight:600;letter-spacing:.3px}
802
+ .xtbl tbody tr{border-bottom:1px solid #f1f5f9;transition:background .15s}
803
+ .xtbl tbody tr:hover{background:#f8fafc}
804
+ .xtbl td{padding:9px 14px;color:#1e293b;vertical-align:top}
805
+ .sb2{font-size:.71rem;font-weight:600;padding:2px 8px;border-radius:20px;white-space:nowrap}
806
+
807
+ code{background:#f1f5f9;padding:2px 7px;border-radius:5px;font-size:.84em;color:#0891b2}
808
+ """
809
+
810
+ # ────────────────────────────────────────────────────────────────
811
+ # 10. BUILD GRADIO APP
812
+ # ────────────────────────────────────────────────────────────────
813
+
814
+ HEADER = f"""
815
+ <div class="app-hdr">
816
+ <div class="app-title">πŸ’Š PharmaBridge</div>
817
+ <div class="app-sub">Cross-Medical-System Drug Intelligence Engine Β· Bangladesh National Drug Registry</div>
818
+ <div class="hbadges">
819
+ <span class="hbadge">πŸ”¬ TF-IDF + Cosine Similarity</span>
820
+ <span class="hbadge">🧠 SVD + K-Means Clustering</span>
821
+ <span class="hbadge">🌐 OpenFDA Live API</span>
822
+ <span class="hbadge">πŸ€– Mistral-7B AI Assistant</span>
823
+ <span class="hbadge">πŸ“Š Interactive Dashboards</span>
824
+ </div>
825
+ <div class="stats-row">
826
+ <div class="stat"><span class="sn">53,584</span><span class="sl">Total Drugs</span></div>
827
+ <div class="stat"><span class="sn">5</span><span class="sl">Med. Systems</span></div>
828
+ <div class="stat"><span class="sn">725</span><span class="sl">Manufacturers</span></div>
829
+ <div class="stat"><span class="sn">12,311</span><span class="sl">TF-IDF Features</span></div>
830
+ <div class="stat"><span class="sn">95.5%</span><span class="sl">Precision@10</span></div>
831
+ <div class="stat"><span class="sn">0.2159</span><span class="sl">Silhouette</span></div>
832
+ </div>
833
+ </div>
834
+ """
835
+
836
+ with gr.Blocks(css=CSS, title="PharmaBridge", theme=gr.themes.Base(
837
+ primary_hue=gr.themes.colors.blue,
838
+ font=gr.themes.GoogleFont("Inter"),
839
+ )) as app:
840
+
841
+ gr.HTML(HEADER)
842
+
843
+ with gr.Tabs(elem_classes="tab-nav"):
844
+
845
+ # ── TAB 1 ─────────────────────────────────────────────────
846
+ with gr.Tab("πŸ” Smart Search"):
847
+ with gr.Row(equal_height=True):
848
+ with gr.Column(scale=4):
849
+ t1q = gr.Textbox(label="Search Query",
850
+ placeholder="Try: Azithromycin, Ashwagandha, nux vomica, sharbat amrood, paracetamol fever…",
851
+ lines=1)
852
+ with gr.Column(scale=1):
853
+ t1sys = gr.Dropdown(choices=SYSTEMS, value="All Systems", label="System")
854
+ with gr.Column(scale=1):
855
+ t1btn = gr.Button("πŸ” Search", variant="primary", scale=1)
856
+ with gr.Row():
857
+ t1n = gr.Slider(5,50,value=12,step=1,label="Max Results")
858
+ t1s = gr.Slider(0.0,0.5,value=0.04,step=0.01,label="Min Similarity")
859
+ t1stat = gr.Markdown("")
860
+ t1cards = gr.HTML('<div class="ph">πŸ” Enter a drug name, compound, or symptom above</div>')
861
+ t1chart = gr.Plot(label="Score Distribution")
862
+
863
+ t1btn.click(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
864
+ t1q.submit(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
865
+
866
+ gr.Examples([
867
+ ["Azithromycin 500mg","Allopathic"],
868
+ ["Ashwagandha capsule","Ayurvedic"],
869
+ ["Nux Vomica liquid","Homeopathic"],
870
+ ["Sharbat Amrood","Unani"],
871
+ ["Moringa leaf powder","Herbal"],
872
+ ["antibiotic tablet","All Systems"],
873
+ ["digestive capsule","All Systems"],
874
+ ], inputs=[t1q,t1sys], label="Quick Examples")
875
+
876
+ # ── TAB 2 ─────────────────────────────────────────────────
877
+ with gr.Tab("βš–οΈ Cross-System Compare"):
878
+ with gr.Row(equal_height=True):
879
+ with gr.Column(scale=5):
880
+ t2q = gr.Textbox(label="Query",
881
+ placeholder="e.g. pain relief tablet, digestive liver, sleep anxiety, blood pressure…",
882
+ lines=1)
883
+ with gr.Column(scale=1):
884
+ t2n = gr.Slider(1,5,value=3,step=1,label="Results / System")
885
+ with gr.Column(scale=1):
886
+ t2btn = gr.Button("βš–οΈ Compare", variant="primary")
887
+ t2cards = gr.HTML('<div class="ph">Compare the same therapeutic need across all 5 medical traditions simultaneously</div>')
888
+ t2radar = gr.Plot(label="Cross-System Similarity Radar")
889
+
890
+ t2btn.click(tab2,[t2q,t2n],[t2cards,t2radar])
891
+ t2q.submit(tab2,[t2q,t2n],[t2cards,t2radar])
892
+ gr.Examples([
893
+ ["digestive liver tablet"],["pain anti-inflammatory"],
894
+ ["antibiotic infection"],["blood pressure"],
895
+ ["cough respiratory"],["sleep anxiety stress"],
896
+ ], inputs=[t2q])
897
+
898
+ # ── TAB 3 ─────────────────────────────────────────────────
899
+ with gr.Tab("πŸ“Š Dataset Analytics"):
900
+ with gr.Tabs():
901
+ with gr.Tab("🌐 Overview Dashboard"):
902
+ ov_btn = gr.Button("πŸ“Š Render Dashboard", variant="primary")
903
+ ov_fig = gr.Plot()
904
+ ov_btn.click(_overview_fig,[],[ov_fig])
905
+ app.load(_overview_fig,[],[ov_fig])
906
+
907
+ with gr.Tab("πŸ”Ž System Deep Dive"):
908
+ with gr.Row():
909
+ dd_sys = gr.Dropdown(
910
+ choices=["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
911
+ value="Allopathic", label="Select System")
912
+ dd_btn = gr.Button("Analyze", variant="primary")
913
+ dd_fig = gr.Plot()
914
+ dd_btn.click(_deep_fig,[dd_sys],[dd_fig])
915
+ dd_sys.change(_deep_fig,[dd_sys],[dd_fig])
916
+ app.load(lambda:_deep_fig("Allopathic"),[],[dd_fig])
917
+
918
+ with gr.Tab("πŸ—ΊοΈ Treemap Explorer"):
919
+ tm_btn = gr.Button("πŸ—ΊοΈ Render Treemap", variant="primary")
920
+ tm_fig = gr.Plot()
921
+ tm_btn.click(_treemap_fig,[],[tm_fig])
922
+ app.load(_treemap_fig,[],[tm_fig])
923
+
924
+ # ── TAB 4 ─────────────────────────────────────────────────
925
+ with gr.Tab("🧬 Drug Fingerprint"):
926
+ gr.Markdown("""
927
+ ### Single Drug Deep-Dive
928
+ Search for any drug to see its **full profile card** plus a bar chart of its
929
+ top TF-IDF feature weights β€” the exact tokens driving its similarity scores.
930
+ """)
931
+ with gr.Row(equal_height=True):
932
+ fp_q = gr.Textbox(label="Brand Name or Compound",
933
+ placeholder="e.g. Azithromycin, Ashwagandha, Nux Vomica, Sharbat Amrood…", lines=1)
934
+ fp_btn = gr.Button("🧬 Profile", variant="primary")
935
+ fp_card = gr.HTML('<div class="ph">🧬 Enter a drug or compound name to generate its fingerprint</div>')
936
+ fp_fig = gr.Plot(label="TF-IDF Feature Fingerprint")
937
+
938
+ fp_btn.click(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
939
+ fp_q.submit(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
940
+ gr.Examples([
941
+ ["Azithromycin"],["Ashwagandha"],["Nux Vomica"],
942
+ ["Sharbat Amrood"],["Moringa"],["Paracetamol"],
943
+ ], inputs=[fp_q])
944
+
945
+ # ── TAB 5 ─────────────────────────────────────────────────
946
+ with gr.Tab("πŸ₯ FDA Live Data"):
947
+ gr.Markdown("> **Live OpenFDA API** β€” US drug labels, adverse events (FAERS), and NDC records. "
948
+ "~40% of Bangladesh registry drugs appear here. Bangladeshi names auto-mapped to FDA terms.")
949
+ with gr.Row(equal_height=True):
950
+ fda_drug = gr.Textbox(label="Drug Name",
951
+ placeholder="Paracetamol, Azithromycin, Ciprofloxacin, Omeprazole, Metformin…", lines=1)
952
+ fda_ep = gr.Radio(["Drug Labels","Adverse Events (FAERS)","NDC Directory"],
953
+ value="Drug Labels", label="FDA Database")
954
+ fda_btn = gr.Button("πŸ”Ž Fetch", variant="primary")
955
+ fda_out = gr.HTML('<div class="ph">πŸ₯ Enter a drug name and click Fetch</div>')
956
+ fda_btn.click(tab5_fda,[fda_drug,fda_ep],[fda_out])
957
+ fda_drug.submit(tab5_fda,[fda_drug,fda_ep],[fda_out])
958
+ gr.Examples([["Paracetamol"],["Azithromycin"],["Ciprofloxacin"],
959
+ ["Omeprazole"],["Metformin"],["Ibuprofen"]], inputs=[fda_drug])
960
+
961
+ # ── TAB 6 ─────────────────────────────────────────────────
962
+ with gr.Tab("πŸ€– AI Medical Q&A"):
963
+ gr.Markdown("""
964
+ ### PharmaBridge AI β€” Pharmaceutical Q&A
965
+ Powered by **Mistral-7B-Instruct** via HuggingFace Inference API (free, no key needed).
966
+ Ask anything about drugs, pharmacology, traditional medicine, or the Bangladesh registry.
967
+
968
+ > ⚠️ Educational only β€” not a substitute for professional medical advice. Model may take ~20s to cold-start.
969
+ """)
970
+ ai_bot = gr.Chatbot(label="PharmaBridge AI", height=450, elem_classes="chatbot")
971
+ with gr.Row():
972
+ ai_inp = gr.Textbox(label="Your Question", lines=2, scale=5,
973
+ placeholder="e.g. What is Ashwagandha used for? / Side effects of Azithromycin? / What is Unani medicine?")
974
+ with gr.Column(scale=1):
975
+ ai_send = gr.Button("Send πŸ’¬", variant="primary")
976
+ ai_clear = gr.Button("Clear πŸ—‘οΈ")
977
+ ai_send.click(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
978
+ ai_inp.submit(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
979
+ ai_clear.click(tab6_clear,[],[ai_bot,ai_inp])
980
+ gr.Examples([
981
+ ["What is Ashwagandha used for in Ayurvedic medicine?"],
982
+ ["Explain Unani medicine and its traditional formulations"],
983
+ ["What are the common side effects of Azithromycin?"],
984
+ ["How does TF-IDF cosine similarity work for drug retrieval?"],
985
+ ["What is Homeopathic potency and how are remedies prepared?"],
986
+ ["Compare Allopathic and Herbal medicine approaches"],
987
+ ], inputs=[ai_inp])
988
+
989
+ # ── TAB 7 ─────────────────────────────────────────────────
990
+ with gr.Tab("πŸ“‹ Drug Explorer"):
991
+ with gr.Row():
992
+ ex_sys = gr.Dropdown(["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
993
+ value="All", label="System")
994
+ ex_dos = gr.Dropdown(choices=_ALL_DOS, value="All", label="Dosage Form")
995
+ ex_srch = gr.Textbox(label="Search", placeholder="Brand, compound, manufacturer…")
996
+ ex_pg = gr.Number(value=1, label="Page", minimum=1, precision=0)
997
+ ex_btn = gr.Button("πŸ” Browse Database", variant="primary")
998
+ ex_info = gr.Markdown("")
999
+ ex_tbl = gr.HTML('<div class="ph">Click Browse to explore all 53,584 drug records</div>')
1000
+
1001
+ ex_sys.change(_dos_choices,[ex_sys],[ex_dos])
1002
+ ex_btn.click(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
1003
+ ex_srch.submit(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
1004
+
1005
+ # ── TAB 8 ─────────────────────────────────────────────────
1006
+ with gr.Tab("ℹ️ About"):
1007
+ gr.Markdown(f"""
1008
+ ## PharmaBridge β€” Cross-Medical-System Drug Intelligence
1009
+
1010
+ **PharmaBridge** is a master's thesis project β€” the first NLP-based drug recommendation system
1011
+ spanning all 5 major South Asian pharmaceutical traditions simultaneously using the
1012
+ Bangladesh National Drug Registry (53,584 records).
1013
+
1014
+ ---
1015
+
1016
+ ### Dataset Composition
1017
+ | Medical System | Records | Share |
1018
+ |---|---|---|
1019
+ | Allopathic | 36,254 | 67.7% |
1020
+ | Unani | 8,460 | 15.8% |
1021
+ | Ayurvedic | 5,262 | 9.8% |
1022
+ | Homeopathic | 2,580 | 4.8% |
1023
+ | Herbal | 1,028 | 1.9% |
1024
+ | **Total** | **53,584** | **100%** |
1025
+
1026
+ ### Technical Architecture
1027
+ | Component | Configuration |
1028
+ |---|---|
1029
+ | Vectorization | TF-IDF, bigrams (1,2), max_features=15,000, sublinear_tf=True |
1030
+ | Retrieval | Cosine Similarity on sparse matrix (53,584 Γ— 12,311) |
1031
+ | Dim. Reduction | TruncatedSVD, 50 components, 26.2% variance |
1032
+ | Clustering | K-Means K=10 (elbow-selected), Silhouette=0.2159 |
1033
+
1034
+ ### Evaluation Results
1035
+ | Metric | Value |
1036
+ |---|---|
1037
+ | Precision@5 | 97.00% |
1038
+ | Precision@10 | 95.50% |
1039
+ | Precision@20 | 90.55% |
1040
+ | Silhouette Score | 0.2159 |
1041
+
1042
+ ### App Features
1043
+ | Tab | Feature |
1044
+ |---|---|
1045
+ | πŸ” Smart Search | TF-IDF cosine retrieval with rich card UI + bar chart |
1046
+ | βš–οΈ Cross-System Compare | Side-by-side 5-system view + radar chart |
1047
+ | πŸ“Š Dataset Analytics | Overview dashboard, deep-dive, treemap |
1048
+ | 🧬 Drug Fingerprint | Single drug profile + TF-IDF feature bar chart |
1049
+ | πŸ₯ FDA Live Data | OpenFDA labels / adverse events / NDC lookup |
1050
+ | πŸ€– AI Medical Q&A | Mistral-7B via HuggingFace Inference API |
1051
+ | πŸ“‹ Drug Explorer | Paginated browse & filter across all 53,584 records |
1052
+
1053
+ ---
1054
+ > **Disclaimer:** For research and educational purposes only.
1055
+ > Not intended for clinical decision-making.
1056
+ > Always consult a qualified healthcare professional for medical advice.
1057
+ """)
1058
+
1059
+ if __name__ == "__main__":
1060
+ app.launch()