final_dr / app.py
Sazzz02's picture
Upload app.py
0e1d3d0 verified
"""
╔══════════════════════════════════════════════════════════════╗
β•‘ PharmaBridge β€” Cross-Medical-System Drug Intelligence β•‘
β•‘ Hugging Face Spaces | Gradio 4.x | Master's Thesis β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
7 Tabs:
1. Smart Drug Search β€” TF-IDF cosine retrieval with cards UI
2. Cross-System Compare β€” Side-by-side 5-system radar comparison
3. Dataset Analytics β€” 3 sub-tabs of Plotly dashboards
4. Drug Fingerprint β€” Single drug deep-dive profile
5. FDA Live Intelligence β€” OpenFDA API (Labels / Events / NDC)
6. AI Medical Q&A β€” HuggingFace Inference API (Mistral-7B)
7. Drug Explorer β€” Paginated browse & filter table
"""
import gradio as gr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import joblib, re, os, requests, json, warnings
warnings.filterwarnings("ignore")
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# ────────────────────────────────────────────────────────────────
# 0. LOAD / REBUILD MODELS
# ────────────────────────────────────────────────────────────────
def _clean(text):
if pd.isna(text): return ""
t = str(text).strip()
if t in ["FALSE","False","false","nan","NaN",""]: return ""
return re.sub(r"\s+"," ", re.sub(r"[^a-z0-9\s\+\-\./]"," ", t.lower())).strip()
def _build_text(row):
s = row["medical_system"]
d = _clean(row.get("Dosages Description",""))
g = _clean(row.get("Generic Name and Strength",""))
b = _clean(row.get("Brand Name",""))
n = _clean(row.get("Generic Name",""))
if s == "Allopathic": return " ".join(filter(None,[n,d,s.lower()]))
if s in ("Ayurvedic","Herbal"): return " ".join(filter(None,[g,d,s.lower()]))
if s == "Homeopathic": return " ".join(filter(None,[b,d,s.lower()]))
return " ".join(filter(None,[g,d,s.lower()])) # Unani
print("⏳ Loading PharmaBridge models…")
try:
VEC = joblib.load("models/tfidf_vectorizer.pkl")
MAT = joblib.load("models/tfidf_matrix.pkl")
DF = pd.read_csv("models/drug_database.csv")
print("βœ… PKL models loaded.")
except Exception as e:
print(f"⚠️ PKL not found ({e}), rebuilding from CSV…")
raw = pd.read_csv("merged_pharma_dataset.csv")
DF = raw.copy()
DF["drug_text"] = DF.apply(_build_text, axis=1)
DF = DF.rename(columns={
"Brand Name":"brand_name","Generic Name":"generic_name",
"Dosages Description":"dosage_form","Strength":"strength",
"Name of the Manufacturer":"manufacturer",
"Generic Name and Strength":"gns",
})
VEC = TfidfVectorizer(ngram_range=(1,2),max_features=15000,
stop_words=None,sublinear_tf=True,min_df=1)
MAT = VEC.fit_transform(DF["drug_text"])
print("βœ… Rebuilt from CSV.")
# Normalise column names
_REMAP = {"Brand Name":"brand_name","Generic Name":"generic_name",
"Dosages Description":"dosage_form","Strength":"strength",
"Name of the Manufacturer":"manufacturer","Generic Name and Strength":"gns"}
for o,n in _REMAP.items():
if o in DF.columns and n not in DF.columns:
DF.rename(columns={o:n},inplace=True)
for c in ["brand_name","generic_name","dosage_form","strength","manufacturer","gns","drug_text"]:
if c not in DF.columns: DF[c] = ""
if "drug_text" not in DF.columns or DF["drug_text"].str.len().sum()==0:
DF["drug_text"] = DF.apply(_build_text, axis=1)
DF = DF.reset_index(drop=True)
SYSTEMS = ["All Systems","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
SC = {"Allopathic":"#3B82F6","Ayurvedic":"#10B981",
"Unani":"#F59E0B","Homeopathic":"#8B5CF6","Herbal":"#EF4444"}
EMOJI = {"Allopathic":"πŸ’Š","Ayurvedic":"🌿","Unani":"☘️","Homeopathic":"πŸ’§","Herbal":"🌱"}
# Pre-compute for analytics
_SYS_VC = DF["medical_system"].value_counts()
_DOS_VC = DF["dosage_form"].value_counts()
_MFR_VC = DF["manufacturer"].value_counts()
_SYS_MFR = DF.groupby("medical_system")["manufacturer"].nunique()
_FEAT = np.array(VEC.get_feature_names_out())
# ────────────────────────────────────────────────────────────────
# 1. RETRIEVAL HELPERS
# ────────────────────────────────────────────────────────────────
def _encode(q):
q2 = re.sub(r"[^a-z0-9\s\+\-\./]"," ",q.lower())
return VEC.transform([re.sub(r"\s+"," ",q2).strip()])
def _recommend(query, system, top_n, min_s):
sims = cosine_similarity(_encode(query), MAT).flatten()
if system not in ("All Systems","All",""):
mask = DF["medical_system"]==system
sims[~mask.values]=0
idx=[i for i in sims.argsort()[-(top_n*4):][::-1] if sims[i]>=min_s][:top_n]
if not idx: return pd.DataFrame()
r=DF.iloc[idx].copy(); r["score"]=[round(float(sims[i]),4) for i in idx]
return r.sort_values("score",ascending=False).reset_index(drop=True)
def _cross(query, tps):
sims = cosine_similarity(_encode(query), MAT).flatten()
rows=[]
for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
sc=sims.copy(); sc[~(DF["medical_system"]==sys).values]=0
for i in [i for i in sc.argsort()[-tps:][::-1] if sims[i]>0.01]:
d=DF.iloc[i].to_dict(); d["score"]=round(float(sims[i]),4); rows.append(d)
if not rows: return pd.DataFrame()
return (pd.DataFrame(rows)
.sort_values(["medical_system","score"],ascending=[True,False])
.reset_index(drop=True))
# ────────────────────────────────────────────────────────────────
# 2. TAB 1 β€” SMART DRUG SEARCH
# ────────────────────────────────────────────────────────────────
def tab1(query, system, top_n, min_s):
if not query.strip():
return '<div class="ph">πŸ” Type a drug name, compound, or symptom above and press Search</div>', None, ""
r = _recommend(query, system, int(top_n), float(min_s))
if r.empty:
return f'<div class="ph">No results found for <b>{query}</b>. Try lowering the similarity threshold.</div>', None, ""
cards = f'<div class="rh">Found <b>{len(r)}</b> results for "<b>{query}</b>"</div><div class="grid">'
for _, row in r.iterrows():
sys = str(row.get("medical_system",""))
c = SC.get(sys,"#6B7280")
em = EMOJI.get(sys,"πŸ’Š")
bn = str(row.get("brand_name","β€”"))
gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
dos = str(row.get("dosage_form","β€”"))
mfr = str(row.get("manufacturer","β€”"))[:38]
sc_v = float(row.get("score",0))
pct = int(sc_v*100)
cards += f"""
<div class="card" style="border-left:4px solid {c}">
<div class="ch">
<span class="sbadge" style="background:{c}18;color:{c};border:1px solid {c}35">{em} {sys}</span>
<span class="spct" style="background:{c}12;color:{c}">{pct}%</span>
</div>
<div class="bn">{bn}</div>
<div class="gn">{gn[:70]+'…' if len(gn)>70 else gn}</div>
<div class="meta">πŸ’Š {dos} &nbsp;Β·&nbsp; 🏭 {mfr}</div>
<div class="bar"><div class="fill" style="width:{pct}%;background:{c}"></div></div>
</div>"""
cards += "</div>"
fig = px.bar(
r.head(15), x="score", y="brand_name", color="medical_system",
color_discrete_map=SC, orientation="h",
labels={"score":"Similarity Score","brand_name":""},
title=f'Similarity Scores β€” "{query}"',
)
fig.update_layout(
height=max(340,len(r.head(15))*30+90),
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
legend=dict(orientation="h",yanchor="bottom",y=1.02,title=None),
margin=dict(l=0,r=10,t=50,b=10), yaxis=dict(autorange="reversed"),
xaxis=dict(range=[0,1],gridcolor="#f1f5f9"),
)
dist = r["medical_system"].value_counts().to_dict()
stat = " Β· ".join(f"**{k}** {v}" for k,v in dist.items())
return cards, fig, f"πŸ“Š {stat}"
# ────────────────────────────────────────────────────────────────
# 3. TAB 2 β€” CROSS-SYSTEM COMPARE
# ────────────────────────────────────────────────────────────────
def tab2(query, tps):
if not query.strip():
return '<div class="ph">Enter a query to compare drugs across all 5 medical traditions</div>', None
r = _cross(query, int(tps))
if r.empty:
return '<div class="ph">No cross-system results found.</div>', None
html = f'<div class="cph">Cross-system view for <b>"{query}"</b></div><div class="cgrid">'
for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
sub = r[r["medical_system"]==sys]
c = SC[sys]; em=EMOJI[sys]
html += f'<div class="scol" style="border-top:3px solid {c}"><div class="stitle" style="color:{c}">{em} {sys}</div>'
if sub.empty:
html += '<div class="nr">No match found</div>'
else:
for _,row in sub.iterrows():
bn = str(row.get("brand_name","β€”"))
gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
dos = str(row.get("dosage_form","β€”"))
sc_ = int(float(row.get("score",0))*100)
html += f"""<div class="cc" style="border-left:3px solid {c}38">
<div class="cbn">{bn}</div>
<div class="cgn">{gn[:48]+'…' if len(gn)>48 else gn}</div>
<div class="cm">{dos} Β· {sc_}%</div>
<div class="sbar"><div class="sfill" style="width:{sc_}%;background:{c}"></div></div>
</div>"""
html += "</div>"
html += "</div>"
# Radar chart
avgs={s: float(r[r["medical_system"]==s]["score"].mean()) if not r[r["medical_system"]==s].empty else 0
for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
cats=list(avgs.keys()); vals=list(avgs.values())
fig=go.Figure(go.Scatterpolar(
r=vals+[vals[0]], theta=cats+[cats[0]], fill="toself",
fillcolor="rgba(59,130,246,0.12)", line=dict(color="#3B82F6",width=2.5),
marker=dict(size=9,color=[SC[s] for s in cats]+[SC[cats[0]]]),
))
fig.update_layout(
polar=dict(radialaxis=dict(visible=True,range=[0,1],gridcolor="#e5e7eb"),
angularaxis=dict(gridcolor="#e5e7eb",tickfont=dict(size=12))),
title=dict(text=f'Cross-System Radar β€” "{query}"',font=dict(size=13,color="#1e293b")),
paper_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif"),
height=380, showlegend=False, margin=dict(l=50,r=50,t=60,b=30),
)
return html, fig
# ────────────────────────────────────────────────────────────────
# 4. TAB 3 β€” DATASET ANALYTICS (3 sub-views)
# ────────────────────────────────────────────────────────────────
def _overview_fig():
fig=make_subplots(rows=2,cols=3,
subplot_titles=["System Share","Top 12 Dosage Forms","Manufacturers per System",
"Top 15 Manufacturers","System Γ— Dosage Heatmap","TF-IDF Vocab Share"],
specs=[[{"type":"domain"},{"type":"xy"},{"type":"xy"}],
[{"type":"xy"},{"type":"xy"},{"type":"domain"}]],
vertical_spacing=0.14,horizontal_spacing=0.08)
# 1 donut
fig.add_trace(go.Pie(
labels=_SYS_VC.index.tolist(),values=_SYS_VC.values.tolist(),hole=0.55,
marker=dict(colors=[SC.get(s,"#aaa") for s in _SYS_VC.index],
line=dict(color="white",width=2.5)),
textinfo="label+percent",textfont=dict(size=10),showlegend=False,
),row=1,col=1)
# 2 dosage bar
td=_DOS_VC.head(12)
fig.add_trace(go.Bar(
x=td.values[::-1],y=td.index[::-1].tolist(),orientation="h",
marker=dict(color=px.colors.sequential.Blues_r[:12],line=dict(color="white",width=1)),
text=[f"{v:,}" for v in td.values[::-1]],textposition="outside",showlegend=False,
),row=1,col=2)
# 3 mfr per system
fig.add_trace(go.Bar(
x=_SYS_MFR.index.tolist(),y=_SYS_MFR.values.tolist(),
marker=dict(color=[SC.get(s,"#aaa") for s in _SYS_MFR.index],
line=dict(color="white",width=2)),
text=_SYS_MFR.values.tolist(),textposition="outside",showlegend=False,
),row=1,col=3)
# 4 top 15 mfr
tm=_MFR_VC.head(15)
fig.add_trace(go.Bar(
y=[m[:28] for m in tm.index[::-1].tolist()],x=tm.values[::-1].tolist(),
orientation="h",
marker=dict(color=tm.values[::-1].tolist(),colorscale="Viridis",
showscale=False,line=dict(color="white",width=1)),
showlegend=False,
),row=2,col=1)
# 5 heatmap
top8=_DOS_VC.head(8).index.tolist()
sysl=["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
piv=pd.crosstab(DF["medical_system"],DF["dosage_form"])
z=[[int(piv[d].get(s,0)) if d in piv.columns else 0 for d in top8] for s in sysl]
fig.add_trace(go.Heatmap(
z=z,x=[d[:12] for d in top8],y=sysl,colorscale="YlOrRd",
text=z,texttemplate="%{text}",textfont=dict(size=9),
showscale=True,colorbar=dict(thickness=10,x=0.65,len=0.42),
),row=2,col=2)
# 6 vocab share
vtoks={s:int((np.asarray(MAT[(DF["medical_system"]==s).values].mean(axis=0)).flatten()>0.001).sum())
for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
fig.add_trace(go.Pie(
labels=list(vtoks.keys()),values=list(vtoks.values()),hole=0.5,
marker=dict(colors=[SC.get(s,"#aaa") for s in vtoks],
line=dict(color="white",width=2)),
textinfo="label+value",textfont=dict(size=10),showlegend=False,
),row=2,col=3)
fig.update_layout(
height=720,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
title=dict(text="PharmaBridge β€” Dataset Intelligence Dashboard",
font=dict(size=16,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=80,b=10),
)
fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
fig.update_yaxes(showgrid=False)
return fig
def _deep_fig(sel):
sub = DF if sel=="All" else DF[DF["medical_system"]==sel]
c = SC.get(sel,"#3B82F6")
fig=make_subplots(rows=2,cols=2,
subplot_titles=[f"Top 20 Compounds ({sel})","Dosage Form Split",
"Top 10 Manufacturers","Brand Count Comparison"],
specs=[[{"type":"xy"},{"type":"domain"}],[{"type":"xy"},{"type":"xy"}]],
vertical_spacing=0.16,horizontal_spacing=0.10)
# compound
if sel=="Homeopathic": comp=sub["brand_name"].value_counts().head(20)
elif sel=="Allopathic": comp=sub["generic_name"].dropna().value_counts().head(20)
else: comp=sub["gns"].dropna().value_counts().head(20)
fig.add_trace(go.Bar(
x=comp.values[::-1].tolist(),y=comp.index[::-1].tolist(),orientation="h",
marker=dict(color=c,opacity=0.85,line=dict(color="white",width=1)),
text=comp.values[::-1].tolist(),textposition="outside",showlegend=False,
),row=1,col=1)
# dosage donut
dos=sub["dosage_form"].value_counts().head(8)
fig.add_trace(go.Pie(
labels=dos.index.tolist(),values=dos.values.tolist(),hole=0.48,
marker=dict(colors=px.colors.qualitative.Set3[:len(dos)],
line=dict(color="white",width=2)),
textinfo="label+percent",textfont=dict(size=10),showlegend=False,
),row=1,col=2)
# top mfr
mf=sub["manufacturer"].value_counts().head(10)
fig.add_trace(go.Bar(
x=mf.values[::-1].tolist(),y=[m[:26] for m in mf.index[::-1].tolist()],
orientation="h",
marker=dict(color=mf.values[::-1].tolist(),colorscale="Blues",
showscale=False,line=dict(color="white",width=1)),
showlegend=False,
),row=2,col=1)
# brand count
bc=DF.groupby("medical_system")["brand_name"].nunique().sort_values(ascending=False)
fig.add_trace(go.Bar(
x=bc.index.tolist(),y=bc.values.tolist(),
marker=dict(color=[c if s==sel else "#cbd5e1" for s in bc.index],
line=dict(color="white",width=2)),
text=bc.values.tolist(),textposition="outside",showlegend=False,
),row=2,col=2)
fig.update_layout(
height=680,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
title=dict(text=f"Deep Dive: {sel}",font=dict(size=15,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=70,b=10),
)
fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
fig.update_yaxes(showgrid=False)
return fig
def _treemap_fig():
samp=DF.groupby(["medical_system","dosage_form"]).size().reset_index(name="count")
samp=samp[samp["count"]>=5]
fig=px.treemap(samp,path=["medical_system","dosage_form"],values="count",
color="medical_system",color_discrete_map=SC,
title="Drug Hierarchy: Medical System β†’ Dosage Form")
fig.update_traces(textinfo="label+value+percent parent",textfont=dict(size=12))
fig.update_layout(height=520,paper_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=12),
title=dict(font=dict(size=15,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=60,b=10))
return fig
def tab3_deep_update(sel):
return _deep_fig(sel)
# ────────────────────────────────────────────────────────────────
# 5. TAB 4 β€” DRUG FINGERPRINT (single drug profile)
# ────────────────────────────────────────────────────────────────
def tab4_fingerprint(brand_query):
"""Search for a specific drug and show a rich visual profile card + radar of its TF-IDF feature weights."""
if not brand_query.strip():
return '<div class="ph">Enter a brand name to see its full drug profile</div>', None
# Find best match
sims = cosine_similarity(_encode(brand_query), MAT).flatten()
idx = int(sims.argsort()[-1])
row = DF.iloc[idx]
sc_v = float(sims[idx])
if sc_v < 0.01:
return f'<div class="ph">No drug found matching "<b>{brand_query}</b>".</div>', None
sys_n = str(row.get("medical_system",""))
c = SC.get(sys_n,"#6B7280")
em = EMOJI.get(sys_n,"πŸ’Š")
bn = str(row.get("brand_name","β€”"))
gn = str(row.get("gns","")) or str(row.get("generic_name","β€”"))
dos = str(row.get("dosage_form","β€”"))
mfr = str(row.get("manufacturer","β€”"))
clu = str(row.get("cluster","β€”"))
dart = str(row.get("DAR","β€”")) if "DAR" in row.index else "β€”"
txt = str(row.get("drug_text",""))
# Siblings (same gns/cluster)
sib_mask = (DF["medical_system"]==sys_n) & (DF["gns"]==str(row.get("gns","")))
sib_count = sib_mask.sum()-1
html = f"""
<div class="fp-card" style="border:2px solid {c}40;background:white;border-radius:16px;overflow:hidden">
<div class="fp-banner" style="background:linear-gradient(135deg,{c},{c}99);padding:20px 24px;color:white">
<div style="font-size:0.85rem;opacity:0.85;margin-bottom:4px">{em} {sys_n}</div>
<div style="font-size:1.7rem;font-weight:800;letter-spacing:-0.5px">{bn}</div>
<div style="font-size:0.95rem;opacity:0.9;margin-top:4px">{gn[:80]}</div>
<div style="margin-top:12px;background:rgba(255,255,255,0.2);border-radius:20px;padding:5px 14px;
display:inline-block;font-size:0.8rem;font-weight:600">
{int(sc_v*100)}% match confidence
</div>
</div>
<div style="padding:20px 24px;display:grid;grid-template-columns:1fr 1fr;gap:14px">
<div class="fp-row"><span class="fp-k">πŸ’Š Dosage Form</span><span class="fp-v">{dos}</span></div>
<div class="fp-row"><span class="fp-k">🏭 Manufacturer</span><span class="fp-v">{mfr[:40]}</span></div>
<div class="fp-row"><span class="fp-k">🧬 Medical System</span><span class="fp-v">{sys_n}</span></div>
<div class="fp-row"><span class="fp-k">πŸ“‚ Cluster</span><span class="fp-v">#{clu}</span></div>
<div class="fp-row"><span class="fp-k">πŸ“‹ DAR Number</span><span class="fp-v">{dart}</span></div>
<div class="fp-row"><span class="fp-k">πŸ‘₯ Same-compound drugs</span><span class="fp-v">{sib_count}</span></div>
</div>
<div style="padding:0 24px 20px;font-size:0.82rem;color:#64748b">
<b>Drug Text (TF-IDF input):</b> <code style="background:#f1f5f9;padding:3px 8px;border-radius:6px">{txt[:120]}</code>
</div>
</div>"""
# Top TF-IDF features for this drug
vec_row = MAT[idx]
feat_idx = np.asarray(vec_row.todense()).flatten().argsort()[-20:][::-1]
feat_scores = np.asarray(vec_row.todense()).flatten()[feat_idx]
feat_labels = _FEAT[feat_idx]
mask = feat_scores > 0
feat_labels = feat_labels[mask]; feat_scores = feat_scores[mask]
fig = go.Figure(go.Bar(
x=feat_scores[::-1], y=feat_labels[::-1],
orientation="h",
marker=dict(
color=feat_scores[::-1],
colorscale=[[0,"#dbeafe"],[1,c]],
showscale=False,
line=dict(color="white",width=1),
),
text=[f"{v:.3f}" for v in feat_scores[::-1]],
textposition="outside",
))
fig.update_layout(
title=dict(text=f"TF-IDF Feature Fingerprint: {bn}",
font=dict(size=13,color="#1e293b")),
height=max(300, len(feat_labels)*28+80),
paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
margin=dict(l=10,r=60,t=50,b=10),
xaxis=dict(gridcolor="#f1f5f9",title="TF-IDF Weight"),
yaxis=dict(title=""),
)
return html, fig
# ────────────────────────────────────────────────────────────────
# 6. TAB 5 β€” FDA LIVE INTELLIGENCE
# ────────────────────────────────────────────────────────────────
FDA_NAME_MAP={
"Paracetamol":"acetaminophen","Azithromycin":"azithromycin",
"Ciprofloxacin":"ciprofloxacin","Amoxicillin":"amoxicillin",
"Omeprazole":"omeprazole","Metformin":"metformin",
"Atorvastatin":"atorvastatin","Amlodipine":"amlodipine",
"Ceftriaxone":"ceftriaxone","Diclofenac":"diclofenac sodium",
"Esomeprazole":"esomeprazole","Cefixime":"cefixime",
"Salbutamol":"albuterol","Ibuprofen":"ibuprofen",
"Metronidazole":"metronidazole","Cefuroxime":"cefuroxime",
}
def _fda_fetch(drug, endpoint):
term=FDA_NAME_MAP.get(drug,drug.lower())
base=f"https://api.fda.gov/drug/{endpoint}.json"
for field in [f"openfda.generic_name:{term}",f"openfda.brand_name:{term}"]:
try:
r=requests.get(base,params={"search":field,"limit":"3"},timeout=9)
if r.status_code==200:
res=r.json().get("results",[])
if res: return res, term
except: pass
return [], term
def tab5_fda(drug, ep_label):
if not drug.strip():
return '<div class="ph">πŸ₯ Enter a drug name to fetch live FDA data</div>'
ep_map={"Drug Labels":"label","Adverse Events (FAERS)":"event","NDC Directory":"ndc"}
ep=ep_map.get(ep_label,"label")
results,term=_fda_fetch(drug,ep)
if not results:
return f"""<div class="fda-miss">
<div style="font-size:2.5rem;margin-bottom:12px">πŸ”</div>
<div><b>No FDA data found for "{drug}"</b></div>
<div style="color:#64748b;font-size:0.88rem;margin-top:8px;line-height:1.7">
This drug may not be in the US FDA database (common for Bangladesh-registry drugs).<br>
<b>Try:</b> Paracetamol Β· Azithromycin Β· Ciprofloxacin Β· Omeprazole Β· Metformin Β· Ibuprofen
</div></div>"""
html=f"""<div class="fda-hdr">
<span class="fda-badge">πŸ‡ΊπŸ‡Έ FDA {ep_label}</span>
<b>{drug}</b> β†’ searched as <code>{term}</code>
<span class="fda-cnt">{len(results)} record(s)</span>
</div>"""
if ep=="label":
for i,res in enumerate(results[:3],1):
o=res.get("openfda",{})
brand=", ".join(o.get("brand_name",["β€”"])[:2])
gen =", ".join(o.get("generic_name",["β€”"])[:2])
mfr =", ".join(o.get("manufacturer_name",["β€”"])[:1])
purp =str(res.get("purpose",["β€”"])[0])[:280] if res.get("purpose") else "β€”"
ind =str(res.get("indications_and_usage",["β€”"])[0])[:380] if res.get("indications_and_usage") else "β€”"
warn =str(res.get("warnings",["β€”"])[0])[:280] if res.get("warnings") else "β€”"
html+=f"""<div class="fda-card">
<div class="fda-num">πŸ“„ Record {i}</div>
<table class="fda-tbl">
<tr><td class="fk">Brand Name</td><td>{brand}</td></tr>
<tr><td class="fk">Generic Name</td><td>{gen}</td></tr>
<tr><td class="fk">Manufacturer</td><td>{mfr}</td></tr>
<tr><td class="fk">Purpose</td><td>{purp}</td></tr>
<tr><td class="fk">Indications</td><td>{ind}</td></tr>
<tr><td class="fk">Warnings</td><td>{warn}</td></tr>
</table></div>"""
elif ep=="event":
for i,res in enumerate(results[:3],1):
pt=res.get("patient",{})
rxn=", ".join(r.get("reactionmeddrapt","") for r in pt.get("reaction",[])[:6])
drg=", ".join(d.get("medicinalproduct","") for d in pt.get("drug",[])[:4])
sev="⚠️ Serious" if res.get("serious")=="1" else "ℹ️ Non-Serious"
html+=f"""<div class="fda-card">
<div class="fda-num">Event {i} β€” {sev}</div>
<table class="fda-tbl">
<tr><td class="fk">Reactions</td><td>{rxn or 'β€”'}</td></tr>
<tr><td class="fk">Drugs Involved</td><td>{drg or 'β€”'}</td></tr>
</table></div>"""
elif ep=="ndc":
for i,res in enumerate(results[:3],1):
html+=f"""<div class="fda-card">
<div class="fda-num">NDC {i}</div>
<table class="fda-tbl">
<tr><td class="fk">NDC Code</td><td>{res.get('product_ndc','β€”')}</td></tr>
<tr><td class="fk">Brand</td><td>{res.get('brand_name','β€”')}</td></tr>
<tr><td class="fk">Generic</td><td>{res.get('generic_name','β€”')}</td></tr>
<tr><td class="fk">Dosage Form</td><td>{res.get('dosage_form','β€”')}</td></tr>
<tr><td class="fk">Route</td><td>{res.get('route','β€”')}</td></tr>
<tr><td class="fk">Labeler</td><td>{res.get('labeler_name','β€”')}</td></tr>
</table></div>"""
return html
# ────────────────────────────────────────────────────────────────
# 7. TAB 6 β€” AI MEDICAL Q&A (HuggingFace Inference API)
# ────────────────────────────────────────────────────────────────
SYS_PROMPT=(
"You are PharmaBridge AI β€” a knowledgeable, friendly pharmaceutical assistant. "
"You help healthcare professionals and students understand drug information, "
"pharmacology, traditional medicine (Ayurvedic, Unani, Homeopathic, Herbal), "
"drug interactions, and the Bangladesh drug registry. "
"Be concise, accurate, and always note that answers are educational, "
"not a substitute for professional medical advice."
)
HF_MODELS=[
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta",
"google/flan-t5-xxl",
]
def tab6_ai(question, history):
if not question.strip():
return history, ""
history=history or []
prompt=f"<s>[INST] {SYS_PROMPT}\n\nQuestion: {question} [/INST]"
headers={"Content-Type":"application/json"}
answer=""
for model_url in [f"https://api-inference.huggingface.co/models/{m}" for m in HF_MODELS]:
payload={
"inputs": prompt,
"parameters":{"max_new_tokens":500,"temperature":0.65,
"top_p":0.9,"repetition_penalty":1.1,
"return_full_text":False},
}
# flan-t5 uses different format
if "flan" in model_url:
payload={"inputs":f"As a pharmacist, answer clearly: {question}",
"parameters":{"max_new_tokens":350}}
try:
r=requests.post(model_url,headers=headers,json=payload,timeout=28)
if r.status_code==200:
d=r.json()
txt=(d[0].get("generated_text","") if isinstance(d,list) else d.get("generated_text","")).strip()
if len(txt)>30:
answer=txt; break
except: continue
if not answer:
answer=(
"⚠️ The AI model is warming up (HuggingFace free tier cold-start). "
"Please wait ~20 seconds and try again.\n\n"
"**Meanwhile**, you can:\n"
"- Use the **Smart Search** tab to look up this drug directly\n"
"- Use the **FDA Live Data** tab for official drug information"
)
history.append((question, answer))
return history, ""
def tab6_clear():
return [], ""
# ────────────────────────────────────────────────────────────────
# 8. TAB 7 β€” DRUG EXPLORER (browse & filter)
# ────────────────────────────────────────────────────────────────
_ALL_DOS=["All"]+sorted(DF["dosage_form"].dropna().unique().tolist())
def _dos_choices(sys):
if sys=="All":
return gr.update(choices=_ALL_DOS, value="All")
opts=["All"]+sorted(DF[DF["medical_system"]==sys]["dosage_form"].dropna().unique().tolist())
return gr.update(choices=opts, value="All")
def tab7_explore(system, dosage, search, page):
sub=DF.copy()
if system!="All": sub=sub[sub["medical_system"]==system]
if dosage !="All": sub=sub[sub["dosage_form"]==dosage]
if search.strip():
t=search.lower().strip()
sub=sub[sub["brand_name"].str.lower().str.contains(t,na=False)|
sub["gns"].str.lower().str.contains(t,na=False)|
sub["generic_name"].str.lower().str.contains(t,na=False)|
sub["manufacturer"].str.lower().str.contains(t,na=False)]
total=len(sub); PG=20
page=max(1,int(page)); maxp=max(1,(total+PG-1)//PG); page=min(page,maxp)
sl=sub.iloc[(page-1)*PG:page*PG]
if sl.empty:
return '<div class="ph">No records match your filters.</div>', "0 records"
rows=""
for _,row in sl.iterrows():
sys_n=str(row.get("medical_system",""))
c=SC.get(sys_n,"#6B7280"); em=EMOJI.get(sys_n,"πŸ’Š")
bn=str(row.get("brand_name","β€”"))
gn=str(row.get("gns","")) or str(row.get("generic_name","β€”"))
dos_v=str(row.get("dosage_form","β€”"))
mfr=str(row.get("manufacturer","β€”"))[:36]
rows+=f"""<tr>
<td><b>{bn}</b></td>
<td style="max-width:190px">{gn[:46]}</td>
<td>{dos_v}</td>
<td><span class="sb2" style="background:{c}18;color:{c};border:1px solid {c}30">{em} {sys_n}</span></td>
<td style="color:#64748b">{mfr}</td>
</tr>"""
tbl=f"""<table class="xtbl">
<thead><tr><th>Brand Name</th><th>Compound / Identity</th>
<th>Dosage Form</th><th>System</th><th>Manufacturer</th></tr></thead>
<tbody>{rows}</tbody></table>"""
return tbl, f"Page **{page}** / {maxp} Β· **{total:,}** records"
# ────────────────────────────────────────────────────────────────
# 9. CSS
# ────────────────────────────────────────────────────────────────
CSS="""
@import url('https://fonts.googleapis.com/css2?family=Inter:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,400&display=swap');
*{box-sizing:border-box}
body,.gradio-container{font-family:'Inter',sans-serif!important;background:#f0f4f8!important}
/* ── HEADER ─────────────────────────────────────────────── */
.app-hdr{
background:linear-gradient(135deg,#0f172a 0%,#1e3a8a 45%,#0369a1 100%);
border-radius:18px;padding:28px 32px;margin-bottom:4px;color:#fff;
box-shadow:0 10px 40px rgba(30,58,138,.35);
}
.app-title{font-size:2.1rem;font-weight:800;letter-spacing:-1px;margin:0}
.app-sub{font-size:1rem;opacity:.82;margin:6px 0 0}
.hbadges{display:flex;gap:8px;margin-top:14px;flex-wrap:wrap}
.hbadge{background:rgba(255,255,255,.16);border:1px solid rgba(255,255,255,.28);
border-radius:20px;padding:4px 13px;font-size:.78rem;font-weight:500}
.stats-row{display:flex;gap:10px;margin-top:16px;flex-wrap:wrap}
.stat{background:rgba(255,255,255,.12);border-radius:12px;padding:8px 16px;text-align:center;min-width:88px}
.sn{font-size:1.45rem;font-weight:800;display:block}
.sl{font-size:.7rem;opacity:.78;text-transform:uppercase;letter-spacing:.5px}
/* ── TABS ────────────────────────────────────────────────── */
.tab-nav button{font-weight:500!important;font-size:.88rem!important;border-radius:8px 8px 0 0!important}
.tab-nav button.selected{color:#1d4ed8!important;border-bottom:3px solid #1d4ed8!important;font-weight:700!important}
/* ── INPUTS ──────────────────────────────────────────────── */
.gr-input,textarea,.gr-dropdown select{
border-radius:10px!important;border:1.5px solid #e2e8f0!important;
font-family:'Inter',sans-serif!important;transition:border-color .2s!important;
}
.gr-input:focus,textarea:focus{border-color:#3b82f6!important;box-shadow:0 0 0 3px rgba(59,130,246,.1)!important}
.gr-button-primary{
background:linear-gradient(135deg,#1d4ed8,#0891b2)!important;
border:none!important;border-radius:10px!important;font-weight:700!important;
letter-spacing:.2px!important;box-shadow:0 4px 14px rgba(29,78,216,.3)!important;
transition:transform .15s,box-shadow .15s!important;
}
.gr-button-primary:hover{transform:translateY(-1px)!important;box-shadow:0 6px 22px rgba(29,78,216,.4)!important}
/* ── PLACEHOLDERS ────────────────────────────────────────── */
.ph{text-align:center;color:#94a3b8;padding:60px 20px;font-size:.98rem;
background:#f8fafc;border-radius:14px;border:2px dashed #e2e8f0}
/* ── RESULT CARDS ────────────────────────────────────────── */
.rh{font-size:.93rem;color:#475569;padding:10px 0 14px;
border-bottom:1px solid #e2e8f0;margin-bottom:14px}
.grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(270px,1fr));gap:12px}
.card{background:#fff;border-radius:13px;padding:14px 16px;
box-shadow:0 1px 4px rgba(0,0,0,.06);transition:transform .15s,box-shadow .15s}
.card:hover{transform:translateY(-2px);box-shadow:0 5px 18px rgba(0,0,0,.10)}
.ch{display:flex;justify-content:space-between;align-items:center;margin-bottom:8px}
.sbadge{font-size:.71rem;font-weight:600;padding:3px 9px;border-radius:20px;white-space:nowrap}
.spct{font-size:.74rem;font-weight:700;padding:3px 9px;border-radius:20px}
.bn{font-size:1.05rem;font-weight:700;color:#1e293b;margin-bottom:4px}
.gn{font-size:.81rem;color:#64748b;margin-bottom:9px;min-height:1.2em}
.meta{font-size:.77rem;color:#94a3b8;margin-bottom:10px;line-height:1.8}
.bar{height:4px;background:#f1f5f9;border-radius:2px;overflow:hidden}
.fill{height:100%;border-radius:2px;transition:width .4s}
/* ── CROSS COMPARE ───────────────────────────────────────── */
.cph{font-size:.96rem;color:#475569;padding:10px 0 16px;font-weight:500}
.cgrid{display:grid;grid-template-columns:repeat(5,1fr);gap:11px}
@media(max-width:900px){.cgrid{grid-template-columns:repeat(2,1fr)}}
.scol{background:#fff;border-radius:13px;padding:14px;box-shadow:0 1px 4px rgba(0,0,0,.06)}
.stitle{font-weight:700;font-size:.93rem;margin-bottom:12px}
.nr{color:#94a3b8;font-size:.84rem;padding:10px 0}
.cc{padding:10px;margin-bottom:8px;border-radius:9px;background:#f8fafc}
.cbn{font-weight:700;font-size:.88rem;color:#1e293b}
.cgn{font-size:.77rem;color:#64748b;margin:3px 0}
.cm{font-size:.74rem;color:#94a3b8}
.sbar{height:3px;background:#f1f5f9;border-radius:2px;overflow:hidden;margin-top:6px}
.sfill{height:100%;border-radius:2px}
/* ── FINGERPRINT ─────────────────────────────────────────── */
.fp-banner{border-radius:0}
.fp-row{display:flex;flex-direction:column;background:#f8fafc;border-radius:10px;padding:10px 14px}
.fp-k{font-size:.74rem;color:#64748b;font-weight:600;text-transform:uppercase;letter-spacing:.4px}
.fp-v{font-size:.95rem;color:#1e293b;font-weight:500;margin-top:2px}
/* ── FDA ─────────────────────────────────────────────────── */
.fda-hdr{background:linear-gradient(135deg,#eff6ff,#e0f2fe);border-radius:11px;
padding:14px 18px;margin-bottom:14px;display:flex;align-items:center;
gap:10px;flex-wrap:wrap;font-size:.88rem;color:#1e293b}
.fda-badge{background:#1d4ed8;color:#fff;padding:4px 11px;border-radius:20px;
font-size:.77rem;font-weight:600}
.fda-cnt{margin-left:auto;background:#dcfce7;color:#166534;padding:3px 10px;
border-radius:20px;font-size:.77rem;font-weight:600}
.fda-miss{text-align:center;padding:40px;color:#64748b;background:#f8fafc;
border-radius:14px;border:2px dashed #e2e8f0}
.fda-card{background:#fff;border-radius:13px;padding:18px;margin-bottom:12px;
box-shadow:0 1px 4px rgba(0,0,0,.06)}
.fda-num{font-weight:700;font-size:.88rem;color:#1d4ed8;margin-bottom:10px}
.fda-tbl{width:100%;border-collapse:collapse;font-size:.84rem}
.fda-tbl tr{border-bottom:1px solid #f1f5f9}
.fda-tbl tr:last-child{border-bottom:none}
.fk{color:#64748b;font-weight:600;padding:6px 14px 6px 0;white-space:nowrap;
vertical-align:top;width:130px}
.fda-tbl td:last-child{color:#1e293b;padding:6px 0;line-height:1.55}
/* ── CHATBOT ─────────────────────────────────────────────── */
.chatbot{border-radius:13px!important;border:1.5px solid #e2e8f0!important}
/* ── EXPLORER TABLE ──────────────────────────────────────── */
.xtbl{width:100%;border-collapse:collapse;font-size:.83rem}
.xtbl thead{background:linear-gradient(135deg,#0f172a,#1d4ed8);color:#fff}
.xtbl th{padding:11px 14px;text-align:left;font-weight:600;letter-spacing:.3px}
.xtbl tbody tr{border-bottom:1px solid #f1f5f9;transition:background .15s}
.xtbl tbody tr:hover{background:#f8fafc}
.xtbl td{padding:9px 14px;color:#1e293b;vertical-align:top}
.sb2{font-size:.71rem;font-weight:600;padding:2px 8px;border-radius:20px;white-space:nowrap}
code{background:#f1f5f9;padding:2px 7px;border-radius:5px;font-size:.84em;color:#0891b2}
"""
# ────────────────────────────────────────────────────────────────
# 10. BUILD GRADIO APP
# ────────────────────────────────────────────────────────────────
HEADER = f"""
<div class="app-hdr">
<div class="app-title">πŸ’Š PharmaBridge</div>
<div class="app-sub">Cross-Medical-System Drug Intelligence Engine Β· Bangladesh National Drug Registry</div>
<div class="hbadges">
<span class="hbadge">πŸ”¬ TF-IDF + Cosine Similarity</span>
<span class="hbadge">🧠 SVD + K-Means Clustering</span>
<span class="hbadge">🌐 OpenFDA Live API</span>
<span class="hbadge">πŸ€– Mistral-7B AI Assistant</span>
<span class="hbadge">πŸ“Š Interactive Dashboards</span>
</div>
<div class="stats-row">
<div class="stat"><span class="sn">53,584</span><span class="sl">Total Drugs</span></div>
<div class="stat"><span class="sn">5</span><span class="sl">Med. Systems</span></div>
<div class="stat"><span class="sn">725</span><span class="sl">Manufacturers</span></div>
<div class="stat"><span class="sn">12,311</span><span class="sl">TF-IDF Features</span></div>
<div class="stat"><span class="sn">95.5%</span><span class="sl">Precision@10</span></div>
<div class="stat"><span class="sn">0.2159</span><span class="sl">Silhouette</span></div>
</div>
</div>
"""
with gr.Blocks(css=CSS, title="PharmaBridge", theme=gr.themes.Base(
primary_hue=gr.themes.colors.blue,
font=gr.themes.GoogleFont("Inter"),
)) as app:
gr.HTML(HEADER)
with gr.Tabs(elem_classes="tab-nav"):
# ── TAB 1 ─────────────────────────────────────────────────
with gr.Tab("πŸ” Smart Search"):
with gr.Row(equal_height=True):
with gr.Column(scale=4):
t1q = gr.Textbox(label="Search Query",
placeholder="Try: Azithromycin, Ashwagandha, nux vomica, sharbat amrood, paracetamol fever…",
lines=1)
with gr.Column(scale=1):
t1sys = gr.Dropdown(choices=SYSTEMS, value="All Systems", label="System")
with gr.Column(scale=1):
t1btn = gr.Button("πŸ” Search", variant="primary", scale=1)
with gr.Row():
t1n = gr.Slider(5,50,value=12,step=1,label="Max Results")
t1s = gr.Slider(0.0,0.5,value=0.04,step=0.01,label="Min Similarity")
t1stat = gr.Markdown("")
t1cards = gr.HTML('<div class="ph">πŸ” Enter a drug name, compound, or symptom above</div>')
t1chart = gr.Plot(label="Score Distribution")
t1btn.click(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
t1q.submit(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
gr.Examples([
["Azithromycin 500mg","Allopathic"],
["Ashwagandha capsule","Ayurvedic"],
["Nux Vomica liquid","Homeopathic"],
["Sharbat Amrood","Unani"],
["Moringa leaf powder","Herbal"],
["antibiotic tablet","All Systems"],
["digestive capsule","All Systems"],
], inputs=[t1q,t1sys], label="Quick Examples")
# ── TAB 2 ─────────────────────────────────────────────────
with gr.Tab("βš–οΈ Cross-System Compare"):
with gr.Row(equal_height=True):
with gr.Column(scale=5):
t2q = gr.Textbox(label="Query",
placeholder="e.g. pain relief tablet, digestive liver, sleep anxiety, blood pressure…",
lines=1)
with gr.Column(scale=1):
t2n = gr.Slider(1,5,value=3,step=1,label="Results / System")
with gr.Column(scale=1):
t2btn = gr.Button("βš–οΈ Compare", variant="primary")
t2cards = gr.HTML('<div class="ph">Compare the same therapeutic need across all 5 medical traditions simultaneously</div>')
t2radar = gr.Plot(label="Cross-System Similarity Radar")
t2btn.click(tab2,[t2q,t2n],[t2cards,t2radar])
t2q.submit(tab2,[t2q,t2n],[t2cards,t2radar])
gr.Examples([
["digestive liver tablet"],["pain anti-inflammatory"],
["antibiotic infection"],["blood pressure"],
["cough respiratory"],["sleep anxiety stress"],
], inputs=[t2q])
# ── TAB 3 ─────────────────────────────────────────────────
with gr.Tab("πŸ“Š Dataset Analytics"):
with gr.Tabs():
with gr.Tab("🌐 Overview Dashboard"):
ov_btn = gr.Button("πŸ“Š Render Dashboard", variant="primary")
ov_fig = gr.Plot()
ov_btn.click(_overview_fig,[],[ov_fig])
app.load(_overview_fig,[],[ov_fig])
with gr.Tab("πŸ”Ž System Deep Dive"):
with gr.Row():
dd_sys = gr.Dropdown(
choices=["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
value="Allopathic", label="Select System")
dd_btn = gr.Button("Analyze", variant="primary")
dd_fig = gr.Plot()
dd_btn.click(_deep_fig,[dd_sys],[dd_fig])
dd_sys.change(_deep_fig,[dd_sys],[dd_fig])
app.load(lambda:_deep_fig("Allopathic"),[],[dd_fig])
with gr.Tab("πŸ—ΊοΈ Treemap Explorer"):
tm_btn = gr.Button("πŸ—ΊοΈ Render Treemap", variant="primary")
tm_fig = gr.Plot()
tm_btn.click(_treemap_fig,[],[tm_fig])
app.load(_treemap_fig,[],[tm_fig])
# ── TAB 4 ─────────────────────────────────────────────────
with gr.Tab("🧬 Drug Fingerprint"):
gr.Markdown("""
### Single Drug Deep-Dive
Search for any drug to see its **full profile card** plus a bar chart of its
top TF-IDF feature weights β€” the exact tokens driving its similarity scores.
""")
with gr.Row(equal_height=True):
fp_q = gr.Textbox(label="Brand Name or Compound",
placeholder="e.g. Azithromycin, Ashwagandha, Nux Vomica, Sharbat Amrood…", lines=1)
fp_btn = gr.Button("🧬 Profile", variant="primary")
fp_card = gr.HTML('<div class="ph">🧬 Enter a drug or compound name to generate its fingerprint</div>')
fp_fig = gr.Plot(label="TF-IDF Feature Fingerprint")
fp_btn.click(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
fp_q.submit(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
gr.Examples([
["Azithromycin"],["Ashwagandha"],["Nux Vomica"],
["Sharbat Amrood"],["Moringa"],["Paracetamol"],
], inputs=[fp_q])
# ── TAB 5 ─────────────────────────────────────────────────
with gr.Tab("πŸ₯ FDA Live Data"):
gr.Markdown("> **Live OpenFDA API** β€” US drug labels, adverse events (FAERS), and NDC records. "
"~40% of Bangladesh registry drugs appear here. Bangladeshi names auto-mapped to FDA terms.")
with gr.Row(equal_height=True):
fda_drug = gr.Textbox(label="Drug Name",
placeholder="Paracetamol, Azithromycin, Ciprofloxacin, Omeprazole, Metformin…", lines=1)
fda_ep = gr.Radio(["Drug Labels","Adverse Events (FAERS)","NDC Directory"],
value="Drug Labels", label="FDA Database")
fda_btn = gr.Button("πŸ”Ž Fetch", variant="primary")
fda_out = gr.HTML('<div class="ph">πŸ₯ Enter a drug name and click Fetch</div>')
fda_btn.click(tab5_fda,[fda_drug,fda_ep],[fda_out])
fda_drug.submit(tab5_fda,[fda_drug,fda_ep],[fda_out])
gr.Examples([["Paracetamol"],["Azithromycin"],["Ciprofloxacin"],
["Omeprazole"],["Metformin"],["Ibuprofen"]], inputs=[fda_drug])
# ── TAB 6 ─────────────────────────────────────────────────
with gr.Tab("πŸ€– AI Medical Q&A"):
gr.Markdown("""
### PharmaBridge AI β€” Pharmaceutical Q&A
Powered by **Mistral-7B-Instruct** via HuggingFace Inference API (free, no key needed).
Ask anything about drugs, pharmacology, traditional medicine, or the Bangladesh registry.
> ⚠️ Educational only β€” not a substitute for professional medical advice. Model may take ~20s to cold-start.
""")
ai_bot = gr.Chatbot(label="PharmaBridge AI", height=450, elem_classes="chatbot")
with gr.Row():
ai_inp = gr.Textbox(label="Your Question", lines=2, scale=5,
placeholder="e.g. What is Ashwagandha used for? / Side effects of Azithromycin? / What is Unani medicine?")
with gr.Column(scale=1):
ai_send = gr.Button("Send πŸ’¬", variant="primary")
ai_clear = gr.Button("Clear πŸ—‘οΈ")
ai_send.click(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
ai_inp.submit(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
ai_clear.click(tab6_clear,[],[ai_bot,ai_inp])
gr.Examples([
["What is Ashwagandha used for in Ayurvedic medicine?"],
["Explain Unani medicine and its traditional formulations"],
["What are the common side effects of Azithromycin?"],
["How does TF-IDF cosine similarity work for drug retrieval?"],
["What is Homeopathic potency and how are remedies prepared?"],
["Compare Allopathic and Herbal medicine approaches"],
], inputs=[ai_inp])
# ── TAB 7 ─────────────────────────────────────────────────
with gr.Tab("πŸ“‹ Drug Explorer"):
with gr.Row():
ex_sys = gr.Dropdown(["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
value="All", label="System")
ex_dos = gr.Dropdown(choices=_ALL_DOS, value="All", label="Dosage Form")
ex_srch = gr.Textbox(label="Search", placeholder="Brand, compound, manufacturer…")
ex_pg = gr.Number(value=1, label="Page", minimum=1, precision=0)
ex_btn = gr.Button("πŸ” Browse Database", variant="primary")
ex_info = gr.Markdown("")
ex_tbl = gr.HTML('<div class="ph">Click Browse to explore all 53,584 drug records</div>')
ex_sys.change(_dos_choices,[ex_sys],[ex_dos])
ex_btn.click(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
ex_srch.submit(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
# ── TAB 8 ─────────────────────────────────────────────────
with gr.Tab("ℹ️ About"):
gr.Markdown(f"""
## PharmaBridge β€” Cross-Medical-System Drug Intelligence
**PharmaBridge** is a master's thesis project β€” the first NLP-based drug recommendation system
spanning all 5 major South Asian pharmaceutical traditions simultaneously using the
Bangladesh National Drug Registry (53,584 records).
---
### Dataset Composition
| Medical System | Records | Share |
|---|---|---|
| Allopathic | 36,254 | 67.7% |
| Unani | 8,460 | 15.8% |
| Ayurvedic | 5,262 | 9.8% |
| Homeopathic | 2,580 | 4.8% |
| Herbal | 1,028 | 1.9% |
| **Total** | **53,584** | **100%** |
### Technical Architecture
| Component | Configuration |
|---|---|
| Vectorization | TF-IDF, bigrams (1,2), max_features=15,000, sublinear_tf=True |
| Retrieval | Cosine Similarity on sparse matrix (53,584 Γ— 12,311) |
| Dim. Reduction | TruncatedSVD, 50 components, 26.2% variance |
| Clustering | K-Means K=10 (elbow-selected), Silhouette=0.2159 |
### Evaluation Results
| Metric | Value |
|---|---|
| Precision@5 | 97.00% |
| Precision@10 | 95.50% |
| Precision@20 | 90.55% |
| Silhouette Score | 0.2159 |
### App Features
| Tab | Feature |
|---|---|
| πŸ” Smart Search | TF-IDF cosine retrieval with rich card UI + bar chart |
| βš–οΈ Cross-System Compare | Side-by-side 5-system view + radar chart |
| πŸ“Š Dataset Analytics | Overview dashboard, deep-dive, treemap |
| 🧬 Drug Fingerprint | Single drug profile + TF-IDF feature bar chart |
| πŸ₯ FDA Live Data | OpenFDA labels / adverse events / NDC lookup |
| πŸ€– AI Medical Q&A | Mistral-7B via HuggingFace Inference API |
| πŸ“‹ Drug Explorer | Paginated browse & filter across all 53,584 records |
---
> **Disclaimer:** For research and educational purposes only.
> Not intended for clinical decision-making.
> Always consult a qualified healthcare professional for medical advice.
""")
if __name__ == "__main__":
app.launch()