"""
╔══════════════════════════════════════════════════════════════╗
║ PharmaBridge — Cross-Medical-System Drug Intelligence ║
║ Hugging Face Spaces | Gradio 4.x | Master's Thesis ║
╚══════════════════════════════════════════════════════════════╝
7 Tabs:
1. Smart Drug Search — TF-IDF cosine retrieval with cards UI
2. Cross-System Compare — Side-by-side 5-system radar comparison
3. Dataset Analytics — 3 sub-tabs of Plotly dashboards
4. Drug Fingerprint — Single drug deep-dive profile
5. FDA Live Intelligence — OpenFDA API (Labels / Events / NDC)
6. AI Medical Q&A — HuggingFace Inference API (Mistral-7B)
7. Drug Explorer — Paginated browse & filter table
"""
import gradio as gr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import joblib, re, os, requests, json, warnings
warnings.filterwarnings("ignore")
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# ────────────────────────────────────────────────────────────────
# 0. LOAD / REBUILD MODELS
# ────────────────────────────────────────────────────────────────
def _clean(text):
if pd.isna(text): return ""
t = str(text).strip()
if t in ["FALSE","False","false","nan","NaN",""]: return ""
return re.sub(r"\s+"," ", re.sub(r"[^a-z0-9\s\+\-\./]"," ", t.lower())).strip()
def _build_text(row):
s = row["medical_system"]
d = _clean(row.get("Dosages Description",""))
g = _clean(row.get("Generic Name and Strength",""))
b = _clean(row.get("Brand Name",""))
n = _clean(row.get("Generic Name",""))
if s == "Allopathic": return " ".join(filter(None,[n,d,s.lower()]))
if s in ("Ayurvedic","Herbal"): return " ".join(filter(None,[g,d,s.lower()]))
if s == "Homeopathic": return " ".join(filter(None,[b,d,s.lower()]))
return " ".join(filter(None,[g,d,s.lower()])) # Unani
print("⏳ Loading PharmaBridge models…")
try:
VEC = joblib.load("models/tfidf_vectorizer.pkl")
MAT = joblib.load("models/tfidf_matrix.pkl")
DF = pd.read_csv("models/drug_database.csv")
print("✅ PKL models loaded.")
except Exception as e:
print(f"⚠️ PKL not found ({e}), rebuilding from CSV…")
raw = pd.read_csv("merged_pharma_dataset.csv")
DF = raw.copy()
DF["drug_text"] = DF.apply(_build_text, axis=1)
DF = DF.rename(columns={
"Brand Name":"brand_name","Generic Name":"generic_name",
"Dosages Description":"dosage_form","Strength":"strength",
"Name of the Manufacturer":"manufacturer",
"Generic Name and Strength":"gns",
})
VEC = TfidfVectorizer(ngram_range=(1,2),max_features=15000,
stop_words=None,sublinear_tf=True,min_df=1)
MAT = VEC.fit_transform(DF["drug_text"])
print("✅ Rebuilt from CSV.")
# Normalise column names
_REMAP = {"Brand Name":"brand_name","Generic Name":"generic_name",
"Dosages Description":"dosage_form","Strength":"strength",
"Name of the Manufacturer":"manufacturer","Generic Name and Strength":"gns"}
for o,n in _REMAP.items():
if o in DF.columns and n not in DF.columns:
DF.rename(columns={o:n},inplace=True)
for c in ["brand_name","generic_name","dosage_form","strength","manufacturer","gns","drug_text"]:
if c not in DF.columns: DF[c] = ""
if "drug_text" not in DF.columns or DF["drug_text"].str.len().sum()==0:
DF["drug_text"] = DF.apply(_build_text, axis=1)
DF = DF.reset_index(drop=True)
SYSTEMS = ["All Systems","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
SC = {"Allopathic":"#3B82F6","Ayurvedic":"#10B981",
"Unani":"#F59E0B","Homeopathic":"#8B5CF6","Herbal":"#EF4444"}
EMOJI = {"Allopathic":"💊","Ayurvedic":"🌿","Unani":"☘️","Homeopathic":"💧","Herbal":"🌱"}
# Pre-compute for analytics
_SYS_VC = DF["medical_system"].value_counts()
_DOS_VC = DF["dosage_form"].value_counts()
_MFR_VC = DF["manufacturer"].value_counts()
_SYS_MFR = DF.groupby("medical_system")["manufacturer"].nunique()
_FEAT = np.array(VEC.get_feature_names_out())
# ────────────────────────────────────────────────────────────────
# 1. RETRIEVAL HELPERS
# ────────────────────────────────────────────────────────────────
def _encode(q):
q2 = re.sub(r"[^a-z0-9\s\+\-\./]"," ",q.lower())
return VEC.transform([re.sub(r"\s+"," ",q2).strip()])
def _recommend(query, system, top_n, min_s):
sims = cosine_similarity(_encode(query), MAT).flatten()
if system not in ("All Systems","All",""):
mask = DF["medical_system"]==system
sims[~mask.values]=0
idx=[i for i in sims.argsort()[-(top_n*4):][::-1] if sims[i]>=min_s][:top_n]
if not idx: return pd.DataFrame()
r=DF.iloc[idx].copy(); r["score"]=[round(float(sims[i]),4) for i in idx]
return r.sort_values("score",ascending=False).reset_index(drop=True)
def _cross(query, tps):
sims = cosine_similarity(_encode(query), MAT).flatten()
rows=[]
for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
sc=sims.copy(); sc[~(DF["medical_system"]==sys).values]=0
for i in [i for i in sc.argsort()[-tps:][::-1] if sims[i]>0.01]:
d=DF.iloc[i].to_dict(); d["score"]=round(float(sims[i]),4); rows.append(d)
if not rows: return pd.DataFrame()
return (pd.DataFrame(rows)
.sort_values(["medical_system","score"],ascending=[True,False])
.reset_index(drop=True))
# ────────────────────────────────────────────────────────────────
# 2. TAB 1 — SMART DRUG SEARCH
# ────────────────────────────────────────────────────────────────
def tab1(query, system, top_n, min_s):
if not query.strip():
return '
🔍 Type a drug name, compound, or symptom above and press Search
', None, ""
r = _recommend(query, system, int(top_n), float(min_s))
if r.empty:
return f'No results found for {query}. Try lowering the similarity threshold.
', None, ""
cards = f'Found {len(r)} results for "{query}"
'
for _, row in r.iterrows():
sys = str(row.get("medical_system",""))
c = SC.get(sys,"#6B7280")
em = EMOJI.get(sys,"💊")
bn = str(row.get("brand_name","—"))
gn = str(row.get("gns","")) or str(row.get("generic_name","—"))
dos = str(row.get("dosage_form","—"))
mfr = str(row.get("manufacturer","—"))[:38]
sc_v = float(row.get("score",0))
pct = int(sc_v*100)
cards += f"""
{em} {sys}
{pct}%
{bn}
{gn[:70]+'…' if len(gn)>70 else gn}
💊 {dos} · 🏭 {mfr}
"""
cards += "
"
fig = px.bar(
r.head(15), x="score", y="brand_name", color="medical_system",
color_discrete_map=SC, orientation="h",
labels={"score":"Similarity Score","brand_name":""},
title=f'Similarity Scores — "{query}"',
)
fig.update_layout(
height=max(340,len(r.head(15))*30+90),
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
legend=dict(orientation="h",yanchor="bottom",y=1.02,title=None),
margin=dict(l=0,r=10,t=50,b=10), yaxis=dict(autorange="reversed"),
xaxis=dict(range=[0,1],gridcolor="#f1f5f9"),
)
dist = r["medical_system"].value_counts().to_dict()
stat = " · ".join(f"**{k}** {v}" for k,v in dist.items())
return cards, fig, f"📊 {stat}"
# ────────────────────────────────────────────────────────────────
# 3. TAB 2 — CROSS-SYSTEM COMPARE
# ────────────────────────────────────────────────────────────────
def tab2(query, tps):
if not query.strip():
return 'Enter a query to compare drugs across all 5 medical traditions
', None
r = _cross(query, int(tps))
if r.empty:
return 'No cross-system results found.
', None
html = f'Cross-system view for "{query}"
'
for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]:
sub = r[r["medical_system"]==sys]
c = SC[sys]; em=EMOJI[sys]
html += f'
{em} {sys}
'
if sub.empty:
html += '
No match found
'
else:
for _,row in sub.iterrows():
bn = str(row.get("brand_name","—"))
gn = str(row.get("gns","")) or str(row.get("generic_name","—"))
dos = str(row.get("dosage_form","—"))
sc_ = int(float(row.get("score",0))*100)
html += f"""
{bn}
{gn[:48]+'…' if len(gn)>48 else gn}
{dos} · {sc_}%
"""
html += "
"
html += "
"
# Radar chart
avgs={s: float(r[r["medical_system"]==s]["score"].mean()) if not r[r["medical_system"]==s].empty else 0
for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
cats=list(avgs.keys()); vals=list(avgs.values())
fig=go.Figure(go.Scatterpolar(
r=vals+[vals[0]], theta=cats+[cats[0]], fill="toself",
fillcolor="rgba(59,130,246,0.12)", line=dict(color="#3B82F6",width=2.5),
marker=dict(size=9,color=[SC[s] for s in cats]+[SC[cats[0]]]),
))
fig.update_layout(
polar=dict(radialaxis=dict(visible=True,range=[0,1],gridcolor="#e5e7eb"),
angularaxis=dict(gridcolor="#e5e7eb",tickfont=dict(size=12))),
title=dict(text=f'Cross-System Radar — "{query}"',font=dict(size=13,color="#1e293b")),
paper_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif"),
height=380, showlegend=False, margin=dict(l=50,r=50,t=60,b=30),
)
return html, fig
# ────────────────────────────────────────────────────────────────
# 4. TAB 3 — DATASET ANALYTICS (3 sub-views)
# ────────────────────────────────────────────────────────────────
def _overview_fig():
fig=make_subplots(rows=2,cols=3,
subplot_titles=["System Share","Top 12 Dosage Forms","Manufacturers per System",
"Top 15 Manufacturers","System × Dosage Heatmap","TF-IDF Vocab Share"],
specs=[[{"type":"domain"},{"type":"xy"},{"type":"xy"}],
[{"type":"xy"},{"type":"xy"},{"type":"domain"}]],
vertical_spacing=0.14,horizontal_spacing=0.08)
# 1 donut
fig.add_trace(go.Pie(
labels=_SYS_VC.index.tolist(),values=_SYS_VC.values.tolist(),hole=0.55,
marker=dict(colors=[SC.get(s,"#aaa") for s in _SYS_VC.index],
line=dict(color="white",width=2.5)),
textinfo="label+percent",textfont=dict(size=10),showlegend=False,
),row=1,col=1)
# 2 dosage bar
td=_DOS_VC.head(12)
fig.add_trace(go.Bar(
x=td.values[::-1],y=td.index[::-1].tolist(),orientation="h",
marker=dict(color=px.colors.sequential.Blues_r[:12],line=dict(color="white",width=1)),
text=[f"{v:,}" for v in td.values[::-1]],textposition="outside",showlegend=False,
),row=1,col=2)
# 3 mfr per system
fig.add_trace(go.Bar(
x=_SYS_MFR.index.tolist(),y=_SYS_MFR.values.tolist(),
marker=dict(color=[SC.get(s,"#aaa") for s in _SYS_MFR.index],
line=dict(color="white",width=2)),
text=_SYS_MFR.values.tolist(),textposition="outside",showlegend=False,
),row=1,col=3)
# 4 top 15 mfr
tm=_MFR_VC.head(15)
fig.add_trace(go.Bar(
y=[m[:28] for m in tm.index[::-1].tolist()],x=tm.values[::-1].tolist(),
orientation="h",
marker=dict(color=tm.values[::-1].tolist(),colorscale="Viridis",
showscale=False,line=dict(color="white",width=1)),
showlegend=False,
),row=2,col=1)
# 5 heatmap
top8=_DOS_VC.head(8).index.tolist()
sysl=["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]
piv=pd.crosstab(DF["medical_system"],DF["dosage_form"])
z=[[int(piv[d].get(s,0)) if d in piv.columns else 0 for d in top8] for s in sysl]
fig.add_trace(go.Heatmap(
z=z,x=[d[:12] for d in top8],y=sysl,colorscale="YlOrRd",
text=z,texttemplate="%{text}",textfont=dict(size=9),
showscale=True,colorbar=dict(thickness=10,x=0.65,len=0.42),
),row=2,col=2)
# 6 vocab share
vtoks={s:int((np.asarray(MAT[(DF["medical_system"]==s).values].mean(axis=0)).flatten()>0.001).sum())
for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]}
fig.add_trace(go.Pie(
labels=list(vtoks.keys()),values=list(vtoks.values()),hole=0.5,
marker=dict(colors=[SC.get(s,"#aaa") for s in vtoks],
line=dict(color="white",width=2)),
textinfo="label+value",textfont=dict(size=10),showlegend=False,
),row=2,col=3)
fig.update_layout(
height=720,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
title=dict(text="PharmaBridge — Dataset Intelligence Dashboard",
font=dict(size=16,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=80,b=10),
)
fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
fig.update_yaxes(showgrid=False)
return fig
def _deep_fig(sel):
sub = DF if sel=="All" else DF[DF["medical_system"]==sel]
c = SC.get(sel,"#3B82F6")
fig=make_subplots(rows=2,cols=2,
subplot_titles=[f"Top 20 Compounds ({sel})","Dosage Form Split",
"Top 10 Manufacturers","Brand Count Comparison"],
specs=[[{"type":"xy"},{"type":"domain"}],[{"type":"xy"},{"type":"xy"}]],
vertical_spacing=0.16,horizontal_spacing=0.10)
# compound
if sel=="Homeopathic": comp=sub["brand_name"].value_counts().head(20)
elif sel=="Allopathic": comp=sub["generic_name"].dropna().value_counts().head(20)
else: comp=sub["gns"].dropna().value_counts().head(20)
fig.add_trace(go.Bar(
x=comp.values[::-1].tolist(),y=comp.index[::-1].tolist(),orientation="h",
marker=dict(color=c,opacity=0.85,line=dict(color="white",width=1)),
text=comp.values[::-1].tolist(),textposition="outside",showlegend=False,
),row=1,col=1)
# dosage donut
dos=sub["dosage_form"].value_counts().head(8)
fig.add_trace(go.Pie(
labels=dos.index.tolist(),values=dos.values.tolist(),hole=0.48,
marker=dict(colors=px.colors.qualitative.Set3[:len(dos)],
line=dict(color="white",width=2)),
textinfo="label+percent",textfont=dict(size=10),showlegend=False,
),row=1,col=2)
# top mfr
mf=sub["manufacturer"].value_counts().head(10)
fig.add_trace(go.Bar(
x=mf.values[::-1].tolist(),y=[m[:26] for m in mf.index[::-1].tolist()],
orientation="h",
marker=dict(color=mf.values[::-1].tolist(),colorscale="Blues",
showscale=False,line=dict(color="white",width=1)),
showlegend=False,
),row=2,col=1)
# brand count
bc=DF.groupby("medical_system")["brand_name"].nunique().sort_values(ascending=False)
fig.add_trace(go.Bar(
x=bc.index.tolist(),y=bc.values.tolist(),
marker=dict(color=[c if s==sel else "#cbd5e1" for s in bc.index],
line=dict(color="white",width=2)),
text=bc.values.tolist(),textposition="outside",showlegend=False,
),row=2,col=2)
fig.update_layout(
height=680,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
title=dict(text=f"Deep Dive: {sel}",font=dict(size=15,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=70,b=10),
)
fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False)
fig.update_yaxes(showgrid=False)
return fig
def _treemap_fig():
samp=DF.groupby(["medical_system","dosage_form"]).size().reset_index(name="count")
samp=samp[samp["count"]>=5]
fig=px.treemap(samp,path=["medical_system","dosage_form"],values="count",
color="medical_system",color_discrete_map=SC,
title="Drug Hierarchy: Medical System → Dosage Form")
fig.update_traces(textinfo="label+value+percent parent",textfont=dict(size=12))
fig.update_layout(height=520,paper_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=12),
title=dict(font=dict(size=15,color="#1e293b"),x=0.5),
margin=dict(l=10,r=10,t=60,b=10))
return fig
def tab3_deep_update(sel):
return _deep_fig(sel)
# ────────────────────────────────────────────────────────────────
# 5. TAB 4 — DRUG FINGERPRINT (single drug profile)
# ────────────────────────────────────────────────────────────────
def tab4_fingerprint(brand_query):
"""Search for a specific drug and show a rich visual profile card + radar of its TF-IDF feature weights."""
if not brand_query.strip():
return 'Enter a brand name to see its full drug profile
', None
# Find best match
sims = cosine_similarity(_encode(brand_query), MAT).flatten()
idx = int(sims.argsort()[-1])
row = DF.iloc[idx]
sc_v = float(sims[idx])
if sc_v < 0.01:
return f'No drug found matching "{brand_query}".
', None
sys_n = str(row.get("medical_system",""))
c = SC.get(sys_n,"#6B7280")
em = EMOJI.get(sys_n,"💊")
bn = str(row.get("brand_name","—"))
gn = str(row.get("gns","")) or str(row.get("generic_name","—"))
dos = str(row.get("dosage_form","—"))
mfr = str(row.get("manufacturer","—"))
clu = str(row.get("cluster","—"))
dart = str(row.get("DAR","—")) if "DAR" in row.index else "—"
txt = str(row.get("drug_text",""))
# Siblings (same gns/cluster)
sib_mask = (DF["medical_system"]==sys_n) & (DF["gns"]==str(row.get("gns","")))
sib_count = sib_mask.sum()-1
html = f"""
{em} {sys_n}
{bn}
{gn[:80]}
{int(sc_v*100)}% match confidence
💊 Dosage Form{dos}
🏭 Manufacturer{mfr[:40]}
🧬 Medical System{sys_n}
📂 Cluster#{clu}
📋 DAR Number{dart}
👥 Same-compound drugs{sib_count}
Drug Text (TF-IDF input): {txt[:120]}
"""
# Top TF-IDF features for this drug
vec_row = MAT[idx]
feat_idx = np.asarray(vec_row.todense()).flatten().argsort()[-20:][::-1]
feat_scores = np.asarray(vec_row.todense()).flatten()[feat_idx]
feat_labels = _FEAT[feat_idx]
mask = feat_scores > 0
feat_labels = feat_labels[mask]; feat_scores = feat_scores[mask]
fig = go.Figure(go.Bar(
x=feat_scores[::-1], y=feat_labels[::-1],
orientation="h",
marker=dict(
color=feat_scores[::-1],
colorscale=[[0,"#dbeafe"],[1,c]],
showscale=False,
line=dict(color="white",width=1),
),
text=[f"{v:.3f}" for v in feat_scores[::-1]],
textposition="outside",
))
fig.update_layout(
title=dict(text=f"TF-IDF Feature Fingerprint: {bn}",
font=dict(size=13,color="#1e293b")),
height=max(300, len(feat_labels)*28+80),
paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)",
font=dict(family="Inter,sans-serif",size=11),
margin=dict(l=10,r=60,t=50,b=10),
xaxis=dict(gridcolor="#f1f5f9",title="TF-IDF Weight"),
yaxis=dict(title=""),
)
return html, fig
# ────────────────────────────────────────────────────────────────
# 6. TAB 5 — FDA LIVE INTELLIGENCE
# ────────────────────────────────────────────────────────────────
FDA_NAME_MAP={
"Paracetamol":"acetaminophen","Azithromycin":"azithromycin",
"Ciprofloxacin":"ciprofloxacin","Amoxicillin":"amoxicillin",
"Omeprazole":"omeprazole","Metformin":"metformin",
"Atorvastatin":"atorvastatin","Amlodipine":"amlodipine",
"Ceftriaxone":"ceftriaxone","Diclofenac":"diclofenac sodium",
"Esomeprazole":"esomeprazole","Cefixime":"cefixime",
"Salbutamol":"albuterol","Ibuprofen":"ibuprofen",
"Metronidazole":"metronidazole","Cefuroxime":"cefuroxime",
}
def _fda_fetch(drug, endpoint):
term=FDA_NAME_MAP.get(drug,drug.lower())
base=f"https://api.fda.gov/drug/{endpoint}.json"
for field in [f"openfda.generic_name:{term}",f"openfda.brand_name:{term}"]:
try:
r=requests.get(base,params={"search":field,"limit":"3"},timeout=9)
if r.status_code==200:
res=r.json().get("results",[])
if res: return res, term
except: pass
return [], term
def tab5_fda(drug, ep_label):
if not drug.strip():
return '🏥 Enter a drug name to fetch live FDA data
'
ep_map={"Drug Labels":"label","Adverse Events (FAERS)":"event","NDC Directory":"ndc"}
ep=ep_map.get(ep_label,"label")
results,term=_fda_fetch(drug,ep)
if not results:
return f"""
🔍
No FDA data found for "{drug}"
This drug may not be in the US FDA database (common for Bangladesh-registry drugs).
Try: Paracetamol · Azithromycin · Ciprofloxacin · Omeprazole · Metformin · Ibuprofen
"""
html=f"""
🇺🇸 FDA {ep_label}
{drug} → searched as {term}
{len(results)} record(s)
"""
if ep=="label":
for i,res in enumerate(results[:3],1):
o=res.get("openfda",{})
brand=", ".join(o.get("brand_name",["—"])[:2])
gen =", ".join(o.get("generic_name",["—"])[:2])
mfr =", ".join(o.get("manufacturer_name",["—"])[:1])
purp =str(res.get("purpose",["—"])[0])[:280] if res.get("purpose") else "—"
ind =str(res.get("indications_and_usage",["—"])[0])[:380] if res.get("indications_and_usage") else "—"
warn =str(res.get("warnings",["—"])[0])[:280] if res.get("warnings") else "—"
html+=f"""
📄 Record {i}
| Brand Name | {brand} |
| Generic Name | {gen} |
| Manufacturer | {mfr} |
| Purpose | {purp} |
| Indications | {ind} |
| Warnings | {warn} |
"""
elif ep=="event":
for i,res in enumerate(results[:3],1):
pt=res.get("patient",{})
rxn=", ".join(r.get("reactionmeddrapt","") for r in pt.get("reaction",[])[:6])
drg=", ".join(d.get("medicinalproduct","") for d in pt.get("drug",[])[:4])
sev="⚠️ Serious" if res.get("serious")=="1" else "ℹ️ Non-Serious"
html+=f"""
Event {i} — {sev}
| Reactions | {rxn or '—'} |
| Drugs Involved | {drg or '—'} |
"""
elif ep=="ndc":
for i,res in enumerate(results[:3],1):
html+=f"""
NDC {i}
| NDC Code | {res.get('product_ndc','—')} |
| Brand | {res.get('brand_name','—')} |
| Generic | {res.get('generic_name','—')} |
| Dosage Form | {res.get('dosage_form','—')} |
| Route | {res.get('route','—')} |
| Labeler | {res.get('labeler_name','—')} |
"""
return html
# ────────────────────────────────────────────────────────────────
# 7. TAB 6 — AI MEDICAL Q&A (HuggingFace Inference API)
# ────────────────────────────────────────────────────────────────
SYS_PROMPT=(
"You are PharmaBridge AI — a knowledgeable, friendly pharmaceutical assistant. "
"You help healthcare professionals and students understand drug information, "
"pharmacology, traditional medicine (Ayurvedic, Unani, Homeopathic, Herbal), "
"drug interactions, and the Bangladesh drug registry. "
"Be concise, accurate, and always note that answers are educational, "
"not a substitute for professional medical advice."
)
HF_MODELS=[
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta",
"google/flan-t5-xxl",
]
def tab6_ai(question, history):
if not question.strip():
return history, ""
history=history or []
prompt=f"[INST] {SYS_PROMPT}\n\nQuestion: {question} [/INST]"
headers={"Content-Type":"application/json"}
answer=""
for model_url in [f"https://api-inference.huggingface.co/models/{m}" for m in HF_MODELS]:
payload={
"inputs": prompt,
"parameters":{"max_new_tokens":500,"temperature":0.65,
"top_p":0.9,"repetition_penalty":1.1,
"return_full_text":False},
}
# flan-t5 uses different format
if "flan" in model_url:
payload={"inputs":f"As a pharmacist, answer clearly: {question}",
"parameters":{"max_new_tokens":350}}
try:
r=requests.post(model_url,headers=headers,json=payload,timeout=28)
if r.status_code==200:
d=r.json()
txt=(d[0].get("generated_text","") if isinstance(d,list) else d.get("generated_text","")).strip()
if len(txt)>30:
answer=txt; break
except: continue
if not answer:
answer=(
"⚠️ The AI model is warming up (HuggingFace free tier cold-start). "
"Please wait ~20 seconds and try again.\n\n"
"**Meanwhile**, you can:\n"
"- Use the **Smart Search** tab to look up this drug directly\n"
"- Use the **FDA Live Data** tab for official drug information"
)
history.append((question, answer))
return history, ""
def tab6_clear():
return [], ""
# ────────────────────────────────────────────────────────────────
# 8. TAB 7 — DRUG EXPLORER (browse & filter)
# ────────────────────────────────────────────────────────────────
_ALL_DOS=["All"]+sorted(DF["dosage_form"].dropna().unique().tolist())
def _dos_choices(sys):
if sys=="All":
return gr.update(choices=_ALL_DOS, value="All")
opts=["All"]+sorted(DF[DF["medical_system"]==sys]["dosage_form"].dropna().unique().tolist())
return gr.update(choices=opts, value="All")
def tab7_explore(system, dosage, search, page):
sub=DF.copy()
if system!="All": sub=sub[sub["medical_system"]==system]
if dosage !="All": sub=sub[sub["dosage_form"]==dosage]
if search.strip():
t=search.lower().strip()
sub=sub[sub["brand_name"].str.lower().str.contains(t,na=False)|
sub["gns"].str.lower().str.contains(t,na=False)|
sub["generic_name"].str.lower().str.contains(t,na=False)|
sub["manufacturer"].str.lower().str.contains(t,na=False)]
total=len(sub); PG=20
page=max(1,int(page)); maxp=max(1,(total+PG-1)//PG); page=min(page,maxp)
sl=sub.iloc[(page-1)*PG:page*PG]
if sl.empty:
return 'No records match your filters.
', "0 records"
rows=""
for _,row in sl.iterrows():
sys_n=str(row.get("medical_system",""))
c=SC.get(sys_n,"#6B7280"); em=EMOJI.get(sys_n,"💊")
bn=str(row.get("brand_name","—"))
gn=str(row.get("gns","")) or str(row.get("generic_name","—"))
dos_v=str(row.get("dosage_form","—"))
mfr=str(row.get("manufacturer","—"))[:36]
rows+=f"""
| {bn} |
{gn[:46]} |
{dos_v} |
{em} {sys_n} |
{mfr} |
"""
tbl=f"""
| Brand Name | Compound / Identity |
Dosage Form | System | Manufacturer |
{rows}
"""
return tbl, f"Page **{page}** / {maxp} · **{total:,}** records"
# ────────────────────────────────────────────────────────────────
# 9. CSS
# ────────────────────────────────────────────────────────────────
CSS="""
@import url('https://fonts.googleapis.com/css2?family=Inter:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,400&display=swap');
*{box-sizing:border-box}
body,.gradio-container{font-family:'Inter',sans-serif!important;background:#f0f4f8!important}
/* ── HEADER ─────────────────────────────────────────────── */
.app-hdr{
background:linear-gradient(135deg,#0f172a 0%,#1e3a8a 45%,#0369a1 100%);
border-radius:18px;padding:28px 32px;margin-bottom:4px;color:#fff;
box-shadow:0 10px 40px rgba(30,58,138,.35);
}
.app-title{font-size:2.1rem;font-weight:800;letter-spacing:-1px;margin:0}
.app-sub{font-size:1rem;opacity:.82;margin:6px 0 0}
.hbadges{display:flex;gap:8px;margin-top:14px;flex-wrap:wrap}
.hbadge{background:rgba(255,255,255,.16);border:1px solid rgba(255,255,255,.28);
border-radius:20px;padding:4px 13px;font-size:.78rem;font-weight:500}
.stats-row{display:flex;gap:10px;margin-top:16px;flex-wrap:wrap}
.stat{background:rgba(255,255,255,.12);border-radius:12px;padding:8px 16px;text-align:center;min-width:88px}
.sn{font-size:1.45rem;font-weight:800;display:block}
.sl{font-size:.7rem;opacity:.78;text-transform:uppercase;letter-spacing:.5px}
/* ── TABS ────────────────────────────────────────────────── */
.tab-nav button{font-weight:500!important;font-size:.88rem!important;border-radius:8px 8px 0 0!important}
.tab-nav button.selected{color:#1d4ed8!important;border-bottom:3px solid #1d4ed8!important;font-weight:700!important}
/* ── INPUTS ──────────────────────────────────────────────── */
.gr-input,textarea,.gr-dropdown select{
border-radius:10px!important;border:1.5px solid #e2e8f0!important;
font-family:'Inter',sans-serif!important;transition:border-color .2s!important;
}
.gr-input:focus,textarea:focus{border-color:#3b82f6!important;box-shadow:0 0 0 3px rgba(59,130,246,.1)!important}
.gr-button-primary{
background:linear-gradient(135deg,#1d4ed8,#0891b2)!important;
border:none!important;border-radius:10px!important;font-weight:700!important;
letter-spacing:.2px!important;box-shadow:0 4px 14px rgba(29,78,216,.3)!important;
transition:transform .15s,box-shadow .15s!important;
}
.gr-button-primary:hover{transform:translateY(-1px)!important;box-shadow:0 6px 22px rgba(29,78,216,.4)!important}
/* ── PLACEHOLDERS ────────────────────────────────────────── */
.ph{text-align:center;color:#94a3b8;padding:60px 20px;font-size:.98rem;
background:#f8fafc;border-radius:14px;border:2px dashed #e2e8f0}
/* ── RESULT CARDS ────────────────────────────────────────── */
.rh{font-size:.93rem;color:#475569;padding:10px 0 14px;
border-bottom:1px solid #e2e8f0;margin-bottom:14px}
.grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(270px,1fr));gap:12px}
.card{background:#fff;border-radius:13px;padding:14px 16px;
box-shadow:0 1px 4px rgba(0,0,0,.06);transition:transform .15s,box-shadow .15s}
.card:hover{transform:translateY(-2px);box-shadow:0 5px 18px rgba(0,0,0,.10)}
.ch{display:flex;justify-content:space-between;align-items:center;margin-bottom:8px}
.sbadge{font-size:.71rem;font-weight:600;padding:3px 9px;border-radius:20px;white-space:nowrap}
.spct{font-size:.74rem;font-weight:700;padding:3px 9px;border-radius:20px}
.bn{font-size:1.05rem;font-weight:700;color:#1e293b;margin-bottom:4px}
.gn{font-size:.81rem;color:#64748b;margin-bottom:9px;min-height:1.2em}
.meta{font-size:.77rem;color:#94a3b8;margin-bottom:10px;line-height:1.8}
.bar{height:4px;background:#f1f5f9;border-radius:2px;overflow:hidden}
.fill{height:100%;border-radius:2px;transition:width .4s}
/* ── CROSS COMPARE ───────────────────────────────────────── */
.cph{font-size:.96rem;color:#475569;padding:10px 0 16px;font-weight:500}
.cgrid{display:grid;grid-template-columns:repeat(5,1fr);gap:11px}
@media(max-width:900px){.cgrid{grid-template-columns:repeat(2,1fr)}}
.scol{background:#fff;border-radius:13px;padding:14px;box-shadow:0 1px 4px rgba(0,0,0,.06)}
.stitle{font-weight:700;font-size:.93rem;margin-bottom:12px}
.nr{color:#94a3b8;font-size:.84rem;padding:10px 0}
.cc{padding:10px;margin-bottom:8px;border-radius:9px;background:#f8fafc}
.cbn{font-weight:700;font-size:.88rem;color:#1e293b}
.cgn{font-size:.77rem;color:#64748b;margin:3px 0}
.cm{font-size:.74rem;color:#94a3b8}
.sbar{height:3px;background:#f1f5f9;border-radius:2px;overflow:hidden;margin-top:6px}
.sfill{height:100%;border-radius:2px}
/* ── FINGERPRINT ─────────────────────────────────────────── */
.fp-banner{border-radius:0}
.fp-row{display:flex;flex-direction:column;background:#f8fafc;border-radius:10px;padding:10px 14px}
.fp-k{font-size:.74rem;color:#64748b;font-weight:600;text-transform:uppercase;letter-spacing:.4px}
.fp-v{font-size:.95rem;color:#1e293b;font-weight:500;margin-top:2px}
/* ── FDA ─────────────────────────────────────────────────── */
.fda-hdr{background:linear-gradient(135deg,#eff6ff,#e0f2fe);border-radius:11px;
padding:14px 18px;margin-bottom:14px;display:flex;align-items:center;
gap:10px;flex-wrap:wrap;font-size:.88rem;color:#1e293b}
.fda-badge{background:#1d4ed8;color:#fff;padding:4px 11px;border-radius:20px;
font-size:.77rem;font-weight:600}
.fda-cnt{margin-left:auto;background:#dcfce7;color:#166534;padding:3px 10px;
border-radius:20px;font-size:.77rem;font-weight:600}
.fda-miss{text-align:center;padding:40px;color:#64748b;background:#f8fafc;
border-radius:14px;border:2px dashed #e2e8f0}
.fda-card{background:#fff;border-radius:13px;padding:18px;margin-bottom:12px;
box-shadow:0 1px 4px rgba(0,0,0,.06)}
.fda-num{font-weight:700;font-size:.88rem;color:#1d4ed8;margin-bottom:10px}
.fda-tbl{width:100%;border-collapse:collapse;font-size:.84rem}
.fda-tbl tr{border-bottom:1px solid #f1f5f9}
.fda-tbl tr:last-child{border-bottom:none}
.fk{color:#64748b;font-weight:600;padding:6px 14px 6px 0;white-space:nowrap;
vertical-align:top;width:130px}
.fda-tbl td:last-child{color:#1e293b;padding:6px 0;line-height:1.55}
/* ── CHATBOT ─────────────────────────────────────────────── */
.chatbot{border-radius:13px!important;border:1.5px solid #e2e8f0!important}
/* ── EXPLORER TABLE ──────────────────────────────────────── */
.xtbl{width:100%;border-collapse:collapse;font-size:.83rem}
.xtbl thead{background:linear-gradient(135deg,#0f172a,#1d4ed8);color:#fff}
.xtbl th{padding:11px 14px;text-align:left;font-weight:600;letter-spacing:.3px}
.xtbl tbody tr{border-bottom:1px solid #f1f5f9;transition:background .15s}
.xtbl tbody tr:hover{background:#f8fafc}
.xtbl td{padding:9px 14px;color:#1e293b;vertical-align:top}
.sb2{font-size:.71rem;font-weight:600;padding:2px 8px;border-radius:20px;white-space:nowrap}
code{background:#f1f5f9;padding:2px 7px;border-radius:5px;font-size:.84em;color:#0891b2}
"""
# ────────────────────────────────────────────────────────────────
# 10. BUILD GRADIO APP
# ────────────────────────────────────────────────────────────────
HEADER = f"""
💊 PharmaBridge
Cross-Medical-System Drug Intelligence Engine · Bangladesh National Drug Registry
🔬 TF-IDF + Cosine Similarity
🧠 SVD + K-Means Clustering
🌐 OpenFDA Live API
🤖 Mistral-7B AI Assistant
📊 Interactive Dashboards
53,584Total Drugs
5Med. Systems
725Manufacturers
12,311TF-IDF Features
95.5%Precision@10
0.2159Silhouette
"""
with gr.Blocks(css=CSS, title="PharmaBridge", theme=gr.themes.Base(
primary_hue=gr.themes.colors.blue,
font=gr.themes.GoogleFont("Inter"),
)) as app:
gr.HTML(HEADER)
with gr.Tabs(elem_classes="tab-nav"):
# ── TAB 1 ─────────────────────────────────────────────────
with gr.Tab("🔍 Smart Search"):
with gr.Row(equal_height=True):
with gr.Column(scale=4):
t1q = gr.Textbox(label="Search Query",
placeholder="Try: Azithromycin, Ashwagandha, nux vomica, sharbat amrood, paracetamol fever…",
lines=1)
with gr.Column(scale=1):
t1sys = gr.Dropdown(choices=SYSTEMS, value="All Systems", label="System")
with gr.Column(scale=1):
t1btn = gr.Button("🔍 Search", variant="primary", scale=1)
with gr.Row():
t1n = gr.Slider(5,50,value=12,step=1,label="Max Results")
t1s = gr.Slider(0.0,0.5,value=0.04,step=0.01,label="Min Similarity")
t1stat = gr.Markdown("")
t1cards = gr.HTML('🔍 Enter a drug name, compound, or symptom above
')
t1chart = gr.Plot(label="Score Distribution")
t1btn.click(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
t1q.submit(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat])
gr.Examples([
["Azithromycin 500mg","Allopathic"],
["Ashwagandha capsule","Ayurvedic"],
["Nux Vomica liquid","Homeopathic"],
["Sharbat Amrood","Unani"],
["Moringa leaf powder","Herbal"],
["antibiotic tablet","All Systems"],
["digestive capsule","All Systems"],
], inputs=[t1q,t1sys], label="Quick Examples")
# ── TAB 2 ─────────────────────────────────────────────────
with gr.Tab("⚖️ Cross-System Compare"):
with gr.Row(equal_height=True):
with gr.Column(scale=5):
t2q = gr.Textbox(label="Query",
placeholder="e.g. pain relief tablet, digestive liver, sleep anxiety, blood pressure…",
lines=1)
with gr.Column(scale=1):
t2n = gr.Slider(1,5,value=3,step=1,label="Results / System")
with gr.Column(scale=1):
t2btn = gr.Button("⚖️ Compare", variant="primary")
t2cards = gr.HTML('Compare the same therapeutic need across all 5 medical traditions simultaneously
')
t2radar = gr.Plot(label="Cross-System Similarity Radar")
t2btn.click(tab2,[t2q,t2n],[t2cards,t2radar])
t2q.submit(tab2,[t2q,t2n],[t2cards,t2radar])
gr.Examples([
["digestive liver tablet"],["pain anti-inflammatory"],
["antibiotic infection"],["blood pressure"],
["cough respiratory"],["sleep anxiety stress"],
], inputs=[t2q])
# ── TAB 3 ─────────────────────────────────────────────────
with gr.Tab("📊 Dataset Analytics"):
with gr.Tabs():
with gr.Tab("🌐 Overview Dashboard"):
ov_btn = gr.Button("📊 Render Dashboard", variant="primary")
ov_fig = gr.Plot()
ov_btn.click(_overview_fig,[],[ov_fig])
app.load(_overview_fig,[],[ov_fig])
with gr.Tab("🔎 System Deep Dive"):
with gr.Row():
dd_sys = gr.Dropdown(
choices=["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
value="Allopathic", label="Select System")
dd_btn = gr.Button("Analyze", variant="primary")
dd_fig = gr.Plot()
dd_btn.click(_deep_fig,[dd_sys],[dd_fig])
dd_sys.change(_deep_fig,[dd_sys],[dd_fig])
app.load(lambda:_deep_fig("Allopathic"),[],[dd_fig])
with gr.Tab("🗺️ Treemap Explorer"):
tm_btn = gr.Button("🗺️ Render Treemap", variant="primary")
tm_fig = gr.Plot()
tm_btn.click(_treemap_fig,[],[tm_fig])
app.load(_treemap_fig,[],[tm_fig])
# ── TAB 4 ─────────────────────────────────────────────────
with gr.Tab("🧬 Drug Fingerprint"):
gr.Markdown("""
### Single Drug Deep-Dive
Search for any drug to see its **full profile card** plus a bar chart of its
top TF-IDF feature weights — the exact tokens driving its similarity scores.
""")
with gr.Row(equal_height=True):
fp_q = gr.Textbox(label="Brand Name or Compound",
placeholder="e.g. Azithromycin, Ashwagandha, Nux Vomica, Sharbat Amrood…", lines=1)
fp_btn = gr.Button("🧬 Profile", variant="primary")
fp_card = gr.HTML('🧬 Enter a drug or compound name to generate its fingerprint
')
fp_fig = gr.Plot(label="TF-IDF Feature Fingerprint")
fp_btn.click(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
fp_q.submit(tab4_fingerprint,[fp_q],[fp_card,fp_fig])
gr.Examples([
["Azithromycin"],["Ashwagandha"],["Nux Vomica"],
["Sharbat Amrood"],["Moringa"],["Paracetamol"],
], inputs=[fp_q])
# ── TAB 5 ─────────────────────────────────────────────────
with gr.Tab("🏥 FDA Live Data"):
gr.Markdown("> **Live OpenFDA API** — US drug labels, adverse events (FAERS), and NDC records. "
"~40% of Bangladesh registry drugs appear here. Bangladeshi names auto-mapped to FDA terms.")
with gr.Row(equal_height=True):
fda_drug = gr.Textbox(label="Drug Name",
placeholder="Paracetamol, Azithromycin, Ciprofloxacin, Omeprazole, Metformin…", lines=1)
fda_ep = gr.Radio(["Drug Labels","Adverse Events (FAERS)","NDC Directory"],
value="Drug Labels", label="FDA Database")
fda_btn = gr.Button("🔎 Fetch", variant="primary")
fda_out = gr.HTML('🏥 Enter a drug name and click Fetch
')
fda_btn.click(tab5_fda,[fda_drug,fda_ep],[fda_out])
fda_drug.submit(tab5_fda,[fda_drug,fda_ep],[fda_out])
gr.Examples([["Paracetamol"],["Azithromycin"],["Ciprofloxacin"],
["Omeprazole"],["Metformin"],["Ibuprofen"]], inputs=[fda_drug])
# ── TAB 6 ─────────────────────────────────────────────────
with gr.Tab("🤖 AI Medical Q&A"):
gr.Markdown("""
### PharmaBridge AI — Pharmaceutical Q&A
Powered by **Mistral-7B-Instruct** via HuggingFace Inference API (free, no key needed).
Ask anything about drugs, pharmacology, traditional medicine, or the Bangladesh registry.
> ⚠️ Educational only — not a substitute for professional medical advice. Model may take ~20s to cold-start.
""")
ai_bot = gr.Chatbot(label="PharmaBridge AI", height=450, elem_classes="chatbot")
with gr.Row():
ai_inp = gr.Textbox(label="Your Question", lines=2, scale=5,
placeholder="e.g. What is Ashwagandha used for? / Side effects of Azithromycin? / What is Unani medicine?")
with gr.Column(scale=1):
ai_send = gr.Button("Send 💬", variant="primary")
ai_clear = gr.Button("Clear 🗑️")
ai_send.click(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
ai_inp.submit(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp])
ai_clear.click(tab6_clear,[],[ai_bot,ai_inp])
gr.Examples([
["What is Ashwagandha used for in Ayurvedic medicine?"],
["Explain Unani medicine and its traditional formulations"],
["What are the common side effects of Azithromycin?"],
["How does TF-IDF cosine similarity work for drug retrieval?"],
["What is Homeopathic potency and how are remedies prepared?"],
["Compare Allopathic and Herbal medicine approaches"],
], inputs=[ai_inp])
# ── TAB 7 ─────────────────────────────────────────────────
with gr.Tab("📋 Drug Explorer"):
with gr.Row():
ex_sys = gr.Dropdown(["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"],
value="All", label="System")
ex_dos = gr.Dropdown(choices=_ALL_DOS, value="All", label="Dosage Form")
ex_srch = gr.Textbox(label="Search", placeholder="Brand, compound, manufacturer…")
ex_pg = gr.Number(value=1, label="Page", minimum=1, precision=0)
ex_btn = gr.Button("🔍 Browse Database", variant="primary")
ex_info = gr.Markdown("")
ex_tbl = gr.HTML('Click Browse to explore all 53,584 drug records
')
ex_sys.change(_dos_choices,[ex_sys],[ex_dos])
ex_btn.click(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
ex_srch.submit(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info])
# ── TAB 8 ─────────────────────────────────────────────────
with gr.Tab("ℹ️ About"):
gr.Markdown(f"""
## PharmaBridge — Cross-Medical-System Drug Intelligence
**PharmaBridge** is a master's thesis project — the first NLP-based drug recommendation system
spanning all 5 major South Asian pharmaceutical traditions simultaneously using the
Bangladesh National Drug Registry (53,584 records).
---
### Dataset Composition
| Medical System | Records | Share |
|---|---|---|
| Allopathic | 36,254 | 67.7% |
| Unani | 8,460 | 15.8% |
| Ayurvedic | 5,262 | 9.8% |
| Homeopathic | 2,580 | 4.8% |
| Herbal | 1,028 | 1.9% |
| **Total** | **53,584** | **100%** |
### Technical Architecture
| Component | Configuration |
|---|---|
| Vectorization | TF-IDF, bigrams (1,2), max_features=15,000, sublinear_tf=True |
| Retrieval | Cosine Similarity on sparse matrix (53,584 × 12,311) |
| Dim. Reduction | TruncatedSVD, 50 components, 26.2% variance |
| Clustering | K-Means K=10 (elbow-selected), Silhouette=0.2159 |
### Evaluation Results
| Metric | Value |
|---|---|
| Precision@5 | 97.00% |
| Precision@10 | 95.50% |
| Precision@20 | 90.55% |
| Silhouette Score | 0.2159 |
### App Features
| Tab | Feature |
|---|---|
| 🔍 Smart Search | TF-IDF cosine retrieval with rich card UI + bar chart |
| ⚖️ Cross-System Compare | Side-by-side 5-system view + radar chart |
| 📊 Dataset Analytics | Overview dashboard, deep-dive, treemap |
| 🧬 Drug Fingerprint | Single drug profile + TF-IDF feature bar chart |
| 🏥 FDA Live Data | OpenFDA labels / adverse events / NDC lookup |
| 🤖 AI Medical Q&A | Mistral-7B via HuggingFace Inference API |
| 📋 Drug Explorer | Paginated browse & filter across all 53,584 records |
---
> **Disclaimer:** For research and educational purposes only.
> Not intended for clinical decision-making.
> Always consult a qualified healthcare professional for medical advice.
""")
if __name__ == "__main__":
app.launch()