""" ╔══════════════════════════════════════════════════════════════╗ ║ PharmaBridge — Cross-Medical-System Drug Intelligence ║ ║ Hugging Face Spaces | Gradio 4.x | Master's Thesis ║ ╚══════════════════════════════════════════════════════════════╝ 7 Tabs: 1. Smart Drug Search — TF-IDF cosine retrieval with cards UI 2. Cross-System Compare — Side-by-side 5-system radar comparison 3. Dataset Analytics — 3 sub-tabs of Plotly dashboards 4. Drug Fingerprint — Single drug deep-dive profile 5. FDA Live Intelligence — OpenFDA API (Labels / Events / NDC) 6. AI Medical Q&A — HuggingFace Inference API (Mistral-7B) 7. Drug Explorer — Paginated browse & filter table """ import gradio as gr import pandas as pd import numpy as np import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots import joblib, re, os, requests, json, warnings warnings.filterwarnings("ignore") from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # ──────────────────────────────────────────────────────────────── # 0. LOAD / REBUILD MODELS # ──────────────────────────────────────────────────────────────── def _clean(text): if pd.isna(text): return "" t = str(text).strip() if t in ["FALSE","False","false","nan","NaN",""]: return "" return re.sub(r"\s+"," ", re.sub(r"[^a-z0-9\s\+\-\./]"," ", t.lower())).strip() def _build_text(row): s = row["medical_system"] d = _clean(row.get("Dosages Description","")) g = _clean(row.get("Generic Name and Strength","")) b = _clean(row.get("Brand Name","")) n = _clean(row.get("Generic Name","")) if s == "Allopathic": return " ".join(filter(None,[n,d,s.lower()])) if s in ("Ayurvedic","Herbal"): return " ".join(filter(None,[g,d,s.lower()])) if s == "Homeopathic": return " ".join(filter(None,[b,d,s.lower()])) return " ".join(filter(None,[g,d,s.lower()])) # Unani print("⏳ Loading PharmaBridge models…") try: VEC = joblib.load("models/tfidf_vectorizer.pkl") MAT = joblib.load("models/tfidf_matrix.pkl") DF = pd.read_csv("models/drug_database.csv") print("✅ PKL models loaded.") except Exception as e: print(f"⚠️ PKL not found ({e}), rebuilding from CSV…") raw = pd.read_csv("merged_pharma_dataset.csv") DF = raw.copy() DF["drug_text"] = DF.apply(_build_text, axis=1) DF = DF.rename(columns={ "Brand Name":"brand_name","Generic Name":"generic_name", "Dosages Description":"dosage_form","Strength":"strength", "Name of the Manufacturer":"manufacturer", "Generic Name and Strength":"gns", }) VEC = TfidfVectorizer(ngram_range=(1,2),max_features=15000, stop_words=None,sublinear_tf=True,min_df=1) MAT = VEC.fit_transform(DF["drug_text"]) print("✅ Rebuilt from CSV.") # Normalise column names _REMAP = {"Brand Name":"brand_name","Generic Name":"generic_name", "Dosages Description":"dosage_form","Strength":"strength", "Name of the Manufacturer":"manufacturer","Generic Name and Strength":"gns"} for o,n in _REMAP.items(): if o in DF.columns and n not in DF.columns: DF.rename(columns={o:n},inplace=True) for c in ["brand_name","generic_name","dosage_form","strength","manufacturer","gns","drug_text"]: if c not in DF.columns: DF[c] = "" if "drug_text" not in DF.columns or DF["drug_text"].str.len().sum()==0: DF["drug_text"] = DF.apply(_build_text, axis=1) DF = DF.reset_index(drop=True) SYSTEMS = ["All Systems","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"] SC = {"Allopathic":"#3B82F6","Ayurvedic":"#10B981", "Unani":"#F59E0B","Homeopathic":"#8B5CF6","Herbal":"#EF4444"} EMOJI = {"Allopathic":"💊","Ayurvedic":"🌿","Unani":"☘️","Homeopathic":"💧","Herbal":"🌱"} # Pre-compute for analytics _SYS_VC = DF["medical_system"].value_counts() _DOS_VC = DF["dosage_form"].value_counts() _MFR_VC = DF["manufacturer"].value_counts() _SYS_MFR = DF.groupby("medical_system")["manufacturer"].nunique() _FEAT = np.array(VEC.get_feature_names_out()) # ──────────────────────────────────────────────────────────────── # 1. RETRIEVAL HELPERS # ──────────────────────────────────────────────────────────────── def _encode(q): q2 = re.sub(r"[^a-z0-9\s\+\-\./]"," ",q.lower()) return VEC.transform([re.sub(r"\s+"," ",q2).strip()]) def _recommend(query, system, top_n, min_s): sims = cosine_similarity(_encode(query), MAT).flatten() if system not in ("All Systems","All",""): mask = DF["medical_system"]==system sims[~mask.values]=0 idx=[i for i in sims.argsort()[-(top_n*4):][::-1] if sims[i]>=min_s][:top_n] if not idx: return pd.DataFrame() r=DF.iloc[idx].copy(); r["score"]=[round(float(sims[i]),4) for i in idx] return r.sort_values("score",ascending=False).reset_index(drop=True) def _cross(query, tps): sims = cosine_similarity(_encode(query), MAT).flatten() rows=[] for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]: sc=sims.copy(); sc[~(DF["medical_system"]==sys).values]=0 for i in [i for i in sc.argsort()[-tps:][::-1] if sims[i]>0.01]: d=DF.iloc[i].to_dict(); d["score"]=round(float(sims[i]),4); rows.append(d) if not rows: return pd.DataFrame() return (pd.DataFrame(rows) .sort_values(["medical_system","score"],ascending=[True,False]) .reset_index(drop=True)) # ──────────────────────────────────────────────────────────────── # 2. TAB 1 — SMART DRUG SEARCH # ──────────────────────────────────────────────────────────────── def tab1(query, system, top_n, min_s): if not query.strip(): return '
🔍 Type a drug name, compound, or symptom above and press Search
', None, "" r = _recommend(query, system, int(top_n), float(min_s)) if r.empty: return f'
No results found for {query}. Try lowering the similarity threshold.
', None, "" cards = f'
Found {len(r)} results for "{query}"
' for _, row in r.iterrows(): sys = str(row.get("medical_system","")) c = SC.get(sys,"#6B7280") em = EMOJI.get(sys,"💊") bn = str(row.get("brand_name","—")) gn = str(row.get("gns","")) or str(row.get("generic_name","—")) dos = str(row.get("dosage_form","—")) mfr = str(row.get("manufacturer","—"))[:38] sc_v = float(row.get("score",0)) pct = int(sc_v*100) cards += f"""
{em} {sys} {pct}%
{bn}
{gn[:70]+'…' if len(gn)>70 else gn}
💊 {dos}  ·  🏭 {mfr}
""" cards += "
" fig = px.bar( r.head(15), x="score", y="brand_name", color="medical_system", color_discrete_map=SC, orientation="h", labels={"score":"Similarity Score","brand_name":""}, title=f'Similarity Scores — "{query}"', ) fig.update_layout( height=max(340,len(r.head(15))*30+90), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif",size=11), legend=dict(orientation="h",yanchor="bottom",y=1.02,title=None), margin=dict(l=0,r=10,t=50,b=10), yaxis=dict(autorange="reversed"), xaxis=dict(range=[0,1],gridcolor="#f1f5f9"), ) dist = r["medical_system"].value_counts().to_dict() stat = " · ".join(f"**{k}** {v}" for k,v in dist.items()) return cards, fig, f"📊 {stat}" # ──────────────────────────────────────────────────────────────── # 3. TAB 2 — CROSS-SYSTEM COMPARE # ──────────────────────────────────────────────────────────────── def tab2(query, tps): if not query.strip(): return '
Enter a query to compare drugs across all 5 medical traditions
', None r = _cross(query, int(tps)) if r.empty: return '
No cross-system results found.
', None html = f'
Cross-system view for "{query}"
' for sys in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]: sub = r[r["medical_system"]==sys] c = SC[sys]; em=EMOJI[sys] html += f'
{em} {sys}
' if sub.empty: html += '
No match found
' else: for _,row in sub.iterrows(): bn = str(row.get("brand_name","—")) gn = str(row.get("gns","")) or str(row.get("generic_name","—")) dos = str(row.get("dosage_form","—")) sc_ = int(float(row.get("score",0))*100) html += f"""
{bn}
{gn[:48]+'…' if len(gn)>48 else gn}
{dos} · {sc_}%
""" html += "
" html += "
" # Radar chart avgs={s: float(r[r["medical_system"]==s]["score"].mean()) if not r[r["medical_system"]==s].empty else 0 for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]} cats=list(avgs.keys()); vals=list(avgs.values()) fig=go.Figure(go.Scatterpolar( r=vals+[vals[0]], theta=cats+[cats[0]], fill="toself", fillcolor="rgba(59,130,246,0.12)", line=dict(color="#3B82F6",width=2.5), marker=dict(size=9,color=[SC[s] for s in cats]+[SC[cats[0]]]), )) fig.update_layout( polar=dict(radialaxis=dict(visible=True,range=[0,1],gridcolor="#e5e7eb"), angularaxis=dict(gridcolor="#e5e7eb",tickfont=dict(size=12))), title=dict(text=f'Cross-System Radar — "{query}"',font=dict(size=13,color="#1e293b")), paper_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif"), height=380, showlegend=False, margin=dict(l=50,r=50,t=60,b=30), ) return html, fig # ──────────────────────────────────────────────────────────────── # 4. TAB 3 — DATASET ANALYTICS (3 sub-views) # ──────────────────────────────────────────────────────────────── def _overview_fig(): fig=make_subplots(rows=2,cols=3, subplot_titles=["System Share","Top 12 Dosage Forms","Manufacturers per System", "Top 15 Manufacturers","System × Dosage Heatmap","TF-IDF Vocab Share"], specs=[[{"type":"domain"},{"type":"xy"},{"type":"xy"}], [{"type":"xy"},{"type":"xy"},{"type":"domain"}]], vertical_spacing=0.14,horizontal_spacing=0.08) # 1 donut fig.add_trace(go.Pie( labels=_SYS_VC.index.tolist(),values=_SYS_VC.values.tolist(),hole=0.55, marker=dict(colors=[SC.get(s,"#aaa") for s in _SYS_VC.index], line=dict(color="white",width=2.5)), textinfo="label+percent",textfont=dict(size=10),showlegend=False, ),row=1,col=1) # 2 dosage bar td=_DOS_VC.head(12) fig.add_trace(go.Bar( x=td.values[::-1],y=td.index[::-1].tolist(),orientation="h", marker=dict(color=px.colors.sequential.Blues_r[:12],line=dict(color="white",width=1)), text=[f"{v:,}" for v in td.values[::-1]],textposition="outside",showlegend=False, ),row=1,col=2) # 3 mfr per system fig.add_trace(go.Bar( x=_SYS_MFR.index.tolist(),y=_SYS_MFR.values.tolist(), marker=dict(color=[SC.get(s,"#aaa") for s in _SYS_MFR.index], line=dict(color="white",width=2)), text=_SYS_MFR.values.tolist(),textposition="outside",showlegend=False, ),row=1,col=3) # 4 top 15 mfr tm=_MFR_VC.head(15) fig.add_trace(go.Bar( y=[m[:28] for m in tm.index[::-1].tolist()],x=tm.values[::-1].tolist(), orientation="h", marker=dict(color=tm.values[::-1].tolist(),colorscale="Viridis", showscale=False,line=dict(color="white",width=1)), showlegend=False, ),row=2,col=1) # 5 heatmap top8=_DOS_VC.head(8).index.tolist() sysl=["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"] piv=pd.crosstab(DF["medical_system"],DF["dosage_form"]) z=[[int(piv[d].get(s,0)) if d in piv.columns else 0 for d in top8] for s in sysl] fig.add_trace(go.Heatmap( z=z,x=[d[:12] for d in top8],y=sysl,colorscale="YlOrRd", text=z,texttemplate="%{text}",textfont=dict(size=9), showscale=True,colorbar=dict(thickness=10,x=0.65,len=0.42), ),row=2,col=2) # 6 vocab share vtoks={s:int((np.asarray(MAT[(DF["medical_system"]==s).values].mean(axis=0)).flatten()>0.001).sum()) for s in ["Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"]} fig.add_trace(go.Pie( labels=list(vtoks.keys()),values=list(vtoks.values()),hole=0.5, marker=dict(colors=[SC.get(s,"#aaa") for s in vtoks], line=dict(color="white",width=2)), textinfo="label+value",textfont=dict(size=10),showlegend=False, ),row=2,col=3) fig.update_layout( height=720,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif",size=11), title=dict(text="PharmaBridge — Dataset Intelligence Dashboard", font=dict(size=16,color="#1e293b"),x=0.5), margin=dict(l=10,r=10,t=80,b=10), ) fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False) fig.update_yaxes(showgrid=False) return fig def _deep_fig(sel): sub = DF if sel=="All" else DF[DF["medical_system"]==sel] c = SC.get(sel,"#3B82F6") fig=make_subplots(rows=2,cols=2, subplot_titles=[f"Top 20 Compounds ({sel})","Dosage Form Split", "Top 10 Manufacturers","Brand Count Comparison"], specs=[[{"type":"xy"},{"type":"domain"}],[{"type":"xy"},{"type":"xy"}]], vertical_spacing=0.16,horizontal_spacing=0.10) # compound if sel=="Homeopathic": comp=sub["brand_name"].value_counts().head(20) elif sel=="Allopathic": comp=sub["generic_name"].dropna().value_counts().head(20) else: comp=sub["gns"].dropna().value_counts().head(20) fig.add_trace(go.Bar( x=comp.values[::-1].tolist(),y=comp.index[::-1].tolist(),orientation="h", marker=dict(color=c,opacity=0.85,line=dict(color="white",width=1)), text=comp.values[::-1].tolist(),textposition="outside",showlegend=False, ),row=1,col=1) # dosage donut dos=sub["dosage_form"].value_counts().head(8) fig.add_trace(go.Pie( labels=dos.index.tolist(),values=dos.values.tolist(),hole=0.48, marker=dict(colors=px.colors.qualitative.Set3[:len(dos)], line=dict(color="white",width=2)), textinfo="label+percent",textfont=dict(size=10),showlegend=False, ),row=1,col=2) # top mfr mf=sub["manufacturer"].value_counts().head(10) fig.add_trace(go.Bar( x=mf.values[::-1].tolist(),y=[m[:26] for m in mf.index[::-1].tolist()], orientation="h", marker=dict(color=mf.values[::-1].tolist(),colorscale="Blues", showscale=False,line=dict(color="white",width=1)), showlegend=False, ),row=2,col=1) # brand count bc=DF.groupby("medical_system")["brand_name"].nunique().sort_values(ascending=False) fig.add_trace(go.Bar( x=bc.index.tolist(),y=bc.values.tolist(), marker=dict(color=[c if s==sel else "#cbd5e1" for s in bc.index], line=dict(color="white",width=2)), text=bc.values.tolist(),textposition="outside",showlegend=False, ),row=2,col=2) fig.update_layout( height=680,paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif",size=11), title=dict(text=f"Deep Dive: {sel}",font=dict(size=15,color="#1e293b"),x=0.5), margin=dict(l=10,r=10,t=70,b=10), ) fig.update_xaxes(showgrid=True,gridcolor="#f1f5f9",zeroline=False) fig.update_yaxes(showgrid=False) return fig def _treemap_fig(): samp=DF.groupby(["medical_system","dosage_form"]).size().reset_index(name="count") samp=samp[samp["count"]>=5] fig=px.treemap(samp,path=["medical_system","dosage_form"],values="count", color="medical_system",color_discrete_map=SC, title="Drug Hierarchy: Medical System → Dosage Form") fig.update_traces(textinfo="label+value+percent parent",textfont=dict(size=12)) fig.update_layout(height=520,paper_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif",size=12), title=dict(font=dict(size=15,color="#1e293b"),x=0.5), margin=dict(l=10,r=10,t=60,b=10)) return fig def tab3_deep_update(sel): return _deep_fig(sel) # ──────────────────────────────────────────────────────────────── # 5. TAB 4 — DRUG FINGERPRINT (single drug profile) # ──────────────────────────────────────────────────────────────── def tab4_fingerprint(brand_query): """Search for a specific drug and show a rich visual profile card + radar of its TF-IDF feature weights.""" if not brand_query.strip(): return '
Enter a brand name to see its full drug profile
', None # Find best match sims = cosine_similarity(_encode(brand_query), MAT).flatten() idx = int(sims.argsort()[-1]) row = DF.iloc[idx] sc_v = float(sims[idx]) if sc_v < 0.01: return f'
No drug found matching "{brand_query}".
', None sys_n = str(row.get("medical_system","")) c = SC.get(sys_n,"#6B7280") em = EMOJI.get(sys_n,"💊") bn = str(row.get("brand_name","—")) gn = str(row.get("gns","")) or str(row.get("generic_name","—")) dos = str(row.get("dosage_form","—")) mfr = str(row.get("manufacturer","—")) clu = str(row.get("cluster","—")) dart = str(row.get("DAR","—")) if "DAR" in row.index else "—" txt = str(row.get("drug_text","")) # Siblings (same gns/cluster) sib_mask = (DF["medical_system"]==sys_n) & (DF["gns"]==str(row.get("gns",""))) sib_count = sib_mask.sum()-1 html = f"""
{em} {sys_n}
{bn}
{gn[:80]}
{int(sc_v*100)}% match confidence
💊 Dosage Form{dos}
🏭 Manufacturer{mfr[:40]}
🧬 Medical System{sys_n}
📂 Cluster#{clu}
📋 DAR Number{dart}
👥 Same-compound drugs{sib_count}
Drug Text (TF-IDF input): {txt[:120]}
""" # Top TF-IDF features for this drug vec_row = MAT[idx] feat_idx = np.asarray(vec_row.todense()).flatten().argsort()[-20:][::-1] feat_scores = np.asarray(vec_row.todense()).flatten()[feat_idx] feat_labels = _FEAT[feat_idx] mask = feat_scores > 0 feat_labels = feat_labels[mask]; feat_scores = feat_scores[mask] fig = go.Figure(go.Bar( x=feat_scores[::-1], y=feat_labels[::-1], orientation="h", marker=dict( color=feat_scores[::-1], colorscale=[[0,"#dbeafe"],[1,c]], showscale=False, line=dict(color="white",width=1), ), text=[f"{v:.3f}" for v in feat_scores[::-1]], textposition="outside", )) fig.update_layout( title=dict(text=f"TF-IDF Feature Fingerprint: {bn}", font=dict(size=13,color="#1e293b")), height=max(300, len(feat_labels)*28+80), paper_bgcolor="rgba(0,0,0,0)",plot_bgcolor="rgba(0,0,0,0)", font=dict(family="Inter,sans-serif",size=11), margin=dict(l=10,r=60,t=50,b=10), xaxis=dict(gridcolor="#f1f5f9",title="TF-IDF Weight"), yaxis=dict(title=""), ) return html, fig # ──────────────────────────────────────────────────────────────── # 6. TAB 5 — FDA LIVE INTELLIGENCE # ──────────────────────────────────────────────────────────────── FDA_NAME_MAP={ "Paracetamol":"acetaminophen","Azithromycin":"azithromycin", "Ciprofloxacin":"ciprofloxacin","Amoxicillin":"amoxicillin", "Omeprazole":"omeprazole","Metformin":"metformin", "Atorvastatin":"atorvastatin","Amlodipine":"amlodipine", "Ceftriaxone":"ceftriaxone","Diclofenac":"diclofenac sodium", "Esomeprazole":"esomeprazole","Cefixime":"cefixime", "Salbutamol":"albuterol","Ibuprofen":"ibuprofen", "Metronidazole":"metronidazole","Cefuroxime":"cefuroxime", } def _fda_fetch(drug, endpoint): term=FDA_NAME_MAP.get(drug,drug.lower()) base=f"https://api.fda.gov/drug/{endpoint}.json" for field in [f"openfda.generic_name:{term}",f"openfda.brand_name:{term}"]: try: r=requests.get(base,params={"search":field,"limit":"3"},timeout=9) if r.status_code==200: res=r.json().get("results",[]) if res: return res, term except: pass return [], term def tab5_fda(drug, ep_label): if not drug.strip(): return '
🏥 Enter a drug name to fetch live FDA data
' ep_map={"Drug Labels":"label","Adverse Events (FAERS)":"event","NDC Directory":"ndc"} ep=ep_map.get(ep_label,"label") results,term=_fda_fetch(drug,ep) if not results: return f"""
🔍
No FDA data found for "{drug}"
This drug may not be in the US FDA database (common for Bangladesh-registry drugs).
Try: Paracetamol · Azithromycin · Ciprofloxacin · Omeprazole · Metformin · Ibuprofen
""" html=f"""
🇺🇸 FDA {ep_label} {drug} → searched as {term} {len(results)} record(s)
""" if ep=="label": for i,res in enumerate(results[:3],1): o=res.get("openfda",{}) brand=", ".join(o.get("brand_name",["—"])[:2]) gen =", ".join(o.get("generic_name",["—"])[:2]) mfr =", ".join(o.get("manufacturer_name",["—"])[:1]) purp =str(res.get("purpose",["—"])[0])[:280] if res.get("purpose") else "—" ind =str(res.get("indications_and_usage",["—"])[0])[:380] if res.get("indications_and_usage") else "—" warn =str(res.get("warnings",["—"])[0])[:280] if res.get("warnings") else "—" html+=f"""
📄 Record {i}
Brand Name{brand}
Generic Name{gen}
Manufacturer{mfr}
Purpose{purp}
Indications{ind}
Warnings{warn}
""" elif ep=="event": for i,res in enumerate(results[:3],1): pt=res.get("patient",{}) rxn=", ".join(r.get("reactionmeddrapt","") for r in pt.get("reaction",[])[:6]) drg=", ".join(d.get("medicinalproduct","") for d in pt.get("drug",[])[:4]) sev="⚠️ Serious" if res.get("serious")=="1" else "ℹ️ Non-Serious" html+=f"""
Event {i} — {sev}
Reactions{rxn or '—'}
Drugs Involved{drg or '—'}
""" elif ep=="ndc": for i,res in enumerate(results[:3],1): html+=f"""
NDC {i}
NDC Code{res.get('product_ndc','—')}
Brand{res.get('brand_name','—')}
Generic{res.get('generic_name','—')}
Dosage Form{res.get('dosage_form','—')}
Route{res.get('route','—')}
Labeler{res.get('labeler_name','—')}
""" return html # ──────────────────────────────────────────────────────────────── # 7. TAB 6 — AI MEDICAL Q&A (HuggingFace Inference API) # ──────────────────────────────────────────────────────────────── SYS_PROMPT=( "You are PharmaBridge AI — a knowledgeable, friendly pharmaceutical assistant. " "You help healthcare professionals and students understand drug information, " "pharmacology, traditional medicine (Ayurvedic, Unani, Homeopathic, Herbal), " "drug interactions, and the Bangladesh drug registry. " "Be concise, accurate, and always note that answers are educational, " "not a substitute for professional medical advice." ) HF_MODELS=[ "mistralai/Mistral-7B-Instruct-v0.3", "HuggingFaceH4/zephyr-7b-beta", "google/flan-t5-xxl", ] def tab6_ai(question, history): if not question.strip(): return history, "" history=history or [] prompt=f"[INST] {SYS_PROMPT}\n\nQuestion: {question} [/INST]" headers={"Content-Type":"application/json"} answer="" for model_url in [f"https://api-inference.huggingface.co/models/{m}" for m in HF_MODELS]: payload={ "inputs": prompt, "parameters":{"max_new_tokens":500,"temperature":0.65, "top_p":0.9,"repetition_penalty":1.1, "return_full_text":False}, } # flan-t5 uses different format if "flan" in model_url: payload={"inputs":f"As a pharmacist, answer clearly: {question}", "parameters":{"max_new_tokens":350}} try: r=requests.post(model_url,headers=headers,json=payload,timeout=28) if r.status_code==200: d=r.json() txt=(d[0].get("generated_text","") if isinstance(d,list) else d.get("generated_text","")).strip() if len(txt)>30: answer=txt; break except: continue if not answer: answer=( "⚠️ The AI model is warming up (HuggingFace free tier cold-start). " "Please wait ~20 seconds and try again.\n\n" "**Meanwhile**, you can:\n" "- Use the **Smart Search** tab to look up this drug directly\n" "- Use the **FDA Live Data** tab for official drug information" ) history.append((question, answer)) return history, "" def tab6_clear(): return [], "" # ──────────────────────────────────────────────────────────────── # 8. TAB 7 — DRUG EXPLORER (browse & filter) # ──────────────────────────────────────────────────────────────── _ALL_DOS=["All"]+sorted(DF["dosage_form"].dropna().unique().tolist()) def _dos_choices(sys): if sys=="All": return gr.update(choices=_ALL_DOS, value="All") opts=["All"]+sorted(DF[DF["medical_system"]==sys]["dosage_form"].dropna().unique().tolist()) return gr.update(choices=opts, value="All") def tab7_explore(system, dosage, search, page): sub=DF.copy() if system!="All": sub=sub[sub["medical_system"]==system] if dosage !="All": sub=sub[sub["dosage_form"]==dosage] if search.strip(): t=search.lower().strip() sub=sub[sub["brand_name"].str.lower().str.contains(t,na=False)| sub["gns"].str.lower().str.contains(t,na=False)| sub["generic_name"].str.lower().str.contains(t,na=False)| sub["manufacturer"].str.lower().str.contains(t,na=False)] total=len(sub); PG=20 page=max(1,int(page)); maxp=max(1,(total+PG-1)//PG); page=min(page,maxp) sl=sub.iloc[(page-1)*PG:page*PG] if sl.empty: return '
No records match your filters.
', "0 records" rows="" for _,row in sl.iterrows(): sys_n=str(row.get("medical_system","")) c=SC.get(sys_n,"#6B7280"); em=EMOJI.get(sys_n,"💊") bn=str(row.get("brand_name","—")) gn=str(row.get("gns","")) or str(row.get("generic_name","—")) dos_v=str(row.get("dosage_form","—")) mfr=str(row.get("manufacturer","—"))[:36] rows+=f""" {bn} {gn[:46]} {dos_v} {em} {sys_n} {mfr} """ tbl=f"""{rows}
Brand NameCompound / Identity Dosage FormSystemManufacturer
""" return tbl, f"Page **{page}** / {maxp} · **{total:,}** records" # ──────────────────────────────────────────────────────────────── # 9. CSS # ──────────────────────────────────────────────────────────────── CSS=""" @import url('https://fonts.googleapis.com/css2?family=Inter:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,400&display=swap'); *{box-sizing:border-box} body,.gradio-container{font-family:'Inter',sans-serif!important;background:#f0f4f8!important} /* ── HEADER ─────────────────────────────────────────────── */ .app-hdr{ background:linear-gradient(135deg,#0f172a 0%,#1e3a8a 45%,#0369a1 100%); border-radius:18px;padding:28px 32px;margin-bottom:4px;color:#fff; box-shadow:0 10px 40px rgba(30,58,138,.35); } .app-title{font-size:2.1rem;font-weight:800;letter-spacing:-1px;margin:0} .app-sub{font-size:1rem;opacity:.82;margin:6px 0 0} .hbadges{display:flex;gap:8px;margin-top:14px;flex-wrap:wrap} .hbadge{background:rgba(255,255,255,.16);border:1px solid rgba(255,255,255,.28); border-radius:20px;padding:4px 13px;font-size:.78rem;font-weight:500} .stats-row{display:flex;gap:10px;margin-top:16px;flex-wrap:wrap} .stat{background:rgba(255,255,255,.12);border-radius:12px;padding:8px 16px;text-align:center;min-width:88px} .sn{font-size:1.45rem;font-weight:800;display:block} .sl{font-size:.7rem;opacity:.78;text-transform:uppercase;letter-spacing:.5px} /* ── TABS ────────────────────────────────────────────────── */ .tab-nav button{font-weight:500!important;font-size:.88rem!important;border-radius:8px 8px 0 0!important} .tab-nav button.selected{color:#1d4ed8!important;border-bottom:3px solid #1d4ed8!important;font-weight:700!important} /* ── INPUTS ──────────────────────────────────────────────── */ .gr-input,textarea,.gr-dropdown select{ border-radius:10px!important;border:1.5px solid #e2e8f0!important; font-family:'Inter',sans-serif!important;transition:border-color .2s!important; } .gr-input:focus,textarea:focus{border-color:#3b82f6!important;box-shadow:0 0 0 3px rgba(59,130,246,.1)!important} .gr-button-primary{ background:linear-gradient(135deg,#1d4ed8,#0891b2)!important; border:none!important;border-radius:10px!important;font-weight:700!important; letter-spacing:.2px!important;box-shadow:0 4px 14px rgba(29,78,216,.3)!important; transition:transform .15s,box-shadow .15s!important; } .gr-button-primary:hover{transform:translateY(-1px)!important;box-shadow:0 6px 22px rgba(29,78,216,.4)!important} /* ── PLACEHOLDERS ────────────────────────────────────────── */ .ph{text-align:center;color:#94a3b8;padding:60px 20px;font-size:.98rem; background:#f8fafc;border-radius:14px;border:2px dashed #e2e8f0} /* ── RESULT CARDS ────────────────────────────────────────── */ .rh{font-size:.93rem;color:#475569;padding:10px 0 14px; border-bottom:1px solid #e2e8f0;margin-bottom:14px} .grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(270px,1fr));gap:12px} .card{background:#fff;border-radius:13px;padding:14px 16px; box-shadow:0 1px 4px rgba(0,0,0,.06);transition:transform .15s,box-shadow .15s} .card:hover{transform:translateY(-2px);box-shadow:0 5px 18px rgba(0,0,0,.10)} .ch{display:flex;justify-content:space-between;align-items:center;margin-bottom:8px} .sbadge{font-size:.71rem;font-weight:600;padding:3px 9px;border-radius:20px;white-space:nowrap} .spct{font-size:.74rem;font-weight:700;padding:3px 9px;border-radius:20px} .bn{font-size:1.05rem;font-weight:700;color:#1e293b;margin-bottom:4px} .gn{font-size:.81rem;color:#64748b;margin-bottom:9px;min-height:1.2em} .meta{font-size:.77rem;color:#94a3b8;margin-bottom:10px;line-height:1.8} .bar{height:4px;background:#f1f5f9;border-radius:2px;overflow:hidden} .fill{height:100%;border-radius:2px;transition:width .4s} /* ── CROSS COMPARE ───────────────────────────────────────── */ .cph{font-size:.96rem;color:#475569;padding:10px 0 16px;font-weight:500} .cgrid{display:grid;grid-template-columns:repeat(5,1fr);gap:11px} @media(max-width:900px){.cgrid{grid-template-columns:repeat(2,1fr)}} .scol{background:#fff;border-radius:13px;padding:14px;box-shadow:0 1px 4px rgba(0,0,0,.06)} .stitle{font-weight:700;font-size:.93rem;margin-bottom:12px} .nr{color:#94a3b8;font-size:.84rem;padding:10px 0} .cc{padding:10px;margin-bottom:8px;border-radius:9px;background:#f8fafc} .cbn{font-weight:700;font-size:.88rem;color:#1e293b} .cgn{font-size:.77rem;color:#64748b;margin:3px 0} .cm{font-size:.74rem;color:#94a3b8} .sbar{height:3px;background:#f1f5f9;border-radius:2px;overflow:hidden;margin-top:6px} .sfill{height:100%;border-radius:2px} /* ── FINGERPRINT ─────────────────────────────────────────── */ .fp-banner{border-radius:0} .fp-row{display:flex;flex-direction:column;background:#f8fafc;border-radius:10px;padding:10px 14px} .fp-k{font-size:.74rem;color:#64748b;font-weight:600;text-transform:uppercase;letter-spacing:.4px} .fp-v{font-size:.95rem;color:#1e293b;font-weight:500;margin-top:2px} /* ── FDA ─────────────────────────────────────────────────── */ .fda-hdr{background:linear-gradient(135deg,#eff6ff,#e0f2fe);border-radius:11px; padding:14px 18px;margin-bottom:14px;display:flex;align-items:center; gap:10px;flex-wrap:wrap;font-size:.88rem;color:#1e293b} .fda-badge{background:#1d4ed8;color:#fff;padding:4px 11px;border-radius:20px; font-size:.77rem;font-weight:600} .fda-cnt{margin-left:auto;background:#dcfce7;color:#166534;padding:3px 10px; border-radius:20px;font-size:.77rem;font-weight:600} .fda-miss{text-align:center;padding:40px;color:#64748b;background:#f8fafc; border-radius:14px;border:2px dashed #e2e8f0} .fda-card{background:#fff;border-radius:13px;padding:18px;margin-bottom:12px; box-shadow:0 1px 4px rgba(0,0,0,.06)} .fda-num{font-weight:700;font-size:.88rem;color:#1d4ed8;margin-bottom:10px} .fda-tbl{width:100%;border-collapse:collapse;font-size:.84rem} .fda-tbl tr{border-bottom:1px solid #f1f5f9} .fda-tbl tr:last-child{border-bottom:none} .fk{color:#64748b;font-weight:600;padding:6px 14px 6px 0;white-space:nowrap; vertical-align:top;width:130px} .fda-tbl td:last-child{color:#1e293b;padding:6px 0;line-height:1.55} /* ── CHATBOT ─────────────────────────────────────────────── */ .chatbot{border-radius:13px!important;border:1.5px solid #e2e8f0!important} /* ── EXPLORER TABLE ──────────────────────────────────────── */ .xtbl{width:100%;border-collapse:collapse;font-size:.83rem} .xtbl thead{background:linear-gradient(135deg,#0f172a,#1d4ed8);color:#fff} .xtbl th{padding:11px 14px;text-align:left;font-weight:600;letter-spacing:.3px} .xtbl tbody tr{border-bottom:1px solid #f1f5f9;transition:background .15s} .xtbl tbody tr:hover{background:#f8fafc} .xtbl td{padding:9px 14px;color:#1e293b;vertical-align:top} .sb2{font-size:.71rem;font-weight:600;padding:2px 8px;border-radius:20px;white-space:nowrap} code{background:#f1f5f9;padding:2px 7px;border-radius:5px;font-size:.84em;color:#0891b2} """ # ──────────────────────────────────────────────────────────────── # 10. BUILD GRADIO APP # ──────────────────────────────────────────────────────────────── HEADER = f"""
💊 PharmaBridge
Cross-Medical-System Drug Intelligence Engine · Bangladesh National Drug Registry
🔬 TF-IDF + Cosine Similarity 🧠 SVD + K-Means Clustering 🌐 OpenFDA Live API 🤖 Mistral-7B AI Assistant 📊 Interactive Dashboards
53,584Total Drugs
5Med. Systems
725Manufacturers
12,311TF-IDF Features
95.5%Precision@10
0.2159Silhouette
""" with gr.Blocks(css=CSS, title="PharmaBridge", theme=gr.themes.Base( primary_hue=gr.themes.colors.blue, font=gr.themes.GoogleFont("Inter"), )) as app: gr.HTML(HEADER) with gr.Tabs(elem_classes="tab-nav"): # ── TAB 1 ───────────────────────────────────────────────── with gr.Tab("🔍 Smart Search"): with gr.Row(equal_height=True): with gr.Column(scale=4): t1q = gr.Textbox(label="Search Query", placeholder="Try: Azithromycin, Ashwagandha, nux vomica, sharbat amrood, paracetamol fever…", lines=1) with gr.Column(scale=1): t1sys = gr.Dropdown(choices=SYSTEMS, value="All Systems", label="System") with gr.Column(scale=1): t1btn = gr.Button("🔍 Search", variant="primary", scale=1) with gr.Row(): t1n = gr.Slider(5,50,value=12,step=1,label="Max Results") t1s = gr.Slider(0.0,0.5,value=0.04,step=0.01,label="Min Similarity") t1stat = gr.Markdown("") t1cards = gr.HTML('
🔍 Enter a drug name, compound, or symptom above
') t1chart = gr.Plot(label="Score Distribution") t1btn.click(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat]) t1q.submit(tab1,[t1q,t1sys,t1n,t1s],[t1cards,t1chart,t1stat]) gr.Examples([ ["Azithromycin 500mg","Allopathic"], ["Ashwagandha capsule","Ayurvedic"], ["Nux Vomica liquid","Homeopathic"], ["Sharbat Amrood","Unani"], ["Moringa leaf powder","Herbal"], ["antibiotic tablet","All Systems"], ["digestive capsule","All Systems"], ], inputs=[t1q,t1sys], label="Quick Examples") # ── TAB 2 ───────────────────────────────────────────────── with gr.Tab("⚖️ Cross-System Compare"): with gr.Row(equal_height=True): with gr.Column(scale=5): t2q = gr.Textbox(label="Query", placeholder="e.g. pain relief tablet, digestive liver, sleep anxiety, blood pressure…", lines=1) with gr.Column(scale=1): t2n = gr.Slider(1,5,value=3,step=1,label="Results / System") with gr.Column(scale=1): t2btn = gr.Button("⚖️ Compare", variant="primary") t2cards = gr.HTML('
Compare the same therapeutic need across all 5 medical traditions simultaneously
') t2radar = gr.Plot(label="Cross-System Similarity Radar") t2btn.click(tab2,[t2q,t2n],[t2cards,t2radar]) t2q.submit(tab2,[t2q,t2n],[t2cards,t2radar]) gr.Examples([ ["digestive liver tablet"],["pain anti-inflammatory"], ["antibiotic infection"],["blood pressure"], ["cough respiratory"],["sleep anxiety stress"], ], inputs=[t2q]) # ── TAB 3 ───────────────────────────────────────────────── with gr.Tab("📊 Dataset Analytics"): with gr.Tabs(): with gr.Tab("🌐 Overview Dashboard"): ov_btn = gr.Button("📊 Render Dashboard", variant="primary") ov_fig = gr.Plot() ov_btn.click(_overview_fig,[],[ov_fig]) app.load(_overview_fig,[],[ov_fig]) with gr.Tab("🔎 System Deep Dive"): with gr.Row(): dd_sys = gr.Dropdown( choices=["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"], value="Allopathic", label="Select System") dd_btn = gr.Button("Analyze", variant="primary") dd_fig = gr.Plot() dd_btn.click(_deep_fig,[dd_sys],[dd_fig]) dd_sys.change(_deep_fig,[dd_sys],[dd_fig]) app.load(lambda:_deep_fig("Allopathic"),[],[dd_fig]) with gr.Tab("🗺️ Treemap Explorer"): tm_btn = gr.Button("🗺️ Render Treemap", variant="primary") tm_fig = gr.Plot() tm_btn.click(_treemap_fig,[],[tm_fig]) app.load(_treemap_fig,[],[tm_fig]) # ── TAB 4 ───────────────────────────────────────────────── with gr.Tab("🧬 Drug Fingerprint"): gr.Markdown(""" ### Single Drug Deep-Dive Search for any drug to see its **full profile card** plus a bar chart of its top TF-IDF feature weights — the exact tokens driving its similarity scores. """) with gr.Row(equal_height=True): fp_q = gr.Textbox(label="Brand Name or Compound", placeholder="e.g. Azithromycin, Ashwagandha, Nux Vomica, Sharbat Amrood…", lines=1) fp_btn = gr.Button("🧬 Profile", variant="primary") fp_card = gr.HTML('
🧬 Enter a drug or compound name to generate its fingerprint
') fp_fig = gr.Plot(label="TF-IDF Feature Fingerprint") fp_btn.click(tab4_fingerprint,[fp_q],[fp_card,fp_fig]) fp_q.submit(tab4_fingerprint,[fp_q],[fp_card,fp_fig]) gr.Examples([ ["Azithromycin"],["Ashwagandha"],["Nux Vomica"], ["Sharbat Amrood"],["Moringa"],["Paracetamol"], ], inputs=[fp_q]) # ── TAB 5 ───────────────────────────────────────────────── with gr.Tab("🏥 FDA Live Data"): gr.Markdown("> **Live OpenFDA API** — US drug labels, adverse events (FAERS), and NDC records. " "~40% of Bangladesh registry drugs appear here. Bangladeshi names auto-mapped to FDA terms.") with gr.Row(equal_height=True): fda_drug = gr.Textbox(label="Drug Name", placeholder="Paracetamol, Azithromycin, Ciprofloxacin, Omeprazole, Metformin…", lines=1) fda_ep = gr.Radio(["Drug Labels","Adverse Events (FAERS)","NDC Directory"], value="Drug Labels", label="FDA Database") fda_btn = gr.Button("🔎 Fetch", variant="primary") fda_out = gr.HTML('
🏥 Enter a drug name and click Fetch
') fda_btn.click(tab5_fda,[fda_drug,fda_ep],[fda_out]) fda_drug.submit(tab5_fda,[fda_drug,fda_ep],[fda_out]) gr.Examples([["Paracetamol"],["Azithromycin"],["Ciprofloxacin"], ["Omeprazole"],["Metformin"],["Ibuprofen"]], inputs=[fda_drug]) # ── TAB 6 ───────────────────────────────────────────────── with gr.Tab("🤖 AI Medical Q&A"): gr.Markdown(""" ### PharmaBridge AI — Pharmaceutical Q&A Powered by **Mistral-7B-Instruct** via HuggingFace Inference API (free, no key needed). Ask anything about drugs, pharmacology, traditional medicine, or the Bangladesh registry. > ⚠️ Educational only — not a substitute for professional medical advice. Model may take ~20s to cold-start. """) ai_bot = gr.Chatbot(label="PharmaBridge AI", height=450, elem_classes="chatbot") with gr.Row(): ai_inp = gr.Textbox(label="Your Question", lines=2, scale=5, placeholder="e.g. What is Ashwagandha used for? / Side effects of Azithromycin? / What is Unani medicine?") with gr.Column(scale=1): ai_send = gr.Button("Send 💬", variant="primary") ai_clear = gr.Button("Clear 🗑️") ai_send.click(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp]) ai_inp.submit(tab6_ai,[ai_inp,ai_bot],[ai_bot,ai_inp]) ai_clear.click(tab6_clear,[],[ai_bot,ai_inp]) gr.Examples([ ["What is Ashwagandha used for in Ayurvedic medicine?"], ["Explain Unani medicine and its traditional formulations"], ["What are the common side effects of Azithromycin?"], ["How does TF-IDF cosine similarity work for drug retrieval?"], ["What is Homeopathic potency and how are remedies prepared?"], ["Compare Allopathic and Herbal medicine approaches"], ], inputs=[ai_inp]) # ── TAB 7 ───────────────────────────────────────────────── with gr.Tab("📋 Drug Explorer"): with gr.Row(): ex_sys = gr.Dropdown(["All","Allopathic","Ayurvedic","Unani","Homeopathic","Herbal"], value="All", label="System") ex_dos = gr.Dropdown(choices=_ALL_DOS, value="All", label="Dosage Form") ex_srch = gr.Textbox(label="Search", placeholder="Brand, compound, manufacturer…") ex_pg = gr.Number(value=1, label="Page", minimum=1, precision=0) ex_btn = gr.Button("🔍 Browse Database", variant="primary") ex_info = gr.Markdown("") ex_tbl = gr.HTML('
Click Browse to explore all 53,584 drug records
') ex_sys.change(_dos_choices,[ex_sys],[ex_dos]) ex_btn.click(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info]) ex_srch.submit(tab7_explore,[ex_sys,ex_dos,ex_srch,ex_pg],[ex_tbl,ex_info]) # ── TAB 8 ───────────────────────────────────────────────── with gr.Tab("ℹ️ About"): gr.Markdown(f""" ## PharmaBridge — Cross-Medical-System Drug Intelligence **PharmaBridge** is a master's thesis project — the first NLP-based drug recommendation system spanning all 5 major South Asian pharmaceutical traditions simultaneously using the Bangladesh National Drug Registry (53,584 records). --- ### Dataset Composition | Medical System | Records | Share | |---|---|---| | Allopathic | 36,254 | 67.7% | | Unani | 8,460 | 15.8% | | Ayurvedic | 5,262 | 9.8% | | Homeopathic | 2,580 | 4.8% | | Herbal | 1,028 | 1.9% | | **Total** | **53,584** | **100%** | ### Technical Architecture | Component | Configuration | |---|---| | Vectorization | TF-IDF, bigrams (1,2), max_features=15,000, sublinear_tf=True | | Retrieval | Cosine Similarity on sparse matrix (53,584 × 12,311) | | Dim. Reduction | TruncatedSVD, 50 components, 26.2% variance | | Clustering | K-Means K=10 (elbow-selected), Silhouette=0.2159 | ### Evaluation Results | Metric | Value | |---|---| | Precision@5 | 97.00% | | Precision@10 | 95.50% | | Precision@20 | 90.55% | | Silhouette Score | 0.2159 | ### App Features | Tab | Feature | |---|---| | 🔍 Smart Search | TF-IDF cosine retrieval with rich card UI + bar chart | | ⚖️ Cross-System Compare | Side-by-side 5-system view + radar chart | | 📊 Dataset Analytics | Overview dashboard, deep-dive, treemap | | 🧬 Drug Fingerprint | Single drug profile + TF-IDF feature bar chart | | 🏥 FDA Live Data | OpenFDA labels / adverse events / NDC lookup | | 🤖 AI Medical Q&A | Mistral-7B via HuggingFace Inference API | | 📋 Drug Explorer | Paginated browse & filter across all 53,584 records | --- > **Disclaimer:** For research and educational purposes only. > Not intended for clinical decision-making. > Always consult a qualified healthcare professional for medical advice. """) if __name__ == "__main__": app.launch()