"""
Cross-Medical-System Drug Recommender v4.0
4 Tabs: Search | Cross-Compare | FDA Live | Dashboard
Symptoms + Medicine dropdown, Plotly dashboard, OpenFDA API
"""
import gradio as gr
import pandas as pd
import numpy as np
import joblib, json, os, re, warnings, requests
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
warnings.filterwarnings("ignore")
# ── Constants ────────────────────────────────────────────────────
OPENFDA_BASE = "https://api.fda.gov/drug"
MODEL_DIR = os.path.join(os.path.dirname(__file__), "models")
SYSTEM_COLORS = {
"Allopathic": "#3b82f6",
"Unani": "#f97316",
"Ayurvedic": "#22c55e",
"Homeopathic": "#a855f7",
"Herbal": "#ef4444",
}
# ── Dropdown options: Symptoms AND Medicines ─────────────────────
SEARCH_OPTIONS = {
# ══ SYMPTOMS / CONDITIONS ════════════════════════════════════
"── SYMPTOMS & CONDITIONS ──": None, # separator (disabled)
"🤒 Fever": "fever paracetamol tablet",
"🩹 Body Pain / General Pain": "pain relief tablet",
"🤕 Headache": "headache pain tablet",
"😤 Cold & Runny Nose": "cold antihistamine tablet",
"😮💨 Cough": "cough syrup liquid",
"🫁 Asthma / Breathing Difficulty": "asthma bronchodilator tablet",
"🤮 Nausea & Vomiting": "nausea vomiting tablet",
"🫃 Acidity / Heartburn": "acidity antacid capsule",
"💩 Diarrhea": "diarrhea tablet",
"🤢 Stomach Pain": "stomach pain tablet",
"😴 Anxiety / Sleeplessness": "anxiety sleep tablet",
"🫀 High Blood Pressure": "hypertension blood pressure tablet",
"🩸 High Blood Sugar (Diabetes)": "diabetes blood sugar tablet",
"🦴 Joint Pain / Arthritis": "joint pain inflammation tablet",
"🦷 Tooth / Ear Infection": "antibiotic infection capsule",
"👁️ Eye Infection": "eye drops infection",
"🩺 Urinary Tract Infection (UTI)": "urinary tract infection antibiotic tablet",
"🌿 Worm / Parasite Infection": "deworming tablet",
"🫧 Skin Fungal Infection": "antifungal cream tablet",
"😵 Dizziness / Vertigo": "vertigo dizziness tablet",
# ══ MEDICINES BY NAME ════════════════════════════════════════
"── MEDICINES BY NAME ──": None, # separator (disabled)
# Antibiotics
"🦠 Azithromycin — Antibiotic": "Azithromycin 500mg tablet",
"🦠 Amoxicillin — Antibiotic": "Amoxicillin 500mg capsule",
"🦠 Ciprofloxacin — Antibiotic": "Ciprofloxacin 500mg tablet",
"🦠 Metronidazole — Antibiotic": "Metronidazole 400mg tablet",
"🦠 Ceftriaxone — Injection": "Ceftriaxone 1gm injection",
"🦠 Levofloxacin — Antibiotic": "Levofloxacin 500mg tablet",
# Pain & Fever
"🤒 Paracetamol — Fever/Pain": "Paracetamol 500mg tablet",
"🤒 Diclofenac — Anti-inflammatory": "Diclofenac Sodium 50mg tablet",
"🤒 Naproxen — Pain (Joints)": "Naproxen 250mg tablet",
"🤒 Ibuprofen — Pain/Fever": "Ibuprofen 400mg tablet",
# Heart & BP
"💓 Amlodipine — Blood Pressure": "Amlodipine 5mg tablet",
"💓 Atorvastatin — Cholesterol": "Atorvastatin 20mg tablet",
"💓 Losartan — Hypertension": "Losartan Potassium 50mg tablet",
"💓 Metoprolol — Beta Blocker": "Metoprolol 50mg tablet",
# Diabetes
"🩺 Metformin — Diabetes": "Metformin Hydrochloride 500mg tablet",
"🩺 Glibenclamide — Blood Sugar": "Glibenclamide 5mg tablet",
# Respiratory & Allergy
"🫁 Salbutamol — Asthma": "Salbutamol 2mg tablet syrup",
"🫁 Montelukast — Allergy/Asthma": "Montelukast 10mg tablet",
"🫁 Fexofenadine — Antihistamine": "Fexofenadine 120mg tablet",
"🫁 Cetirizine — Antihistamine": "Cetirizine 10mg tablet",
# Neuro
"🧠 Pregabalin — Nerve Pain": "Pregabalin 75mg capsule",
"🧠 Clonazepam — Anxiety/Seizure": "Clonazepam 0.5mg tablet",
# GI / Stomach
"🫃 Omeprazole — Acid Reflux": "Omeprazole 20mg capsule",
"🫃 Esomeprazole — GERD": "Esomeprazole 40mg capsule",
"🫃 Domperidone — Nausea": "Domperidone 10mg tablet",
"🫃 Ondansetron — Nausea": "Ondansetron 4mg tablet",
# Antifungal / Deworming
"🌿 Albendazole — Deworming": "Albendazole 400mg tablet",
"🌿 Fluconazole — Antifungal": "Fluconazole 150mg capsule",
# Vitamins
"💊 Vitamin D3 — Bone/Immunity": "Cholecalciferol Vitamin D3 tablet",
"💊 Zinc + Multivitamin": "Zinc Nicotinamide vitamin tablet",
}
# Remove separator entries to get valid choices
ALL_LABELS = list(SEARCH_OPTIONS.keys())
# ── Load PKL Models ───────────────────────────────────────────────
def load_models():
vec = joblib.load(os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl"))
mat = joblib.load(os.path.join(MODEL_DIR, "tfidf_matrix.pkl"))
db = pd.read_csv(os.path.join(MODEL_DIR, "drug_database.csv"))
with open(os.path.join(MODEL_DIR, "model_metadata.json")) as f:
meta = json.load(f)
return vec, mat, db, meta
try:
from sklearn.metrics.pairwise import cosine_similarity
vectorizer, tfidf_matrix, drug_db, metadata = load_models()
MEDICAL_SYSTEMS = ["All Systems"] + sorted(drug_db["medical_system"].unique().tolist())
MODEL_LOADED = True
print(f"✅ Loaded {len(drug_db):,} drugs")
except Exception as e:
print(f"Model load failed: {e}")
MODEL_LOADED = False
drug_db = pd.DataFrame()
metadata = {}
MEDICAL_SYSTEMS = ["All Systems"]
# ── Helpers ───────────────────────────────────────────────────────
def _clean(t):
if pd.isna(t): return ""
t = re.sub(r"[^a-z0-9\s\+\-\.]", " ", str(t).lower())
return re.sub(r"\s+", " ", t).strip()
def _get_query(label):
return SEARCH_OPTIONS.get(label) or label
def _extract_generic(label):
"""Extract the best single search term from a dropdown label for OpenFDA queries.
Examples
--------
'🦠 Azithromycin — Antibiotic' → 'Azithromycin'
'💊 Vitamin D3 — Bone/Immunity' → 'Cholecalciferol' (mapped)
'🤒 Paracetamol — Fever/Pain' → 'Paracetamol'
'🩺 Metformin — Diabetes' → 'Metformin'
'🤒 Fever' (symptom) → uses query word
"""
# Hardcoded map for labels whose first real word is ambiguous for FDA search
FDA_MAP = {
"Vitamin D3": "Cholecalciferol",
"Zinc": "Zinc",
"Salbutamol": "Albuterol", # US FDA name
"Paracetamol": "Acetaminophen", # US FDA name
"Fexofenadine": "Fexofenadine",
"Cetirizine": "Cetirizine",
"Amlodipine": "Amlodipine",
"Atorvastatin": "Atorvastatin",
"Metformin": "Metformin",
"Omeprazole": "Omeprazole",
"Esomeprazole": "Esomeprazole",
"Domperidone": "Domperidone",
"Ondansetron": "Ondansetron",
"Albendazole": "Albendazole",
"Fluconazole": "Fluconazole",
"Pregabalin": "Pregabalin",
"Clonazepam": "Clonazepam",
"Montelukast": "Montelukast",
"Losartan": "Losartan",
"Metoprolol": "Metoprolol",
}
# Strip everything after —
raw = label.split("—")[0]
cleaned = re.sub(r"[^\w\s]", "", raw).strip()
words = [w for w in cleaned.split() if len(w) > 2] # skip short emoji fragments
if not words:
return cleaned
# Try each word against the map
for w in words:
if w in FDA_MAP:
return FDA_MAP[w]
# Heuristic: return the longest word (most likely to be a pharmaceutical term)
return max(words, key=len)
def _openfda(endpoint, params, timeout=10):
try:
r = requests.get(f"{OPENFDA_BASE}/{endpoint}.json", params=params,
timeout=timeout, headers={"User-Agent": "DrugRecommender/4.0"})
return r.json() if r.status_code == 200 else {"error": f"HTTP {r.status_code}", "message": r.text[:200]}
except requests.exceptions.Timeout:
return {"error": "timeout", "message": "OpenFDA timed out — try again."}
except Exception as e:
return {"error": "connection", "message": str(e)}
# ── Core Recommender ──────────────────────────────────────────────
def recommend(label, system_filter, top_n, min_score):
if not MODEL_LOADED:
return None, "❌ Models not loaded."
query = _get_query(label)
if not query:
return None, "⚠️ Please select a valid option (not a section header)."
q_vec = vectorizer.transform([_clean(query)])
sims = cosine_similarity(q_vec, tfidf_matrix).flatten()
if system_filter != "All Systems":
mask = drug_db["medical_system"] == system_filter
work = sims.copy(); work[~mask] = 0
else:
work = sims
idx = [i for i in work.argsort()[-(top_n*4):][::-1] if sims[i] >= min_score][:top_n]
if not idx:
return None, f"⚠️ No results above score {min_score}. Lower the threshold."
out = drug_db.iloc[idx][["brand_name","generic_name","dosage_form","strength","medical_system","manufacturer"]].copy()
out["score"] = [round(float(sims[i]),4) for i in idx]
out = out.sort_values("score", ascending=False).reset_index(drop=True)
out.index = range(1, len(out)+1); out.index.name = "Rank"
out.columns = ["Brand Name","Generic Name","Dosage Form","Strength","Medical System","Manufacturer","Score"]
sys_str = " · ".join(f"**{k}** {v}" for k,v in out["Medical System"].value_counts().items())
label_short = re.sub(r"[^\w\s\-/]","",label).strip()[:40]
summary = f"### ✅ {len(out)} results for **{label_short}**\n\n{sys_str}\n\n*Query: `{query}`*"
return out, summary
def cross_compare(label, top_per):
if not MODEL_LOADED: return None, "❌ Models not loaded."
query = _get_query(label)
if not query: return None, "⚠️ Select a valid option."
q_vec = vectorizer.transform([_clean(query)])
sims = cosine_similarity(q_vec, tfidf_matrix).flatten()
rows = []
for sys in sorted(drug_db["medical_system"].unique()):
mask = drug_db["medical_system"] == sys
s = sims.copy(); s[~mask] = 0
for i in [x for x in s.argsort()[-top_per:][::-1] if sims[x] > 0.01]:
r = drug_db.iloc[i]
rows.append({"System": r["medical_system"], "Brand": r["brand_name"],
"Generic": r["generic_name"], "Form": r["dosage_form"],
"Strength": r["strength"], "Score": round(float(sims[i]),4)})
if not rows: return None, "No results found."
df = pd.DataFrame(rows).sort_values(["System","Score"],ascending=[True,False]).reset_index(drop=True)
df.index = range(1, len(df)+1); df.index.name = "Rank"
label_short = re.sub(r"[^\w\s\-/]","",label).strip()[:40]
return df, f"### 🌐 **{label_short}** — {len(df)} drugs across {df['System'].nunique()} systems"
# ── OpenFDA ───────────────────────────────────────────────────────
def fda_label(label):
g = _extract_generic(label)
d = _openfda("label", {"search": f"openfda.generic_name:{g}", "limit": 1})
if "error" in d:
return f"### ⚠️ {d['message']}\n\n*`{g}` may not be in US FDA records.*"
res = d.get("results", [])
if not res: return f"ℹ️ No FDA label for **{g}**."
r = res[0]; ofd = r.get("openfda", {})
lines = [f"## 💊 {g.title()} — FDA Label", "_U.S. Food & Drug Administration · OpenFDA_\n"]
for k,t in [("brand_name","Brand Names"),("manufacturer_name","Manufacturer"),("route","Route")]:
v = ofd.get(k,[])
if v: lines.append(f"**{t}:** {', '.join(v[:5])}")
lines.append("")
for field, heading, lim in [
("indications_and_usage","📋 Indications & Usage",700),
("warnings","⚠️ Warnings",500),
("dosage_and_administration","💉 Dosage",500),
("adverse_reactions","🔴 Adverse Reactions",400),
("drug_interactions","🔗 Drug Interactions",400),
]:
v = r.get(field,[])
if v: lines += [f"### {heading}", v[0][:lim]+"…\n"]
lines.append("---\n*[OpenFDA](https://open.fda.gov) · Research only · Not clinical advice*")
return "\n".join(lines)
def fda_adverse(label):
g = _extract_generic(label)
d = _openfda("event",{"search":f"patient.drug.medicinalproduct:{g}","count":"patient.reaction.reactionmeddrapt.exact","limit":15})
if "error" in d: return None, f"### ⚠️ {d['message']}"
res = d.get("results",[])
if not res: return None, f"ℹ️ No FAERS data for **{g}**."
df = pd.DataFrame(res, columns=["Adverse Reaction","Report Count"])
df = df.sort_values("Report Count",ascending=False).reset_index(drop=True)
df.index = range(1,len(df)+1); df.index.name = "Rank"
return df, f"### 📊 FAERS: **{g.title()}** · {df['Report Count'].sum():,} total reports"
def fda_ndc(label):
g = _extract_generic(label)
d = _openfda("ndc",{"search":f"generic_name:{g}","limit":10})
if "error" in d: return None, f"### ⚠️ {d['message']}"
res = d.get("results",[])
if not res: return None, f"ℹ️ No NDC data for **{g}**."
df = pd.DataFrame([{"Brand":r.get("brand_name","—"),"Generic":r.get("generic_name","—"),
"Form":r.get("dosage_form","—"),"Route":", ".join(r.get("route",[])),"Manufacturer":r.get("labeler_name","—"),
"NDC":r.get("product_ndc","—")} for r in res])
df.index = range(1,len(df)+1); df.index.name = "#"
return df, f"### 🏷️ NDC Registry: **{g.title()}** · {len(df)} products"
# ── Dashboard Charts ──────────────────────────────────────────────
if MODEL_LOADED and not drug_db.empty:
_sys = drug_db["medical_system"].value_counts()
_dos = drug_db["dosage_form"].value_counts().head(10)
_mfr = drug_db["manufacturer"].value_counts().head(15)
_cross = pd.crosstab(drug_db["medical_system"], drug_db["dosage_form"])
_cross = _cross[[c for c in _dos.index[:8] if c in _cross.columns]]
else:
_sys = pd.Series({"No data":1}); _dos = _sys.copy(); _mfr = _sys.copy(); _cross = pd.DataFrame()
def _sc(labels): return [SYSTEM_COLORS.get(l,"#64748b") for l in labels]
def make_dashboard():
# ── Row 1: KPI cards via annotations ──────────────────────────
fig_kpi = go.Figure()
kpis = [
("53,581", "Total Drugs", "#3b82f6"),
("725", "Manufacturers", "#22c55e"),
("5", "Medical Systems", "#a855f7"),
("1,702", "Unique Compounds", "#f97316"),
]
for i,(val,lbl,col) in enumerate(kpis):
x = 0.13 + i*0.25
r2,g2,b2 = int(col[1:3],16), int(col[3:5],16), int(col[5:7],16)
fill_rgba = f"rgba({r2},{g2},{b2},0.12)"
fig_kpi.add_shape(type="rect", x0=x-0.11, x1=x+0.11, y0=0.05, y1=0.95,
fillcolor=fill_rgba, line=dict(color=col, width=2), xref="paper", yref="paper")
fig_kpi.add_annotation(x=x, y=0.62, text=f"{val}", showarrow=False,
font=dict(size=28, color=col), xref="paper", yref="paper")
fig_kpi.add_annotation(x=x, y=0.28, text=lbl, showarrow=False,
font=dict(size=13, color="#475569"), xref="paper", yref="paper")
fig_kpi.update_layout(height=130, margin=dict(t=10,b=10,l=10,r=10),
paper_bgcolor="white", plot_bgcolor="white",
xaxis=dict(visible=False), yaxis=dict(visible=False))
# ── Donut ──────────────────────────────────────────────────────
fig_donut = go.Figure(go.Pie(
labels=_sys.index.tolist(), values=_sys.values.tolist(), hole=0.58,
marker=dict(colors=_sc(_sys.index), line=dict(color="#fff",width=2.5)),
textinfo="label+percent", textfont=dict(size=12),
hovertemplate="%{label}
%{value:,} drugs · %{percent}
Drugs", x=0.5, y=0.5, font=dict(size=13), showarrow=False)],
legend=dict(orientation="h", y=-0.05, x=0.5, xanchor="center", font=dict(size=11)),
height=340, margin=dict(t=50,b=30,l=10,r=10), paper_bgcolor="white",
)
# ── H-Bar dosage ───────────────────────────────────────────────
labs = _dos.index.tolist()[::-1]; vals = _dos.values.tolist()[::-1]
fig_bar = go.Figure(go.Bar(
y=labs, x=vals, orientation="h",
marker=dict(color=px.colors.sequential.Blues_r[:len(labs)]),
text=[f" {v:,}" for v in vals], textposition="outside",
hovertemplate="%{y}: %{x:,}
%{{x}}: %{{y:,}}
Products: %{customdata[0]:,}
%{{theta}}: %{{r:.0f}}%
53,581 drugs · Search by Symptom or Medicine · NLP-Powered · Master's Thesis