Pharma_Rec_clone

Sleeping

App Files Files Community

Sazzz02 commited on Mar 4

Commit

0b792da

verified ·

1 Parent(s): 777b714

Update app.py

Browse files

Files changed (1) hide show

app.py +900 -293

app.py CHANGED Viewed

@@ -1,28 +1,7 @@
-"""
-╔══════════════════════════════════════════════════════════════════╗
-║  Cross-Medical-System Drug Recommendation Engine                 ║
-║  Master's Thesis Project — Hugging Face Gradio App               ║
-║  Medical Systems: Allopathic | Ayurvedic | Unani |               ║
-║                   Homeopathic | Herbal                           ║
-╚══════════════════════════════════════════════════════════════════╝
-HOW TO DEPLOY ON HUGGING FACE SPACES:
-1. Create a new Space → SDK: Gradio
-2. Upload: app.py, requirements.txt, and the entire models/ folder
-3. The Space will auto-install requirements and launch
-FOLDER STRUCTURE on HF Space:
-  app.py
-  requirements.txt
-  models/
-    tfidf_vectorizer.pkl
-    tfidf_matrix.pkl
-    svd_reducer.pkl
-    kmeans_model.pkl
-    drug_database.csv
-    model_metadata.json
-"""
 import gradio as gr
 import pandas as pd
 import numpy as np
@@ -31,22 +10,147 @@ import json
 import os
 import re
 import warnings
 warnings.filterwarnings("ignore")
-# ─── Load Models ────────────────────────────────────────────────────────────
-MODEL_DIR = os.path.join(os.path.dirname(__file__), "models")
 def load_models():
-    print("Loading models from PKL files...")
-    vectorizer  = joblib.load(os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl"))
-    tfidf_matrix = joblib.load(os.path.join(MODEL_DIR, "tfidf_matrix.pkl"))
-    drug_db     = pd.read_csv(os.path.join(MODEL_DIR, "drug_database.csv"))
     with open(os.path.join(MODEL_DIR, "model_metadata.json")) as f:
-        metadata = json.load(f)
-    print(f"✅ Loaded {len(drug_db):,} drugs | {tfidf_matrix.shape[1]} features")
-    return vectorizer, tfidf_matrix, drug_db, metadata
 try:
@@ -54,338 +158,841 @@ try:
     vectorizer, tfidf_matrix, drug_db, metadata = load_models()
     MEDICAL_SYSTEMS = ["All Systems"] + sorted(drug_db["medical_system"].unique().tolist())
     MODEL_LOADED = True
-except Exception as e:
-    print(f"Model load error: {e}")
     MODEL_LOADED = False
     MEDICAL_SYSTEMS = ["All Systems"]
-# ─── Core Recommendation Function ───────────────────────────────────────────
-def recommend_drugs(query: str, medical_system: str, top_n: int, min_score: float):
-    """
-    Core recommendation engine using TF-IDF + Cosine Similarity.
-    Returns a formatted DataFrame of recommendations.
-    """
-    if not MODEL_LOADED:
-        return pd.DataFrame({"Error": ["Models not loaded. Check /models folder."]}), "❌ Models not loaded"
-    if not query or not query.strip():
-        return pd.DataFrame({"Info": ["Please enter a drug name or query."]}), "⚠️ Empty query"
-    # Clean query
-    query_clean = re.sub(r"[^a-z0-9\s\+\-\.]", " ", query.lower().strip())
-    query_clean = re.sub(r"\s+", " ", query_clean).strip()
-    # Vectorize
-    q_vec = vectorizer.transform([query_clean])
-    sims  = cosine_similarity(q_vec, tfidf_matrix).flatten()
-    # Apply system filter
-    if medical_system and medical_system != "All Systems":
-        mask = drug_db["medical_system"] == medical_system
         sims_work = sims.copy()
-        sims_work[~mask] = 0
     else:
         sims_work = sims
-    # Get top indices
-    top_idx = sims_work.argsort()[-(top_n * 3):][::-1]
-    top_idx = [i for i in top_idx if sims[i] >= min_score][:top_n]
-    if not top_idx:
-        return (
-            pd.DataFrame({"Result": [f"No results found above similarity threshold {min_score}."
-                                     f" Try lowering threshold or broader query."]}),
-            f"⚠️ No results for '{query}'"
         )
-    results = drug_db.iloc[top_idx][[
-        "brand_name", "generic_name", "dosage_form", "strength",
-        "medical_system", "manufacturer"
     ]].copy()
-    results["similarity_score"] = sims[top_idx].round(4)
-    results = results.sort_values("similarity_score", ascending=False).reset_index(drop=True)
-    results.index += 1
-    results.index.name = "Rank"
-    # Rename columns for display
-    results.columns = ["Brand Name", "Generic Name", "Dosage Form",
-                       "Strength", "Medical System", "Manufacturer", "Score"]
-    n_systems = results["Medical System"].nunique()
     summary = (
-        f"✅ Found **{len(results)}** drugs"
-        f"{' in ' + medical_system if medical_system != 'All Systems' else ' across ' + str(n_systems) + ' medical systems'}"
-        f" for query: **'{query}'**"
     )
-    return results, summary
-def cross_system_compare(query: str, top_per_system: int):
-    """
-    Return best N results from EACH medical system — the core thesis contribution.
-    """
-    if not MODEL_LOADED:
-        return pd.DataFrame({"Error": ["Models not loaded."]}), "❌ Models not loaded"
-    if not query or not query.strip():
-        return pd.DataFrame({"Info": ["Please enter a query."]}), "⚠️ Empty query"
-    systems = [s for s in drug_db["medical_system"].unique()]
-    all_results = []
-    query_clean = re.sub(r"[^a-z0-9\s\+\-\.]", " ", query.lower().strip())
-    q_vec = vectorizer.transform([query_clean])
-    sims  = cosine_similarity(q_vec, tfidf_matrix).flatten()
-    for system in systems:
-        mask = drug_db["medical_system"] == system
-        sims_sys = sims.copy()
-        sims_sys[~mask] = 0
-        top_idx = sims_sys.argsort()[-top_per_system:][::-1]
-        top_idx = [i for i in top_idx if sims[i] > 0.01][:top_per_system]
-        if top_idx:
-            sub = drug_db.iloc[top_idx][["brand_name", "generic_name",
-                                          "dosage_form", "strength",
-                                          "medical_system", "manufacturer"]].copy()
-            sub["similarity_score"] = sims[top_idx].round(4)
-            all_results.append(sub)
-    if not all_results:
-        return pd.DataFrame({"Result": ["No cross-system results found."]}), "No results"
-    combined = pd.concat(all_results, ignore_index=True)
-    combined = combined.sort_values(["medical_system", "similarity_score"], ascending=[True, False])
-    combined.index = range(1, len(combined) + 1)
-    combined.index.name = "Rank"
-    combined.columns = ["Brand Name", "Generic Name", "Dosage Form",
-                        "Strength", "Medical System", "Manufacturer", "Score"]
-    summary = f"✅ Cross-system comparison for **'{query}'** — {len(combined)} drugs across {len(systems)} systems"
-    return combined, summary
-def get_stats():
-    """Return dataset statistics as markdown."""
     if not MODEL_LOADED:
         return "Models not loaded."
-    sys_dist = drug_db["medical_system"].value_counts()
-    dosage_dist = drug_db["dosage_form"].value_counts().head(8)
-    stats_md = f"""
-## 📊 Dataset Statistics
 | Metric | Value |
 |--------|-------|
-| Total Drugs | {len(drug_db):,} |
-| Medical Systems | {drug_db['medical_system'].nunique()} |
-| Unique Manufacturers | {drug_db['manufacturer'].nunique():,} |
-| Unique Brand Names | {drug_db['brand_name'].nunique():,} |
-| TF-IDF Features | {metadata.get('n_features', 10000):,} |
-| Silhouette Score | {metadata.get('silhouette_score', 'N/A')} |
-### 🏥 Medical System Distribution
 """
-    for sys, cnt in sys_dist.items():
-        pct = cnt / len(drug_db) * 100
-        bar = "█" * int(pct / 2)
-        stats_md += f"\n- **{sys}**: {cnt:,} ({pct:.1f}%) `{bar}`"
-    stats_md += "\n\n### 💊 Top Dosage Forms\n"
-    for dosage, cnt in dosage_dist.items():
-        stats_md += f"\n- {dosage}: {cnt:,}"
-    return stats_md
-# ─── Gradio UI ──────────────────────────────────────────────────────────────
-EXAMPLE_QUERIES = [
-    ["Azithromycin 500mg", "All Systems", 10, 0.05],
-    ["paracetamol fever tablet", "Ayurvedic", 8, 0.05],
-    ["omeprazole capsule 20mg", "Allopathic", 10, 0.1],
-    ["blood pressure tablet", "Homeopathic", 6, 0.05],
-    ["herbal digestive liquid", "Herbal", 5, 0.05],
-    ["antibiotic suspension", "Unani", 6, 0.05],
-]
-CROSS_SYSTEM_EXAMPLES = [
-    ["Azithromycin antibiotic tablet", 3],
-    ["fever pain relief", 2],
-    ["digestive stomach", 2],
-    ["blood pressure hypertension", 2],
-]
-CSS = """
-.gradio-container { max-width: 1100px; margin: auto; font-family: 'Segoe UI', sans-serif; }
-.header-box { background: linear-gradient(135deg, #1a237e, #283593);
-              color: white; padding: 24px; border-radius: 12px; margin-bottom: 16px; }
-.stat-box { background: #f8f9fa; border-radius: 8px; padding: 12px; }
-footer { display: none !important; }
-"""
-with gr.Blocks(css=CSS, title="💊 Drug Recommendation System") as demo:
     gr.HTML("""
-    <div class="header-box">
-        <h1 style="margin:0; font-size:1.8em;">💊 Cross-Medical-System Drug Recommender</h1>
-        <p style="margin:8px 0 0; opacity:0.85; font-size:1.05em;">
-            Master's Thesis — Intelligent Drug Formulation Recommendation System<br>
-            <span style="font-size:0.9em;">Allopathic • Ayurvedic • Unani • Homeopathic • Herbal</span>
-        </p>
     </div>
     """)
     with gr.Tabs():
-        # ── Tab 1: Single System Recommendation ─────────────────────────
-        with gr.TabItem("🔍 Drug Recommender"):
-            gr.Markdown("### Find drugs by name, generic compound, or description")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    query_input = gr.Textbox(
-                        label="🔎 Search Query",
-                        placeholder="e.g. Azithromycin 500mg tablet, fever pain, omeprazole capsule...",
-                        lines=1
-                    )
-                with gr.Column(scale=2):
-                    system_filter = gr.Dropdown(
-                        choices=MEDICAL_SYSTEMS,
-                        value="All Systems",
-                        label="🏥 Medical System Filter"
-                    )
             with gr.Row():
-                top_n_slider = gr.Slider(
-                    minimum=3, maximum=25, value=10, step=1,
-                    label="📋 Number of Results"
-                )
-                min_score_slider = gr.Slider(
-                    minimum=0.01, maximum=0.5, value=0.05, step=0.01,
-                    label="🎯 Minimum Similarity Score"
-                )
-            recommend_btn = gr.Button("🚀 Get Recommendations", variant="primary", size="lg")
-            summary_box   = gr.Markdown(label="Summary")
-            results_table = gr.DataFrame(
                 label="📋 Recommended Drugs",
-                wrap=True,
-                interactive=False
             )
-            gr.Examples(
-                examples=EXAMPLE_QUERIES,
-                inputs=[query_input, system_filter, top_n_slider, min_score_slider],
-                label="📌 Quick Examples — Click to Try"
             )
-            recommend_btn.click(
-                fn=recommend_drugs,
-                inputs=[query_input, system_filter, top_n_slider, min_score_slider],
-                outputs=[results_table, summary_box]
-            )
-        # ── Tab 2: Cross-System Comparison ──────────────────────────────
-        with gr.TabItem("🔄 Cross-System Comparison"):
             gr.Markdown("""
-            ### Compare the same drug/query across ALL 5 medical systems simultaneously
-            > This is the **core thesis contribution** — finding equivalent treatments across Allopathic, Ayurvedic, Unani, Homeopathic, and Herbal systems.
             """)
-            with gr.Row():
-                cross_query = gr.Textbox(
-                    label="🔎 Drug / Condition Query",
-                    placeholder="e.g. fever tablet, antibiotic, digestive...",
-                    lines=1,
-                    scale=4
-                )
-                top_per_sys = gr.Slider(
-                    minimum=1, maximum=5, value=3, step=1,
-                    label="Results per System",
-                    scale=2
-                )
-            compare_btn   = gr.Button("🔄 Compare Across All Systems", variant="primary", size="lg")
             cross_summary = gr.Markdown()
-            cross_table   = gr.DataFrame(label="🌐 Cross-System Drug Comparison", wrap=True, interactive=False)
-            gr.Examples(
-                examples=CROSS_SYSTEM_EXAMPLES,
-                inputs=[cross_query, top_per_sys],
-                label="📌 Quick Examples"
             )
             compare_btn.click(
                 fn=cross_system_compare,
-                inputs=[cross_query, top_per_sys],
-                outputs=[cross_table, cross_summary]
             )
-        # ── Tab 3: Dataset Stats ─────────────────────────────────────────
-        with gr.TabItem("📊 Dataset Statistics"):
-            stats_output = gr.Markdown()
-            refresh_btn  = gr.Button("🔄 Load Statistics", variant="secondary")
-            refresh_btn.click(fn=get_stats, inputs=[], outputs=[stats_output])
-        # ── Tab 4: About ─────────────────────────────────────────────────
-        with gr.TabItem("📚 About / Thesis"):
             gr.Markdown("""
-## 📖 About This Project
-### Thesis Title
-**Intelligent Cross-Medical-System Drug Recommendation Using NLP and Similarity-Based Learning**
-### Problem Statement
-Healthcare practitioners often need to recommend drug alternatives across different medical traditions
-(Allopathic, Ayurvedic, Unani, Homeopathic, Herbal), especially in regions like South Asia where
-multiple medical systems coexist. No unified digital tool existed for this task.
-### Methodology
-| Component | Technique |
-|-----------|-----------|
-| Text Feature Extraction | TF-IDF (1,2-gram, 10,000 features) |
-| Similarity Engine | Cosine Similarity |
-| Dimensionality Reduction | Truncated SVD (50 components) |
-| Drug Clustering | K-Means (K=10) |
-| Evaluation Metric | Precision@K, Silhouette Score |
 ### Dataset
-- **Source**: Bangladesh National Drug Registry (via Kaggle)
-- **Size**: 53,584 drug records
-- **Systems**: Allopathic (36k), Unani (8.5k), Ayurvedic (5.3k), Homeopathic (2.6k), Herbal (1k)
-- **Fields**: Brand Name, Generic Name, Strength, Dosage Form, Manufacturer
-### Key Contributions
-1. **First unified cross-medical-system recommender** for South Asian drug registry
-2. **NLP-driven**: TF-IDF bigrams handle compound drug names (e.g., "Diphenhydramine + Zinc Acetate")
-3. **Clustering analysis** reveals natural drug groupings across cultural medical traditions
-4. **Deployable**: Fast PKL-based inference, <100ms per query
-### Model Files
-```
-models/
-├── tfidf_vectorizer.pkl   — Fitted TF-IDF transformer
-├── tfidf_matrix.pkl       — Pre-computed drug feature matrix
-├── svd_reducer.pkl        — SVD dimensionality reducer
-├── kmeans_model.pkl       — K-Means cluster assignments
-└── drug_database.csv      — Processed drug database
-```
-### How to Cite
-```
-Author, (2024). Cross-Medical-System Drug Recommendation Engine.
-Master's Thesis, [University Name].
-Dataset: https://www.kaggle.com/datasets/shuvokumarbasak2030/drug-pharma-new-dataset
-```
             """)
     gr.HTML("""
-    <div style="text-align:center; padding:12px; color:#666; font-size:0.85em; margin-top:10px;">
-        🎓 Master's Thesis Project | Drug Recommendation System |
-        Built with TF-IDF + Cosine Similarity | Hugging Face Spaces
     </div>
     """)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

+# ═══════════════════════════════════════════════════════════════════
+# IMPORTS
+# ═══════════════════════════════════════════════════════════════════
 import gradio as gr
 import pandas as pd
 import numpy as np
 import os
 import re
 import warnings
+import requests
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
 warnings.filterwarnings("ignore")
+# ═══════════════════════════════════════════════════════════════════
+# CONSTANTS
+# ═══════════════════════════════════════════════════════════════════
+OPENFDA_BASE = "https://api.fda.gov/drug"
+MODEL_DIR    = os.path.join(os.path.dirname(__file__), "models")
+# System colour palette — used consistently across all charts
+SYSTEM_COLORS = {
+    "Allopathic":  "#3b82f6",   # blue
+    "Unani":       "#f97316",   # orange
+    "Ayurvedic":   "#22c55e",   # green
+    "Homeopathic": "#a855f7",   # purple
+    "Herbal":      "#ef4444",   # red
+}
+# ─── 30 Curated Drug Options ─────────────────────────────────────
+# Key   = display label shown in the Gradio Dropdown
+# Value = exact TF-IDF search query passed to recommend()
+# Changing this dict is the ONLY place you need to add/remove options.
+DRUG_OPTIONS = {
+    # ── Antibiotics ──────────────────────────────────────────────
+    "🦠 Azithromycin — Antibiotic (Respiratory)":       "Azithromycin 500mg tablet",
+    "🦠 Amoxicillin — Antibiotic (Broad Spectrum)":     "Amoxicillin 500mg capsule",
+    "🦠 Ciprofloxacin — Antibiotic (UTI/Infection)":    "Ciprofloxacin 500mg tablet",
+    "🦠 Metronidazole — Antibiotic (Anaerobic)":        "Metronidazole 400mg tablet",
+    "🦠 Ceftriaxone — Antibiotic (Injection)":          "Ceftriaxone 1gm injection",
+    "🦠 Levofloxacin — Antibiotic (Pneumonia)":         "Levofloxacin 500mg tablet",
+    # ── Pain & Fever ─────────────────────────────────────────────
+    "🤒 Paracetamol — Fever & Pain Relief":             "Paracetamol 500mg tablet",
+    "🤒 Diclofenac — Anti-inflammatory":                "Diclofenac Sodium 50mg tablet",
+    "🤒 Naproxen — Pain Relief (Joints)":               "Naproxen 250mg tablet",
+    "🤒 Ketorolac — Strong Painkiller (Injection)":     "Ketorolac 30mg injection",
+    # ── Heart & Blood Pressure ───────────────────────────────────
+    "💓 Amlodipine — Blood Pressure":                   "Amlodipine 5mg tablet",
+    "💓 Atorvastatin — Cholesterol":                    "Atorvastatin 20mg tablet",
+    "💓 Losartan — Hypertension":                       "Losartan Potassium 50mg tablet",
+    "💓 Metoprolol — Heart Rate / Beta Blocker":        "Metoprolol 50mg tablet",
+    # ── Diabetes ─────────────────────────────────────────────────
+    "🩺 Metformin — Type 2 Diabetes":                   "Metformin Hydrochloride 500mg tablet",
+    "🩺 Glibenclamide — Blood Sugar Control":           "Glibenclamide 5mg tablet",
+    # ── Respiratory & Allergy ────────────────────────────────────
+    "🫁 Salbutamol — Asthma / Bronchospasm":            "Salbutamol 2mg tablet syrup",
+    "🫁 Montelukast — Asthma / Allergy":                "Montelukast 10mg tablet",
+    "🫁 Fexofenadine — Allergy / Antihistamine":        "Fexofenadine Hydrochloride 120mg tablet",
+    "🫁 Cetirizine — Allergy / Antihistamine":          "Cetirizine Dihydrochloride 10mg tablet",
+    # ── Neuro / Mental Health ────────────────────────────────────
+    "🧠 Pregabalin — Nerve Pain / Anxiety":             "Pregabalin 75mg capsule",
+    "🧠 Clonazepam — Anxiety / Seizure":                "Clonazepam 0.5mg tablet",
+    # ── GI / Stomach ─────────────────────────────────────────────
+    "🫃 Omeprazole — Acid Reflux / Ulcer":              "Omeprazole 20mg capsule",
+    "🫃 Esomeprazole — GERD / Acid":                    "Esomeprazole 40mg capsule",
+    "🫃 Domperidone — Nausea / Vomiting":               "Domperidone 10mg tablet",
+    "🫃 Ondansetron — Nausea (Chemotherapy)":           "Ondansetron 4mg tablet",
+    # ── Anti-infective / Antifungal ──────────────────────────────
+    "🌿 Albendazole — Deworming":                       "Albendazole 400mg tablet",
+    "🌿 Fluconazole — Antifungal":                      "Fluconazole 150mg capsule",
+    # ── Vitamins & Supplements ───────────────────────────────────
+    "💊 Vitamin D3 — Bone / Immunity":                  "Cholecalciferol Vitamin D3 tablet",
+    "💊 Zinc + Multivitamin — Immunity":                "Zinc Nicotinamide Pyridoxine vitamin tablet",
+}
+DROPDOWN_LABELS = list(DRUG_OPTIONS.keys())
+# ═══════════════════════════════════════════════════════════════════
+# ✅ BUG FIX — build_drug_text
+# ═══════════════════════════════════════════════════════════════════
+# THIS FUNCTION IS THE CORE FIX.
+#
+# OLD behaviour (buggy):
+#   All systems used:  GenericName + Dosage + Strength + System
+#   For Ayurvedic/Unani/Homeopathic/Herbal, Generic Name is NULL in
+#   the dataset, so the code fell back to Brand Name.
+#   Brand names like "Feverfit", "Paincap", "Paralead" contain tokens
+#   like "fever", "pain", "para" → TF-IDF wrongly matched these when
+#   a user searched "paracetamol fever tablet".
+#
+# NEW behaviour (fixed):
+#   Allopathic      → GenericName + Dosage + Strength + "allopathic"
+#                     (uses the real pharmaceutical compound)
+#   Non-allopathic  → Dosage + Strength + SystemName ONLY
+#                     (brand name noise removed entirely)
+#
+# Result: "paracetamol fever tablet" now returns ONLY Allopathic
+# compounds like Paracetamol, Acetaminophen — no more "Feverfit".
+# ─────────────────────────────────────────────────────────────────
+def _clean(t) -> str:
+    """Lowercase, remove special chars, collapse whitespace."""
+    if pd.isna(t):
+        return ""
+    t = re.sub(r"[^a-z0-9\s\+\-\.]", " ", str(t).lower())
+    return re.sub(r"\s+", " ", t).strip()
+def build_drug_text(row) -> str:
+    """
+    ✅ FIXED version of drug_text construction.
+    Allopathic  → rich text: compound + dosage + strength + system
+    All others  → lean text: dosage + strength + system (NO brand name)
+    """
+    if row["medical_system"] == "Allopathic":
+        return " ".join(filter(None, [
+            _clean(row.get("Generic Name", "")),
+            _clean(row.get("Dosages Description", "")),
+            _clean(str(row.get("Strength", ""))),
+            "allopathic",
+        ]))
+    else:
+        # Non-allopathic: Generic Name is always NULL in this dataset.
+        # Using Brand Name as fallback was the source of the bug.
+        # We intentionally exclude it here.
+        return " ".join(filter(None, [
+            _clean(row.get("Dosages Description", "")),
+            _clean(str(row.get("Strength", ""))),
+            _clean(row.get("medical_system", "")),
+        ]))
+# ════════════════���══════════════════════════════════════════════════
+# LOAD PKL MODELS
+# ═══════════════════════════════════════════════════════════════════
 def load_models():
+    print("Loading models …")
+    vec  = joblib.load(os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl"))
+    mat  = joblib.load(os.path.join(MODEL_DIR, "tfidf_matrix.pkl"))
+    db   = pd.read_csv(os.path.join(MODEL_DIR, "drug_database.csv"))
     with open(os.path.join(MODEL_DIR, "model_metadata.json")) as f:
+        meta = json.load(f)
+    print(f"✅ {len(db):,} drugs · {mat.shape[1]:,} features loaded")
+    return vec, mat, db, meta
 try:
     vectorizer, tfidf_matrix, drug_db, metadata = load_models()
     MEDICAL_SYSTEMS = ["All Systems"] + sorted(drug_db["medical_system"].unique().tolist())
     MODEL_LOADED = True
+except Exception as exc:
+    print(f"Model load failed: {exc}")
     MODEL_LOADED = False
     MEDICAL_SYSTEMS = ["All Systems"]
+    drug_db = pd.DataFrame()
+    metadata = {}
+# ═══════════════════════════════════════════════════════════════════
+# CORE RECOMMENDER
+# ═══════════════════════════════════════════════════════════════════
+def _get_query(drug_label: str) -> str:
+    """Map dropdown label → TF-IDF search query."""
+    return DRUG_OPTIONS.get(drug_label, drug_label)
+def _run_similarity(query: str, system_filter: str, top_n: int, min_score: float):
+    """Inner similarity search. Returns (indices, scores)."""
+    q_clean = _clean(query)
+    q_vec   = vectorizer.transform([q_clean])
+    sims    = cosine_similarity(q_vec, tfidf_matrix).flatten()
+    if system_filter and system_filter != "All Systems":
+        mask = drug_db["medical_system"] == system_filter
         sims_work = sims.copy()
+        sims_work[~mask] = 0.0
     else:
         sims_work = sims
+    candidate_idx = sims_work.argsort()[-(top_n * 4):][::-1]
+    filtered_idx  = [i for i in candidate_idx if sims[i] >= min_score][:top_n]
+    return filtered_idx, sims
+def recommend_from_selection(drug_label: str, system_filter: str,
+                              top_n: int, min_score: float):
+    """Tab 1 — Dataset recommendations from PKL model."""
+    if not MODEL_LOADED:
+        return None, "❌ Models not loaded. Ensure `/models` folder is present."
+    if not drug_label:
+        return None, "⚠️ Please select a drug from the dropdown."
+    query       = _get_query(drug_label)
+    idx, sims   = _run_similarity(query, system_filter, top_n, min_score)
+    if not idx:
+        return None, (
+            f"⚠️ No results above similarity score **{min_score}**. "
+            "Try lowering the threshold slider."
         )
+    out = drug_db.iloc[idx][[
+        "brand_name", "generic_name", "dosage_form",
+        "strength", "medical_system", "manufacturer",
     ]].copy()
+    out["similarity_score"] = [round(float(sims[i]), 4) for i in idx]
+    out = out.sort_values("similarity_score", ascending=False).reset_index(drop=True)
+    out.index = range(1, len(out) + 1)
+    out.index.name = "Rank"
+    out.columns = [
+        "Brand Name", "Generic Name", "Dosage Form",
+        "Strength", "Medical System", "Manufacturer", "Score",
+    ]
+    sys_counts = out["Medical System"].value_counts()
+    sys_str    = "  ·  ".join(f"**{k}** {v}" for k, v in sys_counts.items())
+    label_short = drug_label.split("—")[0].strip()
+    summary = (
+        f"### ✅ {len(out)} results for {label_short}\n\n"
+        f"{sys_str}\n\n"
+        f"*Query used: `{query}`*"
+    )
+    return out, summary
+def cross_system_compare(drug_label: str, top_per_system: int):
+    """Tab 2 — Best N drugs from every system side by side."""
+    if not MODEL_LOADED:
+        return None, "❌ Models not loaded."
+    if not drug_label:
+        return None, "⚠️ Select a drug first."
+    query     = _get_query(drug_label)
+    q_clean   = _clean(query)
+    q_vec     = vectorizer.transform([q_clean])
+    sims      = cosine_similarity(q_vec, tfidf_matrix).flatten()
+    rows = []
+    for system in sorted(drug_db["medical_system"].unique()):
+        mask  = drug_db["medical_system"] == system
+        s     = sims.copy(); s[~mask] = 0.0
+        idx   = [i for i in s.argsort()[-top_per_system:][::-1] if sims[i] > 0.01]
+        for i in idx:
+            r = drug_db.iloc[i]
+            rows.append({
+                "Medical System": r["medical_system"],
+                "Brand Name":     r["brand_name"],
+                "Generic Name":   r["generic_name"],
+                "Dosage Form":    r["dosage_form"],
+                "Strength":       r["strength"],
+                "Score":          round(float(sims[i]), 4),
+            })
+    if not rows:
+        return None, "No cross-system results found."
+    df = pd.DataFrame(rows).sort_values(
+        ["Medical System", "Score"], ascending=[True, False]
+    ).reset_index(drop=True)
+    df.index = range(1, len(df) + 1)
+    df.index.name = "Rank"
+    label_short = drug_label.split("—")[0].strip()
     summary = (
+        f"### 🌐 Cross-system: {label_short}\n\n"
+        f"Top **{top_per_system}** per system · {len(df)} total drugs · "
+        f"{df['Medical System'].nunique()} systems"
     )
+    return df, summary
+# ═══════════════════════════════════════════════════════════════════
+# OPENFA API HELPERS
+# ═══════════════════════════════════════════════════════════════════
+def _openfda(endpoint: str, params: dict, timeout: int = 10) -> dict:
+    try:
+        r = requests.get(
+            f"{OPENFDA_BASE}/{endpoint}.json",
+            params=params, timeout=timeout,
+            headers={"User-Agent": "DrugRecommenderThesis/3.0"},
+        )
+        if r.status_code == 200:
+            return r.json()
+        return {"error": f"HTTP {r.status_code}", "message": r.text[:200]}
+    except requests.exceptions.Timeout:
+        return {"error": "timeout", "message": "OpenFDA timed out — try again."}
+    except requests.exceptions.ConnectionError:
+        return {"error": "connection", "message": "Cannot reach OpenFDA. Check internet."}
+    except Exception as e:
+        return {"error": "unknown", "message": str(e)}
+def _extract_generic(drug_label: str) -> str:
+    """'🦠 Azithromycin — Antibiotic' → 'Azithromycin'"""
+    raw = drug_label.split("—")[0]
+    cleaned = re.sub(r"[^\w\s]", "", raw).strip()
+    words = cleaned.split()
+    return words[0] if words else cleaned
+# ─── Tab 3: FDA Drug Label ───────────────────────────────────────
+def get_fda_label(drug_label: str) -> str:
+    if not drug_label:
+        return "⚠️ Select a drug first."
+    generic = _extract_generic(drug_label)
+    data    = _openfda("label", {"search": f"openfda.generic_name:{generic}", "limit": 1})
+    if "error" in data:
+        return (
+            f"### ⚠️ OpenFDA: {data['message']}\n\n"
+            f"*`{generic}` may not be in the US FDA database — "
+            "OpenFDA covers US-approved drugs only.*"
+        )
+    results = data.get("results", [])
+    if not results:
+        return f"ℹ️ No FDA label found for **{generic}**."
+    r      = results[0]
+    ofd    = r.get("openfda", {})
+    lines  = [
+        f"## 💊 FDA Label: {generic.title()}",
+        "_Source: U.S. Food & Drug Administration · OpenFDA_\n",
+    ]
+    def _add(key, title):
+        v = ofd.get(key, [])
+        if v:
+            lines.append(f"**{title}:** {', '.join(v[:5])}")
+    _add("brand_name",        "Brand Names (US)")
+    _add("manufacturer_name", "Manufacturer")
+    _add("route",             "Route")
+    lines.append("")
+    SECTIONS = [
+        ("indications_and_usage",      "📋 Indications & Usage",       700),
+        ("warnings",                   "⚠️ Warnings",                  500),
+        ("dosage_and_administration",  "💉 Dosage & Administration",   500),
+        ("adverse_reactions",          "🔴 Adverse Reactions",         400),
+        ("drug_interactions",          "🔗 Drug Interactions",         400),
+        ("contraindications",          "🚫 Contraindications",         400),
+    ]
+    for field, heading, limit in SECTIONS:
+        val = r.get(field, [])
+        if val:
+            lines.append(f"### {heading}")
+            lines.append(val[0][:limit] + ("…" if len(val[0]) > limit else "") + "\n")
+    lines.append("---")
+    lines.append(
+        "*Data from [OpenFDA](https://open.fda.gov) · "
+        "For research purposes only · Not clinical advice*"
+    )
+    return "\n".join(lines)
+# ─── Tab 4: FAERS Adverse Events ────────────────────────────────
+def get_fda_adverse_events(drug_label: str):
+    if not drug_label:
+        return None, "⚠️ Select a drug first."
+    generic = _extract_generic(drug_label)
+    data    = _openfda("event", {
+        "search": f"patient.drug.medicinalproduct:{generic}",
+        "count":  "patient.reaction.reactionmeddrapt.exact",
+        "limit":  15,
+    })
+    if "error" in data:
+        return None, f"### ⚠️ FAERS: {data['message']}"
+    results = data.get("results", [])
+    if not results:
+        return None, f"ℹ️ No FAERS data for **{generic}**."
+    df = pd.DataFrame(results, columns=["Adverse Reaction", "Report Count"])
+    df = df.sort_values("Report Count", ascending=False).reset_index(drop=True)
+    df.index = range(1, len(df) + 1)
+    df.index.name = "Rank"
+    total   = df["Report Count"].sum()
+    summary = (
+        f"### 📊 FAERS Adverse Events: **{generic.title()}**\n\n"
+        f"Top 15 reactions · **{total:,} total reports** in FDA database\n\n"
+        f"*Source: FDA Adverse Event Reporting System (FAERS) via OpenFDA*"
+    )
+    return df, summary
+# ─── Tab 4: NDC Lookup ──────────────────────────────────────────
+def get_fda_ndc(drug_label: str):
+    if not drug_label:
+        return None, "⚠️ Select a drug."
+    generic = _extract_generic(drug_label)
+    data    = _openfda("ndc", {"search": f"generic_name:{generic}", "limit": 10})
+    if "error" in data:
+        return None, f"### ⚠️ NDC: {data['message']}"
+    results = data.get("results", [])
+    if not results:
+        return None, f"ℹ️ No NDC data for **{generic}**."
+    rows = [{
+        "Brand Name":   r.get("brand_name", "—"),
+        "Generic Name": r.get("generic_name", "—"),
+        "Dosage Form":  r.get("dosage_form", "—"),
+        "Route":        ", ".join(r.get("route", [])),
+        "Manufacturer": r.get("labeler_name", "—"),
+        "Product Type": r.get("product_type", "—"),
+        "NDC Code":     r.get("product_ndc", "—"),
+    } for r in results]
+    df = pd.DataFrame(rows)
+    df.index = range(1, len(df) + 1)
+    df.index.name = "#"
+    summary = (
+        f"### 🏷️ NDC Registry: **{generic.title()}**\n\n"
+        f"**{len(df)} products** in US National Drug Code directory\n\n"
+        f"*Source: FDA NDC Database via OpenFDA*"
+    )
+    return df, summary
+# ═══════════════════════════════════════════════════════════════════
+# ✨ CHARTS — 5 unique visuals for the medical system overview tab
+# ═══════════════════════════════════════════════════════════════════
+# Precompute all chart data at startup (fast, in-memory)
+if MODEL_LOADED and not drug_db.empty:
+    _sys_counts   = drug_db["medical_system"].value_counts()
+    _dosage_top10 = drug_db["dosage_form"].value_counts().head(10)
+    _mfr_top15    = drug_db["manufacturer"].value_counts().head(15)
+    _sys_dosage   = pd.crosstab(drug_db["medical_system"], drug_db["dosage_form"])
+    _sys_dosage   = _sys_dosage[_dosage_top10.index[:8]]
+else:
+    _sys_counts   = pd.Series({"No data": 1})
+    _dosage_top10 = pd.Series({"No data": 1})
+    _mfr_top15    = pd.Series({"No data": 1})
+    _sys_dosage   = pd.DataFrame()
+def _sys_colors(labels):
+    return [SYSTEM_COLORS.get(l, "#64748b") for l in labels]
+# ── Chart 1: Donut — Drug share per medical system ───────────────
+def chart_donut():
+    labels = _sys_counts.index.tolist()
+    values = _sys_counts.values.tolist()
+    colors = _sys_colors(labels)
+    fig = go.Figure(go.Pie(
+        labels=labels,
+        values=values,
+        hole=0.55,
+        marker=dict(colors=colors, line=dict(color="#ffffff", width=2.5)),
+        textinfo="label+percent",
+        textfont=dict(size=13),
+        hovertemplate="<b>%{label}</b><br>%{value:,} drugs<br>%{percent}<extra></extra>",
+    ))
+    fig.update_layout(
+        title=dict(
+            text="<b>Drug Distribution Across 5 Medical Systems</b>",
+            x=0.5, xanchor="center", font=dict(size=17)
+        ),
+        annotations=[dict(
+            text=f"<b>{_sys_counts.sum():,}</b><br>Total Drugs",
+            x=0.5, y=0.5, font=dict(size=15), showarrow=False
+        )],
+        legend=dict(orientation="h", y=-0.08, x=0.5, xanchor="center"),
+        height=420,
+        margin=dict(t=60, b=40, l=20, r=20),
+        paper_bgcolor="white", plot_bgcolor="white",
+    )
+    return fig
+# ── Chart 2: Horizontal bar — Top 10 dosage forms ────────────────
+def chart_dosage_bar():
+    labels = _dosage_top10.index.tolist()[::-1]
+    values = _dosage_top10.values.tolist()[::-1]
+    colors = px.colors.sequential.Blues[2:][:len(labels)][::-1]
+    fig = go.Figure(go.Bar(
+        y=labels, x=values,
+        orientation="h",
+        marker=dict(color=colors),
+        text=[f"  {v:,}" for v in values],
+        textposition="outside",
+        hovertemplate="<b>%{y}</b>: %{x:,} drugs<extra></extra>",
+    ))
+    fig.update_layout(
+        title=dict(
+            text="<b>Top 10 Dosage Forms</b>",
+            x=0.5, xanchor="center", font=dict(size=17)
+        ),
+        xaxis=dict(title="Number of Drugs", showgrid=True, gridcolor="#f0f0f0"),
+        yaxis=dict(title=""),
+        height=420,
+        margin=dict(t=60, b=40, l=160, r=60),
+        paper_bgcolor="white", plot_bgcolor="white",
+    )
+    return fig
+# ── Chart 3: Grouped bar — Dosage form per system ────────────────
+def chart_system_dosage_grouped():
+    if _sys_dosage.empty:
+        return go.Figure()
+    fig = go.Figure()
+    dosage_cols = _sys_dosage.columns.tolist()
+    palette     = px.colors.qualitative.Pastel[:len(dosage_cols)]
+    for col, color in zip(dosage_cols, palette):
+        fig.add_trace(go.Bar(
+            name=col,
+            x=_sys_dosage.index.tolist(),
+            y=_sys_dosage[col].tolist(),
+            marker_color=color,
+            hovertemplate=f"<b>{col}</b><br>%{{x}}: %{{y:,}}<extra></extra>",
+        ))
+    fig.update_layout(
+        barmode="group",
+        title=dict(
+            text="<b>Dosage Form Breakdown per Medical System</b>",
+            x=0.5, xanchor="center", font=dict(size=17)
+        ),
+        xaxis=dict(title="Medical System"),
+        yaxis=dict(title="Drug Count", showgrid=True, gridcolor="#f0f0f0"),
+        legend=dict(title="Dosage Form", orientation="h", y=-0.22, x=0.5, xanchor="center"),
+        height=460,
+        margin=dict(t=60, b=100, l=60, r=20),
+        paper_bgcolor="white", plot_bgcolor="white",
+    )
+    return fig
+# ── Chart 4: Treemap — Manufacturer × System ─────────────────────
+def chart_treemap():
+    top_mfr = drug_db.groupby(["medical_system", "manufacturer"]).size().reset_index(name="count")
+    top_mfr = top_mfr.sort_values("count", ascending=False)
+    # Keep top 5 manufacturers per system
+    top_mfr = top_mfr.groupby("medical_system").head(5).reset_index(drop=True)
+    fig = px.treemap(
+        top_mfr,
+        path=["medical_system", "manufacturer"],
+        values="count",
+        color="medical_system",
+        color_discrete_map=SYSTEM_COLORS,
+        custom_data=["count"],
+    )
+    fig.update_traces(
+        hovertemplate="<b>%{label}</b><br>Products: %{customdata[0]:,}<extra></extra>",
+        textfont=dict(size=12),
+    )
+    fig.update_layout(
+        title=dict(
+            text="<b>Top Manufacturers by Medical System (Treemap)</b>",
+            x=0.5, xanchor="center", font=dict(size=17)
+        ),
+        height=480,
+        margin=dict(t=60, b=20, l=20, r=20),
+        paper_bgcolor="white",
+    )
+    return fig
+# ── Chart 5: Radar — System profile across dosage dimensions ─────
+def chart_radar():
+    dosage_categories = ["Tablet", "Capsule", "Liquid", "Injection", "Syrup"]
+    available_cats    = [c for c in dosage_categories if c in _sys_dosage.columns]
+    if not available_cats:
+        return go.Figure()
+    sub   = _sys_dosage[available_cats]
+    # Normalise each system to 0-100
+    sub_n = sub.div(sub.max(axis=0), axis=1).fillna(0) * 100
+    fig = go.Figure()
+    for system in sub_n.index:
+        vals   = sub_n.loc[system].tolist()
+        color  = SYSTEM_COLORS.get(system, "#64748b")
+        fig.add_trace(go.Scatterpolar(
+            r=vals + [vals[0]],
+            theta=available_cats + [available_cats[0]],
+            fill="toself",
+            fillcolor=color.replace(")", ",0.15)").replace("rgb", "rgba")
+                        if "rgb" in color else color + "28",
+            line=dict(color=color, width=2),
+            name=system,
+            hovertemplate="<b>" + system + "</b><br>%{theta}: %{r:.0f}%<extra></extra>",
+        ))
+    fig.update_layout(
+        polar=dict(
+            radialaxis=dict(
+                visible=True, range=[0, 110],
+                tickfont=dict(size=10), gridcolor="#e5e7eb",
+            ),
+            angularaxis=dict(tickfont=dict(size=12)),
+            bgcolor="white",
+        ),
+        title=dict(
+            text="<b>Medical System Profile — Dosage Form Radar</b>",
+            x=0.5, xanchor="center", font=dict(size=17)
+        ),
+        showlegend=True,
+        legend=dict(orientation="h", y=-0.12, x=0.5, xanchor="center"),
+        height=460,
+        margin=dict(t=60, b=80, l=60, r=60),
+        paper_bgcolor="white", plot_bgcolor="white",
+    )
+    return fig
+def build_all_charts():
+    """Called once when the Charts tab is first opened."""
+    return (
+        chart_donut(),
+        chart_dosage_bar(),
+        chart_system_dosage_grouped(),
+        chart_treemap(),
+        chart_radar(),
+    )
+# ═══════════════════════════════════════════════════════════════════
+# STATS TEXT
+# ═══════════════════════════════════════════════════════════════════
+def get_stats() -> str:
     if not MODEL_LOADED:
         return "Models not loaded."
+    sys_dist    = drug_db["medical_system"].value_counts()
+    dosage_dist = drug_db["dosage_form"].value_counts().head(10)
+    md = f"""## 📊 Dataset Statistics
 | Metric | Value |
 |--------|-------|
+| **Total Drugs** | {len(drug_db):,} |
+| **Medical Systems** | {drug_db["medical_system"].nunique()} |
+| **Unique Manufacturers** | {drug_db["manufacturer"].nunique():,} |
+| **Unique Brand Names** | {drug_db["brand_name"].nunique():,} |
+| **TF-IDF Features** | {metadata.get("n_features", 10000):,} |
+| **Silhouette Score** | {metadata.get("silhouette_score", "N/A")} |
+| **Bug Fix Applied** | Non-allopathic brand names excluded from TF-IDF |
+### 🏥 Medical Systems
 """
+    for s, c in sys_dist.items():
+        pct = c / len(drug_db) * 100
+        bar = "█" * int(pct / 3)
+        md += f"\n- **{s}**: {c:,} ({pct:.1f}%) `{bar}`"
+    md += "\n\n### 💊 Top 10 Dosage Forms\n"
+    for d, c in dosage_dist.items():
+        md += f"\n- {d}: {c:,}"
+    return md
+# ═══════════════════════════════════════════════════════════════════
+# GRADIO UI
+# ═══════════════════════════════════════════════════════════════════
+CSS = """
+.gradio-container {
+    max-width: 1080px !important;
+    margin: auto !important;
+    font-family: 'Segoe UI', system-ui, sans-serif !important;
+}
+.hero {
+    background: linear-gradient(135deg, #0f172a 0%, #1e1b4b 55%, #0f172a 100%);
+    border: 1px solid rgba(99,102,241,0.35);
+    border-radius: 16px;
+    padding: 28px 32px 22px;
+    margin-bottom: 18px;
+    text-align: center;
+}
+.sbadge {
+    display: inline-block; border-radius: 999px;
+    padding: 4px 13px; font-size: 12px; margin: 3px;
+}
+.fix-note {
+    background: rgba(34,197,94,0.08);
+    border: 1px solid rgba(34,197,94,0.25);
+    border-radius: 10px; padding: 11px 16px;
+    font-size: 13px; margin: 8px 0 12px;
+}
+footer { display: none !important; }
+"""
+HEADER_HTML = """
+<div class="hero">
+  <h1 style="color:white;font-size:2em;margin:0 0 8px;font-weight:800;letter-spacing:-0.5px;">
+    💊 Cross-Medical-System Drug Recommender
+  </h1>
+  <p style="color:#94a3b8;margin:0 0 14px;font-size:1rem;">
+    53,581 drugs · NLP-Powered · Master's Thesis · + Live OpenFDA API
+  </p>
+  <div>
+    <span class="sbadge" style="background:rgba(59,130,246,.15);border:1px solid rgba(59,130,246,.3);color:#93c5fd;">🔵 Allopathic 36,251</span>
+    <span class="sbadge" style="background:rgba(249,115,22,.12);border:1px solid rgba(249,115,22,.3);color:#fdba74;">🟠 Unani 8,460</span>
+    <span class="sbadge" style="background:rgba(34,197,94,.12);border:1px solid rgba(34,197,94,.3);color:#86efac;">🟢 Ayurvedic 5,262</span>
+    <span class="sbadge" style="background:rgba(168,85,247,.12);border:1px solid rgba(168,85,247,.3);color:#d8b4fe;">🟣 Homeopathic 2,580</span>
+    <span class="sbadge" style="background:rgba(239,68,68,.1);border:1px solid rgba(239,68,68,.3);color:#fca5a5;">🔴 Herbal 1,028</span>
+    <span class="sbadge" style="background:rgba(16,185,129,.1);border:1px solid rgba(16,185,129,.3);color:#6ee7b7;">🇺🇸 + OpenFDA API</span>
+  </div>
+</div>
+"""
+FIX_NOTE_HTML = """
+<div class="fix-note">
+  <strong>✅ Bug fix applied:</strong> Non-allopathic drugs (Ayurvedic, Unani, Homeopathic, Herbal)
+  no longer appear in Allopathic compound searches.
+  Brand names like <em>"Feverfit"</em> or <em>"Paincap"</em> are no longer used as TF-IDF tokens —
+  only the pharmaceutical compound name (Generic Name) drives matching for Allopathic drugs.
+</div>
+"""
+with gr.Blocks(css=CSS, title="💊 Drug Recommender v3", theme=gr.themes.Soft()) as demo:
+    gr.HTML(HEADER_HTML)
+    # ── Global selector — shared by all 4 data tabs ──────────────
+    gr.Markdown("### 👇 Step 1: Select a drug — then use any tab below")
+    with gr.Row():
+        with gr.Column(scale=5):
+            drug_selector = gr.Dropdown(
+                choices=DROPDOWN_LABELS,
+                value=DROPDOWN_LABELS[0],
+                label="💊 Select Drug / Category  (30 options)",
+                info="Antibiotics · Pain · Heart · Diabetes · Respiratory · Neuro · GI · Antifungal · Vitamins",
+                interactive=True,
+            )
+        with gr.Column(scale=2):
+            system_filter = gr.Dropdown(
+                choices=MEDICAL_SYSTEMS,
+                value="All Systems",
+                label="🏥 Medical System Filter",
+                info="Optional — narrows results",
+            )
     gr.HTML("""
+    <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:10px;
+                padding:11px 16px;font-size:13px;margin:6px 0 14px;color:#475569;">
+      <strong>Two data sources:</strong>
+      Tabs 1–2 query your <strong>local PKL model</strong> (53k drugs). &nbsp;|&nbsp;
+      Tabs 3–4 call <strong>OpenFDA live API</strong> for real-time FDA data.
+      Both use the same dropdown above.
     </div>
     """)
     with gr.Tabs():
+        # ═══════════════════════════════════════════════════════
+        # TAB 1 — Dataset Recommendations
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("🔍 Dataset Recommendations"):
+            gr.HTML(FIX_NOTE_HTML)
+            gr.Markdown("""
+            Finds similar drugs using **TF-IDF cosine similarity** across all 53,581 records.
+            The dropdown selection maps to a precise search query — no typing needed.
+            """)
             with gr.Row():
+                top_n     = gr.Slider(3, 25, value=10, step=1,
+                                      label="📋 Number of Results")
+                min_score = gr.Slider(0.01, 0.50, value=0.05, step=0.01,
+                                      label="🎯 Min Similarity Score")
+            rec_btn     = gr.Button("🚀 Get Recommendations", variant="primary", size="lg")
+            rec_summary = gr.Markdown()
+            rec_table   = gr.DataFrame(
                 label="📋 Recommended Drugs",
+                wrap=True, interactive=False,
             )
+            rec_btn.click(
+                fn=recommend_from_selection,
+                inputs=[drug_selector, system_filter, top_n, min_score],
+                outputs=[rec_table, rec_summary],
             )
+            gr.Markdown("""
+            ---
+            **How matching works per system:**
+            - **Allopathic** → matched by *Generic Name* compound (e.g. "Paracetamol") ✅
+            - **Ayurvedic / Unani / Homeopathic / Herbal** → matched by *dosage form + system*
+              (no generic compound data exists in this dataset for these systems)
+            """)
+        # ═══════════════════════════════════════════════════════
+        # TAB 2 — Cross-System Comparison
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("🌐 Cross-System Compare"):
             gr.Markdown("""
+            ### 🏆 Core Thesis Feature
+            Best results from **every medical tradition** side by side.
+            Bridges Allopathic ↔ Ayurvedic ↔ Unani ↔ Homeopathic ↔ Herbal.
             """)
+            top_per_sys  = gr.Slider(1, 5, value=3, step=1,
+                                     label="Results per Medical System")
+            compare_btn  = gr.Button("🔄 Compare All 5 Systems",
+                                     variant="primary", size="lg")
             cross_summary = gr.Markdown()
+            cross_table   = gr.DataFrame(
+                label="🌐 All 5 Medical Systems — Side by Side",
+                wrap=True, interactive=False,
             )
             compare_btn.click(
                 fn=cross_system_compare,
+                inputs=[drug_selector, top_per_sys],
+                outputs=[cross_table, cross_summary],
             )
+        # ═══════════════════════════════════════════════════════
+        # TAB 3 — OpenFDA Drug Label (Live)
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("🇺🇸 FDA Label (Live)"):
+            gr.Markdown("""
+            ### Official FDA Drug Label — fetched live from OpenFDA
+            Returns indications, warnings, dosage, adverse reactions, and drug interactions
+            directly from the US Food & Drug Administration.
+            > 🔌 **API:** [OpenFDA /drug/label](https://open.fda.gov/apis/drug/label/) · Free · No key required
+            """)
+            fda_label_btn    = gr.Button("🔍 Fetch FDA Drug Label",
+                                         variant="primary", size="lg")
+            fda_label_result = gr.Markdown()
+            fda_label_btn.click(
+                fn=get_fda_label,
+                inputs=[drug_selector],
+                outputs=[fda_label_result],
+            )
             gr.Markdown("""
+            ---
+            ⚠️ *OpenFDA covers US-approved drugs. Bangladesh dataset drugs may use
+            different brand names or may not be in FDA records — this is expected.*
+            """)
+        # ═══════════════════════════════════════════════════════
+        # TAB 4 — FDA Adverse Events + NDC (Live)
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("⚠️ Adverse Events + NDC (Live)"):
+            gr.Markdown("""
+            ### FDA FAERS Adverse Events + National Drug Code Registry
+            - **FAERS** — real patient-reported side effects from millions of reports
+            - **NDC** — manufacturer, packaging, and product type data
+            """)
+            with gr.Row():
+                ae_btn  = gr.Button("📊 Fetch Adverse Events (FAERS)",
+                                    variant="primary")
+                ndc_btn = gr.Button("🏷️ Lookup NDC Directory",
+                                    variant="secondary")
+            ae_summary = gr.Markdown()
+            ae_table   = gr.DataFrame(
+                label="⚠️ Top Adverse Reactions (Real FDA Data)",
+                wrap=True, interactive=False,
+            )
+            gr.HTML("<hr style='margin:14px 0;border-color:#e2e8f0;'>")
+            ndc_summary = gr.Markdown()
+            ndc_table   = gr.DataFrame(
+                label="🏷️ NDC Product Registry",
+                wrap=True, interactive=False,
+            )
+            ae_btn.click(
+                fn=get_fda_adverse_events,
+                inputs=[drug_selector],
+                outputs=[ae_table, ae_summary],
+            )
+            ndc_btn.click(
+                fn=get_fda_ndc,
+                inputs=[drug_selector],
+                outputs=[ndc_table, ndc_summary],
+            )
+            gr.Markdown("""
+            ---
+            > **APIs:** [OpenFDA FAERS](https://open.fda.gov/apis/drug/event/) ·
+            > [OpenFDA NDC](https://open.fda.gov/apis/drug/ndc/) · Both free, no key.
+            """)
+        # ═══════════════════════════════════════════════════════
+        # TAB 5 — Visual Charts (5 unique plots)
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("📊 Visual Charts"):
+            gr.Markdown("""
+            ### 📊 Five unique visualisations of the 53,581-drug dataset
+            Click **Load All Charts** to render the full dashboard.
+            """)
+            load_charts_btn = gr.Button("📊 Load All Charts",
+                                        variant="primary", size="lg")
+            with gr.Row():
+                p1 = gr.Plot(label="① Drug Share by Medical System (Donut)")
+                p2 = gr.Plot(label="② Top 10 Dosage Forms (Bar)")
+            with gr.Row():
+                p3 = gr.Plot(label="③ Dosage Form per System (Grouped Bar)")
+                p4 = gr.Plot(label="④ Top Manufacturers Treemap")
+            with gr.Row():
+                p5 = gr.Plot(label="⑤ System Profile — Dosage Radar")
+            load_charts_btn.click(
+                fn=build_all_charts,
+                inputs=[],
+                outputs=[p1, p2, p3, p4, p5],
+            )
+        # ═══════════════════════════════════════════════════════
+        # TAB 6 — Stats text
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("📈 Dataset Stats"):
+            load_stats_btn = gr.Button("📈 Load Statistics", variant="secondary")
+            stats_output   = gr.Markdown()
+            load_stats_btn.click(fn=get_stats, inputs=[], outputs=[stats_output])
+        # ═══════════════════════════════════════════════════════
+        # TAB 7 — About / Code Reference
+        # ═══════════════════════════════════════════════════════
+        with gr.TabItem("📚 About / Code"):
+            gr.Markdown("""
+## 📖 About This Project
+**Thesis:** Intelligent Cross-Medical-System Drug Recommendation Using NLP
+### Where the key code changes live in `app.py`
+| What changed | Function / location |
+|---|---|
+| ✅ Bug fix — non-allopathic brand name excluded | `build_drug_text()` ~line 100 |
+| ✨ 30-option dropdown | `DRUG_OPTIONS` dict ~line 60 |
+| 🇺🇸 FDA Drug Label API | `get_fda_label()` |
+| ⚠️ FDA FAERS adverse events | `get_fda_adverse_events()` |
+| 🏷️ FDA NDC lookup | `get_fda_ndc()` |
+| 📊 Donut chart | `chart_donut()` |
+| 📊 Bar chart | `chart_dosage_bar()` |
+| 📊 Grouped bar | `chart_system_dosage_grouped()` |
+| 📊 Treemap | `chart_treemap()` |
+| 📊 Radar chart | `chart_radar()` |
+### Technical Stack
+| Layer | Technology |
+|---|---|
+| NLP | TF-IDF bigrams, 10,000 features, sublinear TF |
+| Similarity | Cosine Similarity |
+| Clustering | SVD (50d) + K-Means (K=10) |
+| External APIs | OpenFDA label · FAERS · NDC |
+| Visualisation | Plotly (donut, bar, grouped bar, treemap, radar) |
+| Deployment | Hugging Face Spaces · Gradio 4 |
 ### Dataset
+53,581 records · Bangladesh National Drug Registry ·
+[Kaggle link](https://www.kaggle.com/datasets/shuvokumarbasak2030/drug-pharma-new-dataset)
+---
+⚠️ *Research and educational purposes only. Not clinical advice.*
             """)
     gr.HTML("""
+    <div style="text-align:center;padding:14px;color:#94a3b8;font-size:12px;
+                border-top:1px solid #e2e8f0;margin-top:12px;">
+      💊 Cross-Medical-System Drug Recommender v3.0 · Master's Thesis ·
+      53,581 drugs · TF-IDF + Cosine Similarity · OpenFDA API · Plotly Charts
     </div>
     """)
+# ═══════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860,
+                share=False, show_error=True)