Spaces:

QueenS5Ella
/

Royalty

Sleeping

App Files Files Community

QueenS5Ella commited on Jul 17, 2025

Commit

c26d1cd

verified ·

1 Parent(s): b891774

Upload Medic_bot.ipynb

Browse files

Files changed (1) hide show

Medic_bot.ipynb +1533 -0

Medic_bot.ipynb ADDED Viewed

	@@ -0,0 +1,1533 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6a1699ee-e3d0-4cd8-8a0f-b4b749a9ed95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# IMPORT THE NECESSARY LIBARIES 1\n",
+    "#Import Python libraries: Numpy and Pandas\n",
+    "import pandas as pd\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "import openai\n",
+    "import faiss\n",
+    "import numpy as np\n",
+    "\n",
+    "#import libraries &modules for data visualization\n",
+    "from pandas.plotting import scatter_matrix\n",
+    "from matplotlib import pyplot\n",
+    "\n",
+    "#import scikit-learn module for algoruthm/model: Linear Regression\n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "\n",
+    "#import scikit learn module to split the dataset into train/test sub-datasets\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "#Import scikit-learn module for K-fold cross validation - algorithm/model evluation & vallidation\n",
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "\n",
+    "#Import sckit-learn module for classification report\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.preprocessing import OrdinalEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "43cd247a-6452-4686-b5e0-99d0c303a51e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to C:\\Users\\Sharon-\n",
+      "[nltk_data]     Rose\\AppData\\Roaming\\nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package stopwords to C:\\Users\\Sharon-\n",
+      "[nltk_data]     Rose\\AppData\\Roaming\\nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# IMPORTATION OF NECESSARY LIBRARIES 2\n",
+    "import os # for handling data\n",
+    "import re # for text preprocessing\n",
+    "\n",
+    "# For Natural Language Processing tasks\n",
+    "import nltk\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "nltk.download(\"punkt\")\n",
+    "nltk.download(\"stopwords\")\n",
+    "\n",
+    "# Optional: for vectorization and building of the models\n",
+    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
+    "\n",
+    "#IMPORTATION OF THE DIFFERENT MODELS FOR THE CHATBOT\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "import xgboost as xgb\n",
+    "from sklearn.linear_model import Ridge\n",
+    "from sklearn.neural_network import MLPRegressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "15b532ac-c058-4676-814a-ac52d46ef3f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.16.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import scipy\n",
+    "print(scipy.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "cec20cc7-22c4-4505-8779-5692d946eca2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import openai\n",
+    "import gradio as gr\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.metrics.pairwise import cosine_similarity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "121c1914-e27a-4220-a445-2e7f2e297845",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Description</th>\n",
+       "      <th>Patient</th>\n",
+       "      <th>Doctor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Q. What does abutment of the nerve root mean?</td>\n",
+       "      <td>Hi doctor,I am just wondering what is abutting...</td>\n",
+       "      <td>Hi. I have gone through your query with dilige...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Q. What should I do to reduce my weight gained...</td>\n",
+       "      <td>Hi doctor, I am a 22-year-old female who was d...</td>\n",
+       "      <td>Hi. You have really done well with the hypothy...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Q. I have started to get lots of acne on my fa...</td>\n",
+       "      <td>Hi doctor! I used to have clear skin but since...</td>\n",
+       "      <td>Hi there Acne has multifactorial etiology. Onl...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Q. Why do I have uncomfortable feeling between...</td>\n",
+       "      <td>Hello doctor,I am having an uncomfortable feel...</td>\n",
+       "      <td>Hello. The popping and discomfort what you fel...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Q. My symptoms after intercourse threatns me e...</td>\n",
+       "      <td>Hello doctor,Before two years had sex with a c...</td>\n",
+       "      <td>Hello. The HIV test uses a finger prick blood ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                         Description  \\\n",
+       "0      Q. What does abutment of the nerve root mean?   \n",
+       "1  Q. What should I do to reduce my weight gained...   \n",
+       "2  Q. I have started to get lots of acne on my fa...   \n",
+       "3  Q. Why do I have uncomfortable feeling between...   \n",
+       "4  Q. My symptoms after intercourse threatns me e...   \n",
+       "\n",
+       "                                             Patient  \\\n",
+       "0  Hi doctor,I am just wondering what is abutting...   \n",
+       "1  Hi doctor, I am a 22-year-old female who was d...   \n",
+       "2  Hi doctor! I used to have clear skin but since...   \n",
+       "3  Hello doctor,I am having an uncomfortable feel...   \n",
+       "4  Hello doctor,Before two years had sex with a c...   \n",
+       "\n",
+       "                                              Doctor  \n",
+       "0  Hi. I have gone through your query with dilige...  \n",
+       "1  Hi. You have really done well with the hypothy...  \n",
+       "2  Hi there Acne has multifactorial etiology. Onl...  \n",
+       "3  Hello. The popping and discomfort what you fel...  \n",
+       "4  Hello. The HIV test uses a finger prick blood ...  "
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 🔑 Replace with your real OpenAI API key\n",
+    "openai.api_key = \"sk-...\"  # <- Replace this with your actual API key\n",
+    "\n",
+    "# 📄 Load dataset\n",
+    "d1 = pd.read_csv(\"ai-medical-chatbot.csv\")\n",
+    "d1.dropna(subset=[\"Description\", \"Doctor\"], inplace=True)\n",
+    "\n",
+    "vector1 = TfidfVectorizer()\n",
+    "# Keep the sparse matrix — don't convert to dense\n",
+    "qvs = vector1.fit_transform(d1[\"Description\"])  # No .toarray()\n",
+    "\n",
+    "d1.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "7c0d1a74-52bd-484f-bfc7-ceed36983140",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_best_match(user_input):\n",
+    "    user_vec = vector1.transform([user_input])  # Still a sparse matrix\n",
+    "    similarities = cosine_similarity(user_vec, qvs)\n",
+    "    best_idx = np.argmax(similarities[0])\n",
+    "    best_score = float(similarities[0][best_idx])\n",
+    "    return d1.iloc[best_idx][\"Description\"], d1.iloc[best_idx][\"Doctor\"], best_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "4898c3af-3e91-42d0-bede-532b65897993",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7862\n",
+      "\n",
+      "Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 🔍 Vectorize questions\n",
+    "#vectorizer = TfidfVectorizer()\n",
+    "#question_vectors = vectorizer.fit_transform(df[\"Question\"]).toarray()\n",
+    "\n",
+    "# 🔎 Find the most similar FAQ match\n",
+    "#def find_best_match(user_input):\n",
+    "    #user_vec = vectorizer.transform([user_input]).toarray()\n",
+    "    #similarities = cosine_similarity(user_vec, question_vectors)\n",
+    "    #best_idx = np.argmax(similarities[0])\n",
+    "   # best_score = float(similarities[0][best_idx])\n",
+    "  #  return df.iloc[best_idx][\"Question\"], df.iloc[best_idx][\"Answer\"], best_score\n",
+    "\n",
+    "# 🤖 Query OpenAI if no good FAQ match\n",
+    "def query_gpt(user_input):\n",
+    "    try:\n",
+    "        response = openai.ChatCompletion.create(\n",
+    "            model=\"gpt-4\",  # or use \"gpt-3.5-turbo\"\n",
+    "            messages=[\n",
+    "                {\"role\": \"system\", \"content\": \"You are a pediatric pulmonology expert.\"},\n",
+    "                {\"role\": \"user\", \"content\": user_input},\n",
+    "                {\"role\": \"assistant\", \"content\": \"Hello\"}\n",
+    "\n",
+    "            ]\n",
+    "        )\n",
+    "        return response.choices[0].message[\"content\"]\n",
+    "    except Exception as e:\n",
+    "        return f\"⚠️ GPT Error: {str(e)}\"\n",
+    "\n",
+    "# 💬 Chatbot response logic\n",
+    "def chatbot_response(user_input):\n",
+    "    if not user_input.strip():\n",
+    "        return \"Please enter a question.\"\n",
+    "\n",
+    "    try:\n",
+    "        matched_q, matched_a, score = find_best_match(user_input)\n",
+    "        if score > 0.75:\n",
+    "            return f\"📚 **Answer from FAQ**:\\n\\n**Q:** {matched_q}\\n**A:** {matched_a}\"\n",
+    "        else:\n",
+    "            gpt_answer = query_gpt(user_input)\n",
+    "            return f\"🤖 **Answer from GPT-4**:\\n\\n{gpt_answer}\"\n",
+    "    except Exception as e:\n",
+    "        return f\"❌ Error processing your question: {str(e)}\"\n",
+    "\n",
+    "# 🌐 Launch Gradio interface\n",
+    "gr.Interface(\n",
+    "    fn=chatbot_response,\n",
+    "    inputs=gr.Textbox(label=\"Ask a pediatric pulmonology question\"),\n",
+    "    outputs=gr.Textbox(label=\"Response\", lines=10),\n",
+    "    title=\"Pediatric Pulmonology Chatbot\",\n",
+    "    description=\"Answers common non-critical questions about pediatric pulmonology using a mix of FAQ and GPT-4.\"\n",
+    ").launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "823966da-b528-48e2-a81f-927d72f386ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set your OpenAI key\n",
+    "openai.api_key = \"sk-...\"  # <- Replace this with your actual API key\n",
+    "\n",
+    "# Load CSV\n",
+    "chat = pd.read_csv(\"PedMedQA_final.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "0e1055dc-28cc-499c-8303-6b922fcd7057",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>meta_info</th>\n",
+       "      <th>question</th>\n",
+       "      <th>answer_idx</th>\n",
+       "      <th>answer</th>\n",
+       "      <th>options</th>\n",
+       "      <th>age_years</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>step2&amp;3</td>\n",
+       "      <td>A 3-month-old baby died suddenly at night whil...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Placing the infant in a supine position on a f...</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Placing the infant in ...</td>\n",
+       "      <td>0.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A mother brings her 3-week-old infant to the p...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Abnormal migration of ventral pancreatic bud</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Abnormal migration of ...</td>\n",
+       "      <td>0.06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>7</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A 3900-g (8.6-lb) male infant is delivered at ...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Gastric fundus in the thorax</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Gastric fundus in the ...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>11</td>\n",
+       "      <td>step2&amp;3</td>\n",
+       "      <td>A 1-year-old boy presents to the emergency dep...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>Blockade of presynaptic acetylcholine release ...</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Antibodies against pos...</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>12</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A 9-month-old female is brought to the emergen...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>Pleiotropy</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Anticipation'}\\n {'key...</td>\n",
+       "      <td>0.75</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index meta_info                                           question  \\\n",
+       "0      1   step2&3  A 3-month-old baby died suddenly at night whil...   \n",
+       "1      2     step1  A mother brings her 3-week-old infant to the p...   \n",
+       "2      7     step1  A 3900-g (8.6-lb) male infant is delivered at ...   \n",
+       "3     11   step2&3  A 1-year-old boy presents to the emergency dep...   \n",
+       "4     12     step1  A 9-month-old female is brought to the emergen...   \n",
+       "\n",
+       "  answer_idx                                             answer  \\\n",
+       "0          A  Placing the infant in a supine position on a f...   \n",
+       "1          A       Abnormal migration of ventral pancreatic bud   \n",
+       "2          A                       Gastric fundus in the thorax   \n",
+       "3          D  Blockade of presynaptic acetylcholine release ...   \n",
+       "4          D                                         Pleiotropy   \n",
+       "\n",
+       "                                             options  age_years  \n",
+       "0  [{'key': 'A', 'value': 'Placing the infant in ...       0.25  \n",
+       "1  [{'key': 'A', 'value': 'Abnormal migration of ...       0.06  \n",
+       "2  [{'key': 'A', 'value': 'Gastric fundus in the ...        NaN  \n",
+       "3  [{'key': 'A', 'value': 'Antibodies against pos...       1.00  \n",
+       "4  [{'key': 'A', 'value': 'Anticipation'}\\n {'key...       0.75  "
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "69bd354e-482c-42e2-ab78-55d8cda2acee",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>age_years</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>2683.000000</td>\n",
+       "      <td>2383.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>6266.011927</td>\n",
+       "      <td>7.152585</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>3657.727022</td>\n",
+       "      <td>5.722108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>3064.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>6193.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>9492.500000</td>\n",
+       "      <td>12.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>12709.000000</td>\n",
+       "      <td>35.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              index    age_years\n",
+       "count   2683.000000  2383.000000\n",
+       "mean    6266.011927     7.152585\n",
+       "std     3657.727022     5.722108\n",
+       "min        1.000000     0.000000\n",
+       "25%     3064.000000     2.000000\n",
+       "50%     6193.000000     6.000000\n",
+       "75%     9492.500000    12.000000\n",
+       "max    12709.000000    35.000000"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "4b6591e9-7501-43fa-a847-bd9cd922124e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "index           0\n",
+       "meta_info       0\n",
+       "question        0\n",
+       "answer_idx      0\n",
+       "answer          1\n",
+       "options         0\n",
+       "age_years     300\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "f8e9bdce-80f8-4942-88f6-abaddc1c5d72",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2683, 7)"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "5fd1e9dd-6748-4b06-a74f-f3827ae16ae5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 2683 entries, 0 to 2682\n",
+      "Data columns (total 7 columns):\n",
+      " #   Column      Non-Null Count  Dtype  \n",
+      "---  ------      --------------  -----  \n",
+      " 0   index       2683 non-null   int64  \n",
+      " 1   meta_info   2683 non-null   object \n",
+      " 2   question    2683 non-null   object \n",
+      " 3   answer_idx  2683 non-null   object \n",
+      " 4   answer      2682 non-null   object \n",
+      " 5   options     2683 non-null   object \n",
+      " 6   age_years   2383 non-null   float64\n",
+      "dtypes: float64(1), int64(1), object(5)\n",
+      "memory usage: 146.9+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "82989bf3-abc6-486d-917e-2b78677bed49",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['Placing the infant in a supine position on a firm mattress while sleeping',\n",
+       "       'Abnormal migration of ventral pancreatic bud',\n",
+       "       'Gastric fundus in the thorax', ..., 'Ixodes scapularis',\n",
+       "       'Scalded skin syndrome', 'Apply a simple shoulder sling'],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat[\"answer\"]. unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "21020a56-1b88-4541-9739-354362899149",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "answer\n",
+       "Reassurance                                16\n",
+       "Ventricular septal defect                   7\n",
+       "Autism spectrum disorder                    7\n",
+       "Streptococcus pneumoniae                    6\n",
+       "Patent ductus arteriosus                    6\n",
+       "                                           ..\n",
+       "Adrenal hemorrhage                          1\n",
+       "C5 and C6 nerve roots                       1\n",
+       "Viral upper respiratory tract infection     1\n",
+       "Failure of the vitelline duct to close      1\n",
+       "Apply a simple shoulder sling               1\n",
+       "Name: count, Length: 2284, dtype: int64"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat[\"answer\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "a3b442d1-33e1-4400-8e96-4d1ea5d9d699",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0       Placing the infant in a supine position on a f...\n",
+      "1            Abnormal migration of ventral pancreatic bud\n",
+      "2                            Gastric fundus in the thorax\n",
+      "3       Blockade of presynaptic acetylcholine release ...\n",
+      "4                                              Pleiotropy\n",
+      "                              ...                        \n",
+      "2678                                   X-linked recessive\n",
+      "2679    Insulin production by the pancreas is insuffic...\n",
+      "2680                                    Ixodes scapularis\n",
+      "2681                                Scalded skin syndrome\n",
+      "2682                        Apply a simple shoulder sling\n",
+      "Name: answer, Length: 2683, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat[\"answer\"] = chat[\"answer\"].fillna(\"Reassurance\")\n",
+    "print(chat[\"answer\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "74fd9008-f566-4fd9-92b8-d3e5b69dfa33",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method Series.unique of 0        0.25\n",
+       "1        0.06\n",
+       "2         NaN\n",
+       "3        1.00\n",
+       "4        0.75\n",
+       "        ...  \n",
+       "2678     3.00\n",
+       "2679    16.00\n",
+       "2680    14.00\n",
+       "2681     0.02\n",
+       "2682    15.00\n",
+       "Name: age_years, Length: 2683, dtype: float64>"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat[\"age_years\"].unique"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "ced90873-67bd-4f2a-aaaa-c901b163ac6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method IndexOpsMixin.value_counts of 0        0.25\n",
+       "1        0.06\n",
+       "2         NaN\n",
+       "3        1.00\n",
+       "4        0.75\n",
+       "        ...  \n",
+       "2678     3.00\n",
+       "2679    16.00\n",
+       "2680    14.00\n",
+       "2681     0.02\n",
+       "2682    15.00\n",
+       "Name: age_years, Length: 2683, dtype: float64>"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat[\"age_years\"].value_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "3ca55f08-fb7b-4b0b-bd9c-f8d1ccc15618",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>meta_info</th>\n",
+       "      <th>question</th>\n",
+       "      <th>answer_idx</th>\n",
+       "      <th>answer</th>\n",
+       "      <th>options</th>\n",
+       "      <th>age_years</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>step2&amp;3</td>\n",
+       "      <td>A 3-month-old baby died suddenly at night whil...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Placing the infant in a supine position on a f...</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Placing the infant in ...</td>\n",
+       "      <td>0.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A mother brings her 3-week-old infant to the p...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Abnormal migration of ventral pancreatic bud</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Abnormal migration of ...</td>\n",
+       "      <td>0.06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>7</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A 3900-g (8.6-lb) male infant is delivered at ...</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Gastric fundus in the thorax</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Gastric fundus in the ...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>11</td>\n",
+       "      <td>step2&amp;3</td>\n",
+       "      <td>A 1-year-old boy presents to the emergency dep...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>Blockade of presynaptic acetylcholine release ...</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Antibodies against pos...</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>12</td>\n",
+       "      <td>step1</td>\n",
+       "      <td>A 9-month-old female is brought to the emergen...</td>\n",
+       "      <td>D</td>\n",
+       "      <td>Pleiotropy</td>\n",
+       "      <td>[{'key': 'A', 'value': 'Anticipation'}\\n {'key...</td>\n",
+       "      <td>0.75</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index meta_info                                           question  \\\n",
+       "0      1   step2&3  A 3-month-old baby died suddenly at night whil...   \n",
+       "1      2     step1  A mother brings her 3-week-old infant to the p...   \n",
+       "2      7     step1  A 3900-g (8.6-lb) male infant is delivered at ...   \n",
+       "3     11   step2&3  A 1-year-old boy presents to the emergency dep...   \n",
+       "4     12     step1  A 9-month-old female is brought to the emergen...   \n",
+       "\n",
+       "  answer_idx                                             answer  \\\n",
+       "0          A  Placing the infant in a supine position on a f...   \n",
+       "1          A       Abnormal migration of ventral pancreatic bud   \n",
+       "2          A                       Gastric fundus in the thorax   \n",
+       "3          D  Blockade of presynaptic acetylcholine release ...   \n",
+       "4          D                                         Pleiotropy   \n",
+       "\n",
+       "                                             options  age_years  \n",
+       "0  [{'key': 'A', 'value': 'Placing the infant in ...       0.25  \n",
+       "1  [{'key': 'A', 'value': 'Abnormal migration of ...       0.06  \n",
+       "2  [{'key': 'A', 'value': 'Gastric fundus in the ...        NaN  \n",
+       "3  [{'key': 'A', 'value': 'Antibodies against pos...       1.00  \n",
+       "4  [{'key': 'A', 'value': 'Anticipation'}\\n {'key...       0.75  "
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "47fa0f95-72e2-4b85-919b-aec5fe5aa5af",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "index           int64\n",
+       "meta_info      object\n",
+       "question       object\n",
+       "answer_idx     object\n",
+       "answer         object\n",
+       "options        object\n",
+       "age_years     float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "133b6b98-4408-47dc-b6c8-24b6e19ac2f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat.dropna(subset=[\"question\", \"answer\"], inplace=True)\n",
+    "chat.drop_duplicates(subset=[\"question\"], inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "934ff55c-2ff4-4761-b401-d19749402d98",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "index           0\n",
+       "meta_info       0\n",
+       "question        0\n",
+       "answer_idx      0\n",
+       "answer          0\n",
+       "options         0\n",
+       "age_years     300\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "4cf16cdd-6457-4edb-87d5-c307f850450a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#oe = OrdinalEncoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "c4328f7d-a148-40c8-8207-66fa6b67d8b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    1\n",
+       "1    2\n",
+       "2    7\n",
+       "Name: index, dtype: int64"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"index\"] = oe.fit_transform(chat[[\"index\"]])\n",
+    "chat[\"index\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "222839fa-3070-41fa-842f-18c6998704cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    step2&3\n",
+       "1      step1\n",
+       "2      step1\n",
+       "Name: meta_info, dtype: object"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"meta_info\"] = oe.fit_transform(chat[[\"meta_info\"]])\n",
+    "chat[\"meta_info\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "acfbeea1-92a5-4558-b82b-6511c0b8de47",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    A 3-month-old baby died suddenly at night whil...\n",
+       "1    A mother brings her 3-week-old infant to the p...\n",
+       "2    A 3900-g (8.6-lb) male infant is delivered at ...\n",
+       "Name: question, dtype: object"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"question\"] = oe.fit_transform(chat[[\"question\"]])\n",
+    "chat[\"question\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "8346763f-045b-4ade-bcaf-fdfe35555a2f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    A\n",
+       "1    A\n",
+       "2    A\n",
+       "Name: answer_idx, dtype: object"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"answer_idx\"] = oe.fit_transform(chat[[\"answer_idx\"]])\n",
+    "chat[\"answer_idx\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "8d054a13-3dfc-4710-bf6c-7d4e62ec5d5c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    Placing the infant in a supine position on a f...\n",
+       "1         Abnormal migration of ventral pancreatic bud\n",
+       "2                         Gastric fundus in the thorax\n",
+       "Name: answer, dtype: object"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"answer\"] = oe.fit_transform(chat[[\"answer\"]])\n",
+    "chat[\"answer\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "10295266-f340-4a7f-81c2-a05ad219285a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    [{'key': 'A', 'value': 'Placing the infant in ...\n",
+       "1    [{'key': 'A', 'value': 'Abnormal migration of ...\n",
+       "2    [{'key': 'A', 'value': 'Gastric fundus in the ...\n",
+       "Name: options, dtype: object"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#chat[\"options\"] = oe.fit_transform(chat[[\"options\"]])\n",
+    "chat[\"options\"].head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "7c284442-9b72-432a-840e-5543e6c8adf4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2683, 7)"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "46308806-7545-480a-bf57-7434babe4efc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['index', 'meta_info', 'question', 'answer_idx', 'answer', 'options',\n",
+       "       'age_years'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "id": "7610d011-cdc9-4416-ade5-e93756b820ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LassoCV\n",
+    "from sklearn.feature_selection import SelectFromModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "5da7ae38-db35-4f5d-ab18-5e0f25e9fd02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#clf = LassoCV.fit(X_train, Y_trarin)\n",
+    "#importance = np.abs(clf.coef)\n",
+    "#print(importance)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "id": "99d49912-ba82-4670-8420-e5188e6ead27",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "You can ask me any pediatric pulmonology related question (or type 'exit'):  exit\n"
+     ]
+    }
+   ],
+   "source": [
+    "while True:\n",
+    "    user_input = input(\"You can ask me any pediatric pulmonology related question (or type 'exit'): \")\n",
+    "\n",
+    "    if user_input.lower() == \"exit\":\n",
+    "        break\n",
+    "\n",
+    "    response = chatbot_response(user_input)\n",
+    "    print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "id": "4057a702-de90-4697-b080-3cea436a290e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#response = chatbot_response(ui)\n",
+    "#print(response)\n",
+    "chat.dropna(subset=[\"question\", \"answer\"], inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "id": "8ea73391-d8ba-4e24-ba44-b0b93321ef2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "\n",
+    "# Vectorize the questions using TF-IDF\n",
+    "# ✅ 1. Fit and transform your dataset questions\n",
+    "vector1 = TfidfVectorizer()\n",
+    "qvs = vector1.fit_transform(chat[\"question\"]).toarray()\n",
+    "\n",
+    "# ✅ 2. Later, transform user input using the same vectorizer\n",
+    "user_vec = vector1.transform([user_input]).toarray()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "id": "60701e12-49cc-4bef-8865-65dd5ebb3ae6",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (1206184978.py, line 29)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;36m  Cell \u001b[1;32mIn[155], line 29\u001b[1;36m\u001b[0m\n\u001b[1;33m    except Exception as e:\u001b[0m\n\u001b[1;37m    ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 🔌 Connect to OpenAI\n",
+    "openai.api_key = \"your-openai-api-key\"  # Replace with your real key\n",
+    "\n",
+    "# 📄 Step 1: Load your dataset\n",
+    "df.dropna(subset=[\"Question\", \"Answer\"], inplace=True)\n",
+    "\n",
+    "# 🧠 Step 2: Vectorize dataset questions\n",
+    "#vectorizer = TfidfVectorizer()\n",
+    "#question_vectors = vectorizer.fit_transform(df[\"Question\"]).toarray()\n",
+    "\n",
+    "# 🔍 Step 3: Find most similar question\n",
+    "def find_best_match(user_input):\n",
+    "    user_vec = vector1.transform([user_input]).toarray()\n",
+    "    similarities = cosine_similarity(user_vec, qvs)\n",
+    "    best_idx = np.argmax(similarities[0])\n",
+    "    best_score = similarities[0][answer_idx]\n",
+    "    return df.iloc[best_idx][\"question\"], chat.iloc[best_idx][\"answer\"], best_score\n",
+    "\n",
+    "# 🤖 Step 4: Fallback to GPT-4 if no good match\n",
+    "def query_gpt(user_input):\n",
+    "    response = openai.ChatCompletion.create(\n",
+    "        model=\"gpt-4\",\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": \"You are a pediatric pulmonology expert.\"},\n",
+    "            {\"role\": \"user\", \"content\": user_input}\n",
+    "        ]\n",
+    "    )\n",
+    "    return response.choices[0].message[\"content\"]\n",
+    "    except Exception as e:\n",
+    "            return f\"⚠️ GPT Error: {e}\"\n",
+    "\n",
+    "# 💬 Step 5: Define chatbot logic\n",
+    "def chatbot_response(user_input):\n",
+    "    matched_q, matched_a, score = find_best_match(user_input)\n",
+    "    if score > 0.75:\n",
+    "        return f\"📚 Answer from FAQ:\\nQ: {matched_q}\\nA: {matched_a}\"\n",
+    "    else:\n",
+    "        return f\"🤖 Answer from GPT-4:\\n{query_gpt(user_input)}\"\n",
+    "\n",
+    "# 🌐 Step 6: Launch Gradio interface\n",
+    "gr.Interface(\n",
+    "    fn=chatbot_response,\n",
+    "    inputs=gr.Textbox(label=\"Ask any pediatric pulmonology related question\"),\n",
+    "    outputs=gr.Textbox(label=\"Response\"),\n",
+    "    title=\"Royalty Medic_bot\",\n",
+    "    description=\"Get non-crtical answers to common pediatric respiratory health questions.\"\n",
+    ").launch()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "572732aa-1b8b-4202-97a3-0d4ffd272f82",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "import numpy as np\n",
+    "\n",
+    "def find_best_match(user_input):\n",
+    "    input_vec = vectorizer.transform([user_input]).toarray()\n",
+    "    sims = cosine_similarity(input_vec, question_vectors)\n",
+    "    idx = np.argmax(sims)\n",
+    "    score = sims[0][answer_idx]\n",
+    "    return chat.iloc[answer_idx][\"Question\"], chat.iloc[answer_idx][\"Answer\"], score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "ed94c25c-7951-4cdb-bead-c169d3e0c1a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "🧒 Ask a pediatric pulmonology question (or type 'exit'):  exit\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "👋 Goodbye!\n"
+     ]
+    }
+   ],
+   "source": [
+    "while True:\n",
+    "    user_input = input(\"🧒 Ask a pediatric pulmonology question (or type 'exit'): \")\n",
+    "    if user_input.lower() == \"exit\":\n",
+    "        print(\"👋 Goodbye!\")\n",
+    "        break\n",
+    "    print(chatbot_response(user_input))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "8f9aa311-3e70-47c5-b103-db71d1d65ac3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7860\n",
+      "* To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "def chatbot_gradio_interface(user_input):\n",
+    "    return chatbot_response(user_input)\n",
+    "\n",
+    "gr.Interface(fn=chatbot_gradio_interface,\n",
+    "             inputs=\"text\",\n",
+    "             outputs=\"text\",\n",
+    "             title=\"Pediatric Pulmonology Chatbot\",\n",
+    "             description=\"Ask any question related to pediatric lung health.\").launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2436f4d-bc62-4f5b-8559-0ac2f1449912",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07c643f3-72ed-46c1-bfcb-d7da0b78337e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b63ca794-008f-491f-a317-999755b9a964",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Build FAISS index for similarity search\n",
+    "index = faiss.IndexFlatL2(question_vectors.shape[1])\n",
+    "index.add(np.array(question_vectors))\n",
+    "\n",
+    "# Function to find the closest question\n",
+    "def find_most_similar_question(user_question, top_k=1):\n",
+    "    user_vec = vectorizer.transform([user_question]).toarray()\n",
+    "    D, I = index.search(user_vec, top_k)\n",
+    "    return df.iloc[I[0][0]][\"Question\"], df.iloc[I[0][0]][\"Answer\"]\n",
+    "\n",
+    "# Function to query a language model\n",
+    "def ask_openai(question, model=\"gpt-4\"):\n",
+    "    try:\n",
+    "        response = openai.ChatCompletion.create(\n",
+    "            model=model,\n",
+    "            messages=[\n",
+    "                {\"role\": \"system\", \"content\": \"You are a pediatric pulmonology expert.\"},\n",
+    "                {\"role\": \"user\", \"content\": question},\n",
+    "            ],\n",
+    "            temperature=0.3,\n",
+    "        )\n",
+    "        return response.choices[0].message[\"content\"]\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error with {model}: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "# Main chatbot function\n",
+    "def pediatric_pulmonology_chatbot(user_input):\n",
+    "    matched_question, matched_answer = find_most_similar_question(user_input)\n",
+    "\n",
+    "    similarity = cosine_similarity(\n",
+    "        vectorizer.transform([user_input]), vectorizer.transform([matched_question])\n",
+    "    )[0][0]\n",
+    "\n",
+    "    if similarity > 0.7:\n",
+    "        return f\"(From Knowledge Base)\\nQ: {matched_question}\\nA: {matched_answer}\"\n",
+    "    else:\n",
+    "        # Try GPT-4 first\n",
+    "        reply = ask_openai(user_input, model=\"gpt-4\")\n",
+    "        if reply:\n",
+    "            return f\"(From GPT-4)\\n{reply}\"\n",
+    "        else:\n",
+    "            # Fallback to GPT-3.5\n",
+    "            reply = ask_openai(user_input, model=\"gpt-3.5-turbo\")\n",
+    "            if reply:\n",
+    "                return f\"(From GPT-3.5)\\n{reply}\"\n",
+    "            else:\n",
+    "                return \"Sorry, I couldn't find an answer to that.\"\n",
+    "\n",
+    "# 🔁 Example interaction\n",
+    "while True:\n",
+    "    user_input = input(\"\\n👶 Ask a pediatric pulmonology question (or type 'exit'): \")\n",
+    "    if user_input.lower() == \"exit\":\n",
+    "        break\n",
+    "    print(pediatric_pulmonology_chatbot(user_input))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}