codemogul
/

tabibu-md

Model card Files Files and versions

xet

Community

codemogul commited on Feb 20, 2023

Commit

03edd98

1 Parent(s): b865bd7

Upload model.ipynb

Browse files

Files changed (1) hide show

model.ipynb +297 -0

model.ipynb ADDED Viewed

	@@ -0,0 +1,297 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ace57031",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Question_ID</th>\n",
+       "      <th>Questions</th>\n",
+       "      <th>Answers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1590140</td>\n",
+       "      <td>What does it mean to have a mental illness?</td>\n",
+       "      <td>Mental illnesses are health conditions that di...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2110618</td>\n",
+       "      <td>Who does mental illness affect?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6361820</td>\n",
+       "      <td>What causes mental illness?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>9434130</td>\n",
+       "      <td>What are some of the warning signs of mental i...</td>\n",
+       "      <td>Symptoms of mental health disorders vary depen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7657263</td>\n",
+       "      <td>Can people with mental illness recover?</td>\n",
+       "      <td>When healing from mental illness, early identi...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Question_ID                                          Questions  \\\n",
+       "0     1590140        What does it mean to have a mental illness?   \n",
+       "1     2110618                    Who does mental illness affect?   \n",
+       "2     6361820                        What causes mental illness?   \n",
+       "3     9434130  What are some of the warning signs of mental i...   \n",
+       "4     7657263            Can people with mental illness recover?   \n",
+       "\n",
+       "                                             Answers  \n",
+       "0  Mental illnesses are health conditions that di...  \n",
+       "1  It is estimated that mental illness affects 1 ...  \n",
+       "2  It is estimated that mental illness affects 1 ...  \n",
+       "3  Symptoms of mental health disorders vary depen...  \n",
+       "4  When healing from mental illness, early identi...  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+    "from huggingface_hub import notebook_login\n",
+    "# notebook_login()\n",
+    "# Step 1: Collect and preprocess data\n",
+    "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
+    "# questions = data[\"Questions\"].tolist()\n",
+    "# responses = data[\"Responses\"].tolist()\n",
+    "questions = []\n",
+    "responses = []\n",
+    "q_id = []\n",
+    "with open(\"mental_health_bot.csv\", \"r\") as f:\n",
+    "    for line in f:\n",
+    "        \n",
+    "        array = line.split(\",\") \n",
+    "        # questions.append(question)\n",
+    "        # responses.append(response)\n",
+    "        # q_id.append(question_id)\n",
+    "        try:\n",
+    "            question = array[1]\n",
+    "            response = array[2]\n",
+    "            question_id = array[0]\n",
+    "            questions.append(question)\n",
+    "            responses.append(response)\n",
+    "            q_id.append(question_id)\n",
+    "        except:\n",
+    "            pass\n",
+    "\n",
+    "data = pd.read_csv(\"data.csv\")\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "8f51e39d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "missing values: Question_ID    0\n",
+      "Questions      0\n",
+      "Answers        0\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('missing values:', data.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1d697a39",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 149 entries, 0 to 148\n",
+      "Data columns (total 3 columns):\n",
+      " #   Column       Non-Null Count  Dtype \n",
+      "---  ------       --------------  ----- \n",
+      " 0   Question_ID  149 non-null    object\n",
+      " 1   Questions    149 non-null    object\n",
+      " 2   Answers      149 non-null    object\n",
+      "dtypes: object(3)\n",
+      "memory usage: 3.6+ KB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "c5dde0e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.03333333333333333\n"
+     ]
+    }
+   ],
+   "source": [
+    "# print(questions)\n",
+    "# print(responses)\n",
+    "\n",
+    "\n",
+    "# questions = [\"What are some symptoms of depression?\",\n",
+    "#              \"How can I manage my anxiety?\",\n",
+    "#              \"What are the treatments for bipolar disorder?\"]\n",
+    "# responses = [\"Symptoms of depression include sadness, lack of energy, and loss of interest in activities.\",\n",
+    "#              \"You can manage your anxiety through techniques such as deep breathing, meditation, and therapy.\",\n",
+    "#              \"Treatments for bipolar disorder include medication, therapy, and lifestyle changes.\"]\n",
+    "\n",
+    "vectorizer = TfidfVectorizer()\n",
+    "X = vectorizer.fit_transform(questions)\n",
+    "y = responses\n",
+    "\n",
+    "# Step 2: Split data into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
+    "\n",
+    "# Step 3: Choose a machine learning algorithm\n",
+    "model = LogisticRegression()\n",
+    "\n",
+    "# Step 4: Train the model\n",
+    "model.fit(X_train, y_train)\n",
+    "\n",
+    "model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
+    "pt_model = DistilBertForSequenceClassification.from_pretrained(\"model.ipynb\", from_tf=True)\n",
+    "pt_model.save_pretrained(\"model.ipynb\")\n",
+    "# load model from hub\n",
+    "\n",
+    "# Step 5: Evaluate the model\n",
+    "y_pred = model.predict(X_test)\n",
+    "accuracy = accuracy_score(y_test, y_pred)\n",
+    "print(\"Accuracy:\", accuracy)\n",
+    "\n",
+    "# Step 6: Use the model to make predictions\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "14406312",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ask me anything : I feel sad\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_question = input(\"Ask me anything : \")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "6b9198db",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prediction: ['\"It is estimated that mental illness affects 1 in 5 adults in America']\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_question_vector = vectorizer.transform([new_question])\n",
+    "prediction = model.predict(new_question_vector)\n",
+    "print(\"Prediction:\", prediction)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}