lynn-twinkl commited on
Commit ·
939aa1c
1
Parent(s): e1b6583
exploration of GoEmotion for better reactive applciation ID
Browse files
notebooks/emotion_classification.ipynb
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "a386eccf-6880-46e2-91e0-c54836a41b1a",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"name": "stderr",
|
| 11 |
+
"output_type": "stream",
|
| 12 |
+
"text": [
|
| 13 |
+
"/Users/lynn/Documents/Twinkl/grant-applications-app/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 14 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 15 |
+
]
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"source": [
|
| 19 |
+
"import pandas as pd\n",
|
| 20 |
+
"from transformers import pipeline\n",
|
| 21 |
+
"import time"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 2,
|
| 27 |
+
"id": "9ac61aaf-9e36-4aef-98d4-e54a4ac6e643",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"outputs": [
|
| 30 |
+
{
|
| 31 |
+
"name": "stdout",
|
| 32 |
+
"output_type": "stream",
|
| 33 |
+
"text": [
|
| 34 |
+
"960\n",
|
| 35 |
+
"Index(['Id', 'Date/Time Requested', 'Giveaway Title', 'Customer Name',\n",
|
| 36 |
+
" 'Email Address', 'School Name', 'Postal Address', 'Address Line 2',\n",
|
| 37 |
+
" 'Address City', 'Postcode', 'Additional Info', 'Unnamed: 11'],\n",
|
| 38 |
+
" dtype='object')\n"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"data": {
|
| 43 |
+
"text/html": [
|
| 44 |
+
"<div>\n",
|
| 45 |
+
"<style scoped>\n",
|
| 46 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 47 |
+
" vertical-align: middle;\n",
|
| 48 |
+
" }\n",
|
| 49 |
+
"\n",
|
| 50 |
+
" .dataframe tbody tr th {\n",
|
| 51 |
+
" vertical-align: top;\n",
|
| 52 |
+
" }\n",
|
| 53 |
+
"\n",
|
| 54 |
+
" .dataframe thead th {\n",
|
| 55 |
+
" text-align: right;\n",
|
| 56 |
+
" }\n",
|
| 57 |
+
"</style>\n",
|
| 58 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 59 |
+
" <thead>\n",
|
| 60 |
+
" <tr style=\"text-align: right;\">\n",
|
| 61 |
+
" <th></th>\n",
|
| 62 |
+
" <th>Id</th>\n",
|
| 63 |
+
" <th>Date/Time Requested</th>\n",
|
| 64 |
+
" <th>Giveaway Title</th>\n",
|
| 65 |
+
" <th>Customer Name</th>\n",
|
| 66 |
+
" <th>Email Address</th>\n",
|
| 67 |
+
" <th>School Name</th>\n",
|
| 68 |
+
" <th>Postal Address</th>\n",
|
| 69 |
+
" <th>Address Line 2</th>\n",
|
| 70 |
+
" <th>Address City</th>\n",
|
| 71 |
+
" <th>Postcode</th>\n",
|
| 72 |
+
" <th>Additional Info</th>\n",
|
| 73 |
+
" <th>Unnamed: 11</th>\n",
|
| 74 |
+
" </tr>\n",
|
| 75 |
+
" </thead>\n",
|
| 76 |
+
" <tbody>\n",
|
| 77 |
+
" <tr>\n",
|
| 78 |
+
" <th>0</th>\n",
|
| 79 |
+
" <td>304399.0</td>\n",
|
| 80 |
+
" <td>01/03/2025 00:52</td>\n",
|
| 81 |
+
" <td>March Community Collection</td>\n",
|
| 82 |
+
" <td>Susan Bushnell</td>\n",
|
| 83 |
+
" <td>susan.bushnell@googlemail.com</td>\n",
|
| 84 |
+
" <td>Southfield Junior School</td>\n",
|
| 85 |
+
" <td>Shrivenham Road</td>\n",
|
| 86 |
+
" <td>Highworth</td>\n",
|
| 87 |
+
" <td>Swindon</td>\n",
|
| 88 |
+
" <td>SN6 7BZ</td>\n",
|
| 89 |
+
" <td>I would love to use it to spread the love of r...</td>\n",
|
| 90 |
+
" <td></td>\n",
|
| 91 |
+
" </tr>\n",
|
| 92 |
+
" <tr>\n",
|
| 93 |
+
" <th>1</th>\n",
|
| 94 |
+
" <td>305004.0</td>\n",
|
| 95 |
+
" <td>02/03/2025 19:52</td>\n",
|
| 96 |
+
" <td>March Community Collection</td>\n",
|
| 97 |
+
" <td>Sarah Arabestani</td>\n",
|
| 98 |
+
" <td>sarah.a@sandringhamnursery.com</td>\n",
|
| 99 |
+
" <td>Sandringham Nursery</td>\n",
|
| 100 |
+
" <td>16 Sandringham Road</td>\n",
|
| 101 |
+
" <td>Penylan</td>\n",
|
| 102 |
+
" <td>Cardiff</td>\n",
|
| 103 |
+
" <td>CF23 5BJ</td>\n",
|
| 104 |
+
" <td>We would like to introduce early years yoga an...</td>\n",
|
| 105 |
+
" <td></td>\n",
|
| 106 |
+
" </tr>\n",
|
| 107 |
+
" <tr>\n",
|
| 108 |
+
" <th>2</th>\n",
|
| 109 |
+
" <td>305493.0</td>\n",
|
| 110 |
+
" <td>05/03/2025 14:34</td>\n",
|
| 111 |
+
" <td>March Community Collection</td>\n",
|
| 112 |
+
" <td>Rebecca Asker</td>\n",
|
| 113 |
+
" <td>mrsrasker@gmail.com</td>\n",
|
| 114 |
+
" <td>Newhaven PRU Outreach</td>\n",
|
| 115 |
+
" <td>Newhaven Gardens</td>\n",
|
| 116 |
+
" <td>NaN</td>\n",
|
| 117 |
+
" <td>Greenwich</td>\n",
|
| 118 |
+
" <td>SE96HR</td>\n",
|
| 119 |
+
" <td>£500 would enable us to set up a small sensor...</td>\n",
|
| 120 |
+
" <td></td>\n",
|
| 121 |
+
" </tr>\n",
|
| 122 |
+
" </tbody>\n",
|
| 123 |
+
"</table>\n",
|
| 124 |
+
"</div>"
|
| 125 |
+
],
|
| 126 |
+
"text/plain": [
|
| 127 |
+
" Id Date/Time Requested Giveaway Title Customer Name \\\n",
|
| 128 |
+
"0 304399.0 01/03/2025 00:52 March Community Collection Susan Bushnell \n",
|
| 129 |
+
"1 305004.0 02/03/2025 19:52 March Community Collection Sarah Arabestani \n",
|
| 130 |
+
"2 305493.0 05/03/2025 14:34 March Community Collection Rebecca Asker \n",
|
| 131 |
+
"\n",
|
| 132 |
+
" Email Address School Name \\\n",
|
| 133 |
+
"0 susan.bushnell@googlemail.com Southfield Junior School \n",
|
| 134 |
+
"1 sarah.a@sandringhamnursery.com Sandringham Nursery \n",
|
| 135 |
+
"2 mrsrasker@gmail.com Newhaven PRU Outreach \n",
|
| 136 |
+
"\n",
|
| 137 |
+
" Postal Address Address Line 2 Address City Postcode \\\n",
|
| 138 |
+
"0 Shrivenham Road Highworth Swindon SN6 7BZ \n",
|
| 139 |
+
"1 16 Sandringham Road Penylan Cardiff CF23 5BJ \n",
|
| 140 |
+
"2 Newhaven Gardens NaN Greenwich SE96HR \n",
|
| 141 |
+
"\n",
|
| 142 |
+
" Additional Info Unnamed: 11 \n",
|
| 143 |
+
"0 I would love to use it to spread the love of r... \n",
|
| 144 |
+
"1 We would like to introduce early years yoga an... \n",
|
| 145 |
+
"2 £500 would enable us to set up a small sensor... "
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
"execution_count": 2,
|
| 149 |
+
"metadata": {},
|
| 150 |
+
"output_type": "execute_result"
|
| 151 |
+
}
|
| 152 |
+
],
|
| 153 |
+
"source": [
|
| 154 |
+
"df = pd.read_csv('data/feb-march-data.csv')\n",
|
| 155 |
+
"\n",
|
| 156 |
+
"print(len(df))\n",
|
| 157 |
+
"print(df.columns)\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"df.head(3)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 3,
|
| 165 |
+
"id": "cdc0e046-0490-45c0-94e2-902ec9a22248",
|
| 166 |
+
"metadata": {},
|
| 167 |
+
"outputs": [
|
| 168 |
+
{
|
| 169 |
+
"data": {
|
| 170 |
+
"text/plain": [
|
| 171 |
+
"Index(['id', 'date/time_requested', 'giveaway_title', 'customer_name',\n",
|
| 172 |
+
" 'email_address', 'school_name', 'postal_address', 'address_line_2',\n",
|
| 173 |
+
" 'address_city', 'postcode', 'additional_info', 'unnamed:_11'],\n",
|
| 174 |
+
" dtype='object')"
|
| 175 |
+
]
|
| 176 |
+
},
|
| 177 |
+
"execution_count": 3,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"output_type": "execute_result"
|
| 180 |
+
}
|
| 181 |
+
],
|
| 182 |
+
"source": [
|
| 183 |
+
"df.columns = df.columns.str.lower().str.replace(' ','_')\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"df.columns"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"cell_type": "code",
|
| 190 |
+
"execution_count": 4,
|
| 191 |
+
"id": "001e0102-f2f6-4812-99ec-6c78d218cb15",
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [
|
| 194 |
+
{
|
| 195 |
+
"name": "stderr",
|
| 196 |
+
"output_type": "stream",
|
| 197 |
+
"text": [
|
| 198 |
+
"Device set to use mps:0\n"
|
| 199 |
+
]
|
| 200 |
+
}
|
| 201 |
+
],
|
| 202 |
+
"source": [
|
| 203 |
+
"# Initialize the emotion classification pipeline\n",
|
| 204 |
+
"emotion_classifier = pipeline('text-classification', model='SamLowe/roberta-base-go_emotions', top_k=None)"
|
| 205 |
+
]
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"cell_type": "markdown",
|
| 209 |
+
"id": "de0b70c2-e3a2-430f-988b-cc9a5e3b06c4",
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"source": [
|
| 212 |
+
"Let's th eclassifier for a single application to know what kind of output we can expect"
|
| 213 |
+
]
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"cell_type": "code",
|
| 217 |
+
"execution_count": 5,
|
| 218 |
+
"id": "e6548cb4-e763-4758-9d58-92ac880485b2",
|
| 219 |
+
"metadata": {},
|
| 220 |
+
"outputs": [
|
| 221 |
+
{
|
| 222 |
+
"data": {
|
| 223 |
+
"text/html": [
|
| 224 |
+
"<div>\n",
|
| 225 |
+
"<style scoped>\n",
|
| 226 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 227 |
+
" vertical-align: middle;\n",
|
| 228 |
+
" }\n",
|
| 229 |
+
"\n",
|
| 230 |
+
" .dataframe tbody tr th {\n",
|
| 231 |
+
" vertical-align: top;\n",
|
| 232 |
+
" }\n",
|
| 233 |
+
"\n",
|
| 234 |
+
" .dataframe thead th {\n",
|
| 235 |
+
" text-align: right;\n",
|
| 236 |
+
" }\n",
|
| 237 |
+
"</style>\n",
|
| 238 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 239 |
+
" <thead>\n",
|
| 240 |
+
" <tr style=\"text-align: right;\">\n",
|
| 241 |
+
" <th></th>\n",
|
| 242 |
+
" <th>additional_info</th>\n",
|
| 243 |
+
" <th>word_count</th>\n",
|
| 244 |
+
" </tr>\n",
|
| 245 |
+
" </thead>\n",
|
| 246 |
+
" <tbody>\n",
|
| 247 |
+
" <tr>\n",
|
| 248 |
+
" <th>0</th>\n",
|
| 249 |
+
" <td>I would love to use it to spread the love of r...</td>\n",
|
| 250 |
+
" <td>69</td>\n",
|
| 251 |
+
" </tr>\n",
|
| 252 |
+
" <tr>\n",
|
| 253 |
+
" <th>1</th>\n",
|
| 254 |
+
" <td>We would like to introduce early years yoga an...</td>\n",
|
| 255 |
+
" <td>46</td>\n",
|
| 256 |
+
" </tr>\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <th>2</th>\n",
|
| 259 |
+
" <td>£500 would enable us to set up a small sensor...</td>\n",
|
| 260 |
+
" <td>86</td>\n",
|
| 261 |
+
" </tr>\n",
|
| 262 |
+
" </tbody>\n",
|
| 263 |
+
"</table>\n",
|
| 264 |
+
"</div>"
|
| 265 |
+
],
|
| 266 |
+
"text/plain": [
|
| 267 |
+
" additional_info word_count\n",
|
| 268 |
+
"0 I would love to use it to spread the love of r... 69\n",
|
| 269 |
+
"1 We would like to introduce early years yoga an... 46\n",
|
| 270 |
+
"2 £500 would enable us to set up a small sensor... 86"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
"execution_count": 5,
|
| 274 |
+
"metadata": {},
|
| 275 |
+
"output_type": "execute_result"
|
| 276 |
+
}
|
| 277 |
+
],
|
| 278 |
+
"source": [
|
| 279 |
+
"# first let's find a long applciation\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"df['word_count'] = df['additional_info'].apply(lambda x: len(str(x).split()))\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"df[['additional_info', 'word_count']].head(3)"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"cell_type": "code",
|
| 288 |
+
"execution_count": 6,
|
| 289 |
+
"id": "d0e5c20e-ccc4-470a-aafb-7a834d633d17",
|
| 290 |
+
"metadata": {},
|
| 291 |
+
"outputs": [
|
| 292 |
+
{
|
| 293 |
+
"name": "stdout",
|
| 294 |
+
"output_type": "stream",
|
| 295 |
+
"text": [
|
| 296 |
+
"Horton Grange Primary School is located in the Great Horton area of Bradford, West Yorkshire, serving a well-established Pakistani community. The school caters to children aged 2 to 11, with a total enrollment of approximately 726 pupils. The student body is diverse, with 91% of students using English as an additional language. Additionally, 21% of students have Special Educational Needs, and 32 students are considered disadvantaged. In terms of academic performance, 66% of pupils meet the expected standards in reading, writing, and mathematics at Key Stage 2, with 11% achieving a higher standard. The school's most recent Ofsted inspection in September 2024 rated the quality of education as 'Good' and highlighted 'Outstanding' ratings for both behaviour and attitudes, and personal development. The school is part of the Exceed Academies Trust and is led by Headteacher Miss Rebecca Marshall. It offers a range of extracurricular activities, including educational outings and experiences that enrich learning. The school's ethos emphasizes creating a safe and happy environment where children can flourish and begin their lifelong learning journey.\n",
|
| 297 |
+
"In terms of the money we can get, the school will be able to use this for the following;\n",
|
| 298 |
+
"Books for the Library – Purchase diverse and engaging books to encourage reading.\n",
|
| 299 |
+
"Stationery and Art Supplies – Stock up on essentials like notebooks, pencils, and craft materials.\n",
|
| 300 |
+
"Maths and Science Kits – Hands-on resources to support STEM learning.\n",
|
| 301 |
+
"Tablets or Learning Apps – A contribution toward devices or educational subscriptions.\n",
|
| 302 |
+
"Headphones for ICT Use – Useful for online learning and accessibility.\n",
|
| 303 |
+
"Sensory Equipment – Support students with additional needs by purchasing fidget toys, weighted blankets, or calming tools.\n",
|
| 304 |
+
"Outdoor Play Resources – Items like skipping ropes, hula hoops, etc.\n"
|
| 305 |
+
]
|
| 306 |
+
}
|
| 307 |
+
],
|
| 308 |
+
"source": [
|
| 309 |
+
"test_application = df.loc[df['word_count'].idxmax(), 'additional_info']\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"print(test_application)"
|
| 312 |
+
]
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"cell_type": "code",
|
| 316 |
+
"execution_count": 9,
|
| 317 |
+
"id": "2f6f9d3d-71c6-4c0c-8aea-5cd17ba81fce",
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"outputs": [
|
| 320 |
+
{
|
| 321 |
+
"data": {
|
| 322 |
+
"text/plain": [
|
| 323 |
+
"[[{'label': 'neutral', 'score': 0.5856776237487793},\n",
|
| 324 |
+
" {'label': 'approval', 'score': 0.4809421896934509},\n",
|
| 325 |
+
" {'label': 'admiration', 'score': 0.08151720464229584},\n",
|
| 326 |
+
" {'label': 'realization', 'score': 0.027009719982743263},\n",
|
| 327 |
+
" {'label': 'optimism', 'score': 0.018872186541557312},\n",
|
| 328 |
+
" {'label': 'caring', 'score': 0.010474747978150845},\n",
|
| 329 |
+
" {'label': 'disapproval', 'score': 0.008761710487306118},\n",
|
| 330 |
+
" {'label': 'annoyance', 'score': 0.0053910380229353905},\n",
|
| 331 |
+
" {'label': 'disappointment', 'score': 0.004287778399884701},\n",
|
| 332 |
+
" {'label': 'relief', 'score': 0.004131054040044546},\n",
|
| 333 |
+
" {'label': 'pride', 'score': 0.004071239847689867},\n",
|
| 334 |
+
" {'label': 'joy', 'score': 0.00316843600012362},\n",
|
| 335 |
+
" {'label': 'gratitude', 'score': 0.002936755074188113},\n",
|
| 336 |
+
" {'label': 'desire', 'score': 0.0022684938739985228},\n",
|
| 337 |
+
" {'label': 'sadness', 'score': 0.0020141159184277058},\n",
|
| 338 |
+
" {'label': 'love', 'score': 0.0017706562066450715},\n",
|
| 339 |
+
" {'label': 'confusion', 'score': 0.0015989093808457255},\n",
|
| 340 |
+
" {'label': 'excitement', 'score': 0.0014885042328387499},\n",
|
| 341 |
+
" {'label': 'disgust', 'score': 0.0008889383752830327},\n",
|
| 342 |
+
" {'label': 'curiosity', 'score': 0.0008583810413256288},\n",
|
| 343 |
+
" {'label': 'anger', 'score': 0.0007978692301549017},\n",
|
| 344 |
+
" {'label': 'fear', 'score': 0.0007784575573168695},\n",
|
| 345 |
+
" {'label': 'grief', 'score': 0.0005341923679225147},\n",
|
| 346 |
+
" {'label': 'remorse', 'score': 0.0005059443647041917},\n",
|
| 347 |
+
" {'label': 'nervousness', 'score': 0.0004846185038331896},\n",
|
| 348 |
+
" {'label': 'embarrassment', 'score': 0.00036206969525665045},\n",
|
| 349 |
+
" {'label': 'surprise', 'score': 0.00034910510294139385},\n",
|
| 350 |
+
" {'label': 'amusement', 'score': 0.0003461150045040995}]]"
|
| 351 |
+
]
|
| 352 |
+
},
|
| 353 |
+
"execution_count": 9,
|
| 354 |
+
"metadata": {},
|
| 355 |
+
"output_type": "execute_result"
|
| 356 |
+
}
|
| 357 |
+
],
|
| 358 |
+
"source": [
|
| 359 |
+
"emotions = emotion_classifier(test_application)\n",
|
| 360 |
+
"emotions"
|
| 361 |
+
]
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"cell_type": "code",
|
| 365 |
+
"execution_count": 10,
|
| 366 |
+
"id": "a6a954f4-ad3d-40f6-84cb-a8b83ed912f5",
|
| 367 |
+
"metadata": {},
|
| 368 |
+
"outputs": [],
|
| 369 |
+
"source": [
|
| 370 |
+
"more_intense_example = \"\"\"\"I run outdoor learning at my school, we are in a very deprived area, in a central area of Milton Keynes in the middle of a housing estate, the school has very little. I currently pay for as much as possible. We have now got an allotment area, but really need some help with filling it!! We are trying hard to run outdoor learning sessions, as we have so many children without gardens who live in converted shipping containers and their faces when they tackle ceratin skills and plant and see things grow is beautiful. please please help us!!!\"\"\""
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"cell_type": "code",
|
| 375 |
+
"execution_count": 11,
|
| 376 |
+
"id": "9af1aceb-11f5-4755-af2f-de08bd18be6c",
|
| 377 |
+
"metadata": {},
|
| 378 |
+
"outputs": [
|
| 379 |
+
{
|
| 380 |
+
"data": {
|
| 381 |
+
"text/plain": [
|
| 382 |
+
"[[{'label': 'admiration', 'score': 0.6447672843933105},\n",
|
| 383 |
+
" {'label': 'desire', 'score': 0.3310450613498688},\n",
|
| 384 |
+
" {'label': 'optimism', 'score': 0.08578585088253021},\n",
|
| 385 |
+
" {'label': 'approval', 'score': 0.08077544718980789},\n",
|
| 386 |
+
" {'label': 'neutral', 'score': 0.0645425021648407},\n",
|
| 387 |
+
" {'label': 'disappointment', 'score': 0.034120526164770126},\n",
|
| 388 |
+
" {'label': 'caring', 'score': 0.026644788682460785},\n",
|
| 389 |
+
" {'label': 'sadness', 'score': 0.014421308413147926},\n",
|
| 390 |
+
" {'label': 'annoyance', 'score': 0.01335776224732399},\n",
|
| 391 |
+
" {'label': 'disapproval', 'score': 0.012340354733169079},\n",
|
| 392 |
+
" {'label': 'excitement', 'score': 0.008422383107244968},\n",
|
| 393 |
+
" {'label': 'realization', 'score': 0.008331868797540665},\n",
|
| 394 |
+
" {'label': 'love', 'score': 0.007035400252789259},\n",
|
| 395 |
+
" {'label': 'curiosity', 'score': 0.006367261987179518},\n",
|
| 396 |
+
" {'label': 'pride', 'score': 0.005947182886302471},\n",
|
| 397 |
+
" {'label': 'joy', 'score': 0.004438790027052164},\n",
|
| 398 |
+
" {'label': 'gratitude', 'score': 0.0034672864712774754},\n",
|
| 399 |
+
" {'label': 'relief', 'score': 0.0027506654150784016},\n",
|
| 400 |
+
" {'label': 'disgust', 'score': 0.0023999481927603483},\n",
|
| 401 |
+
" {'label': 'confusion', 'score': 0.0022132378071546555},\n",
|
| 402 |
+
" {'label': 'grief', 'score': 0.002203976968303323},\n",
|
| 403 |
+
" {'label': 'anger', 'score': 0.001952152932062745},\n",
|
| 404 |
+
" {'label': 'nervousness', 'score': 0.0017981012351810932},\n",
|
| 405 |
+
" {'label': 'surprise', 'score': 0.0016994696343317628},\n",
|
| 406 |
+
" {'label': 'fear', 'score': 0.0016026603989303112},\n",
|
| 407 |
+
" {'label': 'remorse', 'score': 0.0015317321522161365},\n",
|
| 408 |
+
" {'label': 'embarrassment', 'score': 0.0006112103583291173},\n",
|
| 409 |
+
" {'label': 'amusement', 'score': 0.0006051507662050426}]]"
|
| 410 |
+
]
|
| 411 |
+
},
|
| 412 |
+
"execution_count": 11,
|
| 413 |
+
"metadata": {},
|
| 414 |
+
"output_type": "execute_result"
|
| 415 |
+
}
|
| 416 |
+
],
|
| 417 |
+
"source": [
|
| 418 |
+
"emotions = emotion_classifier(more_intense_example)\n",
|
| 419 |
+
"\n",
|
| 420 |
+
"emotions"
|
| 421 |
+
]
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"cell_type": "code",
|
| 425 |
+
"execution_count": 12,
|
| 426 |
+
"id": "a7049b70-c14a-4475-a852-7938de44850e",
|
| 427 |
+
"metadata": {},
|
| 428 |
+
"outputs": [],
|
| 429 |
+
"source": [
|
| 430 |
+
"def classify_emotions(text):\n",
|
| 431 |
+
" \"\"\"\n",
|
| 432 |
+
" Classifies the emotions in a given text and returns a dictionary of emotions and their scores.\n",
|
| 433 |
+
" \"\"\"\n",
|
| 434 |
+
" if isinstance(text, str):\n",
|
| 435 |
+
" emotions = emotion_classifier(text)\n",
|
| 436 |
+
" # The output is a list of lists of dictionaries. We need to process it.\n",
|
| 437 |
+
" if emotions and isinstance(emotions[0], list):\n",
|
| 438 |
+
" return {item['label']: item['score'] for item in emotions[0]}\n",
|
| 439 |
+
" return {}"
|
| 440 |
+
]
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"cell_type": "code",
|
| 444 |
+
"execution_count": 13,
|
| 445 |
+
"id": "e348575d-df4c-47d3-a74a-3135d1b467d6",
|
| 446 |
+
"metadata": {},
|
| 447 |
+
"outputs": [
|
| 448 |
+
{
|
| 449 |
+
"name": "stdout",
|
| 450 |
+
"output_type": "stream",
|
| 451 |
+
"text": [
|
| 452 |
+
"Processing Time: 17.430683851242065 seconds\n"
|
| 453 |
+
]
|
| 454 |
+
}
|
| 455 |
+
],
|
| 456 |
+
"source": [
|
| 457 |
+
"start_time = time.time()\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"df['emotion'] = df['additional_info'].map(classify_emotions)\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"end_time = time.time()\n",
|
| 462 |
+
"\n",
|
| 463 |
+
"print(f\"Processing Time: {end_time-start_time} seconds\")"
|
| 464 |
+
]
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"cell_type": "code",
|
| 468 |
+
"execution_count": 14,
|
| 469 |
+
"id": "718a597d-78b4-4240-ad0c-7b77f2a5705c",
|
| 470 |
+
"metadata": {},
|
| 471 |
+
"outputs": [
|
| 472 |
+
{
|
| 473 |
+
"data": {
|
| 474 |
+
"text/plain": [
|
| 475 |
+
"emotion\n",
|
| 476 |
+
"{'desire': 0.6581346988677979, 'love': 0.19491417706012726, 'approval': 0.11420776695013046, 'neutral': 0.10338450223207474, 'admiration': 0.09521238505840302, 'optimism': 0.061544269323349, 'caring': 0.017690906301140785, 'excitement': 0.014791340567171574, 'joy': 0.00833946093916893, 'realization': 0.007762537803500891, 'disappointment': 0.006389504764229059, 'annoyance': 0.00584241421893239, 'disapproval': 0.004605362191796303, 'gratitude': 0.0036359988152980804, 'curiosity': 0.0034046811051666737, 'pride': 0.0032050127629190683, 'sadness': 0.002815956948325038, 'anger': 0.0022237244993448257, 'disgust': 0.0021591379772871733, 'remorse': 0.0015994398854672909, 'confusion': 0.001582510769367218, 'relief': 0.0013734428212046623, 'fear': 0.0010202437406405807, 'surprise': 0.000994302099570632, 'nervousness': 0.0008488827734254301, 'amusement': 0.0007571439491584897, 'grief': 0.0007330116932280362, 'embarrassment': 0.00048045202856883407} 2\n",
|
| 477 |
+
"{'neutral': 0.7198513746261597, 'desire': 0.12881478667259216, 'approval': 0.091864213347435, 'optimism': 0.0715695396065712, 'caring': 0.012859346345067024, 'realization': 0.00743895536288619, 'admiration': 0.005056403577327728, 'annoyance': 0.003686325391754508, 'excitement': 0.0029439334757626057, 'love': 0.0029390724375844, 'curiosity': 0.002497346606105566, 'disappointment': 0.002442688448354602, 'joy': 0.0019557320047169924, 'disapproval': 0.0019393657566979527, 'confusion': 0.0016075921012088656, 'sadness': 0.001316291862167418, 'gratitude': 0.0012883843155577779, 'amusement': 0.0009250930743291974, 'relief': 0.0008670985116623342, 'disgust': 0.0007940911455079913, 'fear': 0.0007276318501681089, 'anger': 0.0007231888012029231, 'remorse': 0.0006041062879376113, 'pride': 0.0006011316436342895, 'nervousness': 0.00047796161379665136, 'surprise': 0.00038332759868353605, 'grief': 0.0002986726176459342, 'embarrassment': 0.00018183095380663872} 2\n",
|
| 478 |
+
"{'love': 0.8187400698661804, 'desire': 0.20554493367671967, 'approval': 0.19605238735675812, 'optimism': 0.07884839177131653, 'neutral': 0.047432418912649155, 'admiration': 0.03834596648812294, 'caring': 0.02760092355310917, 'joy': 0.024431521072983742, 'excitement': 0.012095467187464237, 'realization': 0.007476216647773981, 'disapproval': 0.005716291256248951, 'annoyance': 0.004326308146119118, 'disappointment': 0.0034753908403217793, 'gratitude': 0.003186204005032778, 'curiosity': 0.0028302031569182873, 'anger': 0.002644424559548497, 'sadness': 0.0019676932133734226, 'pride': 0.0018537358846515417, 'confusion': 0.0018511429661884904, 'disgust': 0.0016749151982367039, 'relief': 0.001508575864136219, 'remorse': 0.001422111177816987, 'amusement': 0.0012156444136053324, 'fear': 0.0010237834649160504, 'nervousness': 0.0008349438430741429, 'surprise': 0.0008129989146254957, 'grief': 0.0005431832978501916, 'embarrassment': 0.0004631277406588197} 1\n",
|
| 479 |
+
"{'neutral': 0.5076336860656738, 'love': 0.29340073466300964, 'desire': 0.16485583782196045, 'approval': 0.10496989637613297, 'optimism': 0.0401114858686924, 'caring': 0.015620836056768894, 'admiration': 0.0072004590183496475, 'realization': 0.005993019323796034, 'curiosity': 0.004636416211724281, 'excitement': 0.004370811395347118, 'joy': 0.003089142730459571, 'annoyance': 0.003079853719100356, 'disapproval': 0.0027393156196922064, 'disappointment': 0.002309859963133931, 'confusion': 0.0021816666703671217, 'gratitude': 0.001543834456242621, 'anger': 0.0014822438824921846, 'sadness': 0.001471023540943861, 'remorse': 0.0013588856672868133, 'disgust': 0.0012211528373882174, 'amusement': 0.0009235409088432789, 'fear': 0.0007699999841861427, 'relief': 0.0005044073332101107, 'pride': 0.0004537333734333515, 'nervousness': 0.0004489609564188868, 'surprise': 0.0004487480618990958, 'grief': 0.00040851059020496905, 'embarrassment': 0.00025657506193965673} 1\n",
|
| 480 |
+
"{'approval': 0.49856194853782654, 'caring': 0.344292551279068, 'neutral': 0.29566851258277893, 'optimism': 0.09856816381216049, 'admiration': 0.03202248364686966, 'desire': 0.03041200526058674, 'realization': 0.016633788123726845, 'relief': 0.011193848215043545, 'joy': 0.007910333573818207, 'love': 0.00697379419580102, 'disapproval': 0.006227719597518444, 'annoyance': 0.005336344707757235, 'pride': 0.004758044146001339, 'excitement': 0.00448231864720583, 'gratitude': 0.004388503264635801, 'disappointment': 0.0022716594394296408, 'sadness': 0.002184084616601467, 'nervousness': 0.0017732286360114813, 'fear': 0.0016394820995628834, 'curiosity': 0.0014700229512527585, 'confusion': 0.001426154631190002, 'remorse': 0.0011708199745044112, 'anger': 0.0011528246104717255, 'grief': 0.0009556863224133849, 'disgust': 0.0007417521555908024, 'amusement': 0.0005863429978489876, 'embarrassment': 0.000385559193091467, 'surprise': 0.0003641090006567538} 1\n",
|
| 481 |
+
" ..\n",
|
| 482 |
+
"{'love': 0.6240453720092773, 'approval': 0.22971400618553162, 'optimism': 0.10951369255781174, 'admiration': 0.10707932710647583, 'desire': 0.10640142112970352, 'neutral': 0.0772121325135231, 'caring': 0.0336274728178978, 'excitement': 0.011404638178646564, 'joy': 0.010220258496701717, 'gratitude': 0.0061613572761416435, 'realization': 0.004714313428848982, 'disapproval': 0.003537344979122281, 'annoyance': 0.0025603678077459335, 'pride': 0.0021254082676023245, 'curiosity': 0.0020350078120827675, 'disappointment': 0.0019395023118704557, 'anger': 0.0015657278709113598, 'relief': 0.0012329526944085956, 'sadness': 0.0011641201563179493, 'confusion': 0.0011513211065903306, 'disgust': 0.001071184640750289, 'remorse': 0.0009741514804773033, 'fear': 0.0008680318132974207, 'amusement': 0.0006968716043047607, 'surprise': 0.0005896132788620889, 'nervousness': 0.00044098569196648896, 'grief': 0.000394510745536536, 'embarrassment': 0.00025422812905162573} 1\n",
|
| 483 |
+
"{'love': 0.8822043538093567, 'desire': 0.3618478775024414, 'optimism': 0.10699858516454697, 'approval': 0.04940512403845787, 'neutral': 0.031984638422727585, 'admiration': 0.02202957682311535, 'caring': 0.017195403575897217, 'joy': 0.01663418672978878, 'excitement': 0.012207315303385258, 'curiosity': 0.006512079853564501, 'realization': 0.005591886583715677, 'disappointment': 0.005011741537600756, 'disapproval': 0.004293193109333515, 'annoyance': 0.004170648287981749, 'gratitude': 0.0038288026116788387, 'anger': 0.003273312235251069, 'sadness': 0.003027856582775712, 'confusion': 0.0025133287999778986, 'disgust': 0.0023185296449810266, 'amusement': 0.0020872291643172503, 'remorse': 0.001971230376511812, 'fear': 0.0015347638400271535, 'surprise': 0.0014821203658357263, 'nervousness': 0.000915916170924902, 'pride': 0.0008358453633263707, 'relief': 0.0007466922397725284, 'grief': 0.0006537080626003444, 'embarrassment': 0.0004913744633086026} 1\n",
|
| 484 |
+
"{'neutral': 0.4400523006916046, 'disappointment': 0.1673174798488617, 'approval': 0.09361343085765839, 'optimism': 0.0808829739689827, 'disapproval': 0.07908830791711807, 'realization': 0.05669443681836128, 'admiration': 0.03665924444794655, 'desire': 0.029469400644302368, 'annoyance': 0.02366258203983307, 'relief': 0.012171982787549496, 'pride': 0.01058684941381216, 'sadness': 0.009222305379807949, 'gratitude': 0.007708139717578888, 'joy': 0.005490907002240419, 'caring': 0.005353783257305622, 'excitement': 0.0043628523126244545, 'remorse': 0.002954594325274229, 'embarrassment': 0.0026827470865100622, 'surprise': 0.002345424145460129, 'confusion': 0.002178364200517535, 'grief': 0.0017280877800658345, 'disgust': 0.001564697129651904, 'nervousness': 0.0013773756800219417, 'anger': 0.001201029634103179, 'fear': 0.001040227827616036, 'love': 0.0009017083211801946, 'curiosity': 0.0007159471279010177, 'amusement': 0.0005331294960342348} 1\n",
|
| 485 |
+
"{'neutral': 0.8489049673080444, 'approval': 0.09180843830108643, 'optimism': 0.038285840302705765, 'desire': 0.023847607895731926, 'realization': 0.015926184132695198, 'annoyance': 0.004128282889723778, 'caring': 0.0038364490028470755, 'admiration': 0.0033934484235942364, 'excitement': 0.0033061644062399864, 'disapproval': 0.0026745586656033993, 'disappointment': 0.0021017631515860558, 'confusion': 0.0019337492994964123, 'curiosity': 0.0017292933771386743, 'joy': 0.0017195056425407529, 'amusement': 0.0013032474089413881, 'love': 0.0011531843338161707, 'gratitude': 0.0011232284596189857, 'sadness': 0.0009520824532955885, 'relief': 0.0007970146834850311, 'disgust': 0.0007318882853724062, 'anger': 0.0005732764257118106, 'fear': 0.000565911817830056, 'pride': 0.0005316479946486652, 'surprise': 0.00046818636474199593, 'remorse': 0.0004631756746675819, 'nervousness': 0.00034779991256073117, 'grief': 0.0002250072720926255, 'embarrassment': 0.00021606536756735295} 1\n",
|
| 486 |
+
"{'approval': 0.4501877725124359, 'neutral': 0.3748222887516022, 'optimism': 0.1657930463552475, 'caring': 0.11067566275596619, 'admiration': 0.036988891661167145, 'desire': 0.03271962329745293, 'realization': 0.013296376913785934, 'joy': 0.009133617393672466, 'relief': 0.008972113952040672, 'gratitude': 0.00591721385717392, 'pride': 0.005541756749153137, 'excitement': 0.005009407177567482, 'annoyance': 0.004634445067495108, 'disapproval': 0.004361479077488184, 'love': 0.0021700726356357336, 'disappointment': 0.0019895790610462427, 'confusion': 0.0012547277146950364, 'sadness': 0.0012172780698165298, 'curiosity': 0.0009757785010151565, 'fear': 0.0009069022489711642, 'nervousness': 0.0008590560755692422, 'anger': 0.0007762830355204642, 'remorse': 0.0006289182347245514, 'disgust': 0.0006231664447113872, 'grief': 0.0006202697986736894, 'amusement': 0.0006018164567649364, 'surprise': 0.0003919634909834713, 'embarrassment': 0.0002638056466821581} 1\n",
|
| 487 |
+
"Name: count, Length: 909, dtype: int64"
|
| 488 |
+
]
|
| 489 |
+
},
|
| 490 |
+
"execution_count": 14,
|
| 491 |
+
"metadata": {},
|
| 492 |
+
"output_type": "execute_result"
|
| 493 |
+
}
|
| 494 |
+
],
|
| 495 |
+
"source": [
|
| 496 |
+
"df['emotion'].value_counts()"
|
| 497 |
+
]
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"cell_type": "code",
|
| 501 |
+
"execution_count": 18,
|
| 502 |
+
"id": "09b69317-9052-4232-839b-a3989a9456a4",
|
| 503 |
+
"metadata": {},
|
| 504 |
+
"outputs": [],
|
| 505 |
+
"source": [
|
| 506 |
+
"def get_top_emotion(emotion_dict):\n",
|
| 507 |
+
" \"\"\"\n",
|
| 508 |
+
" Returns the emotion with the highest score from a dictionary of emotions.\n",
|
| 509 |
+
" \"\"\"\n",
|
| 510 |
+
" if not emotion_dict:\n",
|
| 511 |
+
" return None, 0.0\n",
|
| 512 |
+
" top_emotion = max(emotion_dict, key=emotion_dict.get)\n",
|
| 513 |
+
" return top_emotion, emotion_dict[top_emotion]\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"# Apply the function to the 'emotions' column\n",
|
| 516 |
+
"df[['top_emotion', 'top_emotion_score']] = df['emotion'].apply(get_top_emotion).apply(pd.Series)"
|
| 517 |
+
]
|
| 518 |
+
},
|
| 519 |
+
{
|
| 520 |
+
"cell_type": "code",
|
| 521 |
+
"execution_count": 20,
|
| 522 |
+
"id": "7864a200-28a8-476a-8486-d6cdaa7090e6",
|
| 523 |
+
"metadata": {},
|
| 524 |
+
"outputs": [
|
| 525 |
+
{
|
| 526 |
+
"data": {
|
| 527 |
+
"text/plain": [
|
| 528 |
+
"top_emotion\n",
|
| 529 |
+
"neutral 319\n",
|
| 530 |
+
"desire 212\n",
|
| 531 |
+
"approval 88\n",
|
| 532 |
+
"love 79\n",
|
| 533 |
+
"gratitude 58\n",
|
| 534 |
+
"admiration 51\n",
|
| 535 |
+
"optimism 36\n",
|
| 536 |
+
"disappointment 16\n",
|
| 537 |
+
"excitement 15\n",
|
| 538 |
+
"caring 12\n",
|
| 539 |
+
"sadness 11\n",
|
| 540 |
+
"joy 9\n",
|
| 541 |
+
"realization 2\n",
|
| 542 |
+
"nervousness 1\n",
|
| 543 |
+
"confusion 1\n",
|
| 544 |
+
"disapproval 1\n",
|
| 545 |
+
"Name: count, dtype: int64"
|
| 546 |
+
]
|
| 547 |
+
},
|
| 548 |
+
"execution_count": 20,
|
| 549 |
+
"metadata": {},
|
| 550 |
+
"output_type": "execute_result"
|
| 551 |
+
}
|
| 552 |
+
],
|
| 553 |
+
"source": [
|
| 554 |
+
"df['top_emotion'].value_counts()"
|
| 555 |
+
]
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"cell_type": "code",
|
| 559 |
+
"execution_count": 23,
|
| 560 |
+
"id": "28d7fd5a-1be6-4311-a0bb-937852040690",
|
| 561 |
+
"metadata": {},
|
| 562 |
+
"outputs": [],
|
| 563 |
+
"source": [
|
| 564 |
+
"not_list = ['neutral', 'approval', 'love', 'admiration', 'optimism', 'excitement', 'joy']\n",
|
| 565 |
+
"\n",
|
| 566 |
+
"not_df = df[~df['top_emotion'].isin(not_list)]"
|
| 567 |
+
]
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"cell_type": "code",
|
| 571 |
+
"execution_count": 25,
|
| 572 |
+
"id": "d9bb5250-4da8-4a57-bf62-2baa0390b6ff",
|
| 573 |
+
"metadata": {},
|
| 574 |
+
"outputs": [
|
| 575 |
+
{
|
| 576 |
+
"data": {
|
| 577 |
+
"text/plain": [
|
| 578 |
+
"363"
|
| 579 |
+
]
|
| 580 |
+
},
|
| 581 |
+
"execution_count": 25,
|
| 582 |
+
"metadata": {},
|
| 583 |
+
"output_type": "execute_result"
|
| 584 |
+
}
|
| 585 |
+
],
|
| 586 |
+
"source": [
|
| 587 |
+
"len(not_df)"
|
| 588 |
+
]
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"cell_type": "code",
|
| 592 |
+
"execution_count": null,
|
| 593 |
+
"id": "3be2721c-9ce9-4e2c-a99e-e60ebc152891",
|
| 594 |
+
"metadata": {},
|
| 595 |
+
"outputs": [],
|
| 596 |
+
"source": []
|
| 597 |
+
}
|
| 598 |
+
],
|
| 599 |
+
"metadata": {
|
| 600 |
+
"kernelspec": {
|
| 601 |
+
"display_name": "Python 3 (ipykernel)",
|
| 602 |
+
"language": "python",
|
| 603 |
+
"name": "python3"
|
| 604 |
+
},
|
| 605 |
+
"language_info": {
|
| 606 |
+
"codemirror_mode": {
|
| 607 |
+
"name": "ipython",
|
| 608 |
+
"version": 3
|
| 609 |
+
},
|
| 610 |
+
"file_extension": ".py",
|
| 611 |
+
"mimetype": "text/x-python",
|
| 612 |
+
"name": "python",
|
| 613 |
+
"nbconvert_exporter": "python",
|
| 614 |
+
"pygments_lexer": "ipython3",
|
| 615 |
+
"version": "3.12.11"
|
| 616 |
+
}
|
| 617 |
+
},
|
| 618 |
+
"nbformat": 4,
|
| 619 |
+
"nbformat_minor": 5
|
| 620 |
+
}
|