{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "import numpy as np,sklearn,os,cv2,pandas\n", "import tensorflow_hub as hub\n", "import tensorflow_text as text\n", "from tensorflow.keras import Model,layers\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0DatetimeTweet IdTextUsernamesentimentsentiment_scoreemotionemotion_score
002022-09-30 23:29:15+00:001575991191170342912@Logitech @apple @Google @Microsoft @Dell @Len...ManjuSreedaranneutral0.853283anticipation0.587121
112022-09-30 21:46:35+00:001575965354425131008@MK_habit_addict @official_stier @MortalKombat...MiKeMcDnetneutral0.519470joy0.886913
222022-09-30 21:18:02+00:001575958171423752203As @CRN celebrates its 40th anniversary, Bob F...jfollettpositive0.763791joy0.960347
332022-09-30 20:05:24+00:001575939891485032450@dell your customer service is horrible especi...daveccarrnegative0.954023anger0.983203
442022-09-30 20:03:17+00:001575939359160750080@zacokalo @Dell @DellCares @Dell give the man ...heycamellaneutral0.529170anger0.776124
\n", "
" ], "text/plain": [ " Unnamed: 0 Datetime Tweet Id \\\n", "0 0 2022-09-30 23:29:15+00:00 1575991191170342912 \n", "1 1 2022-09-30 21:46:35+00:00 1575965354425131008 \n", "2 2 2022-09-30 21:18:02+00:00 1575958171423752203 \n", "3 3 2022-09-30 20:05:24+00:00 1575939891485032450 \n", "4 4 2022-09-30 20:03:17+00:00 1575939359160750080 \n", "\n", " Text Username \\\n", "0 @Logitech @apple @Google @Microsoft @Dell @Len... ManjuSreedaran \n", "1 @MK_habit_addict @official_stier @MortalKombat... MiKeMcDnet \n", "2 As @CRN celebrates its 40th anniversary, Bob F... jfollett \n", "3 @dell your customer service is horrible especi... daveccarr \n", "4 @zacokalo @Dell @DellCares @Dell give the man ... heycamella \n", "\n", " sentiment sentiment_score emotion emotion_score \n", "0 neutral 0.853283 anticipation 0.587121 \n", "1 neutral 0.519470 joy 0.886913 \n", "2 positive 0.763791 joy 0.960347 \n", "3 negative 0.954023 anger 0.983203 \n", "4 neutral 0.529170 anger 0.776124 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pandas.read_csv(\"./sentiment-emotion-labelled_Dell_tweets.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['neutral', 'positive', 'negative'], dtype=object)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X,Y=df.Text,df.sentiment\n", "Y.unique()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "Classes=['neutral', 'positive', 'negative']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((24970,), (24970,))" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape,Y.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "X=np.array(X)\n", "Y=np.array(Y)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "bert_preprocess = hub.KerasLayer(\n", " \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3\")\n", "bert_encoder = hub.KerasLayer(\n", " \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((24970,), (24970,))" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "New_Y=[]\n", "for i,j in enumerate(Y):\n", " label=-2\n", " for k,l in enumerate(Classes):\n", " if l==j:\n", " label=k\n", " New_Y.append(label)\n", "New_Y=np.array(New_Y)\n", "New_Y.shape,X.shape\n", "# 0 => Neutral and 1 => positive and 2 => negative" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split as tts\n", "X_train, X_test, y_train, y_test = tts(X,New_Y,test_size=0.2, random_state=42,shuffle=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((19976,), (4994,), (19976,), (4994,))" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape,X_test.shape,y_train.shape,y_test.shape" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(\"@Dell Apparently your company doesn't think that a swollen battery on a laptop that isn't even a year old is an issue. You are not honoring your warranty and overall putting me in the position where I am a walking fire hazard which as a customer for years now isn't right.\",\n", " 2)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train[0],y_train[0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'bert_preprocess' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_12248\\1158974034.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mtextinput\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mInput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstring\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"text\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mpre\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbert_preprocess\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtextinput\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mend\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbert_encoder\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpre\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;31m# NN Layer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mpo\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"sequence_output\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;31m#[\"pooled_output\"]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'bert_preprocess' is not defined" ] } ], "source": [ "textinput=layers.Input(shape=(),dtype=tf.string,name=\"text\")\n", "pre=bert_preprocess(textinput)\n", "end=bert_encoder(pre)\n", "# NN Layer\n", "po=end[\"sequence_output\"] #[\"pooled_output\"]\n", "l=layers.Bidirectional(layers.GRU(128, return_sequences=True))(po)\n", "l=layers.Bidirectional(layers.GRU(64, return_sequences=True))(l)\n", "l=layers.Conv1D(64, 1, activation=\"relu\", padding='same')(l)\n", "l=layers.Flatten()(l)\n", "l=layers.Dropout(0.2)(l)\n", "l=layers.Dense(128,activation=\"relu\",name=\"input\")(l)\n", "l=layers.Dense(64,activation=\"relu\",name=\"in\")(l)\n", "l=layers.Dense(16,activation=\"relu\",name=\"in1\")(l)\n", "l=layers.Dropout(0.2)(l)\n", "l=layers.Dense(3,activation=\"softmax\",name=\"output\")(l) #[0.2,0.3,0.5] = 1\n", "Model=tf.keras.Model(inputs=textinput,outputs=[l])\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "Model.compile(\"adam\",loss=\"sparse_categorical_crossentropy\",metrics=[\"accuracy\"])\n", "Model.summary()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "hist=Model.fit(X_train,y_train,epochs=30,batch_size=80)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Model.evaluate(X_test,y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Model.save(\"./Model_3_Bert_Uncase.h5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "A=Model.predict([\"you awake from a deep trance, step away from the phone to see your friends & family\"])\n", "a=np.argmax(A)\n", "Classes[a],int(A[0][a]*100)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }