hfzdzakii commited on
Commit
b0abc64
·
1 Parent(s): e932e8c

Add all necessary files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import pickle
4
+ import unicodedata
5
+ import contractions
6
+ import re
7
+ import nltk
8
+ import pandas as pd
9
+ import numpy as np
10
+ from nltk.corpus import stopwords, words
11
+ from nltk.stem import WordNetLemmatizer
12
+ from tensorflow.keras.models import load_model #type:ignore
13
+ from tensorflow.keras.utils import pad_sequences # type: ignore
14
+
15
+ nltk.download('words')
16
+ nltk.download('punkt_tab')
17
+ nltk.download('wordnet')
18
+ nltk.download('stopwords')
19
+
20
+ lemmatizer = WordNetLemmatizer()
21
+ stop_words = set(stopwords.words('english'))
22
+ english_words = set(words.words())
23
+
24
+ def loadCustomDict(path):
25
+ with open(path, 'r') as file:
26
+ return set(line.strip().lower() for line in file if line.strip())
27
+
28
+ def normalizeWhitespace(text):
29
+ text = unicodedata.normalize('NFKC', text)
30
+ text = contractions.fix(text)
31
+ text = re.sub(r'[\t\r]+', ' ', text) # Menghapus tab
32
+ text = re.sub(r'\b\d+\b', '', text) # Menghilangkan angka
33
+ text = re.sub(r'[-‐‑‒–—―]+', '', text)
34
+ text = re.sub(r'[_﹍﹎_]', '', text)
35
+ text = re.sub(r'[^\w\s]', '', text) # Hilangkan symbol punctuation
36
+ text = re.sub(r'\b(\w+)(?:\s+\1\b)+', r'\1', text)
37
+ text = re.sub(r'\s+', ' ', text).strip().lower()
38
+ return text
39
+
40
+ def removeOtherLanguage(text):
41
+ phrase = ' translated'
42
+ pos = text.find(phrase)
43
+ if pos != -1:
44
+ text = text[:pos].rstrip()
45
+ text = re.sub(r'\b\w*[^\x00-\x7F]\w*\b', '', text)
46
+ text = re.sub(r'\s+', ' ', text).strip().lower()
47
+ return text
48
+
49
+ def removeNonEnglish(text_series, custom_dict):
50
+ pattern = r'\b(?:' + '|'.join(re.escape(word) for word in custom_dict) + r')\b'
51
+ temp_series = text_series.str.replace(pattern, '', case=False, regex=True)
52
+ split_words = temp_series.str.split()
53
+ exploded = split_words.explode()
54
+ exploded = exploded[exploded.str.lower().isin(english_words)]
55
+ filtered = exploded[~exploded.str.lower().isin(stop_words)]
56
+ lemmatized = filtered.apply(lambda word: lemmatizer.lemmatize(word.lower()))
57
+ cleaned_text_series = lemmatized.groupby(level=0).agg(' '.join)
58
+ pattern2 = r'\b(\w+)(?:\s+\1\b)+' #, r'\1', text)
59
+ ser = cleaned_text_series.reindex(text_series.index, fill_value='')
60
+ text = ser.str.replace(pattern2, r'\1', case=False, regex=True)
61
+ return text
62
+
63
+ def cleanInference(df):
64
+ custom_dict = loadCustomDict('custom_vocab.txt')
65
+ df['poem'] = df['poem'].apply(normalizeWhitespace)
66
+ df['poem'] = df['poem'].apply(removeOtherLanguage)
67
+ df['poem'] = removeNonEnglish(df['poem'], custom_dict)
68
+ return df
69
+
70
+ def kerasTokenizer(text, tokenizer):
71
+ text_sequence = tokenizer.texts_to_sequences(text)
72
+ text_padded = pad_sequences(text_sequence, maxlen=128)
73
+ return text_padded
74
+
75
+ def getLabelEncoder(name):
76
+ hartmann = ['sadness', 'fear', 'anger', 'joy', 'neutral', 'surprise', 'disgust']
77
+ savani = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']
78
+ deepseek = ['other', 'sadness', 'joy', 'hope', 'love']
79
+ if name=='hartmann':
80
+ return {i : label for i, label in enumerate(sorted(hartmann))}
81
+ if name=='savani':
82
+ return {i : label for i, label in enumerate(sorted(savani))}
83
+ if name=='deepseek':
84
+ return {i : label for i, label in enumerate(sorted(deepseek))}
85
+
86
+ with open(f"tokenizer_savani_0.1_lstm.pkl", "rb") as f:
87
+ tokenizer_savani = pickle.load(f)
88
+ with open(f"tokenizer_hartmann_0.1_lstm.pkl", "rb") as g:
89
+ tokenizer_hartman = pickle.load(g)
90
+ with open(f"tokenizer_deepseek_0.1_lstm.pkl", "rb") as h:
91
+ tokenizer_deepseek = pickle.load(h)
92
+
93
+ model_savani = load_model(f"best_model_savani_0.1_lstm.keras")
94
+ model_hartman = load_model(f"best_model_hartmann_0.1_lstm.keras")
95
+ model_deepseek = load_model(f"best_model_deepseek_0.1_lstm.keras")
96
+
97
+ MODELS = {
98
+ "savani": {
99
+ "model": model_savani,
100
+ "tokenizer": tokenizer_savani
101
+ },
102
+ "hartmann": {
103
+ "model": model_hartman,
104
+ "tokenizer": tokenizer_hartman
105
+ },
106
+ "deepseek": {
107
+ "model": model_deepseek,
108
+ "tokenizer": tokenizer_deepseek
109
+ },
110
+ }
111
+
112
+ loaded_models = {}
113
+
114
+ def load_model(model_name):
115
+ if model_name not in loaded_models:
116
+ tokenizer = MODELS[model_name]['tokenizer']
117
+ model = MODELS[model_name]['model']
118
+ loaded_models[model_name] = (tokenizer, model)
119
+ return loaded_models[model_name]
120
+
121
+
122
+ def predict_poem(poem, model_name):
123
+ tokenizer, model = load_model(model_name)
124
+ poem_df = pd.DataFrame({'poem' : [poem]})
125
+ clean_poem_df = cleanInference(poem_df)
126
+ text_keras = kerasTokenizer(clean_poem_df['poem'], tokenizer)
127
+ result = model.predict(text_keras, verbose=0)
128
+ predicted_labels = np.argmax(result, axis=1)
129
+ dic = getLabelEncoder(model_name)
130
+ return dic[predicted_labels[0]]
131
+
132
+ with gr.Blocks(title="NLP Model Text Classifier") as demo:
133
+ gr.Markdown("# 📜 Poem Emotion Classification")
134
+ gr.Markdown("""
135
+ - ## **Step 1:** Select a labeling technique (model - each has different emotion labels)
136
+ - ## **Step 2:** Enter your poem text
137
+ - ## **Output:** Predicted emotion with confidence score
138
+ """)
139
+ with gr.Row():
140
+ with gr.Column():
141
+ model_selector = gr.Dropdown(
142
+ choices=list(MODELS.keys()),
143
+ value="savani",
144
+ interactive=True,
145
+ label="Select Labelling Technique Model"
146
+ )
147
+ text_input = gr.Textbox(
148
+ lines=5,
149
+ placeholder="Enter text here...",
150
+ label="Input Text",
151
+ interactive=True
152
+ )
153
+ submit_btn = gr.Button("Classify", variant="primary")
154
+
155
+ with gr.Column():
156
+ output_label = gr.Label(label="Classification Results")
157
+ gr.Markdown("""
158
+ **Poem References**
159
+ - [Poem Hunter](https://www.poemhunter.com)
160
+ - [Poem Generator](https://www.poem-generator.org.uk)
161
+ - [HelloPoetry](https://hellopoetry.com)
162
+ """)
163
+
164
+
165
+ submit_btn.click(
166
+ fn=predict_poem,
167
+ inputs=[text_input, model_selector],
168
+ outputs=[output_label]
169
+ )
170
+
171
+ demo.launch()
best_model_deepseek_0.1_lstm.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63717f815fcfb8e27dfa853998fe031b2021467ecd547e459a4f26a09479a53
3
+ size 119406080
best_model_hartmann_0.1_lstm.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844122f8ba3616f2282e160663e5a671e1df2fbe1469a80a391bc5053e0daae5
3
+ size 119993288
best_model_savani_0.1_lstm.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a3c6164ae322a44465c656c787b13313338f04d6d15e0c3ca8617368e6bd68
3
+ size 119762684
custom_vocab.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ l
2
+ mi
3
+ e
4
+ ultimo
5
+ la
6
+ per
7
+ cor
8
+ non
9
+ viva
10
+ lei
11
+ mare
12
+ rag
13
+ ii
14
+ b
15
+ c
16
+ d
17
+ f
18
+ g
19
+ h
20
+ j
21
+ k
22
+ m
23
+ n
24
+ o
25
+ p
26
+ q
27
+ r
28
+ s
29
+ t
30
+ u
31
+ v
32
+ w
33
+ x
34
+ y
35
+ z
36
+ st
37
+ ye
38
+ tr
39
+ xxv
40
+ ix
41
+ iv
42
+ iii
43
+ vi
44
+ vii
45
+ viii
46
+ xi
47
+ xii
48
+ xiii
49
+ xv
50
+ xvi
51
+ xvii
52
+ xviii
53
+ xiv
54
+ xvv
55
+ ey
56
+ hey
57
+ oy
58
+ yew
59
+ of
60
+ in
61
+ the
62
+ an
63
+ a
64
+ ami
65
+ ah
66
+ ih
67
+ uh
68
+ jest
69
+ zest
70
+ be
71
+ rio
72
+ they
73
+ we
74
+ i
75
+ you
76
+ she
77
+ he
78
+ it
79
+ its
80
+ is
81
+ am
82
+ are
gradio.ipynb ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "8d1ce753",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "/home/hafizh/miniconda3/envs/MainCuda/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
14
+ " from .autonotebook import tqdm as notebook_tqdm\n",
15
+ "2025-04-12 16:14:06.410469: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
16
+ "2025-04-12 16:14:06.423302: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
17
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
18
+ "E0000 00:00:1744449246.437801 116764 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
19
+ "E0000 00:00:1744449246.442018 116764 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
20
+ "W0000 00:00:1744449246.452843 116764 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
21
+ "W0000 00:00:1744449246.452871 116764 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
22
+ "W0000 00:00:1744449246.452873 116764 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
23
+ "W0000 00:00:1744449246.452874 116764 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
24
+ "2025-04-12 16:14:06.456742: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
25
+ "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
26
+ "[nltk_data] Downloading package words to /home/hafizh/nltk_data...\n",
27
+ "[nltk_data] Package words is already up-to-date!\n",
28
+ "[nltk_data] Downloading package punkt_tab to /home/hafizh/nltk_data...\n",
29
+ "[nltk_data] Package punkt_tab is already up-to-date!\n",
30
+ "[nltk_data] Downloading package wordnet to /home/hafizh/nltk_data...\n",
31
+ "[nltk_data] Package wordnet is already up-to-date!\n",
32
+ "[nltk_data] Downloading package stopwords to /home/hafizh/nltk_data...\n",
33
+ "[nltk_data] Package stopwords is already up-to-date!\n"
34
+ ]
35
+ },
36
+ {
37
+ "data": {
38
+ "text/plain": [
39
+ "True"
40
+ ]
41
+ },
42
+ "execution_count": 1,
43
+ "metadata": {},
44
+ "output_type": "execute_result"
45
+ }
46
+ ],
47
+ "source": [
48
+ "import gradio as gr\n",
49
+ "import tensorflow as tf\n",
50
+ "import pickle\n",
51
+ "import unicodedata\n",
52
+ "import contractions\n",
53
+ "import re\n",
54
+ "import nltk\n",
55
+ "import pandas as pd\n",
56
+ "import numpy as np\n",
57
+ "from nltk.corpus import stopwords, words\n",
58
+ "from nltk.stem import WordNetLemmatizer\n",
59
+ "from tensorflow.keras.models import load_model #type:ignore\n",
60
+ "from tensorflow.keras.utils import pad_sequences # type: ignore\n",
61
+ "\n",
62
+ "nltk.download('words')\n",
63
+ "nltk.download('punkt_tab')\n",
64
+ "nltk.download('wordnet')\n",
65
+ "nltk.download('stopwords') "
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 2,
71
+ "id": "f037a836",
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "lemmatizer = WordNetLemmatizer()\n",
76
+ "stop_words = set(stopwords.words('english'))\n",
77
+ "english_words = set(words.words())\n",
78
+ "\n",
79
+ "def loadCustomDict(path):\n",
80
+ " with open(path, 'r') as file:\n",
81
+ " return set(line.strip().lower() for line in file if line.strip())\n",
82
+ "\n",
83
+ "def normalizeWhitespace(text):\n",
84
+ " text = unicodedata.normalize('NFKC', text)\n",
85
+ " text = contractions.fix(text)\n",
86
+ " text = re.sub(r'[\\t\\r]+', ' ', text) # Menghapus tab\n",
87
+ " text = re.sub(r'\\b\\d+\\b', '', text) # Menghilangkan angka\n",
88
+ " text = re.sub(r'[-‐‑‒–—―]+', '', text)\n",
89
+ " text = re.sub(r'[_﹍﹎_]', '', text)\n",
90
+ " text = re.sub(r'[^\\w\\s]', '', text) # Hilangkan symbol punctuation\n",
91
+ " text = re.sub(r'\\b(\\w+)(?:\\s+\\1\\b)+', r'\\1', text)\n",
92
+ " text = re.sub(r'\\s+', ' ', text).strip().lower()\n",
93
+ " return text\n",
94
+ "\n",
95
+ "def removeOtherLanguage(text):\n",
96
+ " phrase = ' translated'\n",
97
+ " pos = text.find(phrase)\n",
98
+ " if pos != -1:\n",
99
+ " text = text[:pos].rstrip()\n",
100
+ " text = re.sub(r'\\b\\w*[^\\x00-\\x7F]\\w*\\b', '', text)\n",
101
+ " text = re.sub(r'\\s+', ' ', text).strip().lower()\n",
102
+ " return text\n",
103
+ "\n",
104
+ "def removeNonEnglish(text_series, custom_dict):\n",
105
+ " pattern = r'\\b(?:' + '|'.join(re.escape(word) for word in custom_dict) + r')\\b'\n",
106
+ " temp_series = text_series.str.replace(pattern, '', case=False, regex=True)\n",
107
+ " split_words = temp_series.str.split()\n",
108
+ " exploded = split_words.explode()\n",
109
+ " exploded = exploded[exploded.str.lower().isin(english_words)]\n",
110
+ " filtered = exploded[~exploded.str.lower().isin(stop_words)]\n",
111
+ " lemmatized = filtered.apply(lambda word: lemmatizer.lemmatize(word.lower()))\n",
112
+ " cleaned_text_series = lemmatized.groupby(level=0).agg(' '.join)\n",
113
+ " pattern2 = r'\\b(\\w+)(?:\\s+\\1\\b)+' #, r'\\1', text)\n",
114
+ " ser = cleaned_text_series.reindex(text_series.index, fill_value='')\n",
115
+ " text = ser.str.replace(pattern2, r'\\1', case=False, regex=True)\n",
116
+ " return text\n",
117
+ "\n",
118
+ "def cleanInference(df):\n",
119
+ " custom_dict = loadCustomDict('custom_vocab.txt')\n",
120
+ " df['poem'] = df['poem'].apply(normalizeWhitespace)\n",
121
+ " df['poem'] = df['poem'].apply(removeOtherLanguage)\n",
122
+ " df['poem'] = removeNonEnglish(df['poem'], custom_dict)\n",
123
+ " return df\n",
124
+ "\n",
125
+ "def kerasTokenizer(text, tokenizer):\n",
126
+ " text_sequence = tokenizer.texts_to_sequences(text)\n",
127
+ " text_padded = pad_sequences(text_sequence, maxlen=128)\n",
128
+ " return text_padded\n",
129
+ "\n",
130
+ "def getLabelEncoder(name):\n",
131
+ " hartmann = ['sadness', 'fear', 'anger', 'joy', 'neutral', 'surprise', 'disgust']\n",
132
+ " savani = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']\n",
133
+ " deepseek = ['other', 'sadness', 'joy', 'hope', 'love']\n",
134
+ " if name=='hartmann':\n",
135
+ " return {i : label for i, label in enumerate(sorted(hartmann))}\n",
136
+ " if name=='savani':\n",
137
+ " return {i : label for i, label in enumerate(sorted(savani))}\n",
138
+ " if name=='deepseek':\n",
139
+ " return {i : label for i, label in enumerate(sorted(deepseek))}"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 3,
145
+ "id": "ffcb03a6",
146
+ "metadata": {},
147
+ "outputs": [],
148
+ "source": [
149
+ "poem1 = '''\n",
150
+ "Deliverance is not for me in renunciation.\n",
151
+ "I feel the embrace of freedom in a thousand bonds of delight.\n",
152
+ "\n",
153
+ "Thou ever pourest for me the fresh draught of thy wine of various\n",
154
+ "colours and fragrance, filling this earthen vessel to the brim.\n",
155
+ "\n",
156
+ "My world will light its hundred different lamps with thy flame\n",
157
+ "and place them before the altar of thy temple.\n",
158
+ "\n",
159
+ "No, I will never shut the doors of my senses.\n",
160
+ "The delights of sight and hearing and touch will bear thy delight.\n",
161
+ "\n",
162
+ "Yes, all my illusions will burn into illumination of joy,\n",
163
+ "and all my desires ripen into fruits of love.\n",
164
+ "'''"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": 4,
170
+ "id": "2cedfa00",
171
+ "metadata": {},
172
+ "outputs": [
173
+ {
174
+ "name": "stderr",
175
+ "output_type": "stream",
176
+ "text": [
177
+ "I0000 00:00:1744449252.416426 116764 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory: -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9\n"
178
+ ]
179
+ }
180
+ ],
181
+ "source": [
182
+ "with open(f\"./tokenizer/tokenizer_savani_0.1_lstm.pkl\", \"rb\") as f:\n",
183
+ " tokenizer_savani = pickle.load(f)\n",
184
+ "with open(f\"./tokenizer/tokenizer_hartmann_0.1_lstm.pkl\", \"rb\") as g:\n",
185
+ " tokenizer_hartman = pickle.load(g)\n",
186
+ "with open(f\"./tokenizer/tokenizer_deepseek_0.1_lstm.pkl\", \"rb\") as h:\n",
187
+ " tokenizer_deepseek = pickle.load(h)\n",
188
+ "\n",
189
+ "model_savani = load_model(f\"./model/best_model_savani_0.1_lstm.keras\")\n",
190
+ "model_hartman = load_model(f\"./model/best_model_hartmann_0.1_lstm.keras\")\n",
191
+ "model_deepseek = load_model(f\"./model/best_model_deepseek_0.1_lstm.keras\")\n",
192
+ "\n",
193
+ "MODELS = {\n",
194
+ " \"savani\": {\n",
195
+ " \"model\": model_savani,\n",
196
+ " \"tokenizer\": tokenizer_savani\n",
197
+ " },\n",
198
+ " \"hartmann\": {\n",
199
+ " \"model\": model_hartman,\n",
200
+ " \"tokenizer\": tokenizer_hartman\n",
201
+ " },\n",
202
+ " \"deepseek\": {\n",
203
+ " \"model\": model_deepseek,\n",
204
+ " \"tokenizer\": tokenizer_deepseek\n",
205
+ " },\n",
206
+ "}\n",
207
+ "\n"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": 7,
213
+ "id": "1c9affe4",
214
+ "metadata": {},
215
+ "outputs": [],
216
+ "source": [
217
+ "loaded_models = {}\n",
218
+ "\n",
219
+ "def load_model(model_name):\n",
220
+ " if model_name not in loaded_models:\n",
221
+ " tokenizer = MODELS[model_name]['tokenizer']\n",
222
+ " model = MODELS[model_name]['model']\n",
223
+ " loaded_models[model_name] = (tokenizer, model)\n",
224
+ " return loaded_models[model_name]\n",
225
+ " \n",
226
+ "\n",
227
+ "def predict_poem(poem, model_name):\n",
228
+ " tokenizer, model = load_model(model_name)\n",
229
+ " poem_df = pd.DataFrame({'poem' : [poem]})\n",
230
+ " clean_poem_df = cleanInference(poem_df)\n",
231
+ " text_keras = kerasTokenizer(clean_poem_df['poem'], tokenizer)\n",
232
+ " result = model.predict(text_keras, verbose=0)\n",
233
+ " predicted_labels = np.argmax(result, axis=1)\n",
234
+ " dic = getLabelEncoder(model_name)\n",
235
+ " return dic[predicted_labels[0]]"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "id": "4b844491",
242
+ "metadata": {},
243
+ "outputs": [
244
+ {
245
+ "name": "stdout",
246
+ "output_type": "stream",
247
+ "text": [
248
+ "* Running on local URL: http://127.0.0.1:7860\n",
249
+ "\n",
250
+ "To create a public link, set `share=True` in `launch()`.\n"
251
+ ]
252
+ },
253
+ {
254
+ "data": {
255
+ "text/html": [
256
+ "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
257
+ ],
258
+ "text/plain": [
259
+ "<IPython.core.display.HTML object>"
260
+ ]
261
+ },
262
+ "metadata": {},
263
+ "output_type": "display_data"
264
+ },
265
+ {
266
+ "name": "stdout",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "Keyboard interruption in main thread... closing server.\n"
270
+ ]
271
+ },
272
+ {
273
+ "data": {
274
+ "text/plain": []
275
+ },
276
+ "execution_count": 9,
277
+ "metadata": {},
278
+ "output_type": "execute_result"
279
+ }
280
+ ],
281
+ "source": [
282
+ "with gr.Blocks(title=\"NLP Model Text Classifier\") as demo:\n",
283
+ " gr.Markdown(\"## 📜 Poem Emotion Classification\")\n",
284
+ " gr.Markdown(\"\"\"\n",
285
+ " - **Step 1:** Select a labeling technique (model - each has different emotion labels) \n",
286
+ " - **Step 2:** Enter your poem text \n",
287
+ " - **Output:** Predicted emotion with confidence score \n",
288
+ " *Example: Try \"The sun shines bright\" with the Savani model*\n",
289
+ " \"\"\")\n",
290
+ " with gr.Row():\n",
291
+ " with gr.Column():\n",
292
+ " model_selector = gr.Dropdown(\n",
293
+ " choices=list(MODELS.keys()),\n",
294
+ " value=\"savani\",\n",
295
+ " interactive=True,\n",
296
+ " label=\"Select Labelling Technique Model\"\n",
297
+ " )\n",
298
+ " text_input = gr.Textbox(\n",
299
+ " lines=5,\n",
300
+ " placeholder=\"Enter text here...\",\n",
301
+ " label=\"Input Text\",\n",
302
+ " interactive=True\n",
303
+ " )\n",
304
+ " submit_btn = gr.Button(\"Classify\", variant=\"primary\")\n",
305
+ " \n",
306
+ " with gr.Column():\n",
307
+ " output_label = gr.Label(label=\"Classification Results\")\n",
308
+ " gr.Markdown(\"\"\"\n",
309
+ " **Poem References** \n",
310
+ " - [Poem Hunter](https://www.poemhunter.com)\n",
311
+ " - [Poem Generator](https://www.poem-generator.org.uk)\n",
312
+ " - [HelloPoetry](https://hellopoetry.com)\n",
313
+ " \"\"\")\n",
314
+ " \n",
315
+ " \n",
316
+ " submit_btn.click(\n",
317
+ " fn=predict_poem,\n",
318
+ " inputs=[text_input, model_selector],\n",
319
+ " outputs=[output_label]\n",
320
+ " )\n",
321
+ "\n",
322
+ "demo.launch(debug=True)"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "id": "b9868e38",
329
+ "metadata": {},
330
+ "outputs": [],
331
+ "source": []
332
+ }
333
+ ],
334
+ "metadata": {
335
+ "kernelspec": {
336
+ "display_name": "MainCuda",
337
+ "language": "python",
338
+ "name": "python3"
339
+ },
340
+ "language_info": {
341
+ "codemirror_mode": {
342
+ "name": "ipython",
343
+ "version": 3
344
+ },
345
+ "file_extension": ".py",
346
+ "mimetype": "text/x-python",
347
+ "name": "python",
348
+ "nbconvert_exporter": "python",
349
+ "pygments_lexer": "ipython3",
350
+ "version": "3.12.9"
351
+ }
352
+ },
353
+ "nbformat": 4,
354
+ "nbformat_minor": 5
355
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ pickle
3
+ unicodedata
4
+ contractions
5
+ nltk
6
+ pandas
7
+ numpy
tokenizer_deepseek_0.1_lstm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5334fae5c4c2a533be4781c7ae70e36092daef2a9afe60bb6b0518a04bc581f
3
+ size 1323687
tokenizer_hartmann_0.1_lstm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95673c503bc2e483e49f070f4ca89ee5e63aa786c02bb83e0fbd6e208fd0d2f
3
+ size 1330884
tokenizer_savani_0.1_lstm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30daa553ad48943cfe255fee99482982c3fd1c99af36030dbe384219390e03d2
3
+ size 1328993