Tarun Singh commited on
Commit ·
8733295
1
Parent(s): 4e5b05d
switching to tflite
Browse files- .gitignore +2 -0
- app.py +43 -26
- model/{transformer/fingerprint.pb → model_float16.tflite} +2 -2
- model/transformer/keras_metadata.pb +0 -3
- model/transformer/saved_model.pb +0 -3
- model/transformer/variables/variables.data-00000-of-00001 +0 -3
- model/transformer/variables/variables.index +0 -0
- requirements.txt +1 -1
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
env
|
| 2 |
+
.env
|
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import re
|
|
| 3 |
import emoji
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
-
import tensorflow as
|
| 7 |
from googletrans import Translator
|
| 8 |
from transformers import AutoTokenizer
|
| 9 |
from googleapiclient.discovery import build
|
|
@@ -14,20 +14,22 @@ api_keys = os.getenv("YOUTUBE_API_KEYS").split(',')
|
|
| 14 |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
| 15 |
|
| 16 |
tokenizer_path = 'model/saved_tokenizer'
|
| 17 |
-
model_path = 'model/
|
| 18 |
fine_tuned_tokenizer = None
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
|
| 21 |
|
| 22 |
-
# load model and tokenizer
|
| 23 |
def load_model_and_tokenizer():
|
| 24 |
-
global fine_tuned_tokenizer,
|
| 25 |
-
if fine_tuned_tokenizer is None or
|
| 26 |
fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
# Get the comments form video
|
| 31 |
def build_youtube_client(api_key):
|
| 32 |
return build('youtube', 'v3', developerKey=api_key)
|
| 33 |
|
|
@@ -88,11 +90,13 @@ def extract_youtube_video_id(url):
|
|
| 88 |
def trim_whitespace(s):
|
| 89 |
return s.strip()
|
| 90 |
|
| 91 |
-
# Text Pre Processing
|
| 92 |
def remove_emojis(text):
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
# Get the Sentiments form the text
|
| 96 |
def detect_and_translate(comments: pd.DataFrame, required_count=10):
|
| 97 |
translator = Translator()
|
| 98 |
translated_comments = []
|
|
@@ -161,17 +165,31 @@ def detect_and_translate(comments: pd.DataFrame, required_count=10):
|
|
| 161 |
result_df = pd.DataFrame(translated_comments, columns=['Comment'])
|
| 162 |
return result_df
|
| 163 |
|
| 164 |
-
def
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
return predicted_index
|
| 176 |
|
| 177 |
def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -186,14 +204,14 @@ def get_sentiment(comments_df : pd.DataFrame,comment_count=10) -> tuple:
|
|
| 186 |
comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
|
| 187 |
translated_comment = detect_and_translate(comments_df,comment_count)
|
| 188 |
pre_processed_comments = text_pre_processing(translated_comment)
|
|
|
|
| 189 |
for index, row in pre_processed_comments.iterrows():
|
| 190 |
-
sentiment_index =
|
| 191 |
sentiment_label = SENTIMENT_LABELS[sentiment_index]
|
| 192 |
sentiment_counts[sentiment_label]+=1
|
| 193 |
comments_by_sentiment[sentiment_label].append(row['Comment'])
|
| 194 |
return (sentiment_counts,comments_by_sentiment)
|
| 195 |
|
| 196 |
-
# Flask App
|
| 197 |
@app.route('/', methods=['GET', 'POST'])
|
| 198 |
def index():
|
| 199 |
if request.method == 'POST':
|
|
@@ -204,7 +222,6 @@ def index():
|
|
| 204 |
if video_id:
|
| 205 |
comments_df = get_comments(video_id, max_results=comment_count * 10)
|
| 206 |
if not comments_df.empty:
|
| 207 |
-
load_model_and_tokenizer()
|
| 208 |
sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
|
| 209 |
top_comment = comments_df.iloc[0]["Comment"]
|
| 210 |
return render_template('index.html',
|
|
|
|
| 3 |
import emoji
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
+
import tensorflow.lite as tflite
|
| 7 |
from googletrans import Translator
|
| 8 |
from transformers import AutoTokenizer
|
| 9 |
from googleapiclient.discovery import build
|
|
|
|
| 14 |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
| 15 |
|
| 16 |
tokenizer_path = 'model/saved_tokenizer'
|
| 17 |
+
model_path = 'model/model_float16.tflite'
|
| 18 |
fine_tuned_tokenizer = None
|
| 19 |
+
interpreter = None
|
| 20 |
+
input_details = None
|
| 21 |
+
output_details = None
|
| 22 |
SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
|
| 23 |
|
|
|
|
| 24 |
def load_model_and_tokenizer():
|
| 25 |
+
global fine_tuned_tokenizer, interpreter, input_details, output_details
|
| 26 |
+
if fine_tuned_tokenizer is None or interpreter is None or input_details is None or output_details is None:
|
| 27 |
fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
|
| 28 |
+
interpreter = tflite.Interpreter(model_path=model_path)
|
| 29 |
+
interpreter.allocate_tensors()
|
| 30 |
+
input_details = interpreter.get_input_details()
|
| 31 |
+
output_details = interpreter.get_output_details()
|
| 32 |
|
|
|
|
| 33 |
def build_youtube_client(api_key):
|
| 34 |
return build('youtube', 'v3', developerKey=api_key)
|
| 35 |
|
|
|
|
| 90 |
def trim_whitespace(s):
|
| 91 |
return s.strip()
|
| 92 |
|
|
|
|
| 93 |
def remove_emojis(text):
|
| 94 |
+
text = emoji.replace_emoji(text, replace="")
|
| 95 |
+
if text =='':
|
| 96 |
+
return 'This is a empty comment'
|
| 97 |
+
else :
|
| 98 |
+
return text
|
| 99 |
|
|
|
|
| 100 |
def detect_and_translate(comments: pd.DataFrame, required_count=10):
|
| 101 |
translator = Translator()
|
| 102 |
translated_comments = []
|
|
|
|
| 165 |
result_df = pd.DataFrame(translated_comments, columns=['Comment'])
|
| 166 |
return result_df
|
| 167 |
|
| 168 |
+
def tflite_predict(text):
|
| 169 |
+
# Tokenize input text
|
| 170 |
+
inputs = fine_tuned_tokenizer(text, return_tensors="np", padding="max_length", truncation=True, max_length=55)
|
| 171 |
+
|
| 172 |
+
# Prepare input data for the TFLite model
|
| 173 |
+
input_ids = inputs["input_ids"].flatten()
|
| 174 |
+
attention_mask = inputs["attention_mask"].flatten()
|
| 175 |
+
token_type_ids = inputs["token_type_ids"].flatten()
|
| 176 |
+
|
| 177 |
+
# Ensure inputs are reshaped to match expected input shapes (batch size 1)
|
| 178 |
+
input_ids = np.expand_dims(input_ids, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
|
| 179 |
+
attention_mask = np.expand_dims(attention_mask, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
|
| 180 |
+
token_type_ids = np.expand_dims(token_type_ids, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
|
| 181 |
+
|
| 182 |
+
# Set the input tensors
|
| 183 |
+
interpreter.set_tensor(input_details[1]['index'], input_ids)
|
| 184 |
+
interpreter.set_tensor(input_details[0]['index'], attention_mask)
|
| 185 |
+
interpreter.set_tensor(input_details[2]['index'], token_type_ids)
|
| 186 |
+
|
| 187 |
+
# Run inference
|
| 188 |
+
interpreter.invoke()
|
| 189 |
+
|
| 190 |
+
# Get the output and process it
|
| 191 |
+
output = interpreter.get_tensor(output_details[0]['index'])
|
| 192 |
+
predicted_index = np.argmax(output, axis=1)[0]
|
| 193 |
return predicted_index
|
| 194 |
|
| 195 |
def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 204 |
comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
|
| 205 |
translated_comment = detect_and_translate(comments_df,comment_count)
|
| 206 |
pre_processed_comments = text_pre_processing(translated_comment)
|
| 207 |
+
load_model_and_tokenizer()
|
| 208 |
for index, row in pre_processed_comments.iterrows():
|
| 209 |
+
sentiment_index = tflite_predict(row['Comment'])
|
| 210 |
sentiment_label = SENTIMENT_LABELS[sentiment_index]
|
| 211 |
sentiment_counts[sentiment_label]+=1
|
| 212 |
comments_by_sentiment[sentiment_label].append(row['Comment'])
|
| 213 |
return (sentiment_counts,comments_by_sentiment)
|
| 214 |
|
|
|
|
| 215 |
@app.route('/', methods=['GET', 'POST'])
|
| 216 |
def index():
|
| 217 |
if request.method == 'POST':
|
|
|
|
| 222 |
if video_id:
|
| 223 |
comments_df = get_comments(video_id, max_results=comment_count * 10)
|
| 224 |
if not comments_df.empty:
|
|
|
|
| 225 |
sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
|
| 226 |
top_comment = comments_df.iloc[0]["Comment"]
|
| 227 |
return render_template('index.html',
|
model/{transformer/fingerprint.pb → model_float16.tflite}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ec398514d6c710430eea30fbfc183e9901029238087bac2864c6a06becce2ea
|
| 3 |
+
size 98488392
|
model/transformer/keras_metadata.pb
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:296e85b069c632dbf64c20ae3c43f600b1231507c163618b97e6b07bdc1f1071
|
| 3 |
-
size 692146
|
|
|
|
|
|
|
|
|
|
|
|
model/transformer/saved_model.pb
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e2bfdfb55abf7231c80a126c7e52660325cd7bb10442c02e2442899d68f63572
|
| 3 |
-
size 49147810
|
|
|
|
|
|
|
|
|
|
|
|
model/transformer/variables/variables.data-00000-of-00001
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:890fd47658860323f349c3ea633812405ed3464b389142c556b2b96ea5fe786d
|
| 3 |
-
size 296341794
|
|
|
|
|
|
|
|
|
|
|
|
model/transformer/variables/variables.index
DELETED
|
Binary file (189 kB)
|
|
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
google-api-python-client
|
| 2 |
transformers==4.42.4
|
| 3 |
-
|
| 4 |
pandas
|
| 5 |
Flask
|
| 6 |
emoji
|
|
|
|
| 1 |
google-api-python-client
|
| 2 |
transformers==4.42.4
|
| 3 |
+
tflite
|
| 4 |
pandas
|
| 5 |
Flask
|
| 6 |
emoji
|