Tarun Singh commited on
Commit
8733295
·
1 Parent(s): 4e5b05d

switching to tflite

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ env
2
+ .env
app.py CHANGED
@@ -3,7 +3,7 @@ import re
3
  import emoji
4
  import numpy as np
5
  import pandas as pd
6
- import tensorflow as tf
7
  from googletrans import Translator
8
  from transformers import AutoTokenizer
9
  from googleapiclient.discovery import build
@@ -14,20 +14,22 @@ api_keys = os.getenv("YOUTUBE_API_KEYS").split(',')
14
  os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
15
 
16
  tokenizer_path = 'model/saved_tokenizer'
17
- model_path = 'model/transformer'
18
  fine_tuned_tokenizer = None
19
- fine_tuned_model = None
 
 
20
  SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
21
 
22
- # load model and tokenizer
23
  def load_model_and_tokenizer():
24
- global fine_tuned_tokenizer, fine_tuned_model
25
- if fine_tuned_tokenizer is None or fine_tuned_model is None:
26
  fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
27
- fine_tuned_model = tf.saved_model.load(model_path)
28
- # print("model is loaded successfully")
 
 
29
 
30
- # Get the comments form video
31
  def build_youtube_client(api_key):
32
  return build('youtube', 'v3', developerKey=api_key)
33
 
@@ -88,11 +90,13 @@ def extract_youtube_video_id(url):
88
  def trim_whitespace(s):
89
  return s.strip()
90
 
91
- # Text Pre Processing
92
  def remove_emojis(text):
93
- return emoji.replace_emoji(text, replace="")
 
 
 
 
94
 
95
- # Get the Sentiments form the text
96
  def detect_and_translate(comments: pd.DataFrame, required_count=10):
97
  translator = Translator()
98
  translated_comments = []
@@ -161,17 +165,31 @@ def detect_and_translate(comments: pd.DataFrame, required_count=10):
161
  result_df = pd.DataFrame(translated_comments, columns=['Comment'])
162
  return result_df
163
 
164
- def return_sentiment(text : str) -> np.int64 :
165
- inputs = fine_tuned_tokenizer(text, return_tensors="tf", padding="max_length", truncation=True, max_length=55)
166
- input_dict = {
167
- 'input_ids': tf.cast(inputs['input_ids'], tf.int64),
168
- 'attention_mask': tf.cast(inputs['attention_mask'], tf.int64),
169
- 'token_type_ids': tf.cast(inputs['token_type_ids'], tf.int64)
170
- }
171
- outputs = fine_tuned_model(input_dict)
172
- probabilities = tf.nn.softmax(outputs, axis=-1)
173
- probabilities_np = probabilities.numpy()[0]
174
- predicted_index = np.argmax(probabilities_np)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  return predicted_index
176
 
177
  def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
@@ -186,14 +204,14 @@ def get_sentiment(comments_df : pd.DataFrame,comment_count=10) -> tuple:
186
  comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
187
  translated_comment = detect_and_translate(comments_df,comment_count)
188
  pre_processed_comments = text_pre_processing(translated_comment)
 
189
  for index, row in pre_processed_comments.iterrows():
190
- sentiment_index = return_sentiment(row['Comment'])
191
  sentiment_label = SENTIMENT_LABELS[sentiment_index]
192
  sentiment_counts[sentiment_label]+=1
193
  comments_by_sentiment[sentiment_label].append(row['Comment'])
194
  return (sentiment_counts,comments_by_sentiment)
195
 
196
- # Flask App
197
  @app.route('/', methods=['GET', 'POST'])
198
  def index():
199
  if request.method == 'POST':
@@ -204,7 +222,6 @@ def index():
204
  if video_id:
205
  comments_df = get_comments(video_id, max_results=comment_count * 10)
206
  if not comments_df.empty:
207
- load_model_and_tokenizer()
208
  sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
209
  top_comment = comments_df.iloc[0]["Comment"]
210
  return render_template('index.html',
 
3
  import emoji
4
  import numpy as np
5
  import pandas as pd
6
+ import tensorflow.lite as tflite
7
  from googletrans import Translator
8
  from transformers import AutoTokenizer
9
  from googleapiclient.discovery import build
 
14
  os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
15
 
16
  tokenizer_path = 'model/saved_tokenizer'
17
+ model_path = 'model/model_float16.tflite'
18
  fine_tuned_tokenizer = None
19
+ interpreter = None
20
+ input_details = None
21
+ output_details = None
22
  SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
23
 
 
24
  def load_model_and_tokenizer():
25
+ global fine_tuned_tokenizer, interpreter, input_details, output_details
26
+ if fine_tuned_tokenizer is None or interpreter is None or input_details is None or output_details is None:
27
  fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
28
+ interpreter = tflite.Interpreter(model_path=model_path)
29
+ interpreter.allocate_tensors()
30
+ input_details = interpreter.get_input_details()
31
+ output_details = interpreter.get_output_details()
32
 
 
33
  def build_youtube_client(api_key):
34
  return build('youtube', 'v3', developerKey=api_key)
35
 
 
90
  def trim_whitespace(s):
91
  return s.strip()
92
 
 
93
  def remove_emojis(text):
94
+ text = emoji.replace_emoji(text, replace="")
95
+ if text =='':
96
+ return 'This is a empty comment'
97
+ else :
98
+ return text
99
 
 
100
  def detect_and_translate(comments: pd.DataFrame, required_count=10):
101
  translator = Translator()
102
  translated_comments = []
 
165
  result_df = pd.DataFrame(translated_comments, columns=['Comment'])
166
  return result_df
167
 
168
+ def tflite_predict(text):
169
+ # Tokenize input text
170
+ inputs = fine_tuned_tokenizer(text, return_tensors="np", padding="max_length", truncation=True, max_length=55)
171
+
172
+ # Prepare input data for the TFLite model
173
+ input_ids = inputs["input_ids"].flatten()
174
+ attention_mask = inputs["attention_mask"].flatten()
175
+ token_type_ids = inputs["token_type_ids"].flatten()
176
+
177
+ # Ensure inputs are reshaped to match expected input shapes (batch size 1)
178
+ input_ids = np.expand_dims(input_ids, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
179
+ attention_mask = np.expand_dims(attention_mask, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
180
+ token_type_ids = np.expand_dims(token_type_ids, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
181
+
182
+ # Set the input tensors
183
+ interpreter.set_tensor(input_details[1]['index'], input_ids)
184
+ interpreter.set_tensor(input_details[0]['index'], attention_mask)
185
+ interpreter.set_tensor(input_details[2]['index'], token_type_ids)
186
+
187
+ # Run inference
188
+ interpreter.invoke()
189
+
190
+ # Get the output and process it
191
+ output = interpreter.get_tensor(output_details[0]['index'])
192
+ predicted_index = np.argmax(output, axis=1)[0]
193
  return predicted_index
194
 
195
  def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
 
204
  comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
205
  translated_comment = detect_and_translate(comments_df,comment_count)
206
  pre_processed_comments = text_pre_processing(translated_comment)
207
+ load_model_and_tokenizer()
208
  for index, row in pre_processed_comments.iterrows():
209
+ sentiment_index = tflite_predict(row['Comment'])
210
  sentiment_label = SENTIMENT_LABELS[sentiment_index]
211
  sentiment_counts[sentiment_label]+=1
212
  comments_by_sentiment[sentiment_label].append(row['Comment'])
213
  return (sentiment_counts,comments_by_sentiment)
214
 
 
215
  @app.route('/', methods=['GET', 'POST'])
216
  def index():
217
  if request.method == 'POST':
 
222
  if video_id:
223
  comments_df = get_comments(video_id, max_results=comment_count * 10)
224
  if not comments_df.empty:
 
225
  sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
226
  top_comment = comments_df.iloc[0]["Comment"]
227
  return render_template('index.html',
model/{transformer/fingerprint.pb → model_float16.tflite} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:082699f61ee7da6b8fc24cafd9e42ee8de29df4f2e1bc00f9528aba0a9d9e3b7
3
- size 57
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec398514d6c710430eea30fbfc183e9901029238087bac2864c6a06becce2ea
3
+ size 98488392
model/transformer/keras_metadata.pb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:296e85b069c632dbf64c20ae3c43f600b1231507c163618b97e6b07bdc1f1071
3
- size 692146
 
 
 
 
model/transformer/saved_model.pb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2bfdfb55abf7231c80a126c7e52660325cd7bb10442c02e2442899d68f63572
3
- size 49147810
 
 
 
 
model/transformer/variables/variables.data-00000-of-00001 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:890fd47658860323f349c3ea633812405ed3464b389142c556b2b96ea5fe786d
3
- size 296341794
 
 
 
 
model/transformer/variables/variables.index DELETED
Binary file (189 kB)
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  google-api-python-client
2
  transformers==4.42.4
3
- tensorflow-cpu==2.17.0
4
  pandas
5
  Flask
6
  emoji
 
1
  google-api-python-client
2
  transformers==4.42.4
3
+ tflite
4
  pandas
5
  Flask
6
  emoji