Spaces:

Tarun-singh
/

Youtube-Video-Comments-Sentiment-Analysis

Running

App Files Files Community

Tarun Singh commited on Sep 7, 2024

Commit

8733295

1 Parent(s): 4e5b05d

switching to tflite

Browse files

Files changed (8) hide show

.gitignore +2 -0
app.py +43 -26
model/{transformer/fingerprint.pb → model_float16.tflite} +2 -2
model/transformer/keras_metadata.pb +0 -3
model/transformer/saved_model.pb +0 -3
model/transformer/variables/variables.data-00000-of-00001 +0 -3
model/transformer/variables/variables.index +0 -0
requirements.txt +1 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ env
2	+ .env

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import re
 import emoji
 import numpy as np
 import pandas as pd
-import tensorflow as tf
 from googletrans import Translator
 from transformers import AutoTokenizer
 from googleapiclient.discovery import build
@@ -14,20 +14,22 @@ api_keys = os.getenv("YOUTUBE_API_KEYS").split(',')
 os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 tokenizer_path = 'model/saved_tokenizer'
-model_path = 'model/transformer'
 fine_tuned_tokenizer = None
-fine_tuned_model = None
 SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
-# load model and tokenizer
 def load_model_and_tokenizer():
-    global fine_tuned_tokenizer, fine_tuned_model
-    if fine_tuned_tokenizer is None or fine_tuned_model is None:
         fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-        fine_tuned_model = tf.saved_model.load(model_path)
-        # print("model is loaded successfully")
-#  Get the comments form video
 def build_youtube_client(api_key):
     return build('youtube', 'v3', developerKey=api_key)
@@ -88,11 +90,13 @@ def extract_youtube_video_id(url):
 def trim_whitespace(s):
     return s.strip()
-# Text Pre Processing
 def remove_emojis(text):
-  return emoji.replace_emoji(text, replace="")
-# Get the Sentiments form the text
 def detect_and_translate(comments: pd.DataFrame, required_count=10):
     translator = Translator()
     translated_comments = []
@@ -161,17 +165,31 @@ def detect_and_translate(comments: pd.DataFrame, required_count=10):
     result_df = pd.DataFrame(translated_comments, columns=['Comment'])
     return result_df
-def return_sentiment(text : str) -> np.int64 :
-    inputs = fine_tuned_tokenizer(text, return_tensors="tf", padding="max_length", truncation=True, max_length=55)
-    input_dict = {
-        'input_ids': tf.cast(inputs['input_ids'], tf.int64),
-        'attention_mask': tf.cast(inputs['attention_mask'], tf.int64),
-        'token_type_ids': tf.cast(inputs['token_type_ids'], tf.int64)
-        }
-    outputs = fine_tuned_model(input_dict)
-    probabilities = tf.nn.softmax(outputs, axis=-1)
-    probabilities_np = probabilities.numpy()[0]
-    predicted_index = np.argmax(probabilities_np)
     return predicted_index
 def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
@@ -186,14 +204,14 @@ def get_sentiment(comments_df : pd.DataFrame,comment_count=10) -> tuple:
     comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
     translated_comment = detect_and_translate(comments_df,comment_count)
     pre_processed_comments = text_pre_processing(translated_comment)
     for index, row in pre_processed_comments.iterrows():
-        sentiment_index = return_sentiment(row['Comment'])
         sentiment_label = SENTIMENT_LABELS[sentiment_index]
         sentiment_counts[sentiment_label]+=1
         comments_by_sentiment[sentiment_label].append(row['Comment'])
     return (sentiment_counts,comments_by_sentiment)
-# Flask App
 @app.route('/', methods=['GET', 'POST'])
 def index():
     if request.method == 'POST':
@@ -204,7 +222,6 @@ def index():
         if video_id:
             comments_df = get_comments(video_id, max_results=comment_count * 10)
             if not comments_df.empty:
-                load_model_and_tokenizer()
                 sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
                 top_comment = comments_df.iloc[0]["Comment"]
                 return render_template('index.html',

 import emoji
 import numpy as np
 import pandas as pd
+import tensorflow.lite as tflite
 from googletrans import Translator
 from transformers import AutoTokenizer
 from googleapiclient.discovery import build
 os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 tokenizer_path = 'model/saved_tokenizer'
+model_path = 'model/model_float16.tflite'
 fine_tuned_tokenizer = None
+interpreter = None
+input_details = None
+output_details = None
 SENTIMENT_LABELS = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Annoyed', 4: 'Fear', 5: 'Surprise'}
 def load_model_and_tokenizer():
+    global fine_tuned_tokenizer, interpreter, input_details, output_details
+    if fine_tuned_tokenizer is None or interpreter is None or input_details is None or output_details is None:
         fine_tuned_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+        interpreter = tflite.Interpreter(model_path=model_path)
+        interpreter.allocate_tensors()
+        input_details = interpreter.get_input_details()
+        output_details = interpreter.get_output_details()
 def build_youtube_client(api_key):
     return build('youtube', 'v3', developerKey=api_key)
 def trim_whitespace(s):
     return s.strip()
 def remove_emojis(text):
+  text = emoji.replace_emoji(text, replace="")
+  if text =='':
+    return 'This is a empty comment'
+  else :
+    return text
 def detect_and_translate(comments: pd.DataFrame, required_count=10):
     translator = Translator()
     translated_comments = []
     result_df = pd.DataFrame(translated_comments, columns=['Comment'])
     return result_df
+def tflite_predict(text):
+    # Tokenize input text
+    inputs = fine_tuned_tokenizer(text, return_tensors="np", padding="max_length", truncation=True, max_length=55)
+    # Prepare input data for the TFLite model
+    input_ids = inputs["input_ids"].flatten()
+    attention_mask = inputs["attention_mask"].flatten()
+    token_type_ids = inputs["token_type_ids"].flatten()
+    # Ensure inputs are reshaped to match expected input shapes (batch size 1)
+    input_ids = np.expand_dims(input_ids, axis=0).astype(np.int64)           # Reshape to (1, 55) and cast to INT64
+    attention_mask = np.expand_dims(attention_mask, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
+    token_type_ids = np.expand_dims(token_type_ids, axis=0).astype(np.int64) # Reshape to (1, 55) and cast to INT64
+    # Set the input tensors
+    interpreter.set_tensor(input_details[1]['index'], input_ids)
+    interpreter.set_tensor(input_details[0]['index'], attention_mask)
+    interpreter.set_tensor(input_details[2]['index'], token_type_ids)
+    # Run inference
+    interpreter.invoke()
+    # Get the output and process it
+    output = interpreter.get_tensor(output_details[0]['index'])
+    predicted_index = np.argmax(output, axis=1)[0]
     return predicted_index
 def text_pre_processing(translated_comment: pd.DataFrame) -> pd.DataFrame:
     comments_by_sentiment = {'Sadness': [], 'Joy': [], 'Love': [], 'Annoyed': [], 'Fear': [], 'Surprise': []}
     translated_comment = detect_and_translate(comments_df,comment_count)
     pre_processed_comments = text_pre_processing(translated_comment)
+    load_model_and_tokenizer()
     for index, row in pre_processed_comments.iterrows():
+        sentiment_index = tflite_predict(row['Comment'])
         sentiment_label = SENTIMENT_LABELS[sentiment_index]
         sentiment_counts[sentiment_label]+=1
         comments_by_sentiment[sentiment_label].append(row['Comment'])
     return (sentiment_counts,comments_by_sentiment)
 @app.route('/', methods=['GET', 'POST'])
 def index():
     if request.method == 'POST':
         if video_id:
             comments_df = get_comments(video_id, max_results=comment_count * 10)
             if not comments_df.empty:
                 sentiment_counts, comments_by_sentiment = get_sentiment(comments_df, comment_count)
                 top_comment = comments_df.iloc[0]["Comment"]
                 return render_template('index.html',

model/{transformer/fingerprint.pb → model_float16.tflite} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:082699f61ee7da6b8fc24cafd9e42ee8de29df4f2e1bc00f9528aba0a9d9e3b7
-size 57

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ec398514d6c710430eea30fbfc183e9901029238087bac2864c6a06becce2ea
+size 98488392

model/transformer/keras_metadata.pb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:296e85b069c632dbf64c20ae3c43f600b1231507c163618b97e6b07bdc1f1071
-size 692146

model/transformer/saved_model.pb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e2bfdfb55abf7231c80a126c7e52660325cd7bb10442c02e2442899d68f63572
-size 49147810

model/transformer/variables/variables.data-00000-of-00001 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:890fd47658860323f349c3ea633812405ed3464b389142c556b2b96ea5fe786d
-size 296341794

model/transformer/variables/variables.index DELETED Viewed

Binary file (189 kB)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 google-api-python-client
 transformers==4.42.4
-tensorflow-cpu==2.17.0
 pandas
 Flask
 emoji

 google-api-python-client
 transformers==4.42.4
+tflite
 pandas
 Flask
 emoji