Spaces:
Runtime error
Runtime error
File size: 8,051 Bytes
66142c4 50967d1 77f9dcf 50967d1 43452d4 77f9dcf c036719 77f9dcf df75663 77f9dcf 66142c4 d3934ae 50967d1 d3934ae 66142c4 50967d1 43452d4 50967d1 43452d4 50967d1 77f9dcf 9d64903 50967d1 43452d4 df75663 77f9dcf c036719 77f9dcf c036719 9d64903 e2124cd 9d64903 77f9dcf 43452d4 77f9dcf df75663 c036719 df75663 c036719 50967d1 c036719 b234a25 c036719 43452d4 50967d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
import gradio as gr
import os
import matplotlib.pyplot as plt
import pandas as pd
import re
from pythainlp.util import normalize
from pythainlp.tokenize import word_tokenize
from pythainlp import word_vector
import numpy as np
import keras
import plotly.express as px
#################
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import chromedriver_autoinstaller
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
# setup chrome options
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless') # ensure GUI is off
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# set path to chromedriver as per your configuration
chromedriver_autoinstaller.install()
wv = word_vector.WordVector()
word2vec = wv.get_model()
model= keras.models.load_model('my_model3.h5')
def get_comments(VIDEO_URL):
# Initialize the WebDriver
driver = webdriver.Chrome(options=chrome_options)
# Your scraping code here
#VIDEO_URL = 'https://www.youtube.com/watch?v=VIDEO_ID'
driver.get(VIDEO_URL)
# Wait for the comments to load
time.sleep(5)
# Scroll down to load more comments (optional, repeat as needed)
driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
time.sleep(2)
# Find and print comments
comment_elements = driver.find_elements(By.XPATH, '//yt-formatted-string[@id="content-text"]')
data = []
for comment in comment_elements:
if comment != '':
data.append(comment.text)
print(comment.text)
# Close the WebDriver
driver.quit()
return data
def cosine_sim(u, v):
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
def sentences_to_indices(X, word2vec, max_len):
"""
Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
The output shape should be such that it can be given to `Embedding()`.
Arguments:
X -- array of sentences (strings), of shape (m, 1)
word2vec -- a trained Word2Vec model from gensim
max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this.
Returns:
X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
"""
m = X.shape[0] # number of training examples
# Initialize X_indices as a numpy matrix of zeros and the correct shape
X_indices = np.zeros((m, max_len))
for i in range(m): # loop over training examples
# Convert the ith training sentence in lower case and split is into words. You should get a list of words.
# print(X)
# print(len(X[i].lower().split()))
sentence_words = X[i].lower().split()[:max_len]
# Initialize j to 0
j = 0
try:
# Loop over the words of sentence_words
for w in sentence_words:
# Set the (i,j)th entry of X_indices to the index of the correct word.
if w in word2vec.key_to_index:
X_indices[i, j] = word2vec.key_to_index[w]
# Increment j to j + 1
j += 1
except:
print('key error: ', w)
return X_indices
def deEmojify(text):
regrex_pattern = re.compile(pattern = "["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002500-\U00002BEF" # chinese char
u"\U00002702-\U000027B0"
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
u"\U0001f926-\U0001f937"
u"\U00010000-\U0010ffff"
u"\u2640-\u2642"
u"\u2600-\u2B55"
u"\u200d"
u"\u23cf"
u"\u23e9"
u"\u231a"
u"\ufe0f" # dingbats
u"\u3030"
"]+", flags = re.UNICODE)
return regrex_pattern.sub(r'',text)
def clean_me(data):
data['clean_text'] = data['text'].str.replace(r'<[^<>]*>', '', regex=True)
data['clean2_text']= data['clean_text'].str.strip().str.lower().str.replace('\r+', ' ').str.replace('\n+',' ').str.replace('\t+',' ')
data['clean3_text'] = data.apply(lambda row: deEmojify(row['clean2_text']), axis=1)
# Normalize text
data['clean4_text'] = data.apply(lambda row: normalize(row['clean3_text']), axis=1)
# Word segmentation: it will take a while....
data['wordseged_text'] = data.apply(lambda row: word_tokenize(row['clean4_text'], engine="newmm-safe"), axis=1)
# Join the wordsegged with space
data['wordseged_space_text'] = data.apply(lambda row: " ".join(row["wordseged_text"]), axis=1)
return(data)
def pretty_output(lines, sentiment):
label = np.array(['Neg', 'Neu', 'Pos'])
txt_sentiment = label[np.argmax(sentiment, axis=1)]
seriesText = pd.Series(txt_sentiment).value_counts()
df = pd.DataFrame({'Sentiment': seriesText.index, 'Count': seriesText.values})
fig = px.bar(df, x='Sentiment', y='Count', color='Sentiment')
fig.update_xaxes(categoryorder='array', categoryarray=['Neg', 'Neu', 'Pos'])
txt_pos = ''
txt_neu = ''
txt_neg = ''
for (x, y, score) in zip(lines, txt_sentiment, sentiment,):
txt_score = [f"{i:.2f}" for i in score]
tmp = f'{y} {txt_score}:-{x} \n'
if y == 'Pos':
txt_pos += tmp
elif y == 'Neu':
txt_neu += tmp
else:
txt_neg += tmp
return(txt_pos, txt_neu, txt_neg, fig)
def fx(t):
return 16 * np.sin(t) ** 3
def fy(t):
return 13 * np.cos(t) - 5 * np.cos(2 * t) - 2 * np.cos(3 * t) - np.cos(4 * t)
def combine(a, b):
data = pd.DataFrame()
embedded_url = ''
lines = str.split(a, '\n')
if b != "":
lines = get_comments(b)
if not lines:
text001 = 'CANNOT_GET DATA from Youtube'
print(text001)
if not lines:
t = np.linspace(-2 * np.pi, 2 * np.pi)
xs = fx(t)
ys = fy(t)
plt.plot(xs, ys, "o")
plt.title('My Heart')
plt.xlabel(' CANNOT LOAD VDO ')
plt.ylabel(' CANNOT LOAD VDO ')
str_output = 'www.youtube.com/embed/KRhPBvrBhro?si=U8sOh4ighEG9hkTI'
embed_html = f'<center> <iframe id="ytplayer" type="text/html" width="560" height="315" src="{str_output}" frameborder="0" allowfullscreen </iframe> </center>'
return (embed_html,"","","", plt.gcf())
data['text'] = lines
data = clean_me(data)
a = data['wordseged_space_text'][0] + ' SENTIMENT: '
X_train_indices = sentences_to_indices(data['wordseged_space_text'].values, word2vec, 128)
result = model.predict(X_train_indices[:])
txt_pos, txt_neu, txt_neg, fig = pretty_output(lines,result)
str_output = re.sub(r'youtube.com/', 'youtube.com/embed/', b)
str_output = re.sub(r'watch\?v=', '', str_output)
embed_html = f'<center> <iframe id="ytplayer" type="text/html" width="560" height="315" src="{str_output}" frameborder="0" allowfullscreen </iframe> </center>'
print(embed_html)
return embed_html, txt_pos, txt_neu, txt_neg, fig
def mirror(x):
return x
with gr.Blocks() as demo:
with gr.Row():
txt_2 = gr.Textbox(label="Input: Youtube URL")
txt = gr.Textbox(label="Input: TEXT", lines=2)
with gr.Row():
btn = gr.Button(value="Submit")
with gr.Row():
HTM = gr.HTML(value= "", label="Youtube VDO")
plot = gr.Plot(label="Plot")
with gr.Row():
txt_NEG = gr.Textbox(value="", label="Negative comments")
txt_NEU = gr.Textbox(value="", label="Neutral comments")
txt_POS = gr.Textbox(value="", label="Positive comments")
btn.click(combine, inputs=[txt, txt_2], outputs=[HTM, txt_POS, txt_NEU, txt_NEG, plot])
if __name__ == "__main__":
demo.launch() |