Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import re | |
| from pythainlp.util import normalize | |
| from pythainlp.tokenize import word_tokenize | |
| from pythainlp import word_vector | |
| import numpy as np | |
| import keras | |
| import plotly.express as px | |
| ################# | |
| from selenium import webdriver | |
| from selenium.webdriver.common.keys import Keys | |
| from selenium.webdriver.common.by import By | |
| import time | |
| import chromedriver_autoinstaller | |
| import sys | |
| sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver') | |
| # setup chrome options | |
| chrome_options = webdriver.ChromeOptions() | |
| chrome_options.add_argument('--headless') # ensure GUI is off | |
| chrome_options.add_argument('--no-sandbox') | |
| chrome_options.add_argument('--disable-dev-shm-usage') | |
| # set path to chromedriver as per your configuration | |
| chromedriver_autoinstaller.install() | |
| wv = word_vector.WordVector() | |
| word2vec = wv.get_model() | |
| model= keras.models.load_model('my_model3.h5') | |
| def get_comments(VIDEO_URL): | |
| # Initialize the WebDriver | |
| driver = webdriver.Chrome(options=chrome_options) | |
| # Your scraping code here | |
| #VIDEO_URL = 'https://www.youtube.com/watch?v=VIDEO_ID' | |
| driver.get(VIDEO_URL) | |
| # Wait for the comments to load | |
| time.sleep(5) | |
| # Scroll down to load more comments (optional, repeat as needed) | |
| driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END) | |
| time.sleep(2) | |
| # Find and print comments | |
| comment_elements = driver.find_elements(By.XPATH, '//yt-formatted-string[@id="content-text"]') | |
| data = [] | |
| for comment in comment_elements: | |
| if comment != '': | |
| data.append(comment.text) | |
| print(comment.text) | |
| # Close the WebDriver | |
| driver.quit() | |
| return data | |
| def cosine_sim(u, v): | |
| return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) | |
| def sentences_to_indices(X, word2vec, max_len): | |
| """ | |
| Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences. | |
| The output shape should be such that it can be given to `Embedding()`. | |
| Arguments: | |
| X -- array of sentences (strings), of shape (m, 1) | |
| word2vec -- a trained Word2Vec model from gensim | |
| max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. | |
| Returns: | |
| X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len) | |
| """ | |
| m = X.shape[0] # number of training examples | |
| # Initialize X_indices as a numpy matrix of zeros and the correct shape | |
| X_indices = np.zeros((m, max_len)) | |
| for i in range(m): # loop over training examples | |
| # Convert the ith training sentence in lower case and split is into words. You should get a list of words. | |
| # print(X) | |
| # print(len(X[i].lower().split())) | |
| sentence_words = X[i].lower().split()[:max_len] | |
| # Initialize j to 0 | |
| j = 0 | |
| try: | |
| # Loop over the words of sentence_words | |
| for w in sentence_words: | |
| # Set the (i,j)th entry of X_indices to the index of the correct word. | |
| if w in word2vec.key_to_index: | |
| X_indices[i, j] = word2vec.key_to_index[w] | |
| # Increment j to j + 1 | |
| j += 1 | |
| except: | |
| print('key error: ', w) | |
| return X_indices | |
| def deEmojify(text): | |
| regrex_pattern = re.compile(pattern = "[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U00002500-\U00002BEF" # chinese char | |
| u"\U00002702-\U000027B0" | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| u"\U0001f926-\U0001f937" | |
| u"\U00010000-\U0010ffff" | |
| u"\u2640-\u2642" | |
| u"\u2600-\u2B55" | |
| u"\u200d" | |
| u"\u23cf" | |
| u"\u23e9" | |
| u"\u231a" | |
| u"\ufe0f" # dingbats | |
| u"\u3030" | |
| "]+", flags = re.UNICODE) | |
| return regrex_pattern.sub(r'',text) | |
| def clean_me(data): | |
| data['clean_text'] = data['text'].str.replace(r'<[^<>]*>', '', regex=True) | |
| data['clean2_text']= data['clean_text'].str.strip().str.lower().str.replace('\r+', ' ').str.replace('\n+',' ').str.replace('\t+',' ') | |
| data['clean3_text'] = data.apply(lambda row: deEmojify(row['clean2_text']), axis=1) | |
| # Normalize text | |
| data['clean4_text'] = data.apply(lambda row: normalize(row['clean3_text']), axis=1) | |
| # Word segmentation: it will take a while.... | |
| data['wordseged_text'] = data.apply(lambda row: word_tokenize(row['clean4_text'], engine="newmm-safe"), axis=1) | |
| # Join the wordsegged with space | |
| data['wordseged_space_text'] = data.apply(lambda row: " ".join(row["wordseged_text"]), axis=1) | |
| return(data) | |
| def pretty_output(lines, sentiment): | |
| label = np.array(['Neg', 'Neu', 'Pos']) | |
| txt_sentiment = label[np.argmax(sentiment, axis=1)] | |
| seriesText = pd.Series(txt_sentiment).value_counts() | |
| df = pd.DataFrame({'Sentiment': seriesText.index, 'Count': seriesText.values}) | |
| fig = px.bar(df, x='Sentiment', y='Count', color='Sentiment') | |
| fig.update_xaxes(categoryorder='array', categoryarray=['Neg', 'Neu', 'Pos']) | |
| txt_pos = '' | |
| txt_neu = '' | |
| txt_neg = '' | |
| for (x, y, score) in zip(lines, txt_sentiment, sentiment,): | |
| txt_score = [f"{i:.2f}" for i in score] | |
| tmp = f'{y} {txt_score}:-{x} \n' | |
| if y == 'Pos': | |
| txt_pos += tmp | |
| elif y == 'Neu': | |
| txt_neu += tmp | |
| else: | |
| txt_neg += tmp | |
| return(txt_pos, txt_neu, txt_neg, fig) | |
| def fx(t): | |
| return 16 * np.sin(t) ** 3 | |
| def fy(t): | |
| return 13 * np.cos(t) - 5 * np.cos(2 * t) - 2 * np.cos(3 * t) - np.cos(4 * t) | |
| def combine(a, b): | |
| data = pd.DataFrame() | |
| embedded_url = '' | |
| lines = str.split(a, '\n') | |
| if b != "": | |
| lines = get_comments(b) | |
| if not lines: | |
| text001 = 'CANNOT_GET DATA from Youtube' | |
| print(text001) | |
| if not lines: | |
| t = np.linspace(-2 * np.pi, 2 * np.pi) | |
| xs = fx(t) | |
| ys = fy(t) | |
| plt.plot(xs, ys, "o") | |
| plt.title('My Heart') | |
| plt.xlabel(' CANNOT LOAD VDO ') | |
| plt.ylabel(' CANNOT LOAD VDO ') | |
| str_output = 'www.youtube.com/embed/KRhPBvrBhro?si=U8sOh4ighEG9hkTI' | |
| embed_html = f'<center> <iframe id="ytplayer" type="text/html" width="560" height="315" src="{str_output}" frameborder="0" allowfullscreen </iframe> </center>' | |
| return (embed_html,"","","", plt.gcf()) | |
| data['text'] = lines | |
| data = clean_me(data) | |
| a = data['wordseged_space_text'][0] + ' SENTIMENT: ' | |
| X_train_indices = sentences_to_indices(data['wordseged_space_text'].values, word2vec, 128) | |
| result = model.predict(X_train_indices[:]) | |
| txt_pos, txt_neu, txt_neg, fig = pretty_output(lines,result) | |
| str_output = re.sub(r'youtube.com/', 'youtube.com/embed/', b) | |
| str_output = re.sub(r'watch\?v=', '', str_output) | |
| embed_html = f'<center> <iframe id="ytplayer" type="text/html" width="560" height="315" src="{str_output}" frameborder="0" allowfullscreen </iframe> </center>' | |
| print(embed_html) | |
| return embed_html, txt_pos, txt_neu, txt_neg, fig | |
| def mirror(x): | |
| return x | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| txt_2 = gr.Textbox(label="Input: Youtube URL") | |
| txt = gr.Textbox(label="Input: TEXT", lines=2) | |
| with gr.Row(): | |
| btn = gr.Button(value="Submit") | |
| with gr.Row(): | |
| HTM = gr.HTML(value= "", label="Youtube VDO") | |
| plot = gr.Plot(label="Plot") | |
| with gr.Row(): | |
| txt_NEG = gr.Textbox(value="", label="Negative comments") | |
| txt_NEU = gr.Textbox(value="", label="Neutral comments") | |
| txt_POS = gr.Textbox(value="", label="Positive comments") | |
| btn.click(combine, inputs=[txt, txt_2], outputs=[HTM, txt_POS, txt_NEU, txt_NEG, plot]) | |
| if __name__ == "__main__": | |
| demo.launch() |