Spaces:
Build error
Build error
| import gradio as gr | |
| import selenium | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| from selenium import webdriver | |
| from selenium.webdriver.common.keys import Keys | |
| import pandas as pd | |
| import time | |
| from transformers import pipeline | |
| # Search Query | |
| def news_and_analysis(query): | |
| # Encode special characters in a text string | |
| def encode_special_characters(text): | |
| encoded_text = '' | |
| special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'} # Add more special characters as needed | |
| for char in text.lower(): | |
| encoded_text += special_characters.get(char, char) | |
| return encoded_text | |
| query2 = encode_special_characters(query) | |
| url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3" | |
| response = requests.get(url, verify = False) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| articles = soup.find_all('article') | |
| links = [article.find('a')['href'] for article in articles] | |
| links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links] | |
| news_text = [article.get_text(separator='\n') for article in articles] | |
| news_text_split = [text.split('\n') for text in news_text] | |
| news_df = pd.DataFrame({ | |
| 'Title': [text[2] for text in news_text_split], | |
| 'Source': [text[0] for text in news_text_split], | |
| 'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split], | |
| 'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split], | |
| 'Link': links | |
| }) | |
| news_df = news_df.loc[0:5,:] | |
| options = webdriver.ChromeOptions() | |
| options.add_argument('--headless') | |
| options.add_argument('--no-sandbox') | |
| options.add_argument('--disable-dev-shm-usage') | |
| options.use_chromium = True | |
| driver = webdriver.Chrome(options = options) | |
| classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis") | |
| news_df['Sentiment'] = '' | |
| for i in range(0, len(news_df)): | |
| # driver.get(news_df.loc[i,'Link']) | |
| # time.sleep(10) | |
| # headline = driver.find_element('xpath', '//h1').text | |
| #news_df.loc[i, 'Headline'] = headline | |
| title = news_df.loc[i, 'Title'] | |
| news_df.loc[i, 'Sentiment'] = str(classification(title)) | |
| print(news_df) | |
| return(news_df) | |
| with gr.Blocks() as demo: | |
| topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis") | |
| btn = gr.Button(value="Submit") | |
| btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe()) | |
| demo.launch() | |