Spaces:

QuantumLearner
/

space2

Sleeping

App Files Files Community

space2 / app.py

QuantumLearner

Update app.py

eb25293 verified over 1 year ago

raw

history blame contribute delete

5.5 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import numpy as np
	import re
	from datetime import datetime
	import nltk
	from nltk.sentiment.vader import SentimentIntensityAnalyzer

	nltk.download('vader_lexicon')

	st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide")
	st.title('Daily News with Sentiment Analysis')

	# Sidebar inputs
	st.sidebar.title('Input Parameters')

	with st.sidebar.expander("How to use:", expanded=False):
	st.markdown("""
	1. Enter Ticker: Type the stock symbol (e.g., 'AAPL').
	2. Fetch News: Click 'Get News' for the latest articles.
	3. View Sentiment: See articles with sentiment scores (-1 to 1).
	4. Sort & Download: Download as CSV if needed.
	""")

	# Wrapping the stock ticker input in an expander
	with st.sidebar.expander("Stock Ticker Input", expanded=True):
	ticker = st.text_input('Enter Stock Ticker', 'AAPL')

	def get_finviz_news_raw(ticker):
	finviz_url = f'https://finviz.com/quote.ashx?t={ticker}'
	headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
	response = requests.get(finviz_url, headers=headers)

	if response.status_code != 200:
	st.error(f"Error fetching data for {ticker}")
	return None

	soup = BeautifulSoup(response.text, "html.parser")
	news_table = soup.find("table", class_="fullview-news-outer")

	if not news_table:
	st.error(f"No news found for {ticker}")
	return None

	news_rows = news_table.find_all("tr")
	raw_data = []

	for row in news_rows:
	date_time_cell = row.find("td", align="right")
	news_source_cell = row.find("td", class_="news-link-right")
	news_title_cell = row.find("a", class_="tab-link-news")

	date_time_str = date_time_cell.get_text().strip() if date_time_cell else ''
	news_source = news_source_cell.get_text() if news_source_cell else ''
	news_title = news_title_cell.get_text().strip() if news_title_cell else ''
	news_link = news_title_cell['href'] if news_title_cell else ''

	raw_data.append({
	"Datetime": date_time_str,
	"Source": news_source,
	"Title": news_title,
	"Link": news_link
	})

	df = pd.DataFrame(raw_data)
	return df

	# Preprocess text for sentiment analysis
	def preprocess_text(text):
	text = re.sub(r'<.*?>', '', text) # Remove HTML tags
	text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters and digits
	text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
	return text

	# Analyze sentiment using VADER
	def analyze_sentiment(text):
	sia = SentimentIntensityAnalyzer()
	sentiment = sia.polarity_scores(text)
	return sentiment['compound'] # Sentiment score between -1 (negative) and 1 (positive)

	# Fetch and display news when the button is pressed
	if st.sidebar.button('Get News'):
	news_df_raw = get_finviz_news_raw(ticker)

	if news_df_raw is not None and not news_df_raw.empty:
	df = news_df_raw.copy()

	# Get today's date
	today_date = datetime.today().strftime('%Y-%m-%d')

	# Initialize the date variable
	current_date = None

	# Lists to store the new column values
	dates = []
	times = []

	# Process each row in the dataframe
	for index, row in df.iterrows():
	datetime_value = row['Datetime']

	# Replace 'Today' with today's date
	if 'Today' in datetime_value:
	datetime_value = datetime_value.replace('Today', today_date)

	# Check if the datetime_value contains a date
	if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit():
	# Extract date and time
	current_date = datetime_value.split(' ')[0]
	time_value = ' '.join(datetime_value.split(' ')[1:])
	else:
	# Only time is provided
	time_value = datetime_value

	# Add the current date and time to the lists
	dates.append(current_date)
	times.append(time_value)

	# Add the new columns to the dataframe
	df['Date'] = dates
	df['Time'] = times

	# Add Sentiment and Relevance
	df['Cleaned_Title'] = df['Title'].apply(preprocess_text)
	df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment)

	# Remove Cleaned_Title column
	df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True)
	df = df.reset_index(drop=True)

	# Make links clickable
	df['Title'] = df.apply(lambda x: f'<a href="{x["Link"]}" target="_blank">{x["Title"]}</a>', axis=1)
	df.drop(columns=['Link'], inplace=True)

	st.write(f"## News for {ticker}")
	st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)

	# Option to download data as CSV
	csv = df.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="Download data as CSV",
	data=csv,
	file_name=f'{ticker}_news_sentiment.csv',
	mime='text/csv',
	)
	else:
	st.write(f"No news found for {ticker}")

	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)