import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from datetime import datetime
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide")
st.title('Daily News with Sentiment Analysis')
# Sidebar inputs
st.sidebar.title('Input Parameters')
with st.sidebar.expander("How to use:", expanded=False):
st.markdown("""
1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL').
2. **Fetch News**: Click 'Get News' for the latest articles.
3. **View Sentiment**: See articles with sentiment scores (-1 to 1).
4. **Sort & Download**: Download as CSV if needed.
""")
# Wrapping the stock ticker input in an expander
with st.sidebar.expander("Stock Ticker Input", expanded=True):
ticker = st.text_input('Enter Stock Ticker', 'AAPL')
def get_finviz_news_raw(ticker):
finviz_url = f'https://finviz.com/quote.ashx?t={ticker}'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
response = requests.get(finviz_url, headers=headers)
if response.status_code != 200:
st.error(f"Error fetching data for {ticker}")
return None
soup = BeautifulSoup(response.text, "html.parser")
news_table = soup.find("table", class_="fullview-news-outer")
if not news_table:
st.error(f"No news found for {ticker}")
return None
news_rows = news_table.find_all("tr")
raw_data = []
for row in news_rows:
date_time_cell = row.find("td", align="right")
news_source_cell = row.find("td", class_="news-link-right")
news_title_cell = row.find("a", class_="tab-link-news")
date_time_str = date_time_cell.get_text().strip() if date_time_cell else ''
news_source = news_source_cell.get_text() if news_source_cell else ''
news_title = news_title_cell.get_text().strip() if news_title_cell else ''
news_link = news_title_cell['href'] if news_title_cell else ''
raw_data.append({
"Datetime": date_time_str,
"Source": news_source,
"Title": news_title,
"Link": news_link
})
df = pd.DataFrame(raw_data)
return df
# Preprocess text for sentiment analysis
def preprocess_text(text):
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters and digits
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
return text
# Analyze sentiment using VADER
def analyze_sentiment(text):
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores(text)
return sentiment['compound'] # Sentiment score between -1 (negative) and 1 (positive)
# Fetch and display news when the button is pressed
if st.sidebar.button('Get News'):
news_df_raw = get_finviz_news_raw(ticker)
if news_df_raw is not None and not news_df_raw.empty:
df = news_df_raw.copy()
# Get today's date
today_date = datetime.today().strftime('%Y-%m-%d')
# Initialize the date variable
current_date = None
# Lists to store the new column values
dates = []
times = []
# Process each row in the dataframe
for index, row in df.iterrows():
datetime_value = row['Datetime']
# Replace 'Today' with today's date
if 'Today' in datetime_value:
datetime_value = datetime_value.replace('Today', today_date)
# Check if the datetime_value contains a date
if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit():
# Extract date and time
current_date = datetime_value.split(' ')[0]
time_value = ' '.join(datetime_value.split(' ')[1:])
else:
# Only time is provided
time_value = datetime_value
# Add the current date and time to the lists
dates.append(current_date)
times.append(time_value)
# Add the new columns to the dataframe
df['Date'] = dates
df['Time'] = times
# Add Sentiment and Relevance
df['Cleaned_Title'] = df['Title'].apply(preprocess_text)
df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment)
# Remove Cleaned_Title column
df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True)
df = df.reset_index(drop=True)
# Make links clickable
df['Title'] = df.apply(lambda x: f'{x["Title"]}', axis=1)
df.drop(columns=['Link'], inplace=True)
st.write(f"## News for {ticker}")
st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
# Option to download data as CSV
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download data as CSV",
data=csv,
file_name=f'{ticker}_news_sentiment.csv',
mime='text/csv',
)
else:
st.write(f"No news found for {ticker}")
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)