import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import numpy as np import re from datetime import datetime import nltk from nltk.sentiment.vader import SentimentIntensityAnalyzer nltk.download('vader_lexicon') st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide") st.title('Daily News with Sentiment Analysis') # Sidebar inputs st.sidebar.title('Input Parameters') with st.sidebar.expander("How to use:", expanded=False): st.markdown(""" 1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL'). 2. **Fetch News**: Click 'Get News' for the latest articles. 3. **View Sentiment**: See articles with sentiment scores (-1 to 1). 4. **Sort & Download**: Download as CSV if needed. """) # Wrapping the stock ticker input in an expander with st.sidebar.expander("Stock Ticker Input", expanded=True): ticker = st.text_input('Enter Stock Ticker', 'AAPL') def get_finviz_news_raw(ticker): finviz_url = f'https://finviz.com/quote.ashx?t={ticker}' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'} response = requests.get(finviz_url, headers=headers) if response.status_code != 200: st.error(f"Error fetching data for {ticker}") return None soup = BeautifulSoup(response.text, "html.parser") news_table = soup.find("table", class_="fullview-news-outer") if not news_table: st.error(f"No news found for {ticker}") return None news_rows = news_table.find_all("tr") raw_data = [] for row in news_rows: date_time_cell = row.find("td", align="right") news_source_cell = row.find("td", class_="news-link-right") news_title_cell = row.find("a", class_="tab-link-news") date_time_str = date_time_cell.get_text().strip() if date_time_cell else '' news_source = news_source_cell.get_text() if news_source_cell else '' news_title = news_title_cell.get_text().strip() if news_title_cell else '' news_link = news_title_cell['href'] if news_title_cell else '' raw_data.append({ "Datetime": date_time_str, "Source": news_source, "Title": news_title, "Link": news_link }) df = pd.DataFrame(raw_data) return df # Preprocess text for sentiment analysis def preprocess_text(text): text = re.sub(r'<.*?>', '', text) # Remove HTML tags text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters and digits text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces return text # Analyze sentiment using VADER def analyze_sentiment(text): sia = SentimentIntensityAnalyzer() sentiment = sia.polarity_scores(text) return sentiment['compound'] # Sentiment score between -1 (negative) and 1 (positive) # Fetch and display news when the button is pressed if st.sidebar.button('Get News'): news_df_raw = get_finviz_news_raw(ticker) if news_df_raw is not None and not news_df_raw.empty: df = news_df_raw.copy() # Get today's date today_date = datetime.today().strftime('%Y-%m-%d') # Initialize the date variable current_date = None # Lists to store the new column values dates = [] times = [] # Process each row in the dataframe for index, row in df.iterrows(): datetime_value = row['Datetime'] # Replace 'Today' with today's date if 'Today' in datetime_value: datetime_value = datetime_value.replace('Today', today_date) # Check if the datetime_value contains a date if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit(): # Extract date and time current_date = datetime_value.split(' ')[0] time_value = ' '.join(datetime_value.split(' ')[1:]) else: # Only time is provided time_value = datetime_value # Add the current date and time to the lists dates.append(current_date) times.append(time_value) # Add the new columns to the dataframe df['Date'] = dates df['Time'] = times # Add Sentiment and Relevance df['Cleaned_Title'] = df['Title'].apply(preprocess_text) df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment) # Remove Cleaned_Title column df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True) df = df.reset_index(drop=True) # Make links clickable df['Title'] = df.apply(lambda x: f'{x["Title"]}', axis=1) df.drop(columns=['Link'], inplace=True) st.write(f"## News for {ticker}") st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True) # Option to download data as CSV csv = df.to_csv(index=False).encode('utf-8') st.download_button( label="Download data as CSV", data=csv, file_name=f'{ticker}_news_sentiment.csv', mime='text/csv', ) else: st.write(f"No news found for {ticker}") hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)