Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| from datetime import datetime | |
| import nltk | |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
| nltk.download('vader_lexicon') | |
| st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide") | |
| st.title('Daily News with Sentiment Analysis') | |
| # Sidebar inputs | |
| st.sidebar.title('Input Parameters') | |
| with st.sidebar.expander("How to use:", expanded=False): | |
| st.markdown(""" | |
| 1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL'). | |
| 2. **Fetch News**: Click 'Get News' for the latest articles. | |
| 3. **View Sentiment**: See articles with sentiment scores (-1 to 1). | |
| 4. **Sort & Download**: Download as CSV if needed. | |
| """) | |
| # Wrapping the stock ticker input in an expander | |
| with st.sidebar.expander("Stock Ticker Input", expanded=True): | |
| ticker = st.text_input('Enter Stock Ticker', 'AAPL') | |
| def get_finviz_news_raw(ticker): | |
| finviz_url = f'https://finviz.com/quote.ashx?t={ticker}' | |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'} | |
| response = requests.get(finviz_url, headers=headers) | |
| if response.status_code != 200: | |
| st.error(f"Error fetching data for {ticker}") | |
| return None | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| news_table = soup.find("table", class_="fullview-news-outer") | |
| if not news_table: | |
| st.error(f"No news found for {ticker}") | |
| return None | |
| news_rows = news_table.find_all("tr") | |
| raw_data = [] | |
| for row in news_rows: | |
| date_time_cell = row.find("td", align="right") | |
| news_source_cell = row.find("td", class_="news-link-right") | |
| news_title_cell = row.find("a", class_="tab-link-news") | |
| date_time_str = date_time_cell.get_text().strip() if date_time_cell else '' | |
| news_source = news_source_cell.get_text() if news_source_cell else '' | |
| news_title = news_title_cell.get_text().strip() if news_title_cell else '' | |
| news_link = news_title_cell['href'] if news_title_cell else '' | |
| raw_data.append({ | |
| "Datetime": date_time_str, | |
| "Source": news_source, | |
| "Title": news_title, | |
| "Link": news_link | |
| }) | |
| df = pd.DataFrame(raw_data) | |
| return df | |
| # Preprocess text for sentiment analysis | |
| def preprocess_text(text): | |
| text = re.sub(r'<.*?>', '', text) # Remove HTML tags | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters and digits | |
| text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces | |
| return text | |
| # Analyze sentiment using VADER | |
| def analyze_sentiment(text): | |
| sia = SentimentIntensityAnalyzer() | |
| sentiment = sia.polarity_scores(text) | |
| return sentiment['compound'] # Sentiment score between -1 (negative) and 1 (positive) | |
| # Fetch and display news when the button is pressed | |
| if st.sidebar.button('Get News'): | |
| news_df_raw = get_finviz_news_raw(ticker) | |
| if news_df_raw is not None and not news_df_raw.empty: | |
| df = news_df_raw.copy() | |
| # Get today's date | |
| today_date = datetime.today().strftime('%Y-%m-%d') | |
| # Initialize the date variable | |
| current_date = None | |
| # Lists to store the new column values | |
| dates = [] | |
| times = [] | |
| # Process each row in the dataframe | |
| for index, row in df.iterrows(): | |
| datetime_value = row['Datetime'] | |
| # Replace 'Today' with today's date | |
| if 'Today' in datetime_value: | |
| datetime_value = datetime_value.replace('Today', today_date) | |
| # Check if the datetime_value contains a date | |
| if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit(): | |
| # Extract date and time | |
| current_date = datetime_value.split(' ')[0] | |
| time_value = ' '.join(datetime_value.split(' ')[1:]) | |
| else: | |
| # Only time is provided | |
| time_value = datetime_value | |
| # Add the current date and time to the lists | |
| dates.append(current_date) | |
| times.append(time_value) | |
| # Add the new columns to the dataframe | |
| df['Date'] = dates | |
| df['Time'] = times | |
| # Add Sentiment and Relevance | |
| df['Cleaned_Title'] = df['Title'].apply(preprocess_text) | |
| df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment) | |
| # Remove Cleaned_Title column | |
| df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True) | |
| df = df.reset_index(drop=True) | |
| # Make links clickable | |
| df['Title'] = df.apply(lambda x: f'<a href="{x["Link"]}" target="_blank">{x["Title"]}</a>', axis=1) | |
| df.drop(columns=['Link'], inplace=True) | |
| st.write(f"## News for {ticker}") | |
| st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True) | |
| # Option to download data as CSV | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download data as CSV", | |
| data=csv, | |
| file_name=f'{ticker}_news_sentiment.csv', | |
| mime='text/csv', | |
| ) | |
| else: | |
| st.write(f"No news found for {ticker}") | |
| hide_streamlit_style = """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |