Spaces:
Sleeping
Sleeping
File size: 5,495 Bytes
bc819d0 3eae18d d7c6826 bc819d0 55bb90f eb25293 f909ac9 bc819d0 92dd357 bc819d0 3eae18d bc819d0 3eae18d bc819d0 92dd357 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from datetime import datetime
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide")
st.title('Daily News with Sentiment Analysis')
# Sidebar inputs
st.sidebar.title('Input Parameters')
with st.sidebar.expander("How to use:", expanded=False):
st.markdown("""
1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL').
2. **Fetch News**: Click 'Get News' for the latest articles.
3. **View Sentiment**: See articles with sentiment scores (-1 to 1).
4. **Sort & Download**: Download as CSV if needed.
""")
# Wrapping the stock ticker input in an expander
with st.sidebar.expander("Stock Ticker Input", expanded=True):
ticker = st.text_input('Enter Stock Ticker', 'AAPL')
def get_finviz_news_raw(ticker):
finviz_url = f'https://finviz.com/quote.ashx?t={ticker}'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
response = requests.get(finviz_url, headers=headers)
if response.status_code != 200:
st.error(f"Error fetching data for {ticker}")
return None
soup = BeautifulSoup(response.text, "html.parser")
news_table = soup.find("table", class_="fullview-news-outer")
if not news_table:
st.error(f"No news found for {ticker}")
return None
news_rows = news_table.find_all("tr")
raw_data = []
for row in news_rows:
date_time_cell = row.find("td", align="right")
news_source_cell = row.find("td", class_="news-link-right")
news_title_cell = row.find("a", class_="tab-link-news")
date_time_str = date_time_cell.get_text().strip() if date_time_cell else ''
news_source = news_source_cell.get_text() if news_source_cell else ''
news_title = news_title_cell.get_text().strip() if news_title_cell else ''
news_link = news_title_cell['href'] if news_title_cell else ''
raw_data.append({
"Datetime": date_time_str,
"Source": news_source,
"Title": news_title,
"Link": news_link
})
df = pd.DataFrame(raw_data)
return df
# Preprocess text for sentiment analysis
def preprocess_text(text):
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters and digits
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
return text
# Analyze sentiment using VADER
def analyze_sentiment(text):
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores(text)
return sentiment['compound'] # Sentiment score between -1 (negative) and 1 (positive)
# Fetch and display news when the button is pressed
if st.sidebar.button('Get News'):
news_df_raw = get_finviz_news_raw(ticker)
if news_df_raw is not None and not news_df_raw.empty:
df = news_df_raw.copy()
# Get today's date
today_date = datetime.today().strftime('%Y-%m-%d')
# Initialize the date variable
current_date = None
# Lists to store the new column values
dates = []
times = []
# Process each row in the dataframe
for index, row in df.iterrows():
datetime_value = row['Datetime']
# Replace 'Today' with today's date
if 'Today' in datetime_value:
datetime_value = datetime_value.replace('Today', today_date)
# Check if the datetime_value contains a date
if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit():
# Extract date and time
current_date = datetime_value.split(' ')[0]
time_value = ' '.join(datetime_value.split(' ')[1:])
else:
# Only time is provided
time_value = datetime_value
# Add the current date and time to the lists
dates.append(current_date)
times.append(time_value)
# Add the new columns to the dataframe
df['Date'] = dates
df['Time'] = times
# Add Sentiment and Relevance
df['Cleaned_Title'] = df['Title'].apply(preprocess_text)
df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment)
# Remove Cleaned_Title column
df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True)
df = df.reset_index(drop=True)
# Make links clickable
df['Title'] = df.apply(lambda x: f'<a href="{x["Link"]}" target="_blank">{x["Title"]}</a>', axis=1)
df.drop(columns=['Link'], inplace=True)
st.write(f"## News for {ticker}")
st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
# Option to download data as CSV
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download data as CSV",
data=csv,
file_name=f'{ticker}_news_sentiment.csv',
mime='text/csv',
)
else:
st.write(f"No news found for {ticker}")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|