File size: 5,495 Bytes
bc819d0
 
 
 
 
 
 
 
 
 
 
 
3eae18d
d7c6826
bc819d0
55bb90f
 
 
eb25293
f909ac9
 
 
 
 
 
bc819d0
92dd357
 
 
bc819d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3eae18d
 
 
 
bc819d0
3eae18d
bc819d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92dd357
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from datetime import datetime
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

st.set_page_config(page_title="Daily News with Sentiment Analysis", layout="wide")
st.title('Daily News with Sentiment Analysis')

# Sidebar inputs
st.sidebar.title('Input Parameters')

with st.sidebar.expander("How to use:", expanded=False):
    st.markdown("""
    1. **Enter Ticker**: Type the stock symbol (e.g., 'AAPL').
    2. **Fetch News**: Click 'Get News' for the latest articles.
    3. **View Sentiment**: See articles with sentiment scores (-1 to 1).
    4. **Sort & Download**: Download as CSV if needed.
    """)

# Wrapping the stock ticker input in an expander
with st.sidebar.expander("Stock Ticker Input", expanded=True):
    ticker = st.text_input('Enter Stock Ticker', 'AAPL')

def get_finviz_news_raw(ticker):
    finviz_url = f'https://finviz.com/quote.ashx?t={ticker}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
    response = requests.get(finviz_url, headers=headers)

    if response.status_code != 200:
        st.error(f"Error fetching data for {ticker}")
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    news_table = soup.find("table", class_="fullview-news-outer")

    if not news_table:
        st.error(f"No news found for {ticker}")
        return None

    news_rows = news_table.find_all("tr")
    raw_data = []

    for row in news_rows:
        date_time_cell = row.find("td", align="right")
        news_source_cell = row.find("td", class_="news-link-right")
        news_title_cell = row.find("a", class_="tab-link-news")

        date_time_str = date_time_cell.get_text().strip() if date_time_cell else ''
        news_source = news_source_cell.get_text() if news_source_cell else ''
        news_title = news_title_cell.get_text().strip() if news_title_cell else ''
        news_link = news_title_cell['href'] if news_title_cell else ''

        raw_data.append({
            "Datetime": date_time_str,
            "Source": news_source,
            "Title": news_title,
            "Link": news_link
        })

    df = pd.DataFrame(raw_data)
    return df

# Preprocess text for sentiment analysis
def preprocess_text(text):
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters and digits
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

# Analyze sentiment using VADER
def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(text)
    return sentiment['compound']  # Sentiment score between -1 (negative) and 1 (positive)

# Fetch and display news when the button is pressed
if st.sidebar.button('Get News'):
    news_df_raw = get_finviz_news_raw(ticker)
    
    if news_df_raw is not None and not news_df_raw.empty:
        df = news_df_raw.copy()

        # Get today's date
        today_date = datetime.today().strftime('%Y-%m-%d')

        # Initialize the date variable
        current_date = None

        # Lists to store the new column values
        dates = []
        times = []

        # Process each row in the dataframe
        for index, row in df.iterrows():
            datetime_value = row['Datetime']
            
            # Replace 'Today' with today's date
            if 'Today' in datetime_value:
                datetime_value = datetime_value.replace('Today', today_date)
                
            # Check if the datetime_value contains a date
            if ' ' in datetime_value and not datetime_value.split(' ')[0].isdigit():
                # Extract date and time
                current_date = datetime_value.split(' ')[0]
                time_value = ' '.join(datetime_value.split(' ')[1:])
            else:
                # Only time is provided
                time_value = datetime_value
            
            # Add the current date and time to the lists
            dates.append(current_date)
            times.append(time_value)

        # Add the new columns to the dataframe
        df['Date'] = dates
        df['Time'] = times

        # Add Sentiment and Relevance
        df['Cleaned_Title'] = df['Title'].apply(preprocess_text)
        df['Sentiment'] = df['Cleaned_Title'].apply(analyze_sentiment)

        # Remove Cleaned_Title column
        df.drop(columns=['Cleaned_Title', 'Datetime', 'Source'], inplace=True)
        df = df.reset_index(drop=True)
        
        # Make links clickable
        df['Title'] = df.apply(lambda x: f'<a href="{x["Link"]}" target="_blank">{x["Title"]}</a>', axis=1)
        df.drop(columns=['Link'], inplace=True)

        st.write(f"## News for {ticker}")
        st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)

        # Option to download data as CSV
        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name=f'{ticker}_news_sentiment.csv',
            mime='text/csv',
        )
    else:
        st.write(f"No news found for {ticker}")

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)