fin_proj_2 / app.py
YaakovY's picture
3
07b7a5b
import streamlit as st
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import subprocess
import os
import datetime
st.set_page_config(page_title = "Bohmian's Stock News Sentiment Analyzer", layout = "wide")
def get_news(ticker):
url = finviz_url + ticker
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
# Read the contents of the file into 'html'
html = BeautifulSoup(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
return news_table
# parse news into dataframe
def parse_news(news_table):
parsed_news = []
today_string = datetime.datetime.today().strftime('%Y-%m-%d')
for x in news_table.findAll('tr'):
try:
# read the text from each tr tag into text
# get text from a only
text = x.a.get_text()
# splite text in the td tag into a list
date_scrape = x.td.text.split()
# if the length of 'date_scrape' is 1, load 'time' as the only element
if len(date_scrape) == 1:
time = date_scrape[0]
# else load 'date' as the 1st element and 'time' as the second
else:
date = date_scrape[0]
time = date_scrape[1]
# Append ticker, date, time and headline as a list to the 'parsed_news' list
parsed_news.append([date, time, text])
except:
pass
# Set column names
columns = ['date', 'time', 'headline']
# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
# Create a pandas datetime object from the strings in 'date' and 'time' column
parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
return parsed_news_df
def score_news(parsed_news_df):
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
return parsed_and_scored_news
# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='
st.header("Data Sience Project: Stock News Sentiment Analyzer")
ticker = st.text_input('Enter Stock Ticker', '').upper()
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
try:
st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
print(parsed_news_df)
parsed_and_scored_news = score_news(parsed_news_df)
st.table(parsed_and_scored_news)
except Exception as e:
print(str(e))
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)