Spaces:
Runtime error
Runtime error
more fix
Browse files- app.py +8 -9
- data_loader_functions.py +12 -10
- sentiment_analysis.py +5 -5
- stock_prediction.py +49 -12
app.py
CHANGED
|
@@ -32,11 +32,13 @@ with left_column:
|
|
| 32 |
|
| 33 |
st.subheader("Vader-based Sentiment Analysis")
|
| 34 |
|
| 35 |
-
with st.spinner("Connecting with
|
| 36 |
df = sentiment_analysis(option_name, datetime(2023, 1, 5))
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
daily_df = aggregate_by_date(df)
|
| 39 |
-
"Current sentiment:", daily_df[
|
| 40 |
|
| 41 |
with right_column:
|
| 42 |
|
|
@@ -48,9 +50,6 @@ with right_column:
|
|
| 48 |
|
| 49 |
st.subheader("LSTM-based 7-day stock price prediction model")
|
| 50 |
|
| 51 |
-
with st.spinner("Loading LSTM model.."):
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
"The stock price tommorow is going up!"
|
| 55 |
-
else:
|
| 56 |
-
"The stock price tommorow is going down!"
|
|
|
|
| 32 |
|
| 33 |
st.subheader("Vader-based Sentiment Analysis")
|
| 34 |
|
| 35 |
+
with st.spinner("Connecting with Hopsworks..."):
|
| 36 |
df = sentiment_analysis(option_name, datetime(2023, 1, 5))
|
| 37 |
+
df_copy = df.copy()
|
| 38 |
+
df_copy = df_copy.set_index('publish_date')
|
| 39 |
+
st.table(df_copy.drop(['body_text', 'text_w_puncts', 'text_tokenized', 'text_w_stopwords', 'text_lemmatized', 'text_stemmed', 'text_processed', 'predicted_class'], axis=1))
|
| 40 |
daily_df = aggregate_by_date(df)
|
| 41 |
+
"Current sentiment:", daily_df.iloc[0]['compound']
|
| 42 |
|
| 43 |
with right_column:
|
| 44 |
|
|
|
|
| 50 |
|
| 51 |
st.subheader("LSTM-based 7-day stock price prediction model")
|
| 52 |
|
| 53 |
+
with st.spinner("Loading LSTM model from Hopsworks.."):
|
| 54 |
+
date, value = model(option_ticker)
|
| 55 |
+
"The predicted stock value on ", date, "is", value
|
|
|
|
|
|
|
|
|
data_loader_functions.py
CHANGED
|
@@ -55,16 +55,18 @@ def scrape_news(urls, df, company):
|
|
| 55 |
def get_news_from_hopsworks():
|
| 56 |
project = hopsworks.login()
|
| 57 |
fs = project.get_feature_store()
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
|
| 69 |
## Formalize the date column
|
| 70 |
def remove_parentheses(s):
|
|
|
|
| 55 |
def get_news_from_hopsworks():
|
| 56 |
project = hopsworks.login()
|
| 57 |
fs = project.get_feature_store()
|
| 58 |
+
news_fg = fs.get_feature_group(name="market_news_fg", version=1)
|
| 59 |
+
# try:
|
| 60 |
+
# feature_view = fs.get_feature_view(name="market_news", version=1)
|
| 61 |
+
# except:
|
| 62 |
+
# news_fg = fs.get_feature_group(name="market_news_fg", version=1)
|
| 63 |
+
# query = news_fg.select_all()
|
| 64 |
+
# feature_view = fs.create_feature_view(name="market_news",
|
| 65 |
+
# version=1,
|
| 66 |
+
# description="Read from market_news_fg",
|
| 67 |
+
# query=query)
|
| 68 |
+
query = news_fg.select_all()
|
| 69 |
+
return query.read()
|
| 70 |
|
| 71 |
## Formalize the date column
|
| 72 |
def remove_parentheses(s):
|
sentiment_analysis.py
CHANGED
|
@@ -45,7 +45,7 @@ def time_2_datetime(x):
|
|
| 45 |
dt_obj = datetime.fromtimestamp(x / 1000)
|
| 46 |
return dt_obj
|
| 47 |
|
| 48 |
-
def fetching_news():
|
| 49 |
articles_df = get_news_from_hopsworks()
|
| 50 |
articles_df.loc[articles_df['ticker'] == company]
|
| 51 |
articles_df['publish_date'] = articles_df['publish_date'].apply(time_2_datetime)
|
|
@@ -196,11 +196,11 @@ def vader_sentiment(articles_processed):
|
|
| 196 |
return articles_processed
|
| 197 |
|
| 198 |
def sentiment_analysis(company, day):
|
| 199 |
-
articles_df =
|
| 200 |
articles_df = select_oneday_news(articles_df, day)
|
| 201 |
-
articles_processed = nlp_processing(articles_df)
|
| 202 |
-
articles_sentimentalized = vader_sentiment(articles_processed)
|
| 203 |
-
return
|
| 204 |
|
| 205 |
## Aggregate News Sentiments Each Day
|
| 206 |
def aggregate_by_date(articles_sentiments):
|
|
|
|
| 45 |
dt_obj = datetime.fromtimestamp(x / 1000)
|
| 46 |
return dt_obj
|
| 47 |
|
| 48 |
+
def fetching_news(company):
|
| 49 |
articles_df = get_news_from_hopsworks()
|
| 50 |
articles_df.loc[articles_df['ticker'] == company]
|
| 51 |
articles_df['publish_date'] = articles_df['publish_date'].apply(time_2_datetime)
|
|
|
|
| 196 |
return articles_processed
|
| 197 |
|
| 198 |
def sentiment_analysis(company, day):
|
| 199 |
+
articles_df = fetching_news(company)
|
| 200 |
articles_df = select_oneday_news(articles_df, day)
|
| 201 |
+
# articles_processed = nlp_processing(articles_df)
|
| 202 |
+
# articles_sentimentalized = vader_sentiment(articles_processed)
|
| 203 |
+
return articles_df
|
| 204 |
|
| 205 |
## Aggregate News Sentiments Each Day
|
| 206 |
def aggregate_by_date(articles_sentiments):
|
stock_prediction.py
CHANGED
|
@@ -1,22 +1,59 @@
|
|
| 1 |
import hopsworks
|
| 2 |
import joblib
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
-
def model(daily_sentiment, stock_df):
|
| 8 |
-
daily_sentiment = daily_sentiment.rename(columns={'publish_date': 'date', 'ticker': 'name'})
|
| 9 |
-
daily_sentiment['name'] = daily_sentiment['name'].str.upper()
|
| 10 |
-
stock_df['date'] = stock_df['date'].apply(lambda x : x.date())
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
mr = project.get_model_registry()
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import hopsworks
|
| 2 |
import joblib
|
| 3 |
+
import math
|
| 4 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 5 |
+
import numpy as np
|
| 6 |
+
from datetime import timedelta, datetime
|
| 7 |
|
| 8 |
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
def model(ticker):
|
| 13 |
+
project = hopsworks.login()
|
| 14 |
|
| 15 |
+
# import data
|
| 16 |
+
fs = project.get_feature_store()
|
| 17 |
+
feature_view = fs.get_feature_view(
|
| 18 |
+
name = 'stock_prediction_fv',
|
| 19 |
+
version = 1
|
| 20 |
+
)
|
| 21 |
|
| 22 |
+
data = feature_view.get_training_data(2)[0]
|
| 23 |
+
data = data.sort_values(by='date')
|
| 24 |
+
|
| 25 |
+
last_date = data['date'].values[-1]
|
| 26 |
+
last_date = datetime.fromtimestamp(int(int(last_date) / 1000))
|
| 27 |
+
date = last_date.date() + timedelta(days=1)
|
| 28 |
+
|
| 29 |
+
data = data.set_index('date')
|
| 30 |
+
data.loc[data['name'] == 'APPLE']
|
| 31 |
+
data.drop(['name', 'predicted_class'], axis=1, inplace=True)
|
| 32 |
+
|
| 33 |
+
# scaling data
|
| 34 |
+
prices = data[['close','neg','neu','pos','compound']]
|
| 35 |
+
scaler = MinMaxScaler(feature_range=(0,1))
|
| 36 |
+
scaled_data = scaler.fit_transform(prices)
|
| 37 |
+
|
| 38 |
+
prediction_list = scaled_data[-60:]
|
| 39 |
+
|
| 40 |
+
x = []
|
| 41 |
+
x.append(prediction_list[-60:])
|
| 42 |
+
x = np.array(x)
|
| 43 |
+
|
| 44 |
+
# import model
|
| 45 |
mr = project.get_model_registry()
|
| 46 |
+
if ticker == 'AAPL':
|
| 47 |
+
remote_model = mr.get_model("LSTM_Apple", version=1)
|
| 48 |
+
elif ticker == 'AMZN':
|
| 49 |
+
remote_model = mr.get_model("LSTM_Amazon", version=1)
|
| 50 |
+
else:
|
| 51 |
+
remote_model = mr.get_model("LSTM_Meta", version=1)
|
| 52 |
+
model_dir = remote_model.download()
|
| 53 |
+
remote_model = joblib.load(model_dir + "/model.pkl")
|
| 54 |
+
|
| 55 |
+
# predict
|
| 56 |
+
out = remote_model.predict(x)
|
| 57 |
+
B=np.hstack((out,scaled_data[ : 1,1:]))
|
| 58 |
+
out = scaler.inverse_transform(B)[0,0]
|
| 59 |
+
return date, out
|