Yilin98 commited on
Commit
e43b3fc
·
1 Parent(s): ff36459
Files changed (4) hide show
  1. app.py +8 -9
  2. data_loader_functions.py +12 -10
  3. sentiment_analysis.py +5 -5
  4. stock_prediction.py +49 -12
app.py CHANGED
@@ -32,11 +32,13 @@ with left_column:
32
 
33
  st.subheader("Vader-based Sentiment Analysis")
34
 
35
- with st.spinner("Connecting with www.investing.com..."):
36
  df = sentiment_analysis(option_name, datetime(2023, 1, 5))
37
- st.table(df)
 
 
38
  daily_df = aggregate_by_date(df)
39
- "Current sentiment:", daily_df["compound"], "%"
40
 
41
  with right_column:
42
 
@@ -48,9 +50,6 @@ with right_column:
48
 
49
  st.subheader("LSTM-based 7-day stock price prediction model")
50
 
51
- with st.spinner("Loading LSTM model.."):
52
- predict = model(daily_df, stock_df)
53
- if predict == 1.0:
54
- "The stock price tommorow is going up!"
55
- else:
56
- "The stock price tommorow is going down!"
 
32
 
33
  st.subheader("Vader-based Sentiment Analysis")
34
 
35
+ with st.spinner("Connecting with Hopsworks..."):
36
  df = sentiment_analysis(option_name, datetime(2023, 1, 5))
37
+ df_copy = df.copy()
38
+ df_copy = df_copy.set_index('publish_date')
39
+ st.table(df_copy.drop(['body_text', 'text_w_puncts', 'text_tokenized', 'text_w_stopwords', 'text_lemmatized', 'text_stemmed', 'text_processed', 'predicted_class'], axis=1))
40
  daily_df = aggregate_by_date(df)
41
+ "Current sentiment:", daily_df.iloc[0]['compound']
42
 
43
  with right_column:
44
 
 
50
 
51
  st.subheader("LSTM-based 7-day stock price prediction model")
52
 
53
+ with st.spinner("Loading LSTM model from Hopsworks.."):
54
+ date, value = model(option_ticker)
55
+ "The predicted stock value on ", date, "is", value
 
 
 
data_loader_functions.py CHANGED
@@ -55,16 +55,18 @@ def scrape_news(urls, df, company):
55
  def get_news_from_hopsworks():
56
  project = hopsworks.login()
57
  fs = project.get_feature_store()
58
- try:
59
- feature_view = fs.get_feature_view(name="iris_modal", version=1)
60
- except:
61
- news_fg = fs.get_feature_group(name="market_news_fg", version=1)
62
- query = news_fg.select_all()
63
- feature_view = fs.create_feature_view(name="market_news",
64
- version=1,
65
- description="Read from market_news_fg",
66
- query=query)
67
- return feature_view.get_batch_data()
 
 
68
 
69
  ## Formalize the date column
70
  def remove_parentheses(s):
 
55
  def get_news_from_hopsworks():
56
  project = hopsworks.login()
57
  fs = project.get_feature_store()
58
+ news_fg = fs.get_feature_group(name="market_news_fg", version=1)
59
+ # try:
60
+ # feature_view = fs.get_feature_view(name="market_news", version=1)
61
+ # except:
62
+ # news_fg = fs.get_feature_group(name="market_news_fg", version=1)
63
+ # query = news_fg.select_all()
64
+ # feature_view = fs.create_feature_view(name="market_news",
65
+ # version=1,
66
+ # description="Read from market_news_fg",
67
+ # query=query)
68
+ query = news_fg.select_all()
69
+ return query.read()
70
 
71
  ## Formalize the date column
72
  def remove_parentheses(s):
sentiment_analysis.py CHANGED
@@ -45,7 +45,7 @@ def time_2_datetime(x):
45
  dt_obj = datetime.fromtimestamp(x / 1000)
46
  return dt_obj
47
 
48
- def fetching_news():
49
  articles_df = get_news_from_hopsworks()
50
  articles_df.loc[articles_df['ticker'] == company]
51
  articles_df['publish_date'] = articles_df['publish_date'].apply(time_2_datetime)
@@ -196,11 +196,11 @@ def vader_sentiment(articles_processed):
196
  return articles_processed
197
 
198
  def sentiment_analysis(company, day):
199
- articles_df = news_scraping(company)
200
  articles_df = select_oneday_news(articles_df, day)
201
- articles_processed = nlp_processing(articles_df)
202
- articles_sentimentalized = vader_sentiment(articles_processed)
203
- return articles_sentimentalized
204
 
205
  ## Aggregate News Sentiments Each Day
206
  def aggregate_by_date(articles_sentiments):
 
45
  dt_obj = datetime.fromtimestamp(x / 1000)
46
  return dt_obj
47
 
48
+ def fetching_news(company):
49
  articles_df = get_news_from_hopsworks()
50
  articles_df.loc[articles_df['ticker'] == company]
51
  articles_df['publish_date'] = articles_df['publish_date'].apply(time_2_datetime)
 
196
  return articles_processed
197
 
198
  def sentiment_analysis(company, day):
199
+ articles_df = fetching_news(company)
200
  articles_df = select_oneday_news(articles_df, day)
201
+ # articles_processed = nlp_processing(articles_df)
202
+ # articles_sentimentalized = vader_sentiment(articles_processed)
203
+ return articles_df
204
 
205
  ## Aggregate News Sentiments Each Day
206
  def aggregate_by_date(articles_sentiments):
stock_prediction.py CHANGED
@@ -1,22 +1,59 @@
1
  import hopsworks
2
  import joblib
 
 
 
 
3
 
4
 
5
 
6
 
7
- def model(daily_sentiment, stock_df):
8
- daily_sentiment = daily_sentiment.rename(columns={'publish_date': 'date', 'ticker': 'name'})
9
- daily_sentiment['name'] = daily_sentiment['name'].str.upper()
10
- stock_df['date'] = stock_df['date'].apply(lambda x : x.date())
11
 
12
- X = daily_sentiment.merge(stock_df)
13
- X = X.drop(['date', 'name'], axis=1)
14
 
15
- project = hopsworks.login()
 
 
 
 
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  mr = project.get_model_registry()
18
- model = mr.get_model("random_forest_classifier", version=1)
19
- model_dir = model.download()
20
- model = joblib.load(model_dir + "/model.pkl")
21
- arr = model.predict(X)
22
- return arr[0]
 
 
 
 
 
 
 
 
 
 
1
  import hopsworks
2
  import joblib
3
+ import math
4
+ from sklearn.preprocessing import MinMaxScaler
5
+ import numpy as np
6
+ from datetime import timedelta, datetime
7
 
8
 
9
 
10
 
 
 
 
 
11
 
12
+ def model(ticker):
13
+ project = hopsworks.login()
14
 
15
+ # import data
16
+ fs = project.get_feature_store()
17
+ feature_view = fs.get_feature_view(
18
+ name = 'stock_prediction_fv',
19
+ version = 1
20
+ )
21
 
22
+ data = feature_view.get_training_data(2)[0]
23
+ data = data.sort_values(by='date')
24
+
25
+ last_date = data['date'].values[-1]
26
+ last_date = datetime.fromtimestamp(int(int(last_date) / 1000))
27
+ date = last_date.date() + timedelta(days=1)
28
+
29
+ data = data.set_index('date')
30
+ data.loc[data['name'] == 'APPLE']
31
+ data.drop(['name', 'predicted_class'], axis=1, inplace=True)
32
+
33
+ # scaling data
34
+ prices = data[['close','neg','neu','pos','compound']]
35
+ scaler = MinMaxScaler(feature_range=(0,1))
36
+ scaled_data = scaler.fit_transform(prices)
37
+
38
+ prediction_list = scaled_data[-60:]
39
+
40
+ x = []
41
+ x.append(prediction_list[-60:])
42
+ x = np.array(x)
43
+
44
+ # import model
45
  mr = project.get_model_registry()
46
+ if ticker == 'AAPL':
47
+ remote_model = mr.get_model("LSTM_Apple", version=1)
48
+ elif ticker == 'AMZN':
49
+ remote_model = mr.get_model("LSTM_Amazon", version=1)
50
+ else:
51
+ remote_model = mr.get_model("LSTM_Meta", version=1)
52
+ model_dir = remote_model.download()
53
+ remote_model = joblib.load(model_dir + "/model.pkl")
54
+
55
+ # predict
56
+ out = remote_model.predict(x)
57
+ B=np.hstack((out,scaled_data[ : 1,1:]))
58
+ out = scaler.inverse_transform(B)[0,0]
59
+ return date, out