rzambrano commited on
Commit
4cef5e1
·
1 Parent(s): 481d4b9

first commit

Browse files
Files changed (3) hide show
  1. app.py +26 -0
  2. predict_page.py +622 -0
  3. requirements.txt +16 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## -- REQUIRED LIBRARIES -- ##
2
+ import streamlit as st
3
+
4
+ st.set_page_config(page_title='Models')
5
+
6
+ ## -- -- ##
7
+
8
+ def welcome_page():
9
+ st.title("Welcome to Models!")
10
+ st.write("""Click on the model name to load and run the model""")
11
+ st.write("""**Beware, the loading and fitting time of some models may take up to 15 minutes**""")
12
+ if st.button(label='Yield Curve Prediction with XGBoost Model'):
13
+ with st.spinner('Fetching the data... fitting the model... predicting...'):
14
+ #import time
15
+ #time.sleep(5)
16
+ from predict_page import show_predict_page
17
+ #show_predict_page()
18
+ st.balloons()
19
+ st.success('Done!')
20
+
21
+ welcome_page()
22
+
23
+
24
+
25
+
26
+ #from predict_page import show_predict_page
predict_page.py ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## -- REQUIRED LIBRARIES -- ##
2
+ import streamlit as st
3
+ import pickle
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+ from matplotlib.pyplot import cm
8
+ import matplotlib.pyplot as plt
9
+
10
+ import re
11
+
12
+ from tqdm import tqdm
13
+
14
+ import requests
15
+ from bs4 import BeautifulSoup
16
+ import json
17
+ from lxml import objectify
18
+ from lxml import etree
19
+ from lxml import html
20
+ import lxml.html
21
+ import lxml.html.soupparser
22
+
23
+ import datetime
24
+ from datetime import datetime, date, time
25
+ from datetime import timedelta
26
+ from dateutil.relativedelta import relativedelta
27
+
28
+ import yfinance as yahooFinance
29
+
30
+ import sklearn
31
+ import tensorflow as tf
32
+ from tensorflow import keras
33
+
34
+ from sklearn.preprocessing import MinMaxScaler
35
+ from sklearn.metrics import mean_squared_error
36
+ from sklearn.multioutput import MultiOutputRegressor
37
+ from sklearn.linear_model import ElasticNet
38
+ from sklearn.linear_model import Ridge
39
+ from sklearn.model_selection import train_test_split
40
+ from sklearn.metrics import accuracy_score
41
+ from sklearn.model_selection import RandomizedSearchCV
42
+ from sklearn.model_selection import GridSearchCV
43
+
44
+ import xgboost
45
+ from xgboost import XGBRegressor
46
+
47
+ ## -- -- ##
48
+
49
+ ## -- U.S. Treasury Yield Curve Data -- ##
50
+
51
+ def extractEntries(sopa):
52
+ """Assumes a bs4 object downloaded from the U.S. Treasury website.
53
+ Returns a list with sections of the url with the yield curve data"""
54
+ entries = sopa.find_all('content')
55
+ tx = str(entries)
56
+ tx = tx[1:]
57
+ tx = tx.rstrip(tx[-1])
58
+ tx = tx.split(",")
59
+ return tx
60
+
61
+ def processEntries2(texto):
62
+ """Assumes a list with sections of the url with the yield curve data.
63
+ Returns a dict in with each key corresponds to a row"""
64
+ entries = {}
65
+ colPos = ["id","new_date","bc_1month","bc_2month","bc_3month","bc_4month","bc_6month",
66
+ "bc_1year","bc_2year","bc_3year","bc_5year","bc_7year",
67
+ "bc_10year","bc_20year","bc_30year"]
68
+ for i in range(len(texto)):
69
+ currEntry = texto[i]
70
+ currEntrySplit = currEntry.split("\n")
71
+ currEntryLen = len(currEntrySplit)
72
+ subSetEntryList = currEntrySplit[2:(currEntryLen-3)]
73
+ currRow = [pd.NA]*15
74
+ for j in range(len(subSetEntryList)):
75
+ item = re.findall('>(.+?)<', subSetEntryList[j])
76
+ category = re.findall('d:(.+?)>', subSetEntryList[j])
77
+ try:
78
+ dataItem = item[0]
79
+ except:
80
+ pass
81
+ try:
82
+ extractCat = category[1].lower()
83
+ except:
84
+ pass
85
+ try:
86
+ posInRow = colPos.index(extractCat)
87
+ except:
88
+ pass
89
+ try:
90
+ currRow[posInRow] = dataItem
91
+ except:
92
+ pass
93
+ entries[i] = currRow
94
+ return entries
95
+
96
+ def getYieldData2(yrs):
97
+ """Assumes a list of years.
98
+ Returns a pandas dataframe with the yield curve for the years in the list"""
99
+ colNames = ["Id","Date","1-month","2-month","3-month","4-month","6-month","1-year","2-year","3-year","5-year","7-year","10-year","20-year","30-year"]
100
+ treasuryYieldCurve = pd.DataFrame(columns=colNames)
101
+ for i in tqdm(range(len(yrs))):
102
+ currURL = 'https://home.treasury.gov/resource-center/data-chart-center/interest-rates/pages/xml?data=daily_treasury_yield_curve&field_tdr_date_value={}'.format(yrs[i])
103
+ try:
104
+ r = requests.get(currURL)
105
+ except:
106
+ print(i,r.status_code)
107
+ soup = BeautifulSoup(r.text, features="lxml")
108
+ txt = extractEntries(soup)
109
+ data = processEntries2(txt)
110
+ df = pd.DataFrame.from_dict(data, orient='index',columns=colNames)
111
+ treasuryYieldCurve = pd.concat([treasuryYieldCurve, df], ignore_index=True, axis=0)
112
+ return treasuryYieldCurve
113
+
114
+ def tblFormater(yldData):
115
+ """Assumes a pandas dataframe with the yield curve data for a given number of years.
116
+ Returns the pandas dataframe with correct data types."""
117
+ #print("start")
118
+ yldData["Id"] = yldData["Id"].apply(lambda x: int(x) if pd.notnull(x) else x)
119
+ yldData["Date"] = yldData["Date"].apply(lambda x: str(x).replace("T"," ") if pd.notnull(x) else x)
120
+ yldData["Date"] = yldData["Date"].apply(lambda x: datetime.strptime(x,"%Y-%m-%d %H:%M:%S") if pd.notnull(x) else x)
121
+ yldData["1-month"] = yldData["1-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
122
+ yldData["2-month"] = yldData["2-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
123
+ yldData["3-month"] = yldData["3-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
124
+ yldData["4-month"] = yldData["4-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
125
+ yldData["6-month"] = yldData["6-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
126
+ yldData["1-year"] = yldData["1-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
127
+ yldData["2-year"] = yldData["2-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
128
+ yldData["3-year"] = yldData["3-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
129
+ yldData["5-year"] = yldData["5-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
130
+ yldData["7-year"] = yldData["7-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
131
+ yldData["10-year"] = yldData["10-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
132
+ yldData["20-year"] = yldData["20-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
133
+ yldData["30-year"] = yldData["30-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
134
+ return yldData
135
+
136
+ yieldCurveCurrent = getYieldData2([datetime.now().year])
137
+ yieldCurveCurrent2 = tblFormater(yieldCurveCurrent)
138
+
139
+ ## -- Stock Data -- ##
140
+
141
+ # Top 25 stocks traded in the U.S. plus publickly traded stocks of defense companies
142
+ stocks = ['MSFT','AMZN','TSLA','GOOGL','GOOG','BRK-B','UNH','JNJ','XOM','JPM',
143
+ 'META','V','PG','NVDA','HD','CVX','LLY','MA','ABBV','PFE','MRK','PEP','BAC','KO','LMT','NOC','GD','BA','RTX']
144
+
145
+ def stckFormater(tbl):
146
+ histTable = tbl.reset_index()
147
+ histTable['Date'] = histTable['Date'].apply(lambda x: str(x)[:19] if pd.notnull(x) else x)
148
+ histTable['Date'] = histTable['Date'].apply(lambda x: datetime.strptime(str(x),"%Y-%m-%d %H:%M:%S") if pd.notnull(x) else x)
149
+ return histTable
150
+
151
+ def stckPull(stcks,startDate = datetime(2022, 1, 1),endDate = datetime.now()):
152
+ """Assumes a list of stock tickers.
153
+ Returns a pandas dataframe with the daily closing price for each stock."""
154
+ currStockName = stcks[0]
155
+ currStock = yahooFinance.Ticker(currStockName)
156
+ currStockHist = currStock.history(start=startDate, end=endDate)
157
+ currStockFormated = stckFormater(currStockHist)
158
+ currStock2 = currStockFormated[['Date','Close']]
159
+ stocksTable = currStock2.rename(columns={"Close": currStockName})
160
+ for i in range(1,len(stcks)):
161
+ currStockName = stcks[i]
162
+ currStock = yahooFinance.Ticker(currStockName)
163
+ currStockHist = currStock.history(start=startDate, end=endDate)
164
+ currStockFormated = stckFormater(currStockHist)
165
+ currStock2 = currStockFormated[['Date','Close']]
166
+ currStockTable = currStock2.rename(columns={"Close": currStockName})
167
+ stocksTable = pd.merge(stocksTable,currStockTable,on='Date',how='outer')
168
+ return stocksTable.sort_values(by=['Date'])
169
+
170
+ stocksData = stckPull(stocks)
171
+
172
+ ## -- U.S. Bureau of Labor Statistics Data -- ##
173
+
174
+ def getBLS(start=str(datetime(2022, 1, 1).year),end=str(datetime.now().year)):
175
+ """Assumes a start year and an end year. Both strings.
176
+ Defaults: year=current year minus ten years, end=current year.
177
+ System-allowed range is 9 years.
178
+ Returns the following series from the U.S. Bureau of Labor Statistics:
179
+ CPI, Import/Export Price Index, National Employment"""
180
+ #CUUR0000SA0L1E = Consumer Price Index - All Urban Consumers
181
+ #EIUCOCANMANU = Import/Export Price Indexes
182
+ #CEU0800000003 = National Employment, Hours, and Earnings
183
+ #CXUMENBOYSLB0101M = Consumer Expenditure Survey - Annual Publication thus EXCLUDED
184
+ headers = {'Content-type': 'application/json'}
185
+ data = json.dumps({"seriesid": ['CUUR0000SA0L1E','EIUCOCANMANU','CEU0800000003'],"startyear":start, "endyear":end})
186
+ p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
187
+ json_data = json.loads(p.text)
188
+
189
+ msg = json_data['message']
190
+ for item in msg:
191
+ print(item)
192
+
193
+ colNames = ["seriesId","year","period","periodName","value"]
194
+ blsData = pd.DataFrame(columns=colNames)
195
+
196
+ for series in json_data['Results']['series']:
197
+ seriesId = series['seriesID']
198
+ for item in series['data']:
199
+ year = item['year']
200
+ period = item['period']
201
+ periodName = item['periodName']
202
+ value = item['value']
203
+ row = [[seriesId,year,period,periodName,value]]
204
+ temp_df = pd.DataFrame(row,columns=colNames)
205
+ blsData = pd.concat([blsData,temp_df], ignore_index=True, axis=0)
206
+
207
+ return blsData
208
+
209
+ blsData = getBLS()
210
+
211
+ ## -- Federal Reserve Data -- ##
212
+
213
+ series_id = ['DFF','RRPONTSYD','SP500','DCOILWTICO','SOFR','DJIA','NASDAQCOM']
214
+
215
+ def getFRED(nombreSerie):
216
+ """Assumes a series valid with the St. Louis FRED API.
217
+ Returns a pandas data frame with the series values/observations."""
218
+ apiKey = '9180dde91a32bac5c7699bbf994870bc'
219
+ file_type = 'json'
220
+ seriesName = nombreSerie
221
+
222
+ urlSeriesObservations = 'https://api.stlouisfed.org/fred/series/observations?series_id={}&api_key={}&file_type={}'.format(nombreSerie,apiKey,file_type)
223
+ r = requests.get(urlSeriesObservations)
224
+ json_data = json.loads(r.text)
225
+
226
+ colNames = ['Date',seriesName]
227
+ df = pd.DataFrame(columns=colNames)
228
+
229
+ for item in json_data['observations']:
230
+ currDate = item['date']
231
+ currDate = datetime.strptime(currDate,"%Y-%m-%d")
232
+ currValue = item['value']
233
+ row = [[currDate,currValue]]
234
+ temp_df = pd.DataFrame(row,columns=colNames)
235
+ df = pd.concat([df,temp_df], ignore_index=True, axis=0)
236
+
237
+ return df
238
+
239
+ def multiSeriesFRED(seriesList):
240
+ """Assumes a list of series, valid with the St. Louis FRED API.
241
+ Returns a pandas dataframe with the series merged by date."""
242
+ df = pd.merge(getFRED(seriesList[0]),getFRED(seriesList[1]),on='Date',how='outer')
243
+ for i in range(2,len(seriesList)):
244
+ temp_df = getFRED(seriesList[i])
245
+ df = pd.merge(df,temp_df,on='Date',how='outer')
246
+ return df
247
+
248
+ fredData = multiSeriesFRED(series_id)
249
+
250
+ ## -- Data Processing -- ##
251
+
252
+ mergedEconData = pd.merge(yieldCurveCurrent2,stocksData,on="Date",how="left")
253
+ mergedEconData = pd.merge(mergedEconData,fredData,on="Date",how="left")
254
+ blsData['month'] = pd.NA
255
+ blsData['seriesName'] = pd.NA
256
+ seriesDict = {'CUUR0000SA0L1E':'CPI','EIUCOCANMANU':'Import_Export_Indx','CEU0800000003':'ntnlEmployment'}
257
+ for i in range(len(blsData)):
258
+ month = int(re.sub('[a-zA-Z]','',blsData.iloc[i,2]))
259
+ blsData.iloc[i,5] = month
260
+ blsData.iloc[i,6] = seriesDict.get(blsData.iloc[i,0])
261
+ mergedEconData['CPI'] = pd.NA
262
+ mergedEconData['Import_Export_Indx'] = pd.NA
263
+ mergedEconData['ntnlEmployment'] = pd.NA
264
+ for i in range(len(mergedEconData)):
265
+ mergedEconData.iloc[i,1] = mergedEconData.iloc[i,1].date()
266
+ for i in range(len(blsData)):
267
+ blsData.iloc[i,1] = int(blsData.iloc[i,1])
268
+ colsDict = {'CPI':51,'Import_Export_Indx':52,'ntnlEmployment':53}
269
+ for i in tqdm(range(len(mergedEconData))):
270
+ obsMonth = mergedEconData.iloc[i,1].month
271
+ obsYear = mergedEconData.iloc[i,1].year
272
+ for j in range(len(blsData)):
273
+ currYear = blsData.iloc[j,1]
274
+ currMonth = blsData.iloc[j,5]
275
+ if (obsMonth==currMonth) and (obsYear==currYear):
276
+ colPos = colsDict.get(blsData.iloc[j,6])
277
+ mergedEconData.iloc[i,colPos] = blsData.iloc[j,4]
278
+ yLabels = mergedEconData[["Date","1-month","2-month","3-month","4-month","6-month",
279
+ "1-year","2-year","3-year","5-year","7-year",
280
+ "10-year","20-year","30-year"]].copy()
281
+ xLabels = mergedEconData[['Date','MSFT', 'AMZN', 'TSLA', 'GOOGL', 'GOOG', 'BRK-B', 'UNH',
282
+ 'JNJ', 'XOM', 'JPM', 'META', 'V', 'PG', 'NVDA', 'HD', 'CVX', 'LLY',
283
+ 'MA', 'ABBV', 'PFE', 'MRK', 'PEP', 'BAC', 'KO', 'LMT', 'NOC', 'GD',
284
+ 'BA', 'RTX', 'DFF', 'RRPONTSYD', 'SP500', 'SOFR', 'DJIA',
285
+ 'NASDAQCOM', 'CPI', 'Import_Export_Indx', 'ntnlEmployment']].copy()
286
+ lastBLSDataRow = 0
287
+ lastCpiVal = 0
288
+ lastImpExVal = 0
289
+ lastEmplVal = 0
290
+ blsUpToDate = False
291
+ for i in reversed(range(len(xLabels) + 0)) :
292
+ if not(pd.isnull(xLabels.iloc[i,38])) and not(pd.isnull(xLabels.iloc[i,37])) and not(pd.isnull(xLabels.iloc[i,36])):
293
+ lastBLSDataRow = i
294
+ lastCpiVal = xLabels.iloc[i,36]
295
+ lastImpExVal = xLabels.iloc[i,37]
296
+ lastEmplVal = xLabels.iloc[i,38]
297
+ break
298
+
299
+ if lastCpiVal == 0:
300
+ blsUpToDate = True
301
+
302
+ if blsUpToDate == False:
303
+ for i in range((lastBLSDataRow+1),len(xLabels)):
304
+ xLabels.iloc[i,36] = lastCpiVal
305
+ xLabels.iloc[i,37] = lastImpExVal
306
+ xLabels.iloc[i,38] = lastEmplVal
307
+
308
+ for i in range(len(xLabels.columns)):
309
+ if pd.isnull(xLabels.iloc[(len(xLabels)-1),i]):
310
+ xLabels.iloc[(len(xLabels)-1),i] = xLabels.iloc[(len(xLabels)-2),i]
311
+
312
+ for i in range(len(xLabels)):
313
+ if pd.isnull(xLabels.iloc[i,33]):
314
+ xLabels.iloc[i,33] = 0.0
315
+ if pd.isnull(xLabels.iloc[i,3]):
316
+ xLabels.iloc[i,3] = 0.0
317
+ if pd.isnull(xLabels.iloc[i,12]):
318
+ xLabels.iloc[i,12] = 0.0
319
+ if pd.isnull(xLabels.iloc[i,19]):
320
+ xLabels.iloc[i,19] = 0.0
321
+ if pd.isnull(xLabels.iloc[i,32]):
322
+ xLabels.iloc[i,32] = 0.0
323
+ if pd.isnull(xLabels.iloc[i,34]):
324
+ xLabels.iloc[i,34] = 0.0
325
+ if pd.isnull(xLabels.iloc[i,11]):
326
+ xLabels.iloc[i,11] = 0.0
327
+ if xLabels.iloc[i,31]==".":
328
+ xLabels.iloc[i,31] = 0.0
329
+
330
+ dte = datetime.now() - relativedelta(years=1)
331
+ dte2 = date(dte.year, dte.month, dte.day)
332
+
333
+ yLabels2 = yLabels[yLabels['Date']>dte2]
334
+ # Dropping 2-month and 4-month columns
335
+ yLabels2 = yLabels2[['Date', '1-month', '3-month','6-month', '1-year',
336
+ '2-year', '3-year', '5-year', '7-year', '10-year', '20-year',
337
+ '30-year']]
338
+
339
+ xLabels2 = xLabels[xLabels['Date']>dte2]
340
+
341
+ X = xLabels2[['MSFT', 'AMZN', 'TSLA', 'GOOGL', 'GOOG', 'BRK-B', 'UNH', 'JNJ',
342
+ 'XOM', 'JPM', 'META', 'V', 'PG', 'NVDA', 'HD', 'CVX', 'LLY', 'MA',
343
+ 'ABBV', 'PFE', 'MRK', 'PEP', 'BAC', 'KO', 'LMT', 'NOC', 'GD', 'BA',
344
+ 'RTX', 'DFF', 'RRPONTSYD', 'SP500', 'SOFR', 'DJIA', 'NASDAQCOM', 'CPI',
345
+ 'Import_Export_Indx', 'ntnlEmployment']]
346
+
347
+ Y = yLabels2[['1-month', '3-month', '6-month', '1-year', '2-year', '3-year',
348
+ '5-year', '7-year', '10-year', '20-year', '30-year']]
349
+
350
+ todayYvalues = yLabels2.iloc[(len(yLabels2)-1),:]
351
+ todayYvalues = todayYvalues[1:]
352
+ for i in range(len(todayYvalues)):
353
+ todayYvalues[i] = np.float64(todayYvalues[i])
354
+
355
+ todayXvalues = xLabels2.iloc[(len(xLabels2)-1),:]
356
+ todayXvalues = todayXvalues[1:]
357
+ for i in range(len(todayXvalues)):
358
+ todayXvalues[i] = np.float64(todayXvalues[i])
359
+
360
+ Yseries = Y.iloc[:len(Y)-1,:].copy()
361
+ Xseries = X.iloc[:len(Y)-1,:].copy()
362
+
363
+ #Handles missing values codified as "." - Raplaces with 0
364
+ for i in range(len(Xseries)):
365
+ for j in range(29,len(Xseries.columns)):
366
+ if Xseries.iloc[i,j] == ".":
367
+ Xseries.iloc[i,j] = 0
368
+
369
+ #Handles missing values codified as np.nan or pd.NA - Raplaces with 0
370
+ for i in range(len(Xseries)):
371
+ for j in range(len(Xseries.columns)):
372
+ if pd.isnull(Xseries.iloc[i,j]):
373
+ Xseries.iloc[i,j] = 0
374
+
375
+ #Transform all observations to np.float64 type
376
+ Xseries = Xseries.astype(np.float64)
377
+
378
+ #Handles missing values codified as np.nan or pd.NA - Raplaces with previous observation value
379
+ for i in range(len(Yseries)):
380
+ for j in range(len(Yseries.columns)):
381
+ if pd.isnull(Yseries.iloc[i,j]):
382
+ Yseries.iloc[i,j] = Yseries.iloc[i-1,j]
383
+
384
+ #Transform all observations to np.float64 type
385
+ Yseries = Yseries.astype(np.float64)
386
+
387
+ lastDate = yLabels2.tail(1).iloc[0,0]
388
+
389
+ yPlotVals = yLabels2.tail(11).head(10)
390
+
391
+ ## -- Plots --##
392
+
393
+ lastDateX = xLabels2.tail(1).iloc[0,0]
394
+ xPlotVals = xLabels2.tail(91).head(90)
395
+
396
+ ## --- Stocks --- ##
397
+
398
+ stocksPlot = xPlotVals.iloc[:,0:30]
399
+ color = cm.rainbow(np.linspace(0, 1, len(stocksPlot.columns)))
400
+ selStocks = [1,2,4,6,7,8,10,12,15,16,17,18,19,25,26,27,28]
401
+
402
+ fig2 = plt.figure()
403
+
404
+ for i in range(1,len(selStocks)):
405
+ c = color[i]
406
+ plt.plot(stocksPlot[stocksPlot.columns[0]],
407
+ stocksPlot[stocksPlot.columns[selStocks[i]]],
408
+ linestyle='solid',marker='.',label='{}'.format(stocksPlot.columns[selStocks[i]]),color=c)
409
+
410
+ plt.legend(loc="upper right", frameon=True,
411
+ bbox_to_anchor=(1.35, 1.0))
412
+ plt.xticks(rotation = 45)
413
+ plt.title("Last 90 Days of Selected Best-Performing Stocks")
414
+ plt.grid()
415
+ #plt.show()
416
+
417
+ ## --- Bureau of Labor Statistics Data --- ##
418
+
419
+ blsTable = xPlotVals.iloc[:,36:39]
420
+ blsTbl = blsTable.tail(1)
421
+
422
+ ## --- U.S. Federal Reserve Data --- ##
423
+
424
+ fedVals = xPlotVals[["Date","DFF","RRPONTSYD","SP500","SOFR","DJIA","NASDAQCOM"]]
425
+ fedPlot = fedVals.tail(31).head(30)
426
+
427
+ color = cm.rainbow(np.linspace(0, 1, len(fedPlot.columns)))
428
+ fedLabels = ["notUsedVal","Federal Funds Effective Rate",
429
+ "Overnight Reverse Repurchase Agreements",
430
+ "S&P 500",
431
+ "Secured Overnight Financing Rate",
432
+ "Dow Jones Industrial Average",
433
+ "NASDAQ Composite Index"]
434
+
435
+ fig3 = plt.figure()
436
+ plt.plot(fedPlot[fedPlot.columns[0]],
437
+ fedPlot[fedPlot.columns[1]],
438
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[1]),
439
+ color=color[0])
440
+ plt.xticks(rotation = 45)
441
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[1]))
442
+ plt.grid()
443
+
444
+ fig4 = plt.figure()
445
+ plt.plot(fedPlot[fedPlot.columns[0]],
446
+ fedPlot[fedPlot.columns[2]],
447
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[2]),
448
+ color=color[1])
449
+ plt.xticks(rotation = 45)
450
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[2]))
451
+ plt.grid()
452
+
453
+ fig5 = plt.figure()
454
+ plt.plot(fedPlot[fedPlot.columns[0]],
455
+ fedPlot[fedPlot.columns[3]],
456
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[3]),
457
+ color=color[2])
458
+ plt.xticks(rotation = 45)
459
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[3]))
460
+ plt.grid()
461
+
462
+ fig6 = plt.figure()
463
+ plt.plot(fedPlot[fedPlot.columns[0]],
464
+ fedPlot[fedPlot.columns[4]],
465
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[4]),
466
+ color=color[3])
467
+ plt.xticks(rotation = 45)
468
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[4]))
469
+ plt.grid()
470
+
471
+ fig7 = plt.figure()
472
+ plt.plot(fedPlot[fedPlot.columns[0]],
473
+ fedPlot[fedPlot.columns[5]],
474
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[5]),
475
+ color=color[4])
476
+ plt.xticks(rotation = 45)
477
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[5]))
478
+ plt.grid()
479
+
480
+ fig8 = plt.figure()
481
+ plt.plot(fedPlot[fedPlot.columns[0]],
482
+ fedPlot[fedPlot.columns[6]],
483
+ linestyle='solid',marker='.',label='{}'.format(fedLabels[6]),
484
+ color=color[5])
485
+ plt.xticks(rotation = 45)
486
+ plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[6]))
487
+ plt.grid()
488
+
489
+ #plt.show()
490
+
491
+ ## --- Yield Curve --- ##
492
+
493
+ color = cm.rainbow(np.linspace(0, 1, len(yPlotVals.columns)))
494
+
495
+ fig1 = plt.figure()
496
+
497
+ for i in range(1,len(yPlotVals.columns)):
498
+ c = color[i]
499
+ plt.plot(yPlotVals[yPlotVals.columns[0]],
500
+ yPlotVals[yPlotVals.columns[i]],
501
+ linestyle='solid',marker='o',label='{}'.format(yPlotVals.columns[i]),color=c)
502
+ plt.legend(loc="upper right", frameon=True,
503
+ bbox_to_anchor=(1.35, 1.0))
504
+ plt.xticks(rotation = 45)
505
+ plt.title("Last 10 Days of U.S. Treasury Yield Curve")
506
+ plt.grid()
507
+ #plt.show()
508
+
509
+ ## -- -- ##
510
+
511
+ ## -- Loading Model -- ##
512
+
513
+ ###### -- PICKLED MODELS ARE NOT WORKING -- #####
514
+ #def load_model():
515
+ # with open('xgboostModelYieldCurve4.pkl','rb') as file:
516
+ # retrievedData = pickle.load(file)
517
+ # return retrievedData
518
+
519
+ #modelData = load_model()
520
+ #retrievedModel = modelData['model']
521
+ ###### -- PICKLED MODELS ARE NOT WORKING -- #####
522
+
523
+ ## -- fitting the model with only one year of data -- ##
524
+
525
+ Yseries2 = Yseries.copy()
526
+ Xseries2 = Xseries.copy()
527
+ Yseries2 = Yseries2.astype('float32')
528
+ Xseries2 = Xseries2.astype('float32')
529
+
530
+ bestModel = MultiOutputRegressor(XGBRegressor(subsample = 0.5, n_estimators = 100, max_depth = 3,
531
+ learning_rate = 0.3, colsample_bytree = 0.5, colsample_bylevel = 0.8999999999999999,seed = 20))
532
+
533
+ bestModel.fit(Xseries2,Yseries2)
534
+
535
+ ## -- -- ##
536
+
537
+ todayXvalues = xLabels2.loc[[xLabels2.index[len(xLabels2.index)-1]]]
538
+ todayXvalues = todayXvalues.iloc[:,1:]
539
+ todayXvalues = todayXvalues.astype(np.float64)
540
+
541
+ ynew2 = bestModel.predict(todayXvalues)
542
+
543
+ predictVals = pd.DataFrame(data = ynew2,columns=todayYvalues.index.values)
544
+ actualVals = pd.DataFrame(data = todayYvalues.values.reshape(1,11),columns=todayYvalues.index)
545
+
546
+ yesterdayVals = yPlotVals.iloc[len(yPlotVals)-1,:]
547
+ yesterdayDate = yesterdayVals[0]
548
+
549
+ fig9 = plt.figure()
550
+ fig9.set_figwidth(15)
551
+ fig9.set_figheight(6)
552
+
553
+ color = cm.rainbow(np.linspace(0, 1, len(yPlotVals.columns)))
554
+ for i in range(1,len(yPlotVals.columns)):
555
+ c = color[i]
556
+ plt.plot(yPlotVals[yPlotVals.columns[0]],
557
+ yPlotVals[yPlotVals.columns[i]],
558
+ linestyle='solid',marker='o',label='{}'.format(yPlotVals.columns[i]),color=c)
559
+
560
+ for i in range(len(ynew2[0])):
561
+ c = color[i]
562
+ plt.plot(lastDate, ynew2[0][i], color=c,markeredgecolor="black",markersize=10,marker="*")
563
+
564
+ for i in range(len(todayYvalues.values)):
565
+ c = color[i]
566
+ plt.plot(lastDate,todayYvalues.values[i],color=c,markeredgecolor="black",markersize=10,marker="X")
567
+
568
+ for i in range(len(todayYvalues.values)):
569
+ c = color[i]
570
+ plt.arrow(yesterdayDate, yesterdayVals[i+1], 1, (todayYvalues.values[i]-yesterdayVals[i+1]),
571
+ color=c,linestyle="--")
572
+
573
+ for i in range(len(ynew2[0])):
574
+ c = color[i]
575
+ plt.arrow(yesterdayDate, yesterdayVals[i+1], 1, (ynew2[0][i]-yesterdayVals[i+1]),
576
+ color='black',linestyle="--")
577
+
578
+ plt.legend(loc="upper right", frameon=True,
579
+ bbox_to_anchor=(1.15, 1.0))
580
+ plt.xticks(rotation = 45)
581
+ plt.title("Last 10 Days of U.S. Treasury Yield Curve Overlayed with Predicted Value [Star] and Actual Value [Cross]")
582
+ plt.grid()
583
+ #plt.show()
584
+
585
+ ## -- Page Loading with Streamlit-- ##
586
+
587
+ def show_predict_page():
588
+ st.title("U.S. Treasury Yield Curve Prediction with XGBoost Model")
589
+
590
+ st.write("""### U.S. Treasury Yield Curve - Predicted v. Actual Value""")
591
+
592
+ st.write("""#### Table1: Predicted Values""")
593
+ st.dataframe(data=predictVals)
594
+ st.write("""#### Table2: Actual Values""")
595
+ st.dataframe(data=actualVals)
596
+
597
+ st.pyplot(fig=fig9)
598
+
599
+ st.title("Selected Data Used to Generate the Prediction")
600
+
601
+ st.write("""### Selection of Most Important Stocks in the U.S. Economy - 90 days Prior Prediction""")
602
+
603
+ st.pyplot(fig=fig2)
604
+
605
+ st.write("""### Latest indicators from the U.S. Bureau of Labor Statistics""")
606
+
607
+ st.dataframe(data=blsTbl)
608
+
609
+ st.write("""### Selected indicators from the U.S. Federal Reserve""")
610
+
611
+ st.pyplot(fig=fig3)
612
+ st.pyplot(fig=fig4)
613
+ st.pyplot(fig=fig5)
614
+ st.pyplot(fig=fig6)
615
+ st.pyplot(fig=fig7)
616
+ st.pyplot(fig=fig8)
617
+
618
+ st.write("""### U.S. Treasury Yield Curve 10 days Prior Prediction""")
619
+
620
+ st.pyplot(fig=fig1)
621
+
622
+ show_predict_page()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ matplotlib.pyplot
6
+ re
7
+ tqdm
8
+ requests
9
+ BeautifulSoup4
10
+ json
11
+ lxml
12
+ datetime
13
+ dateutil.relativedelta
14
+ yfinance
15
+ scikit-learn
16
+ xgboost