Spaces:

rzambrano
/

finalProj_yieldCurvePredict

Runtime error

File size: 23,226 Bytes

## -- REQUIRED LIBRARIES -- ##
import streamlit as st
import pickle

import pandas as pd
import numpy as np
from matplotlib.pyplot import cm
import matplotlib.pyplot as plt

import re

from tqdm import tqdm

import requests
from bs4 import BeautifulSoup
import json
from lxml import objectify
from lxml import etree
from lxml import html
import lxml.html
import lxml.html.soupparser

import datetime
from datetime import datetime, date, time
from datetime import timedelta
from dateutil.relativedelta import relativedelta

import yfinance as yahooFinance

import sklearn
#import tensorflow as tf
#from tensorflow import keras

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

import xgboost
from xgboost import XGBRegressor

## -- -- ##

## -- U.S. Treasury Yield Curve Data -- ##

def extractEntries(sopa):
    """Assumes a bs4 object downloaded from the U.S. Treasury website.
    Returns a list with sections of the url with the yield curve data"""
    entries = sopa.find_all('content')
    tx = str(entries)
    tx = tx[1:]
    tx = tx.rstrip(tx[-1])
    tx = tx.split(",")
    return tx

def processEntries2(texto):
    """Assumes a list with sections of the url with the yield curve data.
    Returns a dict in with each key corresponds to a row"""
    entries = {}
    colPos = ["id","new_date","bc_1month","bc_2month","bc_3month","bc_4month","bc_6month",
            "bc_1year","bc_2year","bc_3year","bc_5year","bc_7year",
            "bc_10year","bc_20year","bc_30year"]
    for i in range(len(texto)):
        currEntry = texto[i]
        currEntrySplit = currEntry.split("\n")
        currEntryLen = len(currEntrySplit)
        subSetEntryList = currEntrySplit[2:(currEntryLen-3)]
        currRow = [pd.NA]*15
        for j in range(len(subSetEntryList)):
            item = re.findall('>(.+?)<', subSetEntryList[j])
            category = re.findall('d:(.+?)>', subSetEntryList[j])
            try:
                dataItem = item[0]
            except:
                pass
            try:
              extractCat = category[1].lower()
            except:
              pass
            try: 
              posInRow = colPos.index(extractCat)
            except:
              pass
            try:
              currRow[posInRow] = dataItem
            except:
              pass
        entries[i] = currRow
    return entries

def getYieldData2(yrs):
    """Assumes a list of years.
    Returns a pandas dataframe with the yield curve for the years in the list"""
    colNames = ["Id","Date","1-month","2-month","3-month","4-month","6-month","1-year","2-year","3-year","5-year","7-year","10-year","20-year","30-year"]
    treasuryYieldCurve = pd.DataFrame(columns=colNames)
    for i in tqdm(range(len(yrs))):
        currURL = 'https://home.treasury.gov/resource-center/data-chart-center/interest-rates/pages/xml?data=daily_treasury_yield_curve&field_tdr_date_value={}'.format(yrs[i])
        try:
            r = requests.get(currURL)
        except:
            print(i,r.status_code)
        soup = BeautifulSoup(r.text, features="lxml")
        txt = extractEntries(soup)
        data = processEntries2(txt)
        df = pd.DataFrame.from_dict(data, orient='index',columns=colNames)
        treasuryYieldCurve = pd.concat([treasuryYieldCurve, df], ignore_index=True, axis=0)
    return treasuryYieldCurve

def tblFormater(yldData):
    """Assumes a pandas dataframe with the yield curve data for a given number of years.
    Returns the pandas dataframe with correct data types."""
    #print("start")
    yldData["Id"] = yldData["Id"].apply(lambda x: int(x) if pd.notnull(x) else x)
    yldData["Date"] = yldData["Date"].apply(lambda x: str(x).replace("T"," ") if pd.notnull(x) else x)
    yldData["Date"] = yldData["Date"].apply(lambda x: datetime.strptime(x,"%Y-%m-%d %H:%M:%S") if pd.notnull(x) else x)
    yldData["1-month"] = yldData["1-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["2-month"] = yldData["2-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["3-month"] = yldData["3-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["4-month"] = yldData["4-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["6-month"] = yldData["6-month"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["1-year"] = yldData["1-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["2-year"] = yldData["2-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["3-year"] = yldData["3-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["5-year"] = yldData["5-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["7-year"] = yldData["7-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["10-year"] = yldData["10-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["20-year"] = yldData["20-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    yldData["30-year"] = yldData["30-year"].apply(lambda x: float(x) if pd.notnull(x) else x)
    return yldData

yieldCurveCurrent = getYieldData2([datetime.now().year])
yieldCurveCurrent2 = tblFormater(yieldCurveCurrent)

## -- Stock Data -- ##

# Top 25 stocks traded in the U.S. plus publickly traded stocks of defense companies
stocks = ['MSFT','AMZN','TSLA','GOOGL','GOOG','BRK-B','UNH','JNJ','XOM','JPM',
         'META','V','PG','NVDA','HD','CVX','LLY','MA','ABBV','PFE','MRK','PEP','BAC','KO','LMT','NOC','GD','BA','RTX']

def stckFormater(tbl):
  histTable = tbl.reset_index()
  histTable['Date'] = histTable['Date'].apply(lambda x: str(x)[:19] if pd.notnull(x) else x)
  histTable['Date'] = histTable['Date'].apply(lambda x: datetime.strptime(str(x),"%Y-%m-%d %H:%M:%S") if pd.notnull(x) else x)
  return histTable

def stckPull(stcks,startDate = datetime(2022, 1, 1),endDate = datetime.now()):
  """Assumes a list of stock tickers.
  Returns a pandas dataframe with the daily closing price for each stock."""
  currStockName = stcks[0]
  currStock = yahooFinance.Ticker(currStockName)
  currStockHist = currStock.history(start=startDate, end=endDate)
  currStockFormated = stckFormater(currStockHist)
  currStock2 = currStockFormated[['Date','Close']]
  stocksTable = currStock2.rename(columns={"Close": currStockName})
  for i in range(1,len(stcks)):
    currStockName = stcks[i]
    currStock = yahooFinance.Ticker(currStockName)
    currStockHist = currStock.history(start=startDate, end=endDate)
    currStockFormated = stckFormater(currStockHist)
    currStock2 = currStockFormated[['Date','Close']]
    currStockTable = currStock2.rename(columns={"Close": currStockName})
    stocksTable = pd.merge(stocksTable,currStockTable,on='Date',how='outer')
  return stocksTable.sort_values(by=['Date'])

stocksData = stckPull(stocks)

## -- U.S. Bureau of Labor Statistics Data -- ##

def getBLS(start=str(datetime(2022, 1, 1).year),end=str(datetime.now().year)): 
    """Assumes a start year and an end year. Both strings.
    Defaults: year=current year minus ten years, end=current year.
    System-allowed range is 9 years.
    Returns the following series from the U.S. Bureau of Labor Statistics:
    CPI, Import/Export Price Index, National Employment"""
    #CUUR0000SA0L1E = Consumer Price Index - All Urban Consumers
    #EIUCOCANMANU = Import/Export Price Indexes
    #CEU0800000003 = National Employment, Hours, and Earnings
    #CXUMENBOYSLB0101M = Consumer Expenditure Survey - Annual Publication thus EXCLUDED
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": ['CUUR0000SA0L1E','EIUCOCANMANU','CEU0800000003'],"startyear":start, "endyear":end})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
    json_data = json.loads(p.text)

    msg = json_data['message']
    for item in msg:
        print(item)
    
    colNames = ["seriesId","year","period","periodName","value"]
    blsData = pd.DataFrame(columns=colNames)
    
    for series in json_data['Results']['series']:
        seriesId = series['seriesID']
        for item in series['data']:
            year = item['year']
            period = item['period']
            periodName = item['periodName']
            value = item['value']
            row = [[seriesId,year,period,periodName,value]]
            temp_df = pd.DataFrame(row,columns=colNames)
            blsData = pd.concat([blsData,temp_df], ignore_index=True, axis=0)

    return blsData

blsData = getBLS()

## -- Federal Reserve Data -- ##

series_id = ['DFF','RRPONTSYD','SP500','DCOILWTICO','SOFR','DJIA','NASDAQCOM']

def getFRED(nombreSerie):
    """Assumes a series valid with the St. Louis FRED API.
    Returns a pandas data frame with the series values/observations."""
    apiKey = '9180dde91a32bac5c7699bbf994870bc'
    file_type = 'json'
    seriesName = nombreSerie

    urlSeriesObservations = 'https://api.stlouisfed.org/fred/series/observations?series_id={}&api_key={}&file_type={}'.format(nombreSerie,apiKey,file_type)
    r = requests.get(urlSeriesObservations)
    json_data = json.loads(r.text)
    
    colNames = ['Date',seriesName]
    df = pd.DataFrame(columns=colNames)

    for item in json_data['observations']:
        currDate = item['date']
        currDate = datetime.strptime(currDate,"%Y-%m-%d")
        currValue = item['value']
        row = [[currDate,currValue]]
        temp_df = pd.DataFrame(row,columns=colNames)
        df = pd.concat([df,temp_df], ignore_index=True, axis=0)
    
    return df

def multiSeriesFRED(seriesList):
    """Assumes a list of series, valid with the St. Louis FRED API.
    Returns a pandas dataframe with the series merged by date."""
    df = pd.merge(getFRED(seriesList[0]),getFRED(seriesList[1]),on='Date',how='outer')
    for i in range(2,len(seriesList)):
        temp_df = getFRED(seriesList[i])
        df = pd.merge(df,temp_df,on='Date',how='outer')
    return df

fredData = multiSeriesFRED(series_id)

## -- Data Processing -- ##

mergedEconData = pd.merge(yieldCurveCurrent2,stocksData,on="Date",how="left")
mergedEconData = pd.merge(mergedEconData,fredData,on="Date",how="left")
blsData['month'] = pd.NA
blsData['seriesName'] = pd.NA
seriesDict = {'CUUR0000SA0L1E':'CPI','EIUCOCANMANU':'Import_Export_Indx','CEU0800000003':'ntnlEmployment'}
for i in range(len(blsData)):
    month = int(re.sub('[a-zA-Z]','',blsData.iloc[i,2]))
    blsData.iloc[i,5] = month
    blsData.iloc[i,6] = seriesDict.get(blsData.iloc[i,0])
mergedEconData['CPI'] = pd.NA
mergedEconData['Import_Export_Indx'] = pd.NA
mergedEconData['ntnlEmployment'] = pd.NA
for i in range(len(mergedEconData)):
    mergedEconData.iloc[i,1] = mergedEconData.iloc[i,1].date()
for i in range(len(blsData)):
    blsData.iloc[i,1] = int(blsData.iloc[i,1])
colsDict = {'CPI':51,'Import_Export_Indx':52,'ntnlEmployment':53}
for i in tqdm(range(len(mergedEconData))):
    obsMonth = mergedEconData.iloc[i,1].month
    obsYear = mergedEconData.iloc[i,1].year   
    for j in range(len(blsData)):
        currYear = blsData.iloc[j,1]
        currMonth = blsData.iloc[j,5]
        if (obsMonth==currMonth) and (obsYear==currYear):
            colPos = colsDict.get(blsData.iloc[j,6])
            mergedEconData.iloc[i,colPos] = blsData.iloc[j,4]
yLabels = mergedEconData[["Date","1-month","2-month","3-month","4-month","6-month",
                 "1-year","2-year","3-year","5-year","7-year",
                 "10-year","20-year","30-year"]].copy()
xLabels = mergedEconData[['Date','MSFT', 'AMZN', 'TSLA', 'GOOGL', 'GOOG', 'BRK-B', 'UNH',
       'JNJ', 'XOM', 'JPM', 'META', 'V', 'PG', 'NVDA', 'HD', 'CVX', 'LLY',
       'MA', 'ABBV', 'PFE', 'MRK', 'PEP', 'BAC', 'KO', 'LMT', 'NOC', 'GD',
       'BA', 'RTX', 'DFF', 'RRPONTSYD', 'SP500', 'SOFR', 'DJIA',
       'NASDAQCOM', 'CPI', 'Import_Export_Indx', 'ntnlEmployment']].copy()
lastBLSDataRow = 0
lastCpiVal = 0
lastImpExVal = 0
lastEmplVal = 0
blsUpToDate = False
for i in reversed(range(len(xLabels) + 0)) :
    if not(pd.isnull(xLabels.iloc[i,38])) and not(pd.isnull(xLabels.iloc[i,37])) and not(pd.isnull(xLabels.iloc[i,36])):
        lastBLSDataRow = i
        lastCpiVal = xLabels.iloc[i,36]
        lastImpExVal = xLabels.iloc[i,37]
        lastEmplVal = xLabels.iloc[i,38]
        break

if lastCpiVal == 0:
    blsUpToDate = True

if blsUpToDate == False:
    for i in range((lastBLSDataRow+1),len(xLabels)):
        xLabels.iloc[i,36] = lastCpiVal 
        xLabels.iloc[i,37] = lastImpExVal 
        xLabels.iloc[i,38] = lastEmplVal

for i in range(len(xLabels.columns)):
    if pd.isnull(xLabels.iloc[(len(xLabels)-1),i]):
        xLabels.iloc[(len(xLabels)-1),i] = xLabels.iloc[(len(xLabels)-2),i]

for i in range(len(xLabels)):
    if pd.isnull(xLabels.iloc[i,33]):
        xLabels.iloc[i,33] = 0.0
    if pd.isnull(xLabels.iloc[i,3]):
        xLabels.iloc[i,3] = 0.0
    if pd.isnull(xLabels.iloc[i,12]):
        xLabels.iloc[i,12] = 0.0
    if pd.isnull(xLabels.iloc[i,19]):
        xLabels.iloc[i,19] = 0.0
    if pd.isnull(xLabels.iloc[i,32]):
        xLabels.iloc[i,32] = 0.0
    if pd.isnull(xLabels.iloc[i,34]):
        xLabels.iloc[i,34] = 0.0
    if pd.isnull(xLabels.iloc[i,11]):
        xLabels.iloc[i,11] = 0.0    
    if xLabels.iloc[i,31]==".":
        xLabels.iloc[i,31] = 0.0

dte = datetime.now() - relativedelta(years=1)
dte2 = date(dte.year, dte.month, dte.day)

yLabels2 = yLabels[yLabels['Date']>dte2] 
# Dropping 2-month and 4-month columns
yLabels2 = yLabels2[['Date', '1-month', '3-month','6-month', '1-year',
       '2-year', '3-year', '5-year', '7-year', '10-year', '20-year',
       '30-year']]

xLabels2 = xLabels[xLabels['Date']>dte2] 

X = xLabels2[['MSFT', 'AMZN', 'TSLA', 'GOOGL', 'GOOG', 'BRK-B', 'UNH', 'JNJ',
       'XOM', 'JPM', 'META', 'V', 'PG', 'NVDA', 'HD', 'CVX', 'LLY', 'MA',
       'ABBV', 'PFE', 'MRK', 'PEP', 'BAC', 'KO', 'LMT', 'NOC', 'GD', 'BA',
       'RTX', 'DFF', 'RRPONTSYD', 'SP500', 'SOFR', 'DJIA', 'NASDAQCOM', 'CPI',
       'Import_Export_Indx', 'ntnlEmployment']]

Y = yLabels2[['1-month', '3-month', '6-month', '1-year', '2-year', '3-year',
       '5-year', '7-year', '10-year', '20-year', '30-year']]

todayYvalues = yLabels2.iloc[(len(yLabels2)-1),:]
todayYvalues = todayYvalues[1:]
for i in range(len(todayYvalues)):
    todayYvalues[i] = np.float64(todayYvalues[i])

todayXvalues = xLabels2.iloc[(len(xLabels2)-1),:]
todayXvalues = todayXvalues[1:]
for i in range(len(todayXvalues)):
    todayXvalues[i] = np.float64(todayXvalues[i])

Yseries = Y.iloc[:len(Y)-1,:].copy()
Xseries = X.iloc[:len(Y)-1,:].copy()

#Handles missing values codified as "." - Raplaces with 0
for i in range(len(Xseries)):
    for j in range(29,len(Xseries.columns)):
        if Xseries.iloc[i,j] == ".":
            Xseries.iloc[i,j] = 0

#Handles missing values codified as np.nan or pd.NA - Raplaces with 0
for i in range(len(Xseries)):
    for j in range(len(Xseries.columns)):
        if pd.isnull(Xseries.iloc[i,j]):
            Xseries.iloc[i,j] = 0

#Transform all observations to np.float64 type 
Xseries = Xseries.astype(np.float64)

#Handles missing values codified as np.nan or pd.NA - Raplaces with previous observation value
for i in range(len(Yseries)):
    for j in range(len(Yseries.columns)):
        if pd.isnull(Yseries.iloc[i,j]):
            Yseries.iloc[i,j] = Yseries.iloc[i-1,j]

#Transform all observations to np.float64 type
Yseries = Yseries.astype(np.float64) 

lastDate = yLabels2.tail(1).iloc[0,0]

yPlotVals = yLabels2.tail(11).head(10)

## -- Plots --##

lastDateX = xLabels2.tail(1).iloc[0,0]
xPlotVals = xLabels2.tail(91).head(90)

## --- Stocks --- ##

stocksPlot = xPlotVals.iloc[:,0:30]
color = cm.rainbow(np.linspace(0, 1, len(stocksPlot.columns)))
selStocks = [1,2,4,6,7,8,10,12,15,16,17,18,19,25,26,27,28]

fig2 = plt.figure()

for i in range(1,len(selStocks)):
    c = color[i]
    plt.plot(stocksPlot[stocksPlot.columns[0]],
             stocksPlot[stocksPlot.columns[selStocks[i]]],
             linestyle='solid',marker='.',label='{}'.format(stocksPlot.columns[selStocks[i]]),color=c)

plt.legend(loc="upper right", frameon=True,
          bbox_to_anchor=(1.35, 1.0))
plt.xticks(rotation = 45)
plt.title("Last 90 Days of Selected Best-Performing Stocks")
plt.grid()
#plt.show()

## --- Bureau of Labor Statistics Data --- ##

blsTable = xPlotVals.iloc[:,36:39]
blsTbl = blsTable.tail(1)

## --- U.S. Federal Reserve Data --- ##

fedVals = xPlotVals[["Date","DFF","RRPONTSYD","SP500","SOFR","DJIA","NASDAQCOM"]]
fedPlot = fedVals.tail(31).head(30)  

color = cm.rainbow(np.linspace(0, 1, len(fedPlot.columns)))
fedLabels = ["notUsedVal","Federal Funds Effective Rate",
"Overnight Reverse Repurchase Agreements",
"S&P 500",
"Secured Overnight Financing Rate",
"Dow Jones Industrial Average",
"NASDAQ Composite Index"]

fig3 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[1]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[1]),
         color=color[0])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[1]))
plt.grid()

fig4 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[2]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[2]),
         color=color[1])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[2]))
plt.grid()

fig5 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[3]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[3]),
         color=color[2])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[3]))
plt.grid()

fig6 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[4]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[4]),
         color=color[3])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[4]))
plt.grid()

fig7 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[5]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[5]),
         color=color[4])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[5]))
plt.grid()

fig8 = plt.figure()
plt.plot(fedPlot[fedPlot.columns[0]],
         fedPlot[fedPlot.columns[6]],
         linestyle='solid',marker='.',label='{}'.format(fedLabels[6]),
         color=color[5])
plt.xticks(rotation = 45)
plt.title("Last 30 Days of {} - Source U.S. Federal Reserve".format(fedLabels[6]))
plt.grid()

#plt.show()

## --- Yield Curve --- ##

color = cm.rainbow(np.linspace(0, 1, len(yPlotVals.columns)))

fig1 = plt.figure()

for i in range(1,len(yPlotVals.columns)):
    c = color[i]
    plt.plot(yPlotVals[yPlotVals.columns[0]],
             yPlotVals[yPlotVals.columns[i]],
             linestyle='solid',marker='o',label='{}'.format(yPlotVals.columns[i]),color=c)
plt.legend(loc="upper right", frameon=True,
          bbox_to_anchor=(1.35, 1.0))
plt.xticks(rotation = 45)
plt.title("Last 10 Days of U.S. Treasury Yield Curve")
plt.grid()
#plt.show()

## -- -- ##

## -- Loading Model -- ##

###### -- PICKLED MODELS ARE NOT WORKING -- #####
#def load_model():
#    with open('xgboostModelYieldCurve4.pkl','rb') as file:
#        retrievedData = pickle.load(file)
#    return retrievedData

#modelData = load_model()
#retrievedModel = modelData['model'] 
###### -- PICKLED MODELS ARE NOT WORKING -- #####

## -- fitting the model with only one year of data -- ##

Yseries2 = Yseries.copy()
Xseries2 = Xseries.copy()
Yseries2 = Yseries2.astype('float32')
Xseries2 = Xseries2.astype('float32')

bestModel = MultiOutputRegressor(XGBRegressor(subsample = 0.5, n_estimators = 100, max_depth = 3,
                              learning_rate = 0.3, colsample_bytree = 0.5, colsample_bylevel = 0.8999999999999999,seed = 20))

bestModel.fit(Xseries2,Yseries2)

## -- -- ##

todayXvalues = xLabels2.loc[[xLabels2.index[len(xLabels2.index)-1]]]
todayXvalues = todayXvalues.iloc[:,1:]
todayXvalues = todayXvalues.astype(np.float64)

ynew2 = bestModel.predict(todayXvalues)

predictVals = pd.DataFrame(data = ynew2,columns=todayYvalues.index.values)
actualVals = pd.DataFrame(data = todayYvalues.values.reshape(1,11),columns=todayYvalues.index)

yesterdayVals = yPlotVals.iloc[len(yPlotVals)-1,:]
yesterdayDate = yesterdayVals[0]

fig9 = plt.figure()
fig9.set_figwidth(15)
fig9.set_figheight(6)

color = cm.rainbow(np.linspace(0, 1, len(yPlotVals.columns)))
for i in range(1,len(yPlotVals.columns)):
    c = color[i]
    plt.plot(yPlotVals[yPlotVals.columns[0]],
             yPlotVals[yPlotVals.columns[i]],
             linestyle='solid',marker='o',label='{}'.format(yPlotVals.columns[i]),color=c)

for i in range(len(ynew2[0])):
    c = color[i]
    plt.plot(lastDate, ynew2[0][i], color=c,markeredgecolor="black",markersize=10,marker="*")

for i in range(len(todayYvalues.values)):
    c = color[i]
    plt.plot(lastDate,todayYvalues.values[i],color=c,markeredgecolor="black",markersize=10,marker="X")

for i in range(len(todayYvalues.values)):
    c = color[i]
    plt.arrow(yesterdayDate, yesterdayVals[i+1], 1, (todayYvalues.values[i]-yesterdayVals[i+1]), 
              color=c,linestyle="--")

for i in range(len(ynew2[0])):
    c = color[i]
    plt.arrow(yesterdayDate, yesterdayVals[i+1], 1, (ynew2[0][i]-yesterdayVals[i+1]), 
              color='black',linestyle="--")

plt.legend(loc="upper right", frameon=True,
          bbox_to_anchor=(1.15, 1.0))
plt.xticks(rotation = 45)
plt.title("Last 10 Days of U.S. Treasury Yield Curve Overlayed with Predicted Value [Star] and Actual Value [Cross]")
plt.grid()
#plt.show()

## -- Page Loading with Streamlit-- ##

def show_predict_page():
    st.title("U.S. Treasury Yield Curve Prediction with XGBoost Model")
    
    st.write("""### U.S. Treasury Yield Curve - Predicted v. Actual Value""")
    
    st.write("""#### Table1: Predicted Values""")
    st.dataframe(data=predictVals)
    st.write("""#### Table2: Actual Values""")
    st.dataframe(data=actualVals)

    st.pyplot(fig=fig9)

    st.title("Selected Data Used to Generate the Prediction")

    st.write("""### Selection of Most Important Stocks in the U.S. Economy - 90 days Prior Prediction""")

    st.pyplot(fig=fig2)

    st.write("""### Latest indicators from the U.S. Bureau of Labor Statistics""")

    st.dataframe(data=blsTbl)
    
    st.write("""### Selected indicators from the U.S. Federal Reserve""")

    st.pyplot(fig=fig3)
    st.pyplot(fig=fig4)
    st.pyplot(fig=fig5)
    st.pyplot(fig=fig6)
    st.pyplot(fig=fig7)
    st.pyplot(fig=fig8)

    st.write("""### U.S. Treasury Yield Curve 10 days Prior Prediction""")

    st.pyplot(fig=fig1)

show_predict_page()