farquasar commited on
Commit
e24ac26
·
verified ·
1 Parent(s): 0c8e647

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import ccxt
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sklearn.preprocessing import MinMaxScaler
8
+ from sklearn.impute import SimpleImputer
9
+ from scipy import stats
10
+
11
+ # Fetch data
12
+ def fetch_binance_data(symbol, timeframe, limit=2000):
13
+ binance = ccxt.binance()
14
+ ohlcv = binance.fetch_ohlcv(symbol, timeframe, limit=limit)
15
+ df = pd.DataFrame(ohlcv, columns=['timestamp','open','high','low','close','volume'])
16
+ df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
17
+ return df
18
+
19
+ # Rolling Window Normalizer
20
+ class RollingWindowNormalizer:
21
+ def __init__(self, window=24):
22
+ self.window = window
23
+ self.stats_ = {}
24
+ def fit(self, X, columns):
25
+ for column in columns:
26
+ rolling_mean = X[column].rolling(window=self.window).mean()
27
+ rolling_std = X[column].rolling(window=self.window).std()
28
+ self.stats_[column] = {'rolling_mean': rolling_mean, 'rolling_std': rolling_std}
29
+ return self
30
+ def transform(self, X, columns):
31
+ for column in columns:
32
+ rolling_mean = self.stats_[column]['rolling_mean']
33
+ rolling_std = self.stats_[column]['rolling_std']
34
+ X[column] = (X[column] - rolling_mean) / rolling_std
35
+ X.dropna(inplace=True)
36
+ return X
37
+ def fit_transform(self, X, columns):
38
+ return self.fit(X, columns).transform(X, columns)
39
+
40
+ def normalize(X, columns=['open','high','low','close']):
41
+ X_copy = X.copy()
42
+ Rm = RollingWindowNormalizer()
43
+ Rm.fit(X_copy, columns)
44
+ Y = Rm.transform(X_copy, columns)
45
+ return Y, Rm
46
+
47
+ # Outlier removal
48
+ def remove_outliers(x, epsilon):
49
+ z_score = stats.zscore(x['low'])
50
+ return x[z_score.abs() < epsilon]
51
+
52
+ # Advanced features
53
+ def calculate_rsi(d, window=14):
54
+ delta = d.diff()
55
+ gain = np.where(delta > 0, delta, 0)
56
+ loss = np.where(delta < 0, -delta, 0)
57
+ avg_gain = pd.Series(gain).rolling(window=window).mean()
58
+ avg_loss = pd.Series(loss).rolling(window=window).mean()
59
+ rs = avg_gain / avg_loss
60
+ return 100 - (100 / (1 + rs))
61
+
62
+ def generate_advanced_features(d, other_data=None):
63
+ d = d.copy()
64
+ d['ma_7'] = d['close'].rolling(window=7).mean()
65
+ d['ma_21'] = d['close'].rolling(window=21).mean()
66
+ d['rsi'] = calculate_rsi(d['close'])
67
+ d['ma_ratio'] = d['ma_7'] / d['ma_21']
68
+ for k in ['close','high']:
69
+ for i in range(1,5):
70
+ d[f'lag_{k}{i}'] = d[k].shift(i)
71
+ d['std_last_10'] = d['close'].rolling(window=10).std()
72
+ if other_data is not None:
73
+ d['relative_strength'] = d['close'] / other_data['close']
74
+ d['relative_strength_1'] = d['close'].shift(2) / other_data['close'].shift(2)
75
+ return d.iloc[:,1:].values
76
+
77
+ def create_features_and_labels_with_advanced_features(btc, eth):
78
+ btc_copy = btc.copy()
79
+ eth_copy = eth.copy()
80
+ btc_features = generate_advanced_features(btc_copy, eth_copy)
81
+ eth_features = generate_advanced_features(eth_copy, btc_copy)
82
+ df = btc.copy()
83
+ df['future'] = df['close'].rolling(window=5).mean().shift(-1)
84
+ df['trend'] = (df['future'] > df['close']).astype(int)
85
+ labels = df['trend'].dropna().values
86
+ features = np.vstack((btc_features, eth_features))
87
+ return features, labels
88
+
89
+ def get_data_predict(btc_ori, eth_ori, symbol='ETH/USDT', timeframe='4h', epsilon=2, normalized=False, limit=50):
90
+ btc_data_ = fetch_binance_data('BTC/USDT', timeframe, limit=limit)
91
+ eth_data_ = fetch_binance_data(symbol, timeframe, limit=limit)
92
+ btc_data_ = remove_outliers(btc_data_, epsilon)
93
+ eth_data_ = remove_outliers(eth_data_, epsilon)
94
+ if normalized:
95
+ btc_data_all = pd.concat([btc_ori, btc_data_]).drop_duplicates(subset='timestamp').reset_index(drop=True)
96
+ eth_data_all = pd.concat([eth_ori, eth_data_]).drop_duplicates(subset='timestamp').reset_index(drop=True)
97
+ btc_data_, _ = normalize(btc_data_all)
98
+ eth_data_, _ = normalize(eth_data_all)
99
+ label = btc_data_.copy()[['timestamp','close']].shift(-1)
100
+ return btc_data_, eth_data_, label
101
+
102
+ def predictions(model, X1, X2, name, n_steps):
103
+ features_, labels_ = create_features_and_labels_with_advanced_features(X1, X2)
104
+ imputer = SimpleImputer(strategy='mean')
105
+ features_imputed = imputer.fit_transform(features_)
106
+ y = model.predict_proba(features_imputed)[:,1]
107
+ return y
108
+
109
+ def plot(y, label, timeframe='1h', ma=5, n_steps=None):
110
+ if n_steps is None:
111
+ n_steps = len(y)
112
+ plt.figure(figsize=(12,6))
113
+ if ma:
114
+ df_plot = pd.DataFrame({'date': label['timestamp'].values[-n_steps:], 'prediction':5*(y[-n_steps:]-0.5), 'real': label['close'].values[-n_steps:]})
115
+ plt.plot(df_plot['date'], df_plot['prediction'].rolling(window=ma).mean(), label='updown')
116
+ plt.plot(df_plot['date'], df_plot['real'].rolling(window=ma).mean(), label='real')
117
+ plt.plot(df_plot['date'], (df_plot['real']-df_plot['prediction']).rolling(window=ma).mean(), label='difference')
118
+ else:
119
+ plt.plot(label['timestamp'].values[-n_steps:], 5*(y[-n_steps:]-0.5), label='updown')
120
+ plt.plot(label['timestamp'].values[-n_steps:], label['close'].values[-n_steps:], label='real')
121
+ plt.axhline(0, linestyle='--')
122
+ plt.title(f"BTC timeframe {timeframe}")
123
+ plt.xlabel('Timestamp')
124
+ plt.ylabel('Values')
125
+ plt.legend()
126
+ return plt.gcf()
127
+
128
+ # Load pre-trained models
129
+ with open('model_n1d_cat.pkl','rb') as f:
130
+ model_n1d_cat = pickle.load(f)
131
+ with open('model_n4h_cat.pkl','rb') as f:
132
+ model_n4h_cat = pickle.load(f)
133
+
134
+ def predict_and_plot(timeframe, limit, epsilon, n_steps, ma):
135
+ btc_ori = yf.download('BTC-USD', period=f'{limit}d', interval=timeframe)
136
+ eth_ori = yf.download('ETH-USD', period=f'{limit}d', interval=timeframe)
137
+ btc_data, eth_data, label = get_data_predict(btc_ori, eth_ori, symbol='ETH/USDT', timeframe=timeframe, epsilon=epsilon, normalized=True, limit=limit)
138
+ model = model_n1d_cat if timeframe=='1d' else model_n4h_cat
139
+ preds = predictions(model, btc_data, eth_data, name=timeframe, n_steps=n_steps)
140
+ fig = plot(preds, label=btc_data, timeframe=timeframe, ma=ma, n_steps=n_steps)
141
+ return fig
142
+
143
+ interface = gr.Interface(fn=predict_and_plot,
144
+ inputs=[gr.Dropdown(['1d','4h'], label='Timeframe', value='1d'),
145
+ gr.Slider(50,500,step=50,value=100,label='Data Limit'),
146
+ gr.Slider(0.1,5.0,step=0.1,value=2.0,label='Epsilon'),
147
+ gr.Slider(50,500,step=50,value=200,label='N_steps'),
148
+ gr.Slider(1,20,step=1,value=5,label='Moving Average Window (ma)')],
149
+ outputs=gr.Plot(),
150
+ title='BTC Price Movement Prediction',
151
+ description='Predict BTC price movements using pre-trained LightGBM models.')
152
+
153
+ if __name__=='__main__':
154
+ interface.launch()