QuantumLearner commited on
Commit
65e4223
·
verified ·
1 Parent(s): 0970485

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -330
app.py CHANGED
@@ -26,9 +26,13 @@ default_tickers = ['BTC-USD', 'ETH-USD', 'BNB-USD', 'JPM', 'BAC', 'WFC', 'C']
26
 
27
  # Function to load adjusted close price data for a given ticker
28
  def load_ticker_ts_df(ticker, start, end):
29
- data = yf.download(ticker, start=start, end=end)
30
- return data['Close']
31
-
 
 
 
 
32
  # Function to calculate cross-correlation at different lags
33
  def cross_correlation(series1, series2, lag):
34
  if lag > 0:
@@ -37,34 +41,28 @@ def cross_correlation(series1, series2, lag):
37
  return np.corrcoef(series1[-lag:], series2[:lag])[0, 1]
38
  else:
39
  return np.corrcoef(series1, series2)[0, 1]
40
-
41
  # Function to perform Granger causality test with shifted time series
42
  def granger_test_with_shift(data, target, predictor, shift):
43
- shifted_data = data.copy() # Make a copy of the data
44
- # Shifting the predictor series by the specified lag
45
  shifted_data[predictor] = data[predictor].shift(shift)
46
- # Dropping any NaN values created by the shift
47
  shifted_data.dropna(inplace=True)
48
- # Performing Granger causality test
49
  granger_test_result = grangercausalitytests(shifted_data[[target, predictor]], maxlag=1, verbose=False)
50
- # Extracting the p-value
51
  p_value = granger_test_result[1][0]['ssr_ftest'][1]
52
  return p_value
53
-
54
  # Function to calculate cumulative profit
55
  def calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_threshold):
56
- positions = [] # List to store open positions
57
- profit = 0 # Initialize profit
58
- cumulative_profits = [] # List to store cumulative profits over time
59
- position_open = False # Flag to indicate if a position is open
60
 
61
- # Iterate over the z-scores
62
  for i in range(len(z_scores)):
63
- date = z_scores.index[i] # Get the current date
64
- z = z_scores.iloc[i] # Get the current z-score
65
 
66
  if z > buy_threshold and not position_open:
67
- # Open a short position on ticker2 and a long position on ticker1
68
  entry_date = date
69
  entry_price1 = aligned_data.loc[date, ticker1]
70
  entry_price2 = aligned_data.loc[date, ticker2]
@@ -72,7 +70,6 @@ def calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_thre
72
  positions.append((entry_date, 'sell', ticker2, entry_price2, 'buy', ticker1, entry_price1))
73
 
74
  elif z < sell_threshold and not position_open:
75
- # Open a long position on ticker2 and a short position on ticker1
76
  entry_date = date
77
  entry_price1 = aligned_data.loc[date, ticker1]
78
  entry_price2 = aligned_data.loc[date, ticker2]
@@ -80,24 +77,21 @@ def calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_thre
80
  positions.append((entry_date, 'buy', ticker2, entry_price2, 'sell', ticker1, entry_price1))
81
 
82
  elif position_open and abs(z) < 0.5:
83
- # Close the position when z-score crosses zero (mean reversion)
84
  exit_date = date
85
  exit_price1 = aligned_data.loc[date, ticker1]
86
  exit_price2 = aligned_data.loc[date, ticker2]
87
  position_open = False
88
- entry = positions.pop() # Get the last opened position
89
  entry_date, action1, tickerA, entry_priceA, action2, tickerB, entry_priceB = entry
90
 
91
  if action1 == 'sell':
92
- # Calculate profit for short ticker2 and long ticker1
93
  profit += (entry_priceA - exit_price2) + (exit_price1 - entry_priceB)
94
  else:
95
- # Calculate profit for long ticker2 and short ticker1
96
  profit += (exit_price2 - entry_priceA) + (entry_priceB - exit_price1)
97
 
98
- cumulative_profits.append(profit) # Append the current profit to the cumulative profits
99
 
100
- return cumulative_profits, positions # Return the cumulative profits and positions
101
 
102
  # Function to sanitize the data
103
  def sanitize_data(data_map):
@@ -128,7 +122,7 @@ def find_cointegrated_pairs(tickers_ts_map, p_value_threshold):
128
  result = coint(adj_close_data[:, i], adj_close_data[:, j])
129
  pvalue_matrix[i, j] = result[1]
130
  pvalue_matrix[j, i] = result[1]
131
- np.fill_diagonal(pvalue_matrix, 0) # Set diagonal to 0
132
  pairs = [(tickers[i], tickers[j], pvalue_matrix[i, j]) for i in range(n) for j in range(i+1, n) if pvalue_matrix[i, j] < p_value_threshold]
133
  return pvalue_matrix, pairs
134
 
@@ -155,9 +149,9 @@ def find_cointegrated_pairs_rolling(tickers_ts_map, p_value_threshold, window_si
155
  continue
156
  test_stat, crit_values = johansen_test(window_data)
157
  if test_stat[0] > crit_values[1, 1]: # Using 95% critical value
158
- pvalues.append(0.01) # Assign a small p-value if cointegrated
159
  else:
160
- pvalues.append(1) # Assign a large p-value if not cointegrated
161
 
162
  pvalues = np.array(pvalues)
163
  consistent_cointegration = np.mean(pvalues < p_value_threshold)
@@ -193,270 +187,278 @@ if page == 'Pairs Trading Analysis':
193
  3. Click 'Run Analysis' to start the analysis.
194
  """)
195
 
196
- # Expander for stock/crypto ticker and date selection
197
  with st.sidebar.expander("Stock/Crypto Ticker and Date Selection", expanded=True):
198
  ticker1 = st.text_input('Enter First Stock/Crypto Ticker', 'ASML.AS', help="Enter the ticker symbol for the first stock or cryptocurrency.")
199
  ticker2 = st.text_input('Enter Second Stock/Crypto Ticker', 'ASML', help="Enter the ticker symbol for the second stock or cryptocurrency.")
200
  start_date = st.date_input('Start Date', pd.to_datetime('2022-01-01'), help="Select the start date for the data range.")
201
  end_date = st.date_input('End Date', pd.to_datetime(END_DATE), help="Select the end date for the data range.")
202
 
203
- # Expander for parameters specific to each method
204
  with st.sidebar.expander("Method Parameters", expanded=True):
205
  volatility_window = st.number_input('Volatility Window (days)', min_value=1, max_value=365, value=30, help="Set the number of days for the rolling volatility window.")
206
  buy_threshold = st.number_input('Buy Z-Score Threshold', value=2.0, help="Set the z-score threshold to generate buy signals.")
207
  sell_threshold = st.number_input('Sell Z-Score Threshold', value=-2.0, help="Set the z-score threshold to generate sell signals.")
208
 
209
  if st.sidebar.button('Run Analysis'):
210
- # Data collection
211
- data1 = yf.download(ticker1, start=start_date, end=end_date)['Close']
212
- data2 = yf.download(ticker2, start=start_date, end=end_date)['Close']
213
- aligned_data = pd.concat([data1, data2], axis=1, join='inner')
214
- aligned_data.columns = [ticker1, ticker2]
215
-
216
- # Normalize the price series
217
- normalized_data = (aligned_data - aligned_data.mean()) / aligned_data.std()
218
-
219
- # Plot normalized data
220
- fig1 = go.Figure()
221
- fig1.add_trace(go.Scatter(x=normalized_data.index, y=normalized_data[ticker1], mode='lines', name=f'Normalized {ticker1}'))
222
- fig1.add_trace(go.Scatter(x=normalized_data.index, y=normalized_data[ticker2], mode='lines', name=f'Normalized {ticker2}'))
223
- fig1.update_layout(title=f'Normalized Price Series for {ticker1} and {ticker2}', xaxis_title='Date', yaxis_title='Normalized Price')
224
- st.plotly_chart(fig1)
225
-
226
- # Calculate daily returns
227
- returns = aligned_data.pct_change().dropna()
228
-
229
- # Calculate rolling volatilities (annualized)
230
- volatility1 = returns[ticker1].rolling(volatility_window).std() * np.sqrt(252)
231
- volatility2 = returns[ticker2].rolling(volatility_window).std() * np.sqrt(252)
232
-
233
- # Plot rolling volatilities
234
- fig2 = go.Figure()
235
- fig2.add_trace(go.Scatter(x=volatility1.index, y=volatility1, mode='lines', name=f"{ticker1} Volatility"))
236
- fig2.add_trace(go.Scatter(x=volatility2.index, y=volatility2, mode='lines', name=f"{ticker2} Volatility"))
237
- fig2.update_layout(title=f"{volatility_window}-Day Rolling Historical Volatility for {ticker1} and {ticker2}", xaxis_title='Date', yaxis_title='Volatility')
238
- st.plotly_chart(fig2)
239
-
240
- # Check for stationarity using ADF test
241
- adf_result1 = adfuller(aligned_data[ticker1])
242
- adf_result2 = adfuller(aligned_data[ticker2])
243
-
244
- # Perform Johansen cointegration test
245
- coint_test_stat, coint_critical_values = johansen_test(aligned_data)
246
-
247
- # If cointegration exists, proceed with VECM
248
- vecm = VECM(aligned_data, k_ar_diff=1, coint_rank=1)
249
- vecm_fit = vecm.fit()
250
-
251
- # Analyzing the residuals for stationarity
252
- residuals = vecm_fit.resid
253
- residuals_df = pd.DataFrame(residuals, index=aligned_data.index[-len(residuals):], columns=[f'Residual_{ticker1}', f'Residual_{ticker2}'])
254
- adf_residuals_1 = adfuller(residuals[:, 0])
255
- adf_residuals_2 = adfuller(residuals[:, 1])
256
-
257
- # Plot residuals from VECM
258
- fig3 = go.Figure()
259
- fig3.add_trace(go.Scatter(x=residuals_df.index, y=residuals_df[f'Residual_{ticker1}'], mode='lines', name=f'Residual {ticker1}'))
260
- fig3.add_trace(go.Scatter(x=residuals_df.index, y=residuals_df[f'Residual_{ticker2}'], mode='lines', name=f'Residual {ticker2}'))
261
- fig3.add_hline(y=0, line=dict(color='red', dash='dash'), name='Zero Line')
262
- fig3.update_layout(title='Residuals from VECM', xaxis_title='Date', yaxis_title='Residuals')
263
- st.plotly_chart(fig3)
264
-
265
- # Display ADF test results for the tickers
266
- st.write(f"ADF Statistic for {ticker1}: {adf_result1[0]}, p-value: {adf_result1[1]}")
267
- st.write(f"ADF Statistic for {ticker2}: {adf_result2[0]}, p-value: {adf_result2[1]}")
268
-
269
- # Expander for "How it Works" inside the main body
270
- with st.expander("How it Works", expanded=False):
271
- st.markdown("""
272
- **ADF Test:**
273
- - The Augmented Dickey-Fuller (ADF) test checks whether a time series has a unit root, i.e., whether it is non-stationary.
274
- - If the p-value is less than 0.05, we reject the null hypothesis that the series has a unit root, indicating that the series is stationary.
275
- **Johansen Cointegration Test:**
276
- - The Johansen test is used to determine the number of cointegrating relationships among multiple time series.
277
- - If the test statistic is greater than the critical value, we reject the null hypothesis that there is no cointegration.
278
- **VECM (Vector Error Correction Model):**
279
- - A VECM is a special form of a VAR (Vector Autoregression) model used for cointegrated series. It corrects for disequilibrium in the short run while keeping the long-term relationship intact.
280
- **Z-Score Trading Strategy:**
281
- - Z-scores measure how many standard deviations an element is from the mean. In pairs trading, z-scores are used to identify overbought or oversold conditions, triggering buy or sell signals.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  """)
 
 
 
 
283
 
284
- st.markdown("#### Interpretation of ADF Results")
285
- st.latex(r'''
286
- H_0: \text{The series has a unit root (non-stationary)} \\
287
- H_1: \text{The series does not have a unit root (stationary)}
288
- ''')
289
- st.write("""
290
- - The Augmented Dickey-Fuller (ADF) test checks the null hypothesis that a unit root is present in a time series sample.
291
- """)
292
- if adf_result1[1] < 0.05:
293
- st.write(f"{ticker1} is stationary, indicating the series does not have a unit root.")
294
- else:
295
- st.write(f"{ticker1} is not stationary, indicating the series has a unit root.")
296
-
297
- if adf_result2[1] < 0.05:
298
- st.write(f"{ticker2} is stationary, indicating the series does not have a unit root.")
299
- else:
300
- st.write(f"{ticker2} is not stationary, indicating the series has a unit root.")
301
-
302
- # Display cointegration test results
303
- st.write("Johansen Cointegration Test Results:")
304
- johansen_results = pd.DataFrame({
305
- 'Test Statistic': coint_test_stat,
306
- '90% Critical Value': coint_critical_values[:, 0],
307
- '95% Critical Value': coint_critical_values[:, 1],
308
- '99% Critical Value': coint_critical_values[:, 2]
309
- }, index=[f'Cointegration Test {i+1}' for i in range(len(coint_test_stat))])
310
- st.write(johansen_results)
311
-
312
- st.markdown("#### Interpretation of Johansen Cointegration Test Results")
313
- st.latex(r'''
314
- H_0: \text{No cointegration relationship exists} \\
315
- H_1: \text{Cointegration relationship exists}
316
- ''')
317
- st.write("""
318
- - The Johansen cointegration test is used to determine the cointegration rank between multiple time series.
319
- """)
320
- if coint_test_stat[0] > coint_critical_values[0, 1]:
321
- st.write(f"The two assets {ticker1} and {ticker2} are cointegrated at the 95% confidence level.")
322
- else:
323
- st.write(f"The two assets {ticker1} and {ticker2} are not cointegrated at the 95% confidence level.")
324
 
325
- st.markdown("#### Interpretation of VECM Residuals")
326
- st.write(f"ADF Statistic for VECM residuals of {ticker1}: {adf_residuals_1[0]}, p-value: {adf_residuals_1[1]}")
327
- st.write(f"ADF Statistic for VECM residuals of {ticker2}: {adf_residuals_2[0]}, p-value: {adf_residuals_2[1]}")
328
- st.write("""
329
- - The residuals from the Vector Error Correction Model (VECM) should be stationary to confirm cointegration.
330
- """)
331
- if adf_residuals_1[1] < 0.1:
332
- st.write(f"The residuals of the VECM model for {ticker1} are stationary, confirming cointegration.")
333
- else:
334
- st.write(f"The residuals of the VECM model for {ticker1} are not stationary, suggesting no cointegration.")
335
 
336
- if adf_residuals_2[1] < 0.1:
337
- st.write(f"The residuals of the VECM model for {ticker2} are stationary, confirming cointegration.")
338
- else:
339
- st.write(f"The residuals of the VECM model for {ticker2} are not stationary, suggesting no cointegration.")
340
-
341
- # Calculate cross-correlation for a range of lags
342
- lag_range = range(-30, 31)
343
- cross_correlations = [cross_correlation(returns[ticker1], returns[ticker2], lag) for lag in lag_range]
344
-
345
- # Plot cross-correlation for different lags
346
- fig4 = go.Figure()
347
- fig4.add_trace(go.Scatter(x=list(lag_range), y=cross_correlations, mode='lines+markers'))
348
- fig4.add_hline(y=0, line=dict(color='gray', dash='dash'))
349
- fig4.add_vline(x=0, line=dict(color='red', dash='dash'))
350
- fig4.update_layout(title=f"Cross-Correlation between {ticker1} and {ticker2}", xaxis_title='Lag (days)', yaxis_title='Cross-Correlation')
351
- st.plotly_chart(fig4)
352
-
353
- st.markdown("#### Interpretation of Cross-Correlation Results")
354
- max_corr = max(cross_correlations)
355
- max_lag = lag_range[cross_correlations.index(max_corr)]
356
- second_max_corr = max(corr for i, corr in enumerate(cross_correlations) if corr != max_corr)
357
- second_max_lag = lag_range[cross_correlations.index(second_max_corr)]
358
-
359
- st.write(f"Highest correlation: {max_corr:.2f} at lag {max_lag}")
360
- st.write(f"Second highest correlation: {second_max_corr:.2f} at lag {second_max_lag}")
361
-
362
- interpretation = f"Highest correlation at lag {max_lag}: The high correlation at lag {max_lag} indicates that {ticker1} and {ticker2} move together without any significant lead or lag. In other words, any movements in {ticker1} are almost instantaneously reflected in {ticker2} and vice versa. This is typical for cross-listed assets, where information and price changes are quickly reflected in both markets.\n"
363
-
364
- if second_max_lag < 0:
365
- leading_ticker = ticker2
366
- lagging_ticker = ticker1
367
- lead_days = abs(second_max_lag)
368
- direction = f"Second highest correlation at lag {second_max_lag}: {leading_ticker} leads {lagging_ticker} by {lead_days} days. This means that movements in {leading_ticker} tend to precede similar movements in {lagging_ticker} by {lead_days} day(s)."
369
- elif second_max_lag > 0:
370
- leading_ticker = ticker1
371
- lagging_ticker = ticker2
372
- lead_days = second_max_lag
373
- direction = f"Second highest correlation at lag {second_max_lag}: {leading_ticker} leads {lagging_ticker} by {lead_days} days. This means that movements in {leading_ticker} tend to precede similar movements in {lagging_ticker} by {lead_days} day(s)."
374
- else:
375
- direction = "No significant lead/lag relationship; they move simultaneously."
376
- interpretation += direction
377
- st.write(interpretation)
378
-
379
- # Granger causality test with shifts
380
- shift_range = range(-5, 6)
381
- granger_p_values_shift_1_to_2 = {shift: granger_test_with_shift(aligned_data, ticker1, ticker2, shift) for shift in shift_range}
382
- granger_p_values_shift_2_to_1 = {shift: granger_test_with_shift(aligned_data, ticker2, ticker1, shift) for shift in shift_range}
383
-
384
- # Create DataFrames for plotting Granger causality test results
385
- granger_p_values_df_shift_1_to_2 = pd.DataFrame(granger_p_values_shift_1_to_2, index=[f"{ticker1} causes {ticker2}"]).T
386
- granger_p_values_df_shift_2_to_1 = pd.DataFrame(granger_p_values_shift_2_to_1, index=[f"{ticker2} causes {ticker1}"]).T
387
-
388
- # Plot Granger causality test p-values with shifts
389
- fig5 = go.Figure()
390
- fig5.add_trace(go.Scatter(x=granger_p_values_df_shift_1_to_2.index, y=granger_p_values_df_shift_1_to_2[f"{ticker1} causes {ticker2}"], mode='lines+markers', name=f"{ticker1} causes {ticker2}"))
391
- fig5.add_trace(go.Scatter(x=granger_p_values_df_shift_2_to_1.index, y=granger_p_values_df_shift_2_to_1[f"{ticker2} causes {ticker1}"], mode='lines+markers', name=f"{ticker2} causes {ticker1}"))
392
- fig5.add_hline(y=0.05, line=dict(color='gray', dash='dash'))
393
- fig5.add_vline(x=0, line=dict(color='red', dash='dash'))
394
- fig5.update_layout(title=f"Granger Causality Test p-values with Shifts between {ticker1} and {ticker2}", xaxis_title='Shift (days)', yaxis_title='p-value')
395
- st.plotly_chart(fig5)
396
-
397
- st.markdown("#### Interpretation of Granger Causality Test Results")
398
- best_lag_1_to_2 = min(granger_p_values_shift_1_to_2, key=granger_p_values_shift_1_to_2.get)
399
- best_lag_2_to_1 = min(granger_p_values_shift_2_to_1, key=granger_p_values_shift_2_to_1.get)
400
-
401
- interpretation = ""
402
-
403
- if granger_p_values_shift_1_to_2[best_lag_1_to_2] < 0.05 and granger_p_values_shift_1_to_2[best_lag_1_to_2] < granger_p_values_shift_2_to_1[best_lag_2_to_1]:
404
- causality_direction = f"{ticker1} causes {ticker2}"
405
- best_lag = best_lag_1_to_2
406
- interpretation += f"Granger causality test with shifts suggests that {ticker1} causes {ticker2} with a lag of {abs(best_lag)} days.\n"
407
- interpretation += f"This means that movements in {ticker1} tend to lead movements in {ticker2} by {abs(best_lag)} days. In practical terms, if {ticker1} experiences a price change, we can expect a similar change in {ticker2} approximately {abs(best_lag)} days later."
408
- else:
409
- causality_direction = f"{ticker2} causes {ticker1}"
410
- best_lag = best_lag_2_to_1
411
- interpretation += f"Granger causality test with shifts suggests that {ticker2} causes {ticker1} with a lag of {abs(best_lag)} days.\n"
412
- interpretation += f"This means that movements in {ticker2} tend to lead movements in {ticker1} by {abs(best_lag)} days. In practical terms, if {ticker2} experiences a price change, we can expect a similar change in {ticker1} approximately {abs(best_lag)} days later."
413
-
414
- st.write(interpretation)
415
-
416
- # Adjust data based on the identified best lag
417
- adjusted_data = aligned_data.copy()
418
- adjusted_data[ticker1] = adjusted_data[ticker1].shift(best_lag).dropna()
419
- adjusted_data = adjusted_data.dropna()
420
-
421
- # Calculate the residuals
422
- model = OLS(adjusted_data[ticker2], adjusted_data[ticker1])
423
- results = model.fit()
424
- residuals = adjusted_data[ticker2] - results.params[ticker1] * adjusted_data[ticker1]
425
-
426
- # Calculate Z-Scores
427
- residuals_mean = residuals.mean()
428
- residuals_std = residuals.std()
429
- z_scores = (residuals - residuals_mean) / residuals_std
430
-
431
- # Generate buy and sell signals
432
- buy_signals = z_scores[z_scores > buy_threshold]
433
- sell_signals = z_scores[z_scores < sell_threshold]
434
-
435
- # Plot the residuals with buy and sell signals
436
- fig6 = go.Figure()
437
- fig6.add_trace(go.Scatter(x=z_scores.index, y=z_scores, mode='lines', name='Z-Score of Residuals'))
438
- fig6.add_trace(go.Scatter(x=buy_signals.index, y=buy_signals, mode='markers', marker=dict(color='green', symbol='triangle-up', size=10), name=f'Buy {ticker1}, Sell {ticker2} Signal'))
439
- fig6.add_trace(go.Scatter(x=sell_signals.index, y=sell_signals, mode='markers', marker=dict(color='red', symbol='triangle-down', size=10), name=f'Sell {ticker1}, Buy {ticker2} Signal'))
440
- fig6.add_hline(y=buy_threshold, line=dict(color='gray', dash='dash'))
441
- fig6.add_hline(y=sell_threshold, line=dict(color='gray', dash='dash'))
442
- fig6.update_layout(title=f"Residuals (Adjusted for Lag) with Buy and Sell Signals based on Z-Scores", xaxis_title='Date', yaxis_title='Z-Score')
443
- st.plotly_chart(fig6)
444
-
445
- # Calculate cumulative profits and positions
446
- cumulative_profits, positions = calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_threshold)
447
-
448
- # Plot the cumulative profit
449
- fig7 = go.Figure()
450
- fig7.add_trace(go.Scatter(x=aligned_data.index[:len(cumulative_profits)], y=cumulative_profits, mode='lines', name='Cumulative Profit'))
451
- fig7.update_layout(title=f"Cumulative Profit from Z-Score Trading Strategy", xaxis_title='Date', yaxis_title='Cumulative Profit')
452
- st.plotly_chart(fig7)
453
-
454
- st.markdown("#### Interpretation of Trading Signals and Cumulative Profit")
455
- st.write(f"Cumulative Profit: {cumulative_profits[-1]:.2f}")
456
- st.write("""
457
- - The trading strategy uses z-scores to generate buy and sell signals.
458
- - The cumulative profit shows the total profit from the trading strategy over the analyzed period.
459
- """)
 
 
460
 
461
  elif page == 'Pair Cointegration Identification':
462
  st.subheader("Cointegration Identification")
@@ -466,65 +468,54 @@ elif page == 'Pair Cointegration Identification':
466
  It works for both stocks and cryptocurrency pairs.
467
  """)
468
 
469
- # Cointegration Method Selection
470
  method = st.sidebar.selectbox('Select Cointegration Method', ['Engle-Granger', 'Johansen Cointegration'])
471
 
472
- # Expander for stock/crypto ticker and date selection
473
  with st.sidebar.expander("Stock/Crypto Ticker and Date Selection", expanded=True):
474
  tickers_input = st.text_input('Enter Stock or Crypto Tickers (comma-separated)', ', '.join(default_tickers), help="Enter the ticker symbols for stocks or cryptocurrencies you want to analyze.")
475
  start_date = st.date_input('Start Date', pd.to_datetime(START_DATE), help="Select the start date for the data range.")
476
  end_date = st.date_input('End Date', pd.to_datetime(END_DATE), help="Select the end date for the data range.")
477
 
478
- # Expander for parameters specific to each method
479
- #with st.sidebar.expander("Method Parameters", expanded=True):
480
- # st.write("Set parameters for the selected method.")
481
-
482
  if st.sidebar.button('Run Cointegration Analysis'):
483
- tickers = [ticker.strip() for ticker in tickers_input.split(',')]
484
- universe_tickers_ts_map = {ticker: load_ticker_ts_df(ticker, start_date, end_date) for ticker in tickers}
485
- uts_sanitized = sanitize_data(universe_tickers_ts_map)
486
-
487
- if method == 'Engle-Granger':
488
- # Find cointegrated pairs using the Engle-Granger method
489
- pvalues, pairs = find_cointegrated_pairs(uts_sanitized, P_VALUE_THRESHOLD)
490
- # Mask values greater than P_VALUE_THRESHOLD
491
- masked_pvalues = np.where(pvalues > P_VALUE_THRESHOLD, np.nan, pvalues)
492
- # Plot heatmap of p-values
493
- tickers_list = list(uts_sanitized.keys())
494
- fig_heatmap = px.imshow(masked_pvalues, x=tickers_list, y=tickers_list,
495
- color_continuous_scale='RdYlGn_r', title='Cointegration Heatmap (Engle-Granger)',
496
- labels=dict(x='Tickers', y='Tickers', color='P-value'),
497
- zmin=0, zmax=P_VALUE_THRESHOLD)
498
- else:
499
- # Find cointegrated pairs using the Johansen method with rolling windows
500
- pvalues, pairs = find_cointegrated_pairs_rolling(uts_sanitized, P_VALUE_THRESHOLD, ROLLING_WINDOW_SIZE, CONSISTENT_COINTEGRATION_THRESHOLD)
501
- # Mask values greater than P_VALUE_THRESHOLD
502
- masked_pvalues = np.where(pvalues > P_VALUE_THRESHOLD, np.nan, pvalues)
503
- # Plot heatmap of p-values
504
- tickers_list = list(uts_sanitized.keys())
505
- fig_heatmap = px.imshow(masked_pvalues, x=tickers_list, y=tickers_list,
506
- color_continuous_scale='RdYlGn_r', title='Cointegration Heatmap (Johansen)',
507
- labels=dict(x='Tickers', y='Tickers', color='P-value'),
508
- zmin=0, zmax=P_VALUE_THRESHOLD)
509
-
510
- st.plotly_chart(fig_heatmap)
511
-
512
- # Sort pairs by p-value (ascending) and select the top 10
513
- top_10_pairs = sorted(pairs, key=lambda x: x[2])[:10]
514
-
515
- # Extract data for the bar plot
516
- pair_labels = [f"{pair[0]} & {pair[1]}" for pair in top_10_pairs]
517
- pair_values = [pair[2] for pair in top_10_pairs]
518
-
519
- # Plot bar chart
520
- fig_bar = go.Figure([go.Bar(x=pair_values, y=pair_labels, orientation='h')])
521
- fig_bar.update_layout(title='Top 10 Most Cointegrated Pairs',
522
- xaxis_title='P-value',
523
- yaxis_title='Asset Pairs',
524
- yaxis=dict(autorange='reversed'))
525
- st.plotly_chart(fig_bar)
526
-
527
- # Expander for "How it Works" inside the main body
528
  with st.expander("How it Works", expanded=False):
529
  st.markdown("""
530
  **Cointegration Overview:**
@@ -537,8 +528,6 @@ elif page == 'Pair Cointegration Identification':
537
  - The Johansen test is a more general procedure that allows for more than two series and can identify multiple cointegrating relationships.
538
  """)
539
 
540
-
541
-
542
  # Hide the default Streamlit menu and footer
543
  hide_streamlit_style = """
544
  <style>
@@ -546,4 +535,4 @@ hide_streamlit_style = """
546
  footer {visibility: hidden;}
547
  </style>
548
  """
549
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 
26
 
27
  # Function to load adjusted close price data for a given ticker
28
  def load_ticker_ts_df(ticker, start, end):
29
+ data = yf.download(ticker, start=start, end=end, auto_adjust=False) # Unadjusted prices
30
+ if isinstance(data.columns, pd.MultiIndex): # Flatten multi-index
31
+ data.columns = data.columns.get_level_values(0)
32
+ if data.empty:
33
+ raise ValueError(f"No data found for {ticker}")
34
+ return data['Adj Close']
35
+
36
  # Function to calculate cross-correlation at different lags
37
  def cross_correlation(series1, series2, lag):
38
  if lag > 0:
 
41
  return np.corrcoef(series1[-lag:], series2[:lag])[0, 1]
42
  else:
43
  return np.corrcoef(series1, series2)[0, 1]
44
+
45
  # Function to perform Granger causality test with shifted time series
46
  def granger_test_with_shift(data, target, predictor, shift):
47
+ shifted_data = data.copy()
 
48
  shifted_data[predictor] = data[predictor].shift(shift)
 
49
  shifted_data.dropna(inplace=True)
 
50
  granger_test_result = grangercausalitytests(shifted_data[[target, predictor]], maxlag=1, verbose=False)
 
51
  p_value = granger_test_result[1][0]['ssr_ftest'][1]
52
  return p_value
53
+
54
  # Function to calculate cumulative profit
55
  def calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_threshold):
56
+ positions = []
57
+ profit = 0
58
+ cumulative_profits = []
59
+ position_open = False
60
 
 
61
  for i in range(len(z_scores)):
62
+ date = z_scores.index[i]
63
+ z = z_scores.iloc[i]
64
 
65
  if z > buy_threshold and not position_open:
 
66
  entry_date = date
67
  entry_price1 = aligned_data.loc[date, ticker1]
68
  entry_price2 = aligned_data.loc[date, ticker2]
 
70
  positions.append((entry_date, 'sell', ticker2, entry_price2, 'buy', ticker1, entry_price1))
71
 
72
  elif z < sell_threshold and not position_open:
 
73
  entry_date = date
74
  entry_price1 = aligned_data.loc[date, ticker1]
75
  entry_price2 = aligned_data.loc[date, ticker2]
 
77
  positions.append((entry_date, 'buy', ticker2, entry_price2, 'sell', ticker1, entry_price1))
78
 
79
  elif position_open and abs(z) < 0.5:
 
80
  exit_date = date
81
  exit_price1 = aligned_data.loc[date, ticker1]
82
  exit_price2 = aligned_data.loc[date, ticker2]
83
  position_open = False
84
+ entry = positions.pop()
85
  entry_date, action1, tickerA, entry_priceA, action2, tickerB, entry_priceB = entry
86
 
87
  if action1 == 'sell':
 
88
  profit += (entry_priceA - exit_price2) + (exit_price1 - entry_priceB)
89
  else:
 
90
  profit += (exit_price2 - entry_priceA) + (entry_priceB - exit_price1)
91
 
92
+ cumulative_profits.append(profit)
93
 
94
+ return cumulative_profits, positions
95
 
96
  # Function to sanitize the data
97
  def sanitize_data(data_map):
 
122
  result = coint(adj_close_data[:, i], adj_close_data[:, j])
123
  pvalue_matrix[i, j] = result[1]
124
  pvalue_matrix[j, i] = result[1]
125
+ np.fill_diagonal(pvalue_matrix, 0)
126
  pairs = [(tickers[i], tickers[j], pvalue_matrix[i, j]) for i in range(n) for j in range(i+1, n) if pvalue_matrix[i, j] < p_value_threshold]
127
  return pvalue_matrix, pairs
128
 
 
149
  continue
150
  test_stat, crit_values = johansen_test(window_data)
151
  if test_stat[0] > crit_values[1, 1]: # Using 95% critical value
152
+ pvalues.append(0.01)
153
  else:
154
+ pvalues.append(1)
155
 
156
  pvalues = np.array(pvalues)
157
  consistent_cointegration = np.mean(pvalues < p_value_threshold)
 
187
  3. Click 'Run Analysis' to start the analysis.
188
  """)
189
 
 
190
  with st.sidebar.expander("Stock/Crypto Ticker and Date Selection", expanded=True):
191
  ticker1 = st.text_input('Enter First Stock/Crypto Ticker', 'ASML.AS', help="Enter the ticker symbol for the first stock or cryptocurrency.")
192
  ticker2 = st.text_input('Enter Second Stock/Crypto Ticker', 'ASML', help="Enter the ticker symbol for the second stock or cryptocurrency.")
193
  start_date = st.date_input('Start Date', pd.to_datetime('2022-01-01'), help="Select the start date for the data range.")
194
  end_date = st.date_input('End Date', pd.to_datetime(END_DATE), help="Select the end date for the data range.")
195
 
 
196
  with st.sidebar.expander("Method Parameters", expanded=True):
197
  volatility_window = st.number_input('Volatility Window (days)', min_value=1, max_value=365, value=30, help="Set the number of days for the rolling volatility window.")
198
  buy_threshold = st.number_input('Buy Z-Score Threshold', value=2.0, help="Set the z-score threshold to generate buy signals.")
199
  sell_threshold = st.number_input('Sell Z-Score Threshold', value=-2.0, help="Set the z-score threshold to generate sell signals.")
200
 
201
  if st.sidebar.button('Run Analysis'):
202
+ try:
203
+ # Data collection
204
+ data1 = yf.download(ticker1, start=start_date, end=end_date, auto_adjust=False)
205
+ if isinstance(data1.columns, pd.MultiIndex):
206
+ data1.columns = data1.columns.get_level_values(0)
207
+ data2 = yf.download(ticker2, start=start_date, end=end_date, auto_adjust=False)
208
+ if isinstance(data2.columns, pd.MultiIndex):
209
+ data2.columns = data2.columns.get_level_values(0)
210
+
211
+ if data1.empty or data2.empty:
212
+ raise ValueError(f"No data found for {ticker1} or {ticker2}")
213
+
214
+ aligned_data = pd.concat([data1['Close'], data2['Close']], axis=1, join='inner')
215
+ aligned_data.columns = [ticker1, ticker2]
216
+
217
+ # Normalize the price series
218
+ normalized_data = (aligned_data - aligned_data.mean()) / aligned_data.std()
219
+
220
+ # Plot normalized data
221
+ fig1 = go.Figure()
222
+ fig1.add_trace(go.Scatter(x=normalized_data.index, y=normalized_data[ticker1], mode='lines', name=f'Normalized {ticker1}'))
223
+ fig1.add_trace(go.Scatter(x=normalized_data.index, y=normalized_data[ticker2], mode='lines', name=f'Normalized {ticker2}'))
224
+ fig1.update_layout(title=f'Normalized Price Series for {ticker1} and {ticker2}', xaxis_title='Date', yaxis_title='Normalized Price')
225
+ st.plotly_chart(fig1)
226
+
227
+ # Calculate daily returns
228
+ returns = aligned_data.pct_change().dropna()
229
+
230
+ # Calculate rolling volatilities (annualized)
231
+ volatility1 = returns[ticker1].rolling(volatility_window).std() * np.sqrt(252)
232
+ volatility2 = returns[ticker2].rolling(volatility_window).std() * np.sqrt(252)
233
+
234
+ # Plot rolling volatilities
235
+ fig2 = go.Figure()
236
+ fig2.add_trace(go.Scatter(x=volatility1.index, y=volatility1, mode='lines', name=f"{ticker1} Volatility"))
237
+ fig2.add_trace(go.Scatter(x=volatility2.index, y=volatility2, mode='lines', name=f"{ticker2} Volatility"))
238
+ fig2.update_layout(title=f"{volatility_window}-Day Rolling Historical Volatility for {ticker1} and {ticker2}", xaxis_title='Date', yaxis_title='Volatility')
239
+ st.plotly_chart(fig2)
240
+
241
+ # Check for stationarity using ADF test
242
+ adf_result1 = adfuller(aligned_data[ticker1])
243
+ adf_result2 = adfuller(aligned_data[ticker2])
244
+
245
+ # Perform Johansen cointegration test
246
+ coint_test_stat, coint_critical_values = johansen_test(aligned_data)
247
+
248
+ # If cointegration exists, proceed with VECM
249
+ vecm = VECM(aligned_data, k_ar_diff=1, coint_rank=1)
250
+ vecm_fit = vecm.fit()
251
+
252
+ # Analyzing the residuals for stationarity
253
+ residuals = vecm_fit.resid
254
+ residuals_df = pd.DataFrame(residuals, index=aligned_data.index[-len(residuals):], columns=[f'Residual_{ticker1}', f'Residual_{ticker2}'])
255
+ adf_residuals_1 = adfuller(residuals[:, 0])
256
+ adf_residuals_2 = adfuller(residuals[:, 1])
257
+
258
+ # Plot residuals from VECM
259
+ fig3 = go.Figure()
260
+ fig3.add_trace(go.Scatter(x=residuals_df.index, y=residuals_df[f'Residual_{ticker1}'], mode='lines', name=f'Residual {ticker1}'))
261
+ fig3.add_trace(go.Scatter(x=residuals_df.index, y=residuals_df[f'Residual_{ticker2}'], mode='lines', name=f'Residual {ticker2}'))
262
+ fig3.add_hline(y=0, line=dict(color='red', dash='dash'), name='Zero Line')
263
+ fig3.update_layout(title='Residuals from VECM', xaxis_title='Date', yaxis_title='Residuals')
264
+ st.plotly_chart(fig3)
265
+
266
+ # Display ADF test results for the tickers
267
+ st.write(f"ADF Statistic for {ticker1}: {adf_result1[0]}, p-value: {adf_result1[1]}")
268
+ st.write(f"ADF Statistic for {ticker2}: {adf_result2[0]}, p-value: {adf_result2[1]}")
269
+
270
+ with st.expander("How it Works", expanded=False):
271
+ st.markdown("""
272
+ **ADF Test:**
273
+ - The Augmented Dickey-Fuller (ADF) test checks whether a time series has a unit root, i.e., whether it is non-stationary.
274
+ - If the p-value is less than 0.05, we reject the null hypothesis that the series has a unit root, indicating that the series is stationary.
275
+ **Johansen Cointegration Test:**
276
+ - The Johansen test is used to determine the number of cointegrating relationships among multiple time series.
277
+ - If the test statistic is greater than the critical value, we reject the null hypothesis that there is no cointegration.
278
+ **VECM (Vector Error Correction Model):**
279
+ - A VECM is a special form of a VAR (Vector Autoregression) model used for cointegrated series. It corrects for disequilibrium in the short run while keeping the long-term relationship intact.
280
+ **Z-Score Trading Strategy:**
281
+ - Z-scores measure how many standard deviations an element is from the mean. In pairs trading, z-scores are used to identify overbought or oversold conditions, triggering buy or sell signals.
282
+ """)
283
+
284
+ st.markdown("#### Interpretation of ADF Results")
285
+ st.latex(r'''
286
+ H_0: \text{The series has a unit root (non-stationary)} \\
287
+ H_1: \text{The series does not have a unit root (stationary)}
288
+ ''')
289
+ st.write("""
290
+ - The Augmented Dickey-Fuller (ADF) test checks the null hypothesis that a unit root is present in a time series sample.
291
  """)
292
+ if adf_result1[1] < 0.05:
293
+ st.write(f"{ticker1} is stationary, indicating the series does not have a unit root.")
294
+ else:
295
+ st.write(f"{ticker1} is not stationary, indicating the series has a unit root.")
296
 
297
+ if adf_result2[1] < 0.05:
298
+ st.write(f"{ticker2} is stationary, indicating the series does not have a unit root.")
299
+ else:
300
+ st.write(f"{ticker2} is not stationary, indicating the series has a unit root.")
301
+
302
+ # Display cointegration test results
303
+ st.write("Johansen Cointegration Test Results:")
304
+ johansen_results = pd.DataFrame({
305
+ 'Test Statistic': coint_test_stat,
306
+ '90% Critical Value': coint_critical_values[:, 0],
307
+ '95% Critical Value': coint_critical_values[:, 1],
308
+ '99% Critical Value': coint_critical_values[:, 2]
309
+ }, index=[f'Cointegration Test {i+1}' for i in range(len(coint_test_stat))])
310
+ st.write(johansen_results)
311
+
312
+ st.markdown("#### Interpretation of Johansen Cointegration Test Results")
313
+ st.latex(r'''
314
+ H_0: \text{No cointegration relationship exists} \\
315
+ H_1: \text{Cointegration relationship exists}
316
+ ''')
317
+ st.write("""
318
+ - The Johansen cointegration test is used to determine the cointegration rank between multiple time series.
319
+ """)
320
+ if coint_test_stat[0] > coint_critical_values[0, 1]:
321
+ st.write(f"The two assets {ticker1} and {ticker2} are cointegrated at the 95% confidence level.")
322
+ else:
323
+ st.write(f"The two assets {ticker1} and {ticker2} are not cointegrated at the 95% confidence level.")
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
+ st.markdown("#### Interpretation of VECM Residuals")
326
+ st.write(f"ADF Statistic for VECM residuals of {ticker1}: {adf_residuals_1[0]}, p-value: {adf_residuals_1[1]}")
327
+ st.write(f"ADF Statistic for VECM residuals of {ticker2}: {adf_residuals_2[0]}, p-value: {adf_residuals_2[1]}")
328
+ st.write("""
329
+ - The residuals from the Vector Error Correction Model (VECM) should be stationary to confirm cointegration.
330
+ """)
331
+ if adf_residuals_1[1] < 0.1:
332
+ st.write(f"The residuals of the VECM model for {ticker1} are stationary, confirming cointegration.")
333
+ else:
334
+ st.write(f"The residuals of the VECM model for {ticker1} are not stationary, suggesting no cointegration.")
335
 
336
+ if adf_residuals_2[1] < 0.1:
337
+ st.write(f"The residuals of the VECM model for {ticker2} are stationary, confirming cointegration.")
338
+ else:
339
+ st.write(f"The residuals of the VECM model for {ticker2} are not stationary, suggesting no cointegration.")
340
+
341
+ # Calculate cross-correlation for a range of lags
342
+ lag_range = range(-30, 31)
343
+ cross_correlations = [cross_correlation(returns[ticker1], returns[ticker2], lag) for lag in lag_range]
344
+
345
+ # Plot cross-correlation for different lags
346
+ fig4 = go.Figure()
347
+ fig4.add_trace(go.Scatter(x=list(lag_range), y=cross_correlations, mode='lines+markers'))
348
+ fig4.add_hline(y=0, line=dict(color='gray', dash='dash'))
349
+ fig4.add_vline(x=0, line=dict(color='red', dash='dash'))
350
+ fig4.update_layout(title=f"Cross-Correlation between {ticker1} and {ticker2}", xaxis_title='Lag (days)', yaxis_title='Cross-Correlation')
351
+ st.plotly_chart(fig4)
352
+
353
+ st.markdown("#### Interpretation of Cross-Correlation Results")
354
+ max_corr = max(cross_correlations)
355
+ max_lag = lag_range[cross_correlations.index(max_corr)]
356
+ second_max_corr = max(corr for i, corr in enumerate(cross_correlations) if corr != max_corr)
357
+ second_max_lag = lag_range[cross_correlations.index(second_max_corr)]
358
+
359
+ st.write(f"Highest correlation: {max_corr:.2f} at lag {max_lag}")
360
+ st.write(f"Second highest correlation: {second_max_corr:.2f} at lag {second_max_lag}")
361
+
362
+ interpretation = f"Highest correlation at lag {max_lag}: The high correlation at lag {max_lag} indicates that {ticker1} and {ticker2} move together without any significant lead or lag. In other words, any movements in {ticker1} are almost instantaneously reflected in {ticker2} and vice versa. This is typical for cross-listed assets, where information and price changes are quickly reflected in both markets.\n"
363
+
364
+ if second_max_lag < 0:
365
+ leading_ticker = ticker2
366
+ lagging_ticker = ticker1
367
+ lead_days = abs(second_max_lag)
368
+ direction = f"Second highest correlation at lag {second_max_lag}: {leading_ticker} leads {lagging_ticker} by {lead_days} days. This means that movements in {leading_ticker} tend to precede similar movements in {lagging_ticker} by {lead_days} day(s)."
369
+ elif second_max_lag > 0:
370
+ leading_ticker = ticker1
371
+ lagging_ticker = ticker2
372
+ lead_days = second_max_lag
373
+ direction = f"Second highest correlation at lag {second_max_lag}: {leading_ticker} leads {lagging_ticker} by {lead_days} days. This means that movements in {leading_ticker} tend to precede similar movements in {lagging_ticker} by {lead_days} day(s)."
374
+ else:
375
+ direction = "No significant lead/lag relationship; they move simultaneously."
376
+ interpretation += direction
377
+ st.write(interpretation)
378
+
379
+ # Granger causality test with shifts
380
+ shift_range = range(-5, 6)
381
+ granger_p_values_shift_1_to_2 = {shift: granger_test_with_shift(aligned_data, ticker1, ticker2, shift) for shift in shift_range}
382
+ granger_p_values_shift_2_to_1 = {shift: granger_test_with_shift(aligned_data, ticker2, ticker1, shift) for shift in shift_range}
383
+
384
+ # Create DataFrames for plotting Granger causality test results
385
+ granger_p_values_df_shift_1_to_2 = pd.DataFrame(granger_p_values_shift_1_to_2, index=[f"{ticker1} causes {ticker2}"]).T
386
+ granger_p_values_df_shift_2_to_1 = pd.DataFrame(granger_p_values_shift_2_to_1, index=[f"{ticker2} causes {ticker1}"]).T
387
+
388
+ # Plot Granger causality test p-values with shifts
389
+ fig5 = go.Figure()
390
+ fig5.add_trace(go.Scatter(x=granger_p_values_df_shift_1_to_2.index, y=granger_p_values_df_shift_1_to_2[f"{ticker1} causes {ticker2}"], mode='lines+markers', name=f"{ticker1} causes {ticker2}"))
391
+ fig5.add_trace(go.Scatter(x=granger_p_values_df_shift_2_to_1.index, y=granger_p_values_df_shift_2_to_1[f"{ticker2} causes {ticker1}"], mode='lines+markers', name=f"{ticker2} causes {ticker1}"))
392
+ fig5.add_hline(y=0.05, line=dict(color='gray', dash='dash'))
393
+ fig5.add_vline(x=0, line=dict(color='red', dash='dash'))
394
+ fig5.update_layout(title=f"Granger Causality Test p-values with Shifts between {ticker1} and {ticker2}", xaxis_title='Shift (days)', yaxis_title='p-value')
395
+ st.plotly_chart(fig5)
396
+
397
+ st.markdown("#### Interpretation of Granger Causality Test Results")
398
+ best_lag_1_to_2 = min(granger_p_values_shift_1_to_2, key=granger_p_values_shift_1_to_2.get)
399
+ best_lag_2_to_1 = min(granger_p_values_shift_2_to_1, key=granger_p_values_shift_2_to_1.get)
400
+
401
+ interpretation = ""
402
+
403
+ if granger_p_values_shift_1_to_2[best_lag_1_to_2] < 0.05 and granger_p_values_shift_1_to_2[best_lag_1_to_2] < granger_p_values_shift_2_to_1[best_lag_2_to_1]:
404
+ causality_direction = f"{ticker1} causes {ticker2}"
405
+ best_lag = best_lag_1_to_2
406
+ interpretation += f"Granger causality test with shifts suggests that {ticker1} causes {ticker2} with a lag of {abs(best_lag)} days.\n"
407
+ interpretation += f"This means that movements in {ticker1} tend to lead movements in {ticker2} by {abs(best_lag)} days. In practical terms, if {ticker1} experiences a price change, we can expect a similar change in {ticker2} approximately {abs(best_lag)} days later."
408
+ else:
409
+ causality_direction = f"{ticker2} causes {ticker1}"
410
+ best_lag = best_lag_2_to_1
411
+ interpretation += f"Granger causality test with shifts suggests that {ticker2} causes {ticker1} with a lag of {abs(best_lag)} days.\n"
412
+ interpretation += f"This means that movements in {ticker2} tend to lead movements in {ticker1} by {abs(best_lag)} days. In practical terms, if {ticker2} experiences a price change, we can expect a similar change in {ticker1} approximately {abs(best_lag)} days later."
413
+
414
+ st.write(interpretation)
415
+
416
+ # Adjust data based on the identified best lag
417
+ adjusted_data = aligned_data.copy()
418
+ adjusted_data[ticker1] = adjusted_data[ticker1].shift(best_lag).dropna()
419
+ adjusted_data = adjusted_data.dropna()
420
+
421
+ # Calculate the residuals
422
+ model = OLS(adjusted_data[ticker2], adjusted_data[ticker1])
423
+ results = model.fit()
424
+ residuals = adjusted_data[ticker2] - results.params[ticker1] * adjusted_data[ticker1]
425
+
426
+ # Calculate Z-Scores
427
+ residuals_mean = residuals.mean()
428
+ residuals_std = residuals.std()
429
+ z_scores = (residuals - residuals_mean) / residuals_std
430
+
431
+ # Generate buy and sell signals
432
+ buy_signals = z_scores[z_scores > buy_threshold]
433
+ sell_signals = z_scores[z_scores < sell_threshold]
434
+
435
+ # Plot the residuals with buy and sell signals
436
+ fig6 = go.Figure()
437
+ fig6.add_trace(go.Scatter(x=z_scores.index, y=z_scores, mode='lines', name='Z-Score of Residuals'))
438
+ fig6.add_trace(go.Scatter(x=buy_signals.index, y=buy_signals, mode='markers', marker=dict(color='green', symbol='triangle-up', size=10), name=f'Buy {ticker1}, Sell {ticker2} Signal'))
439
+ fig6.add_trace(go.Scatter(x=sell_signals.index, y=sell_signals, mode='markers', marker=dict(color='red', symbol='triangle-down', size=10), name=f'Sell {ticker1}, Buy {ticker2} Signal'))
440
+ fig6.add_hline(y=buy_threshold, line=dict(color='gray', dash='dash'))
441
+ fig6.add_hline(y=sell_threshold, line=dict(color='gray', dash='dash'))
442
+ fig6.update_layout(title=f"Residuals (Adjusted for Lag) with Buy and Sell Signals based on Z-Scores", xaxis_title='Date', yaxis_title='Z-Score')
443
+ st.plotly_chart(fig6)
444
+
445
+ # Calculate cumulative profits and positions
446
+ cumulative_profits, positions = calculate_cumulative_profit(aligned_data, z_scores, buy_threshold, sell_threshold)
447
+
448
+ # Plot the cumulative profit
449
+ fig7 = go.Figure()
450
+ fig7.add_trace(go.Scatter(x=aligned_data.index[:len(cumulative_profits)], y=cumulative_profits, mode='lines', name='Cumulative Profit'))
451
+ fig7.update_layout(title=f"Cumulative Profit from Z-Score Trading Strategy", xaxis_title='Date', yaxis_title='Cumulative Profit')
452
+ st.plotly_chart(fig7)
453
+
454
+ st.markdown("#### Interpretation of Trading Signals and Cumulative Profit")
455
+ st.write(f"Cumulative Profit: {cumulative_profits[-1]:.2f}")
456
+ st.write("""
457
+ - The trading strategy uses z-scores to generate buy and sell signals.
458
+ - The cumulative profit shows the total profit from the trading strategy over the analyzed period.
459
+ """)
460
+ except Exception as e:
461
+ st.error(f"Error: {str(e)}. Check ticker symbols or date range.")
462
 
463
  elif page == 'Pair Cointegration Identification':
464
  st.subheader("Cointegration Identification")
 
468
  It works for both stocks and cryptocurrency pairs.
469
  """)
470
 
 
471
  method = st.sidebar.selectbox('Select Cointegration Method', ['Engle-Granger', 'Johansen Cointegration'])
472
 
 
473
  with st.sidebar.expander("Stock/Crypto Ticker and Date Selection", expanded=True):
474
  tickers_input = st.text_input('Enter Stock or Crypto Tickers (comma-separated)', ', '.join(default_tickers), help="Enter the ticker symbols for stocks or cryptocurrencies you want to analyze.")
475
  start_date = st.date_input('Start Date', pd.to_datetime(START_DATE), help="Select the start date for the data range.")
476
  end_date = st.date_input('End Date', pd.to_datetime(END_DATE), help="Select the end date for the data range.")
477
 
 
 
 
 
478
  if st.sidebar.button('Run Cointegration Analysis'):
479
+ try:
480
+ tickers = [ticker.strip() for ticker in tickers_input.split(',')]
481
+ universe_tickers_ts_map = {ticker: load_ticker_ts_df(ticker, start_date, end_date) for ticker in tickers}
482
+ uts_sanitized = sanitize_data(universe_tickers_ts_map)
483
+
484
+ if not uts_sanitized:
485
+ raise ValueError("No valid data after sanitization. Check tickers or date range.")
486
+
487
+ if method == 'Engle-Granger':
488
+ pvalues, pairs = find_cointegrated_pairs(uts_sanitized, P_VALUE_THRESHOLD)
489
+ masked_pvalues = np.where(pvalues > P_VALUE_THRESHOLD, np.nan, pvalues)
490
+ tickers_list = list(uts_sanitized.keys())
491
+ fig_heatmap = px.imshow(masked_pvalues, x=tickers_list, y=tickers_list,
492
+ color_continuous_scale='RdYlGn_r', title='Cointegration Heatmap (Engle-Granger)',
493
+ labels=dict(x='Tickers', y='Tickers', color='P-value'),
494
+ zmin=0, zmax=P_VALUE_THRESHOLD)
495
+ else:
496
+ pvalues, pairs = find_cointegrated_pairs_rolling(uts_sanitized, P_VALUE_THRESHOLD, ROLLING_WINDOW_SIZE, CONSISTENT_COINTEGRATION_THRESHOLD)
497
+ masked_pvalues = np.where(pvalues > P_VALUE_THRESHOLD, np.nan, pvalues)
498
+ tickers_list = list(uts_sanitized.keys())
499
+ fig_heatmap = px.imshow(masked_pvalues, x=tickers_list, y=tickers_list,
500
+ color_continuous_scale='RdYlGn_r', title='Cointegration Heatmap (Johansen)',
501
+ labels=dict(x='Tickers', y='Tickers', color='P-value'),
502
+ zmin=0, zmax=P_VALUE_THRESHOLD)
503
+
504
+ st.plotly_chart(fig_heatmap)
505
+
506
+ top_10_pairs = sorted(pairs, key=lambda x: x[2])[:10]
507
+ pair_labels = [f"{pair[0]} & {pair[1]}" for pair in top_10_pairs]
508
+ pair_values = [pair[2] for pair in top_10_pairs]
509
+
510
+ fig_bar = go.Figure([go.Bar(x=pair_values, y=pair_labels, orientation='h')])
511
+ fig_bar.update_layout(title='Top 10 Most Cointegrated Pairs',
512
+ xaxis_title='P-value',
513
+ yaxis_title='Asset Pairs',
514
+ yaxis=dict(autorange='reversed'))
515
+ st.plotly_chart(fig_bar)
516
+ except Exception as e:
517
+ st.error(f"Error: {str(e)}. Check ticker symbols or date range.")
518
+
 
 
 
 
 
519
  with st.expander("How it Works", expanded=False):
520
  st.markdown("""
521
  **Cointegration Overview:**
 
528
  - The Johansen test is a more general procedure that allows for more than two series and can identify multiple cointegrating relationships.
529
  """)
530
 
 
 
531
  # Hide the default Streamlit menu and footer
532
  hide_streamlit_style = """
533
  <style>
 
535
  footer {visibility: hidden;}
536
  </style>
537
  """
538
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)