RazHadas commited on
Commit
76317bb
·
verified ·
1 Parent(s): ff08568

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +541 -0
  2. plotting.py +263 -0
  3. processing.py +152 -0
  4. requirements.txt +9 -0
  5. risk_analysis.py +463 -0
  6. utils.py +138 -0
app.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/18CPi10QPKtnp8wBs3Fd21JjaDxoHytAM
8
+ """
9
+
10
+ # app.py
11
+ # Main Gradio application script for QuantConnect Report Enhancer.
12
+
13
+ import gradio as gr
14
+ import pandas as pd
15
+ import numpy as np
16
+ import traceback
17
+
18
+ # Import functions from other modules
19
+ from utils import create_empty_figure
20
+ from processing import process_single_file
21
+ from risk_analysis import calculate_correlation, calculate_manual_risk_stats
22
+ from plotting import generate_figures_for_strategy, generate_manual_risk_figures
23
+
24
+ # --- Constants for UI ---
25
+ DEFAULT_TRADES_COLS_DISPLAY = [
26
+ 'symbol', 'entryTime', 'exitTime', 'direction', 'quantity',
27
+ 'entryPrice', 'exitPrice', 'profitLoss', 'totalFees', 'duration_days'
28
+ ]
29
+ MAX_TRADES_DISPLAY = 50 # Limit number of trades shown in the table
30
+
31
+ # --- Gradio Interface Callbacks ---
32
+
33
+ def process_files_and_update_ui(uploaded_files):
34
+ """
35
+ Callback function triggered when files are uploaded.
36
+ Processes each file, calculates overall metrics (like correlation),
37
+ updates the application state, and populates the UI with the first strategy's details.
38
+
39
+ Args:
40
+ uploaded_files: A list of file objects uploaded via the Gradio interface.
41
+
42
+ Returns:
43
+ A tuple containing updated values for all relevant Gradio components:
44
+ - Status message (Textbox)
45
+ - Strategy dropdown (Dropdown) - updated choices, value, visibility
46
+ - Application state (State) - dictionary holding all processed results
47
+ - Outputs for individual strategy tabs (DataFrames, Plots)
48
+ - Outputs for correlation tab (DataFrame, Plot)
49
+ - Outputs for manual risk analysis tab (DataFrames, Plots)
50
+ """
51
+ # --- Initialize Default/Empty Outputs ---
52
+ # Create empty figures and dataframes to return if processing fails or no files uploaded
53
+ default_stats_df = pd.DataFrame(columns=['Metric', 'Value'])
54
+ default_trades_df_display = pd.DataFrame()
55
+ default_equity_fig = create_empty_figure("Equity Curve")
56
+ default_drawdown_fig = create_empty_figure("Drawdown Curve")
57
+ default_benchmark_fig = create_empty_figure("Equity vs Benchmark")
58
+ default_pnl_hist_fig = create_empty_figure("P/L Distribution")
59
+ default_duration_hist_fig = create_empty_figure("Trade Duration Distribution")
60
+ default_exposure_fig = create_empty_figure("Exposure")
61
+ default_turnover_fig = create_empty_figure("Portfolio Turnover")
62
+ default_corr_matrix = pd.DataFrame()
63
+ default_corr_heatmap = create_empty_figure("Correlation Heatmap")
64
+ default_monthly_table_display = pd.DataFrame() # For the formatted table in UI
65
+ default_monthly_stats = pd.DataFrame(columns=['Metric', 'Value'])
66
+ default_monthly_heatmap = create_empty_figure("Monthly Returns Heatmap")
67
+ default_rolling_vol_stats = pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol'])
68
+ default_rolling_vol_plot = create_empty_figure("Rolling Volatility")
69
+ default_drawdown_table = pd.DataFrame()
70
+
71
+ # Structure default outputs for return statement clarity
72
+ initial_outputs = [
73
+ default_stats_df, default_equity_fig, default_drawdown_fig, default_benchmark_fig,
74
+ default_pnl_hist_fig, default_duration_hist_fig, default_exposure_fig,
75
+ default_turnover_fig, default_trades_df_display
76
+ ]
77
+ correlation_outputs = [default_corr_matrix, default_corr_heatmap]
78
+ manual_risk_outputs = [
79
+ default_monthly_table_display, default_monthly_stats, default_monthly_heatmap,
80
+ default_rolling_vol_plot, default_rolling_vol_stats, default_drawdown_table
81
+ ]
82
+ # Combine all output lists for the final return
83
+ all_default_outputs = initial_outputs + correlation_outputs + manual_risk_outputs
84
+
85
+ # --- Handle No Files Uploaded ---
86
+ if not uploaded_files:
87
+ return (
88
+ "Please upload one or more QuantConnect JSON files.", # Status message
89
+ gr.Dropdown(choices=[], value=None, visible=False), # Hide dropdown
90
+ {}, # Empty state
91
+ *all_default_outputs # Return all default outputs
92
+ )
93
+
94
+ # --- Process Uploaded Files ---
95
+ all_results = {} # Dictionary to store results for each processed file {filename: results_dict}
96
+ status_messages = [] # List to collect status/error messages
97
+ processed_files_count = 0
98
+
99
+ for file_obj in uploaded_files:
100
+ if file_obj is None: # Skip if file object is somehow None
101
+ continue
102
+ try:
103
+ file_path = file_obj.name # Get the temporary file path from Gradio
104
+ # Process the single file using the function from processing.py
105
+ strategy_result = process_single_file(file_path)
106
+ # Store the result using the filename as the key
107
+ all_results[strategy_result["filename"]] = strategy_result
108
+ # Log errors or increment success count
109
+ if strategy_result["error"]:
110
+ status_messages.append(strategy_result["error"])
111
+ else:
112
+ processed_files_count += 1
113
+ except Exception as e:
114
+ # Catch unexpected errors during the file processing loop
115
+ error_msg = f"Failed to process an uploaded file object: {e}"
116
+ print(error_msg)
117
+ traceback.print_exc()
118
+ status_messages.append(error_msg)
119
+
120
+ # --- Handle No Valid Files Processed ---
121
+ if not all_results or processed_files_count == 0:
122
+ status = "\n".join(status_messages) if status_messages else "No valid QuantConnect JSON files processed."
123
+ return (
124
+ status,
125
+ gr.Dropdown(choices=[], value=None, visible=False), # Hide dropdown
126
+ {}, # Empty state
127
+ *all_default_outputs
128
+ )
129
+
130
+ # --- Calculate Correlation (Across All Processed Files) ---
131
+ try:
132
+ corr_matrix_df, corr_heatmap_fig, corr_status = calculate_correlation(all_results)
133
+ status_messages.append(corr_status) # Add correlation status to messages
134
+ except Exception as e:
135
+ print(f"Error during correlation calculation: {e}")
136
+ traceback.print_exc()
137
+ status_messages.append(f"Correlation Error: {e}")
138
+ # Use default correlation outputs on error
139
+ corr_matrix_df = default_corr_matrix
140
+ corr_heatmap_fig = default_corr_heatmap
141
+
142
+ # --- Prepare Initial UI Display (Using the First Processed Strategy) ---
143
+ first_filename = list(all_results.keys())[0]
144
+ initial_strategy_results = all_results[first_filename]
145
+
146
+ # Generate standard plots for the first strategy
147
+ try:
148
+ initial_figures = generate_figures_for_strategy(initial_strategy_results)
149
+ except Exception as e:
150
+ print(f"Error generating initial figures for {first_filename}: {e}")
151
+ initial_figures = {k: create_empty_figure(f"{k.replace('_fig','')} - Error") for k in initial_outputs_map.keys() if k.endswith('_fig')} # Create error figures
152
+ status_messages.append(f"Plotting Error (Initial): {e}")
153
+
154
+
155
+ # Perform manual risk analysis for the first strategy
156
+ try:
157
+ initial_manual_risk_analysis = calculate_manual_risk_stats(initial_strategy_results.get("daily_returns"))
158
+ status_messages.append(f"Risk Analysis ({first_filename}): {initial_manual_risk_analysis['status']}")
159
+ # Generate risk plots based on the analysis results
160
+ initial_manual_risk_figures = generate_manual_risk_figures(initial_manual_risk_analysis, first_filename)
161
+ except Exception as e:
162
+ print(f"Error during initial manual risk analysis or plotting for {first_filename}: {e}")
163
+ traceback.print_exc()
164
+ status_messages.append(f"Risk Analysis/Plot Error (Initial): {e}")
165
+ # Use default risk outputs on error
166
+ initial_manual_risk_analysis = {
167
+ "monthly_returns_table_for_heatmap": None, "monthly_perf_stats": default_monthly_stats,
168
+ "rolling_vol_df": None, "rolling_vol_stats": default_rolling_vol_stats,
169
+ "drawdown_table": default_drawdown_table
170
+ }
171
+ initial_manual_risk_figures = {
172
+ "monthly_heatmap_fig": default_monthly_heatmap, "rolling_vol_fig": default_rolling_vol_plot
173
+ }
174
+
175
+ # --- Prepare DataFrames for Initial Display ---
176
+ initial_stats_df = initial_strategy_results.get("stats_df", default_stats_df)
177
+ initial_trades_df = initial_strategy_results.get("trades_df", pd.DataFrame())
178
+
179
+ # Select and format trades table for display
180
+ if not initial_trades_df.empty:
181
+ # Filter columns to display
182
+ existing_display_cols = [col for col in DEFAULT_TRADES_COLS_DISPLAY if col in initial_trades_df.columns]
183
+ initial_trades_df_display = initial_trades_df[existing_display_cols].head(MAX_TRADES_DISPLAY)
184
+ # Handle complex 'symbol' column (often a dictionary in QC output)
185
+ if 'symbol' in initial_trades_df_display.columns:
186
+ # Check if the first non-null symbol is a dict
187
+ first_symbol = initial_trades_df_display['symbol'].dropna().iloc[0] if not initial_trades_df_display['symbol'].dropna().empty else None
188
+ if isinstance(first_symbol, dict):
189
+ # Apply function to extract 'value' or 'ticker' if it's a dict, otherwise keep original
190
+ initial_trades_df_display.loc[:, 'symbol'] = initial_trades_df_display['symbol'].apply(
191
+ lambda x: x.get('value', x.get('ticker', str(x))) if isinstance(x, dict) else x
192
+ )
193
+ # Convert datetime columns to string for display if needed (Gradio often handles it)
194
+ for col in ['entryTime', 'exitTime']:
195
+ if col in initial_trades_df_display.columns and pd.api.types.is_datetime64_any_dtype(initial_trades_df_display[col]):
196
+ initial_trades_df_display[col] = initial_trades_df_display[col].dt.strftime('%Y-%m-%d %H:%M:%S')
197
+
198
+ else:
199
+ initial_trades_df_display = default_trades_df_display
200
+
201
+ # Prepare formatted monthly returns table for UI display
202
+ formatted_monthly_table = default_monthly_table_display
203
+ heatmap_data = initial_manual_risk_analysis.get("monthly_returns_table_for_heatmap")
204
+ if heatmap_data is not None and not heatmap_data.empty:
205
+ df_display = heatmap_data.copy() # Work on a copy
206
+ # Format values as percentages (e.g., "1.23%")
207
+ df_display = df_display.applymap(lambda x: f'{x:.2f}%' if pd.notna(x) else '')
208
+ # Reset index to make 'Year' a regular column for Gradio DataFrame display
209
+ formatted_monthly_table = df_display.reset_index()
210
+
211
+
212
+ # --- Consolidate Status Message ---
213
+ final_status = "\n".join(s for s in status_messages if s).strip()
214
+ if not final_status:
215
+ final_status = f"Successfully processed {processed_files_count} file(s)."
216
+
217
+ # --- Assemble Final Outputs ---
218
+ outputs_to_return = [
219
+ final_status, # Status Textbox
220
+ gr.Dropdown( # Strategy Dropdown
221
+ choices=list(all_results.keys()), # Update choices
222
+ value=first_filename, # Set initial value
223
+ visible=True, # Make visible
224
+ label="Select Strategy to View",
225
+ interactive=True
226
+ ),
227
+ all_results, # Update the hidden state
228
+ # --- Individual Strategy Tab Outputs ---
229
+ initial_stats_df,
230
+ initial_figures.get("equity_fig", default_equity_fig),
231
+ initial_figures.get("drawdown_fig", default_drawdown_fig),
232
+ initial_figures.get("benchmark_fig", default_benchmark_fig),
233
+ initial_figures.get("pnl_hist_fig", default_pnl_hist_fig),
234
+ initial_figures.get("duration_hist_fig", default_duration_hist_fig),
235
+ initial_figures.get("exposure_fig", default_exposure_fig),
236
+ initial_figures.get("turnover_fig", default_turnover_fig),
237
+ initial_trades_df_display,
238
+ # --- Correlation Tab Outputs ---
239
+ corr_matrix_df,
240
+ corr_heatmap_fig,
241
+ # --- Manual Risk Tab Outputs ---
242
+ formatted_monthly_table, # Use the formatted table for display
243
+ initial_manual_risk_analysis.get("monthly_perf_stats", default_monthly_stats),
244
+ initial_manual_risk_figures.get("monthly_heatmap_fig", default_monthly_heatmap),
245
+ initial_manual_risk_figures.get("rolling_vol_fig", default_rolling_vol_plot),
246
+ initial_manual_risk_analysis.get("rolling_vol_stats", default_rolling_vol_stats),
247
+ initial_manual_risk_analysis.get("drawdown_table", default_drawdown_table)
248
+ ]
249
+
250
+ return tuple(outputs_to_return)
251
+
252
+
253
+ def display_selected_strategy(selected_filename, all_results_state):
254
+ """
255
+ Callback function triggered when a strategy is selected from the dropdown.
256
+ Retrieves the data for the selected strategy from the state and updates
257
+ the individual strategy tabs and the manual risk analysis tab accordingly.
258
+
259
+ Args:
260
+ selected_filename: The filename of the strategy selected in the dropdown.
261
+ all_results_state: The current state dictionary containing all processed results.
262
+
263
+ Returns:
264
+ A tuple containing updated values for the Gradio components related to
265
+ the selected strategy's details (Overview, Performance, Trade Analysis,
266
+ Other Charts, Risk Analysis tabs). Correlation tab is not updated here.
267
+ """
268
+ # --- Initialize Default/Empty Outputs ---
269
+ # (Same defaults as in process_files_and_update_ui for the relevant outputs)
270
+ default_stats_df = pd.DataFrame(columns=['Metric', 'Value'])
271
+ default_trades_df_display = pd.DataFrame()
272
+ default_equity_fig = create_empty_figure("Equity Curve")
273
+ default_drawdown_fig = create_empty_figure("Drawdown Curve")
274
+ default_benchmark_fig = create_empty_figure("Equity vs Benchmark")
275
+ default_pnl_hist_fig = create_empty_figure("P/L Distribution")
276
+ default_duration_hist_fig = create_empty_figure("Trade Duration Distribution")
277
+ default_exposure_fig = create_empty_figure("Exposure")
278
+ default_turnover_fig = create_empty_figure("Portfolio Turnover")
279
+ default_monthly_table_display = pd.DataFrame()
280
+ default_monthly_stats = pd.DataFrame(columns=['Metric', 'Value'])
281
+ default_monthly_heatmap = create_empty_figure("Monthly Returns Heatmap")
282
+ default_rolling_vol_stats = pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol'])
283
+ default_rolling_vol_plot = create_empty_figure("Rolling Volatility")
284
+ default_drawdown_table = pd.DataFrame()
285
+
286
+ # Structure default outputs for return statement clarity
287
+ initial_outputs = [
288
+ default_stats_df, default_equity_fig, default_drawdown_fig, default_benchmark_fig,
289
+ default_pnl_hist_fig, default_duration_hist_fig, default_exposure_fig,
290
+ default_turnover_fig, default_trades_df_display
291
+ ]
292
+ manual_risk_outputs = [
293
+ default_monthly_table_display, default_monthly_stats, default_monthly_heatmap,
294
+ default_rolling_vol_plot, default_rolling_vol_stats, default_drawdown_table
295
+ ]
296
+ all_default_outputs = initial_outputs + manual_risk_outputs
297
+
298
+ # --- Validate Selection and State ---
299
+ if not selected_filename or not all_results_state or selected_filename not in all_results_state:
300
+ print(f"Warning: Invalid selection ('{selected_filename}') or state. Returning defaults.")
301
+ # Potentially add a status message update here if you have a dedicated status output for selection changes
302
+ return tuple(all_default_outputs)
303
+
304
+ # --- Retrieve Selected Strategy Data ---
305
+ strategy_results = all_results_state[selected_filename]
306
+
307
+ # --- Handle Case Where Selected Strategy Had Processing Errors ---
308
+ if strategy_results.get("error"):
309
+ print(f"Displaying error state for {selected_filename}: {strategy_results['error']}")
310
+ # Show the error in the statistics table and clear other plots/tables
311
+ error_df = pd.DataFrame([{"Metric": "Error", "Value": strategy_results['error']}])
312
+ error_outputs = [error_df] + [ # Use error df for stats table
313
+ create_empty_figure(f"{fig_name} - Error") for fig_name in [ # Create empty error figures
314
+ "Equity", "Drawdown", "Benchmark", "P/L", "Duration", "Exposure", "Turnover"
315
+ ]
316
+ ] + [default_trades_df_display] # Empty trades table
317
+ error_risk_outputs = [ # Empty risk outputs
318
+ default_monthly_table_display, default_monthly_stats, create_empty_figure("Monthly Heatmap - Error"),
319
+ create_empty_figure("Rolling Vol - Error"), default_rolling_vol_stats, default_drawdown_table
320
+ ]
321
+ return tuple(error_outputs + error_risk_outputs)
322
+
323
+
324
+ # --- Generate Figures and Analysis for Selected Strategy ---
325
+ # Generate standard plots
326
+ try:
327
+ figures = generate_figures_for_strategy(strategy_results)
328
+ except Exception as e:
329
+ print(f"Error generating figures for {selected_filename}: {e}")
330
+ figures = {k: create_empty_figure(f"{k.replace('_fig','')} - Error") for k in initial_outputs_map.keys() if k.endswith('_fig')}
331
+
332
+
333
+ # Perform manual risk analysis
334
+ try:
335
+ manual_risk_analysis = calculate_manual_risk_stats(strategy_results.get("daily_returns"))
336
+ # Generate risk plots
337
+ manual_risk_figures = generate_manual_risk_figures(manual_risk_analysis, selected_filename)
338
+ except Exception as e:
339
+ print(f"Error during manual risk analysis or plotting for {selected_filename}: {e}")
340
+ traceback.print_exc()
341
+ # Use default risk outputs on error
342
+ manual_risk_analysis = {
343
+ "monthly_returns_table_for_heatmap": None, "monthly_perf_stats": default_monthly_stats,
344
+ "rolling_vol_df": None, "rolling_vol_stats": default_rolling_vol_stats,
345
+ "drawdown_table": default_drawdown_table
346
+ }
347
+ manual_risk_figures = {
348
+ "monthly_heatmap_fig": default_monthly_heatmap, "rolling_vol_fig": default_rolling_vol_plot
349
+ }
350
+
351
+
352
+ # --- Prepare DataFrames for Display ---
353
+ stats_df = strategy_results.get("stats_df", default_stats_df)
354
+ trades_df = strategy_results.get("trades_df", pd.DataFrame())
355
+
356
+ # Select and format trades table
357
+ if not trades_df.empty:
358
+ existing_display_cols = [col for col in DEFAULT_TRADES_COLS_DISPLAY if col in trades_df.columns]
359
+ trades_df_display = trades_df[existing_display_cols].head(MAX_TRADES_DISPLAY)
360
+ if 'symbol' in trades_df_display.columns:
361
+ first_symbol = trades_df_display['symbol'].dropna().iloc[0] if not trades_df_display['symbol'].dropna().empty else None
362
+ if isinstance(first_symbol, dict):
363
+ trades_df_display.loc[:, 'symbol'] = trades_df_display['symbol'].apply(
364
+ lambda x: x.get('value', x.get('ticker', str(x))) if isinstance(x, dict) else x
365
+ )
366
+ # Convert datetime columns to string for display
367
+ for col in ['entryTime', 'exitTime']:
368
+ if col in trades_df_display.columns and pd.api.types.is_datetime64_any_dtype(trades_df_display[col]):
369
+ trades_df_display[col] = trades_df_display[col].dt.strftime('%Y-%m-%d %H:%M:%S')
370
+
371
+ else:
372
+ trades_df_display = default_trades_df_display
373
+
374
+ # Prepare formatted monthly returns table
375
+ formatted_monthly_table = default_monthly_table_display
376
+ heatmap_data = manual_risk_analysis.get("monthly_returns_table_for_heatmap")
377
+ if heatmap_data is not None and not heatmap_data.empty:
378
+ df_display = heatmap_data.copy()
379
+ df_display = df_display.applymap(lambda x: f'{x:.2f}%' if pd.notna(x) else '')
380
+ formatted_monthly_table = df_display.reset_index()
381
+
382
+
383
+ # --- Assemble Outputs for Return ---
384
+ # Return components for the tabs updated by the dropdown selection
385
+ outputs_to_return = [
386
+ # --- Individual Strategy Tab Outputs ---
387
+ stats_df,
388
+ figures.get("equity_fig", default_equity_fig),
389
+ figures.get("drawdown_fig", default_drawdown_fig),
390
+ figures.get("benchmark_fig", default_benchmark_fig),
391
+ figures.get("pnl_hist_fig", default_pnl_hist_fig),
392
+ figures.get("duration_hist_fig", default_duration_hist_fig),
393
+ figures.get("exposure_fig", default_exposure_fig),
394
+ figures.get("turnover_fig", default_turnover_fig),
395
+ trades_df_display,
396
+ # --- Manual Risk Tab Outputs ---
397
+ formatted_monthly_table, # Use formatted table
398
+ manual_risk_analysis.get("monthly_perf_stats", default_monthly_stats),
399
+ manual_risk_figures.get("monthly_heatmap_fig", default_monthly_heatmap),
400
+ manual_risk_figures.get("rolling_vol_fig", default_rolling_vol_plot),
401
+ manual_risk_analysis.get("rolling_vol_stats", default_rolling_vol_stats),
402
+ manual_risk_analysis.get("drawdown_table", default_drawdown_table)
403
+ ]
404
+
405
+ return tuple(outputs_to_return)
406
+
407
+
408
+ # --- Build Gradio Interface ---
409
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
410
+ gr.Markdown("# Trading Platform Report Enhancer")
411
+ gr.Markdown("Upload one or more QuantConnect backtest JSON files to generate analysis reports and compare strategies.")
412
+
413
+ # Hidden state to store all processed results between interactions
414
+ all_results_state = gr.State({})
415
+
416
+ # --- Row 1: File Upload ---
417
+ with gr.Row():
418
+ file_input = gr.File(
419
+ label="Upload QuantConnect JSON File(s)",
420
+ file_count="multiple", # Allow multiple files
421
+ file_types=['.json'] # Restrict to JSON files
422
+ )
423
+
424
+ # --- Row 2: Status Output ---
425
+ with gr.Row():
426
+ status_output = gr.Textbox(label="Processing Status", interactive=False, lines=2) # Reduced lines
427
+
428
+ # --- Row 3: Strategy Selection Dropdown ---
429
+ with gr.Row():
430
+ strategy_dropdown = gr.Dropdown(
431
+ label="Select Strategy to View",
432
+ choices=[], # Initially empty, populated after file processing
433
+ visible=False, # Initially hidden
434
+ interactive=True # User can interact with it
435
+ )
436
+
437
+ # --- Tabs for Different Analysis Views ---
438
+ with gr.Tabs():
439
+ # --- Tab 1: Overview ---
440
+ with gr.TabItem("📊 Overview"):
441
+ with gr.Column():
442
+ gr.Markdown("## Key Performance Metrics")
443
+ stats_output = gr.DataFrame(label="Overall Statistics", interactive=False, wrap=True)
444
+
445
+ # --- Tab 2: Performance Charts ---
446
+ with gr.TabItem("📈 Performance Charts"):
447
+ with gr.Column():
448
+ gr.Markdown("## Equity & Drawdown")
449
+ with gr.Row():
450
+ plot_equity = gr.Plot(label="Equity Curve")
451
+ plot_drawdown = gr.Plot(label="Drawdown Curve")
452
+ gr.Markdown("## Benchmark Comparison")
453
+ plot_benchmark = gr.Plot(label="Equity vs Benchmark (Normalized)") # Clarified title
454
+
455
+ # --- Tab 3: Trade Analysis ---
456
+ with gr.TabItem("💹 Trade Analysis"):
457
+ with gr.Column():
458
+ gr.Markdown("## Profit/Loss and Duration")
459
+ with gr.Row():
460
+ plot_pnl_hist = gr.Plot(label="P/L Distribution")
461
+ plot_duration_hist = gr.Plot(label="Trade Duration Distribution (Days)")
462
+ gr.Markdown(f"## Closed Trades (Sample - First {MAX_TRADES_DISPLAY})") # Dynamic title
463
+ trades_output = gr.DataFrame(label="Closed Trades Sample", interactive=False, wrap=True)
464
+
465
+ # --- Tab 4: Other Charts ---
466
+ with gr.TabItem("⚙️ Other Charts"):
467
+ with gr.Column():
468
+ gr.Markdown("## Exposure & Turnover")
469
+ with gr.Row():
470
+ plot_exposure = gr.Plot(label="Exposure")
471
+ plot_turnover = gr.Plot(label="Portfolio Turnover")
472
+
473
+ # --- Tab 5: Risk Analysis (Manual Calculations) ---
474
+ with gr.TabItem("🔎 Risk Analysis"):
475
+ with gr.Column():
476
+ gr.Markdown("## Monthly Performance")
477
+ plot_monthly_heatmap = gr.Plot(label="Monthly Returns Heatmap")
478
+ # Use specific names matching callback outputs
479
+ monthly_returns_table_output = gr.DataFrame(label="Monthly Returns (%) Table", interactive=False, wrap=True)
480
+ monthly_perf_stats_output = gr.DataFrame(label="Monthly Performance Stats", interactive=False, wrap=True)
481
+
482
+ gr.Markdown("## Rolling Volatility")
483
+ plot_rolling_vol = gr.Plot(label="Annualized Rolling Volatility")
484
+ rolling_vol_stats_output = gr.DataFrame(label="Rolling Volatility Stats", interactive=False, wrap=True)
485
+
486
+ gr.Markdown("## Drawdown Analysis")
487
+ drawdown_table_output = gr.DataFrame(label=f"Top {5} Drawdown Periods", interactive=False, wrap=True) # Can make 'top' dynamic if needed
488
+
489
+ # --- Tab 6: Correlation ---
490
+ with gr.TabItem("🤝 Correlation"):
491
+ with gr.Column():
492
+ gr.Markdown("## Strategy (+Benchmark) Correlation")
493
+ gr.Markdown("_Based on daily equity percentage change._") # Subtitle explanation
494
+ corr_heatmap_output = gr.Plot(label="Correlation Heatmap")
495
+ corr_matrix_output = gr.DataFrame(label="Correlation Matrix", interactive=False, wrap=True)
496
+
497
+
498
+ # --- Define Output Lists for Callbacks ---
499
+ # Outputs updated by file upload (all tabs + state + dropdown)
500
+ individual_report_outputs = [
501
+ stats_output, plot_equity, plot_drawdown, plot_benchmark, plot_pnl_hist,
502
+ plot_duration_hist, plot_exposure, plot_turnover, trades_output
503
+ ]
504
+ manual_risk_tab_outputs = [ # Renamed for clarity
505
+ monthly_returns_table_output, monthly_perf_stats_output, plot_monthly_heatmap,
506
+ plot_rolling_vol, rolling_vol_stats_output, drawdown_table_output
507
+ ]
508
+ correlation_tab_outputs = [corr_matrix_output, corr_heatmap_output]
509
+ file_processing_outputs = [status_output, strategy_dropdown, all_results_state]
510
+
511
+ # Combine ALL outputs for the file upload callback trigger
512
+ file_upload_all_outputs = (
513
+ file_processing_outputs +
514
+ individual_report_outputs +
515
+ correlation_tab_outputs +
516
+ manual_risk_tab_outputs
517
+ )
518
+
519
+ # Outputs updated by dropdown selection (individual strategy tabs + risk tab)
520
+ dropdown_outputs = individual_report_outputs + manual_risk_tab_outputs
521
+
522
+ # --- Connect Callbacks to Events ---
523
+ # When files are uploaded (or cleared), trigger file processing
524
+ file_input.change(
525
+ fn=process_files_and_update_ui,
526
+ inputs=[file_input],
527
+ outputs=file_upload_all_outputs # Pass the combined list
528
+ )
529
+
530
+ # When the dropdown value changes, trigger display update
531
+ strategy_dropdown.change(
532
+ fn=display_selected_strategy,
533
+ inputs=[strategy_dropdown, all_results_state],
534
+ outputs=dropdown_outputs # Pass the relevant outputs list
535
+ )
536
+
537
+ # --- Launch the Gradio App ---
538
+ if __name__ == '__main__':
539
+ # share=True creates a public link (useful for HF Spaces)
540
+ # debug=True provides detailed error logs in the console
541
+ iface.launch(debug=True, share=False) # Set share=True for Hugging Face deployment if needed
plotting.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """plotting.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ILADgRrYqkAEj5jyymO50ZvzDzVdfD6g
8
+ """
9
+
10
+ # plotting.py
11
+ # Functions for generating Plotly figures from processed strategy data.
12
+
13
+ import plotly.express as px
14
+ import plotly.graph_objects as go
15
+ import pandas as pd
16
+ import numpy as np
17
+ import traceback
18
+ from utils import create_empty_figure # Import helper
19
+
20
+ def generate_figures_for_strategy(strategy_results):
21
+ """
22
+ Generates standard Plotly figures for a single strategy's results.
23
+
24
+ Args:
25
+ strategy_results: Dictionary containing processed data for one strategy,
26
+ as returned by process_single_file. Expected keys include:
27
+ 'filename', 'equity_df', 'drawdown_df', 'benchmark_df',
28
+ 'trades_df', 'exposure_series', 'turnover_df'.
29
+
30
+ Returns:
31
+ A dictionary containing Plotly figure objects:
32
+ 'equity_fig', 'drawdown_fig', 'benchmark_fig', 'pnl_hist_fig',
33
+ 'duration_hist_fig', 'exposure_fig', 'turnover_fig'.
34
+ Uses empty figures if data is missing or invalid.
35
+ """
36
+ figures = {
37
+ "equity_fig": create_empty_figure("Equity Curve"),
38
+ "drawdown_fig": create_empty_figure("Drawdown Curve"),
39
+ "benchmark_fig": create_empty_figure("Equity vs Benchmark"),
40
+ "pnl_hist_fig": create_empty_figure("P/L Distribution"),
41
+ "duration_hist_fig": create_empty_figure("Trade Duration Distribution"),
42
+ "exposure_fig": create_empty_figure("Exposure"),
43
+ "turnover_fig": create_empty_figure("Portfolio Turnover")
44
+ }
45
+ filename = strategy_results.get("filename", "Strategy") # Get filename for titles
46
+
47
+ try:
48
+ # --- Equity Curve ---
49
+ equity_df = strategy_results.get("equity_df")
50
+ if equity_df is not None and not equity_df.empty and 'Time' in equity_df.columns and 'Equity' in equity_df.columns:
51
+ # Ensure Time is datetime
52
+ equity_df['Time'] = pd.to_datetime(equity_df['Time'])
53
+ fig = px.line(equity_df, x='Time', y='Equity', title=f'Equity Curve ({filename})')
54
+ fig.update_layout(yaxis_title="Portfolio Value")
55
+ figures["equity_fig"] = fig
56
+
57
+ # --- Drawdown Curve ---
58
+ drawdown_df = strategy_results.get("drawdown_df")
59
+ if drawdown_df is not None and not drawdown_df.empty and 'Time' in drawdown_df.columns and 'Drawdown' in drawdown_df.columns:
60
+ # Ensure Time is datetime
61
+ drawdown_df['Time'] = pd.to_datetime(drawdown_df['Time'])
62
+ # Convert drawdown to percentage for plotting
63
+ drawdown_df['Drawdown_pct'] = drawdown_df['Drawdown'] * 100
64
+ fig = px.area(drawdown_df, x='Time', y='Drawdown_pct', title=f'Drawdown Curve (%) ({filename})', labels={'Drawdown_pct': 'Drawdown (%)'})
65
+ fig.update_layout(yaxis_title="Drawdown (%)")
66
+ figures["drawdown_fig"] = fig
67
+
68
+ # --- Equity vs Benchmark ---
69
+ benchmark_df = strategy_results.get("benchmark_df")
70
+ # Requires both equity and benchmark data
71
+ if equity_df is not None and not equity_df.empty and 'Time' in equity_df.columns and 'Equity' in equity_df.columns and \
72
+ benchmark_df is not None and not benchmark_df.empty and 'Time' in benchmark_df.columns and 'Benchmark' in benchmark_df.columns:
73
+ try:
74
+ # Ensure Time columns are datetime
75
+ equity_df['Time'] = pd.to_datetime(equity_df['Time'])
76
+ benchmark_df['Time'] = pd.to_datetime(benchmark_df['Time'])
77
+
78
+ # Merge on Time after setting as index
79
+ equity_indexed = equity_df.set_index('Time')['Equity']
80
+ benchmark_indexed = benchmark_df.set_index('Time')['Benchmark']
81
+
82
+ # Combine, handling potential different start/end dates
83
+ combined = pd.concat([equity_indexed, benchmark_indexed], axis=1, keys=['Equity', 'Benchmark'], join='outer')
84
+
85
+ # Normalize to start at 1 (or 100) for comparison
86
+ # Check if first row has NaN values after outer join
87
+ first_valid_index = combined.first_valid_index()
88
+ if first_valid_index is not None:
89
+ # Normalize using the first non-NaN value for each column
90
+ normalized_equity = (combined['Equity'] / combined['Equity'].loc[combined['Equity'].first_valid_index()])#.fillna(method='ffill') # Optional fill
91
+ normalized_benchmark = (combined['Benchmark'] / combined['Benchmark'].loc[combined['Benchmark'].first_valid_index()])#.fillna(method='ffill') # Optional fill
92
+
93
+ # Create figure and add traces
94
+ fig = go.Figure()
95
+ fig.add_trace(go.Scatter(x=normalized_equity.index, y=normalized_equity, mode='lines', name='Strategy Equity'))
96
+ fig.add_trace(go.Scatter(x=normalized_benchmark.index, y=normalized_benchmark, mode='lines', name='Benchmark'))
97
+ fig.update_layout(title=f'Normalized Equity vs Benchmark ({filename})', xaxis_title='Date', yaxis_title='Normalized Value (Start = 1)')
98
+ figures["benchmark_fig"] = fig
99
+ else:
100
+ print("Could not normalize Equity vs Benchmark: No valid starting point found after merge.")
101
+ figures["benchmark_fig"] = create_empty_figure(f"Equity vs Benchmark ({filename}) - Normalization Failed")
102
+
103
+ except Exception as merge_err:
104
+ print(f"Error merging/plotting Equity vs Benchmark: {merge_err}")
105
+ figures["benchmark_fig"] = create_empty_figure(f"Equity vs Benchmark ({filename}) - Error")
106
+
107
+
108
+ # --- Trade P/L Distribution ---
109
+ trades_df = strategy_results.get("trades_df")
110
+ if trades_df is not None and not trades_df.empty and 'profitLoss' in trades_df.columns:
111
+ # Ensure profitLoss is numeric
112
+ trades_df['profitLoss'] = pd.to_numeric(trades_df['profitLoss'], errors='coerce')
113
+ valid_pnl = trades_df['profitLoss'].dropna()
114
+ if not valid_pnl.empty:
115
+ fig = px.histogram(valid_pnl, title=f'Trade Profit/Loss Distribution ({filename})', labels={'value': 'Profit/Loss'})
116
+ figures["pnl_hist_fig"] = fig
117
+
118
+ # --- Trade Duration Distribution ---
119
+ # Uses 'duration_days' calculated in processing.py
120
+ if trades_df is not None and not trades_df.empty and 'duration_days' in trades_df.columns:
121
+ # Ensure duration_days is numeric
122
+ trades_df['duration_days'] = pd.to_numeric(trades_df['duration_days'], errors='coerce')
123
+ valid_duration = trades_df['duration_days'].dropna()
124
+ if not valid_duration.empty:
125
+ fig = px.histogram(valid_duration, title=f'Trade Duration Distribution (Days) ({filename})', labels={'value': 'Duration (Days)'})
126
+ figures["duration_hist_fig"] = fig
127
+
128
+ # --- Exposure Chart ---
129
+ # Exposure data format varies; this is a basic example assuming a dict of series
130
+ exposure_series_dict = strategy_results.get("exposure_series")
131
+ if exposure_series_dict and isinstance(exposure_series_dict, dict):
132
+ fig = go.Figure()
133
+ exposure_plotted = False
134
+ for series_name, series_data in exposure_series_dict.items():
135
+ if 'values' in series_data and isinstance(series_data['values'], list):
136
+ # Process this specific series using the timeseries helper
137
+ exposure_df = process_timeseries_chart(series_data['values'], series_name)
138
+ if not exposure_df.empty:
139
+ # Plot as area chart if 'Exposure' in name, else line
140
+ plot_type = 'area' if 'Exposure' in series_name else 'scatter'
141
+ fill_type = 'tozeroy' if plot_type == 'area' else None
142
+ fig.add_trace(go.Scatter(x=exposure_df.index, y=exposure_df[series_name],
143
+ mode='lines', name=series_name, fill=fill_type))
144
+ exposure_plotted = True
145
+ if exposure_plotted:
146
+ fig.update_layout(title=f'Exposure ({filename})', xaxis_title='Date', yaxis_title='Value / % Exposure')
147
+ figures["exposure_fig"] = fig
148
+ else:
149
+ figures["exposure_fig"] = create_empty_figure(f"Exposure ({filename}) - No PlotData")
150
+ else:
151
+ figures["exposure_fig"] = create_empty_figure(f"Exposure ({filename}) - Data Missing/Invalid")
152
+
153
+
154
+ # --- Portfolio Turnover ---
155
+ turnover_df = strategy_results.get("turnover_df")
156
+ if turnover_df is not None and not turnover_df.empty and 'Time' in turnover_df.columns and 'Turnover' in turnover_df.columns:
157
+ # Ensure Time is datetime
158
+ turnover_df['Time'] = pd.to_datetime(turnover_df['Time'])
159
+ fig = px.line(turnover_df, x='Time', y='Turnover', title=f'Portfolio Turnover ({filename})')
160
+ fig.update_layout(yaxis_title="Turnover")
161
+ figures["turnover_fig"] = fig
162
+
163
+ except Exception as e:
164
+ print(f"Error generating figures for {filename}: {e}")
165
+ traceback.print_exc()
166
+ # Keep default empty figures on error
167
+
168
+ return figures
169
+
170
+
171
+ def generate_manual_risk_figures(analysis_results, filename="Strategy"):
172
+ """
173
+ Generates Plotly figures from manually calculated risk analysis results.
174
+
175
+ Args:
176
+ analysis_results: Dictionary containing results from calculate_manual_risk_stats.
177
+ Expected keys: 'monthly_returns_table_for_heatmap', 'rolling_vol_df'.
178
+ filename: Name of the strategy for figure titles.
179
+
180
+ Returns:
181
+ A dictionary containing Plotly figure objects:
182
+ 'monthly_heatmap_fig', 'rolling_vol_fig'.
183
+ Uses empty figures if data is missing or invalid.
184
+ """
185
+ figures = {
186
+ "monthly_heatmap_fig": create_empty_figure(f"Monthly Returns Heatmap ({filename})"),
187
+ "rolling_vol_fig": create_empty_figure(f"Rolling Volatility ({filename})")
188
+ }
189
+
190
+ try:
191
+ # --- Monthly Returns Heatmap ---
192
+ # Expects percentages (values * 100) from calculate_manual_risk_stats
193
+ monthly_ret_table = analysis_results.get("monthly_returns_table_for_heatmap")
194
+ if monthly_ret_table is not None and not monthly_ret_table.empty:
195
+ z = monthly_ret_table.values # The percentage values
196
+ x = monthly_ret_table.columns # Month names
197
+ y = monthly_ret_table.index # Years
198
+
199
+ # Create heatmap
200
+ fig = go.Figure(data=go.Heatmap(
201
+ z=z, x=x, y=y,
202
+ colorscale='RdYlGn', # Red-Yellow-Green scale, good for returns
203
+ zmid=0, # Center color scale around zero
204
+ # Format text labels shown on the heatmap cells
205
+ text=monthly_ret_table.applymap(lambda v: f'{v:.1f}%' if pd.notna(v) else '').values,
206
+ texttemplate="%{text}", # Use the formatted text
207
+ hoverongaps=False, # Don't show hover info for gaps
208
+ colorbar=dict(title='Monthly Return (%)') # Add color bar title
209
+ ))
210
+ fig.update_layout(
211
+ title=f'Monthly Returns (%) ({filename})',
212
+ yaxis_nticks=len(y), # Ensure all years are shown as ticks
213
+ yaxis_title="Year",
214
+ yaxis_autorange='reversed' # Show earlier years at the top
215
+ )
216
+ figures["monthly_heatmap_fig"] = fig
217
+
218
+ # --- Rolling Volatility Plot ---
219
+ rolling_vol_df = analysis_results.get("rolling_vol_df")
220
+ # Check if DataFrame exists, is not empty, and has the 'Time' column
221
+ if rolling_vol_df is not None and not rolling_vol_df.empty and 'Time' in rolling_vol_df.columns:
222
+ # Ensure Time is datetime
223
+ rolling_vol_df['Time'] = pd.to_datetime(rolling_vol_df['Time'])
224
+
225
+ fig = go.Figure()
226
+ colors = px.colors.qualitative.Plotly # Get a qualitative color sequence
227
+ i = 0 # Color index
228
+ vol_plotted = False
229
+ # Iterate through columns starting with 'vol_'
230
+ for col in rolling_vol_df.columns:
231
+ if col.startswith('vol_'):
232
+ window_label = col.split('_')[1] # Extract window label (e.g., '3M')
233
+ # Plot volatility as percentage
234
+ fig.add_trace(go.Scatter(
235
+ x=rolling_vol_df['Time'],
236
+ y=rolling_vol_df[col] * 100, # Convert to percentage
237
+ mode='lines',
238
+ name=f'Rolling Vol ({window_label})',
239
+ line=dict(color=colors[i % len(colors)]) # Cycle through colors
240
+ ))
241
+ i += 1
242
+ vol_plotted = True
243
+
244
+ # Update layout if at least one volatility series was plotted
245
+ if vol_plotted:
246
+ fig.update_layout(
247
+ title=f'Annualized Rolling Volatility ({filename})',
248
+ xaxis_title='Date',
249
+ yaxis_title='Volatility (%)' # Y-axis label as percentage
250
+ )
251
+ figures["rolling_vol_fig"] = fig
252
+ else:
253
+ figures["rolling_vol_fig"] = create_empty_figure(f"Rolling Volatility ({filename}) - No Plot Data")
254
+ else:
255
+ figures["rolling_vol_fig"] = create_empty_figure(f"Rolling Volatility ({filename}) - Data Missing/Invalid")
256
+
257
+
258
+ except Exception as e:
259
+ print(f"Error generating manual risk figures for {filename}: {e}")
260
+ traceback.print_exc()
261
+ # Keep default empty figures on error
262
+
263
+ return figures
processing.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """processing.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/13EcoLMljb9XzVBELmFC0EBDknuHS79Vy
8
+ """
9
+
10
+ # processing.py
11
+ # Functions for processing QuantConnect JSON data.
12
+
13
+ import json
14
+ import pandas as pd
15
+ import traceback
16
+ import numpy as np
17
+ from utils import get_nested_value, process_timeseries_chart # Import helpers
18
+
19
+ def process_single_file(file_path):
20
+ """
21
+ Processes a single QuantConnect JSON file.
22
+ Extracts statistics, equity, drawdown, benchmark, trades, exposure, and turnover data.
23
+ Returns a dictionary containing processed dataframes and series.
24
+ """
25
+ # Extract filename from the full path
26
+ filename = file_path.split('/')[-1] if file_path else "Unknown File"
27
+
28
+ # Initialize results dictionary with default empty structures
29
+ results = {
30
+ "filename": filename,
31
+ "stats_df": pd.DataFrame(columns=['Metric', 'Value']), # Overall statistics
32
+ "equity_df": pd.DataFrame(), # Equity curve data (with 'Time' column)
33
+ "daily_returns": None, # Series of daily percentage returns (DatetimeIndex)
34
+ "drawdown_df": pd.DataFrame(), # Drawdown curve data (with 'Time' column)
35
+ "benchmark_df": pd.DataFrame(),# Benchmark data (with 'Time' column)
36
+ "trades_df": pd.DataFrame(), # Closed trades data
37
+ "exposure_series": None, # Raw exposure data series (often needs further processing for plotting)
38
+ "turnover_df": pd.DataFrame(), # Portfolio turnover data (with 'Time' column)
39
+ "error": None # Stores any error message during processing
40
+ }
41
+
42
+ try:
43
+ # Open and load the JSON file
44
+ with open(file_path, 'r', encoding='utf-8') as f:
45
+ data = json.load(f)
46
+
47
+ # --- Extract Statistics ---
48
+ # Try primary location, then fallback location for statistics
49
+ stats_dict = get_nested_value(data, ['statistics']) or \
50
+ get_nested_value(data, ['totalPerformance', 'portfolioStatistics'])
51
+ if stats_dict:
52
+ # Convert dictionary to DataFrame
53
+ results["stats_df"] = pd.DataFrame(list(stats_dict.items()), columns=['Metric', 'Value'])
54
+
55
+ # --- Process Equity Curve and Calculate Daily Returns ---
56
+ equity_values = get_nested_value(data, ['charts', 'Strategy Equity', 'series', 'Equity', 'values'])
57
+ equity_df_indexed = process_timeseries_chart(equity_values, 'Equity') # Gets DF with DatetimeIndex
58
+ if not equity_df_indexed.empty:
59
+ # Store equity curve with 'Time' as a column for easier plotting
60
+ results["equity_df"] = equity_df_indexed.reset_index()
61
+ # Calculate daily percentage returns from the indexed equity data
62
+ returns_series = equity_df_indexed['Equity'].pct_change().dropna()
63
+ # Store the returns series if calculation was successful
64
+ if not returns_series.empty:
65
+ results["daily_returns"] = returns_series # Has DatetimeIndex (UTC)
66
+
67
+ # --- Process Drawdown Curve ---
68
+ drawdown_values = get_nested_value(data, ['charts', 'Drawdown', 'series', 'Equity Drawdown', 'values'])
69
+ drawdown_df_indexed = process_timeseries_chart(drawdown_values, 'Drawdown')
70
+ if not drawdown_df_indexed.empty:
71
+ results["drawdown_df"] = drawdown_df_indexed.reset_index() # Store with 'Time' column
72
+
73
+ # --- Process Benchmark Curve ---
74
+ benchmark_values = get_nested_value(data, ['charts', 'Benchmark', 'series', 'Benchmark', 'values'])
75
+ benchmark_df_indexed = process_timeseries_chart(benchmark_values, 'Benchmark')
76
+ if not benchmark_df_indexed.empty:
77
+ results["benchmark_df"] = benchmark_df_indexed.reset_index() # Store with 'Time' column
78
+
79
+ # --- Process Closed Trades ---
80
+ closed_trades_list = get_nested_value(data, ['totalPerformance', 'closedTrades'])
81
+ if closed_trades_list and isinstance(closed_trades_list, list):
82
+ temp_trades_df = pd.DataFrame(closed_trades_list)
83
+ if not temp_trades_df.empty:
84
+ # Convert relevant columns to numeric, coercing errors
85
+ numeric_cols = ['profitLoss', 'entryPrice', 'exitPrice', 'quantity', 'totalFees']
86
+ for col in numeric_cols:
87
+ if col in temp_trades_df.columns:
88
+ temp_trades_df[col] = pd.to_numeric(temp_trades_df[col], errors='coerce')
89
+
90
+ # Convert time columns to datetime, coercing errors
91
+ time_cols = ['entryTime', 'exitTime']
92
+ for col in time_cols:
93
+ if col in temp_trades_df.columns:
94
+ # Attempt conversion, handle potential ISO 8601 format with timezone
95
+ try:
96
+ temp_trades_df[col] = pd.to_datetime(temp_trades_df[col], errors='coerce', utc=True)
97
+ except ValueError: # Fallback if direct conversion fails
98
+ temp_trades_df[col] = pd.to_datetime(temp_trades_df[col].str.slice(0, 19), errors='coerce') # Try without timezone
99
+ if temp_trades_df[col].notna().any(): # If some converted, make timezone naive for consistency before duration calc
100
+ temp_trades_df[col] = temp_trades_df[col].dt.tz_localize(None)
101
+
102
+
103
+ # Calculate trade duration if both entry and exit times are valid datetimes
104
+ if 'entryTime' in temp_trades_df.columns and 'exitTime' in temp_trades_df.columns and \
105
+ pd.api.types.is_datetime64_any_dtype(temp_trades_df['entryTime']) and \
106
+ pd.api.types.is_datetime64_any_dtype(temp_trades_df['exitTime']) and \
107
+ not temp_trades_df['entryTime'].isnull().all() and \
108
+ not temp_trades_df['exitTime'].isnull().all():
109
+
110
+ # Make times timezone-naive for direct subtraction if they have timezones
111
+ if temp_trades_df['entryTime'].dt.tz is not None:
112
+ temp_trades_df['entryTime'] = temp_trades_df['entryTime'].dt.tz_convert(None)
113
+ if temp_trades_df['exitTime'].dt.tz is not None:
114
+ temp_trades_df['exitTime'] = temp_trades_df['exitTime'].dt.tz_convert(None)
115
+
116
+ # Calculate duration as timedelta and in days
117
+ temp_trades_df['duration_td'] = temp_trades_df['exitTime'] - temp_trades_df['entryTime']
118
+ temp_trades_df['duration_days'] = temp_trades_df['duration_td'].dt.total_seconds() / (24 * 60 * 60)
119
+ else:
120
+ # Set duration columns to None if times are invalid/missing
121
+ temp_trades_df['duration_td'] = pd.NaT
122
+ temp_trades_df['duration_days'] = np.nan
123
+
124
+ # Store the processed trades DataFrame
125
+ results["trades_df"] = temp_trades_df
126
+
127
+ # --- Extract Exposure Series Data ---
128
+ # Note: This is often nested and might need specific parsing for plotting
129
+ results["exposure_series"] = get_nested_value(data, ['charts', 'Exposure', 'series'])
130
+
131
+ # --- Process Portfolio Turnover ---
132
+ turnover_values = get_nested_value(data, ['charts', 'Portfolio Turnover', 'series', 'Portfolio Turnover', 'values'])
133
+ turnover_df_indexed = process_timeseries_chart(turnover_values, 'Turnover')
134
+ if not turnover_df_indexed.empty:
135
+ results["turnover_df"] = turnover_df_indexed.reset_index() # Store with 'Time' column
136
+
137
+ except FileNotFoundError:
138
+ error_msg = f"Error: File not found at {file_path}"
139
+ print(error_msg)
140
+ results["error"] = error_msg
141
+ except json.JSONDecodeError:
142
+ error_msg = f"Error: Could not decode JSON from {filename}"
143
+ print(error_msg)
144
+ results["error"] = error_msg
145
+ except Exception as e:
146
+ # Catch any other unexpected errors during processing
147
+ error_msg = f"Error processing file {filename}: {e}"
148
+ print(error_msg)
149
+ traceback.print_exc()
150
+ results["error"] = error_msg
151
+
152
+ return results
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ # List of Python packages required for the Gradio application.
3
+
4
+ gradio
5
+ pandas
6
+ plotly
7
+ numpy
8
+
9
+ # Optional: Add specific versions if needed, e.g., gradio==3.50.2
risk_analysis.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """risk_analysis.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/10u2Di5_droisNYuq_KYAmdgVHixe6oVi
8
+ """
9
+
10
+ # risk_analysis.py
11
+ # Functions for calculating risk metrics and correlations.
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ import traceback
16
+ import plotly.graph_objects as go
17
+ from utils import create_empty_figure # Import helper
18
+
19
+ def get_drawdown_table(returns: pd.Series, top: int = 5) -> pd.DataFrame:
20
+ """
21
+ Calculates drawdown periods and statistics from a series of returns.
22
+
23
+ Args:
24
+ returns: Series of daily returns with a DatetimeIndex.
25
+ top: Number of top drawdowns (by magnitude) to return.
26
+
27
+ Returns:
28
+ DataFrame containing information about the top drawdown periods:
29
+ 'Peak Date', 'Valley Date', 'End Date', 'Duration (Days)', 'Max Drawdown (%)'.
30
+ Returns an empty DataFrame if input is invalid or no drawdowns occur.
31
+ """
32
+ # Input validation
33
+ if returns is None or not isinstance(returns, pd.Series) or returns.empty:
34
+ # print("Drawdown calculation skipped: Input returns series is invalid or empty.")
35
+ return pd.DataFrame()
36
+ if not isinstance(returns.index, pd.DatetimeIndex):
37
+ # print("Drawdown calculation skipped: Input returns series index is not DatetimeIndex.")
38
+ return pd.DataFrame()
39
+
40
+ # Create a DataFrame from the returns series
41
+ df = returns.to_frame(name='returns')
42
+
43
+ # Ensure returns are numeric, drop non-numeric values
44
+ df['returns'] = pd.to_numeric(df['returns'], errors='coerce')
45
+ df.dropna(subset=['returns'], inplace=True)
46
+ if df.empty:
47
+ # print("Drawdown calculation skipped: No valid numeric returns.")
48
+ return pd.DataFrame()
49
+
50
+ # Calculate cumulative returns (compounded)
51
+ df['Cumulative'] = (1 + df['returns']).cumprod()
52
+ # Calculate the running maximum cumulative return (high watermark)
53
+ df['HighWatermark'] = df['Cumulative'].cummax()
54
+ # Calculate drawdown as the percentage decline from the high watermark
55
+ df['Drawdown'] = (df['Cumulative'] / df['HighWatermark']) - 1
56
+
57
+ # Identify drawdown periods
58
+ in_drawdown = False # Flag to track if currently in a drawdown
59
+ periods = [] # List to store completed drawdown period dictionaries
60
+ current_period = {} # Dictionary to store details of the ongoing drawdown
61
+ peak_idx = df.index[0] # Initialize peak index to the start
62
+
63
+ for idx, row in df.iterrows():
64
+ # Update the peak index if a new high watermark is reached
65
+ # Use .loc for safe index-based comparison, especially with potential duplicate indices
66
+ if row['Cumulative'] >= df.loc[peak_idx, 'Cumulative']:
67
+ peak_idx = idx
68
+
69
+ is_dd = row['Drawdown'] < 0 # Check if currently in a drawdown state
70
+
71
+ # Start of a new drawdown period
72
+ if not in_drawdown and is_dd:
73
+ in_drawdown = True
74
+ current_period = {
75
+ 'Peak Date': peak_idx, # Date the drawdown started (previous peak)
76
+ 'Valley Date': idx, # Date the maximum drawdown was reached (initially the start)
77
+ 'End Date': pd.NaT, # Date the drawdown ended (recovered to peak) - initially NaT
78
+ 'Max Drawdown (%)': row['Drawdown'], # The maximum drawdown percentage (initially the current DD)
79
+ 'Duration (Days)': 0 # Duration of the drawdown - calculated at the end
80
+ }
81
+ # Inside an ongoing drawdown period
82
+ elif in_drawdown:
83
+ # Update valley date and max drawdown if a lower point is reached
84
+ if row['Drawdown'] < current_period['Max Drawdown (%)']:
85
+ current_period['Valley Date'] = idx
86
+ current_period['Max Drawdown (%)'] = row['Drawdown']
87
+
88
+ # End of the current drawdown period (recovered)
89
+ if not is_dd: # Recovered when Drawdown is no longer negative (or zero)
90
+ in_drawdown = False
91
+ current_period['End Date'] = idx # Mark the recovery date
92
+
93
+ # Calculate duration (using business days if possible, else calendar days)
94
+ start_date = current_period['Peak Date']
95
+ end_date = current_period['End Date']
96
+ if pd.notna(start_date) and pd.notna(end_date):
97
+ try:
98
+ # Attempt to use business days for duration
99
+ duration = len(pd.bdate_range(start=start_date, end=end_date))
100
+ except Exception: # Fallback to calendar days if bdate_range fails (e.g., non-standard dates)
101
+ duration = (end_date - start_date).days + 1 # Inclusive of start/end day
102
+ current_period['Duration (Days)'] = duration
103
+ else:
104
+ current_period['Duration (Days)'] = np.nan # Duration is NaN if dates are invalid
105
+
106
+ periods.append(current_period) # Add the completed period to the list
107
+ current_period = {} # Reset for the next potential drawdown
108
+
109
+ # Handle the case where the series ends while still in a drawdown
110
+ if in_drawdown:
111
+ start_date = current_period['Peak Date']
112
+ end_date = df.index[-1] # End date is the last date in the series
113
+ if pd.notna(start_date) and pd.notna(end_date):
114
+ try:
115
+ duration = len(pd.bdate_range(start=start_date, end=end_date))
116
+ except Exception:
117
+ duration = (end_date - start_date).days + 1
118
+ current_period['Duration (Days)'] = duration
119
+ else:
120
+ current_period['Duration (Days)'] = np.nan
121
+ # 'End Date' remains NaT as recovery hasn't happened by the end of the data
122
+ periods.append(current_period)
123
+
124
+ # If no drawdown periods were identified
125
+ if not periods:
126
+ return pd.DataFrame()
127
+
128
+ # Create DataFrame from the identified periods
129
+ drawdown_df = pd.DataFrame(periods)
130
+
131
+ # Sort by the magnitude of the drawdown (most negative first) and select the top N
132
+ drawdown_df = drawdown_df.sort_values(by='Max Drawdown (%)', ascending=True).head(top)
133
+
134
+ # Format the Max Drawdown column as percentage
135
+ drawdown_df['Max Drawdown (%)'] = drawdown_df['Max Drawdown (%)'].map('{:.2%}'.format)
136
+
137
+ # Format date columns to YYYY-MM-DD strings for display
138
+ for col in ['Peak Date', 'Valley Date', 'End Date']:
139
+ if col in drawdown_df.columns:
140
+ # Ensure conversion to datetime first, then format
141
+ drawdown_df[col] = pd.to_datetime(drawdown_df[col]).dt.strftime('%Y-%m-%d')
142
+
143
+ # Select and order columns for the final output table
144
+ cols_to_select = ['Peak Date', 'Valley Date', 'End Date', 'Duration (Days)', 'Max Drawdown (%)']
145
+ # Ensure only existing columns are selected (e.g., 'End Date' might be all NaT if never recovered)
146
+ existing_cols = [col for col in cols_to_select if col in drawdown_df.columns]
147
+
148
+ return drawdown_df[existing_cols]
149
+
150
+
151
+ def calculate_manual_risk_stats(returns_series):
152
+ """
153
+ Calculates various risk and performance metrics manually using pandas based on daily returns.
154
+
155
+ Args:
156
+ returns_series: A pandas Series of daily percentage returns with a DatetimeIndex.
157
+
158
+ Returns:
159
+ A dictionary containing:
160
+ - monthly_returns_table_for_heatmap: DataFrame pivoted for monthly return heatmap (values as percentages).
161
+ - monthly_perf_stats: DataFrame with summary stats for monthly returns.
162
+ - rolling_vol_df: DataFrame containing rolling annualized volatility calculations (with 'Time' column).
163
+ - rolling_vol_stats: DataFrame summarizing min/max/mean rolling volatility.
164
+ - drawdown_table: DataFrame with top drawdown periods (from get_drawdown_table).
165
+ - status: A string indicating the status of the analysis.
166
+ """
167
+ # Initialize results dictionary with default empty structures
168
+ analysis_results = {
169
+ "monthly_returns_table_for_heatmap": pd.DataFrame(),
170
+ "monthly_perf_stats": pd.DataFrame(columns=['Metric', 'Value']),
171
+ "rolling_vol_df": pd.DataFrame(),
172
+ "rolling_vol_stats": pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol']),
173
+ "drawdown_table": pd.DataFrame(),
174
+ "status": "Analysis skipped." # Default status
175
+ }
176
+
177
+ # --- Input Validation ---
178
+ if returns_series is None or not isinstance(returns_series, pd.Series) or returns_series.empty or len(returns_series) < 2:
179
+ analysis_results["status"] = "Analysis skipped: Insufficient/invalid returns data."
180
+ return analysis_results
181
+ if not isinstance(returns_series.index, pd.DatetimeIndex):
182
+ analysis_results["status"] = "Analysis skipped: Returns index is not DatetimeIndex."
183
+ return analysis_results
184
+
185
+ try:
186
+ status_parts = [] # To collect status messages for different parts
187
+
188
+ # Ensure returns are numeric and index is UTC DatetimeIndex
189
+ returns_series = pd.to_numeric(returns_series, errors='coerce').dropna()
190
+ if returns_series.empty or len(returns_series) < 2:
191
+ analysis_results["status"] = "Analysis skipped: No valid numeric returns after cleaning."
192
+ return analysis_results
193
+
194
+ if returns_series.index.tz is None:
195
+ returns_series = returns_series.tz_localize('UTC')
196
+ elif returns_series.index.tz != 'UTC':
197
+ returns_series = returns_series.tz_convert('UTC')
198
+
199
+ # --- Monthly Returns Analysis ---
200
+ # Resample daily returns to monthly, calculating compounded monthly return
201
+ # The lambda function calculates (1+r1)*(1+r2)*...*(1+rn) - 1 for each month
202
+ monthly_rets = returns_series.resample('M').apply(lambda x: (1 + x).prod() - 1)
203
+
204
+ if not monthly_rets.empty:
205
+ # Create table for heatmap: Year rows, Month columns
206
+ monthly_ret_table_df = pd.DataFrame({'returns': monthly_rets})
207
+ monthly_ret_table_df['Year'] = monthly_ret_table_df.index.year
208
+ monthly_ret_table_df['Month'] = monthly_ret_table_df.index.strftime('%b') # Month abbreviation (Jan, Feb, ...)
209
+ # Pivot the table
210
+ monthly_heatmap_data = monthly_ret_table_df.pivot_table(index='Year', columns='Month', values='returns')
211
+ # Order columns chronologically
212
+ month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
213
+ present_months = [m for m in month_order if m in monthly_heatmap_data.columns]
214
+ monthly_heatmap_data = monthly_heatmap_data[present_months]
215
+ # Sort index (Year) ascending
216
+ monthly_heatmap_data.sort_index(ascending=True, inplace=True)
217
+ # Store as percentages for the heatmap plot
218
+ analysis_results["monthly_returns_table_for_heatmap"] = monthly_heatmap_data * 100
219
+
220
+ # Monthly Performance Statistics
221
+ monthly_stats = {
222
+ "Min": f"{monthly_rets.min():.2%}",
223
+ "Max": f"{monthly_rets.max():.2%}",
224
+ "Mean": f"{monthly_rets.mean():.2%}",
225
+ "Positive Months": (monthly_rets > 0).sum(),
226
+ "Negative Months": (monthly_rets <= 0).sum()
227
+ }
228
+ analysis_results["monthly_perf_stats"] = pd.DataFrame(list(monthly_stats.items()), columns=['Metric', 'Value'])
229
+ status_parts.append("Monthly stats OK.")
230
+ else:
231
+ status_parts.append("Monthly stats skipped (no monthly data).")
232
+
233
+
234
+ # --- Rolling Volatility Analysis ---
235
+ vol_df = pd.DataFrame(index=returns_series.index) # Initialize DF to store rolling vol results
236
+ vol_stats_list = [] # List to store summary stats for each window
237
+ # Define windows (label: number of trading days)
238
+ windows = {'3M': 63, '6M': 126, '12M': 252}
239
+ vol_calculated = False
240
+ for label, window in windows.items():
241
+ # Check if there's enough data for the window
242
+ if len(returns_series) >= window:
243
+ try:
244
+ # Calculate rolling standard deviation
245
+ # min_periods ensures calculation starts even if window isn't full yet (adjust as needed)
246
+ rolling_std = returns_series.rolling(window=window, min_periods=window // 2).std()
247
+ # Annualize the volatility (multiply by sqrt of trading days per year)
248
+ rolling_vol = rolling_std * np.sqrt(252)
249
+ # Store the result in the DataFrame
250
+ vol_df[f'vol_{label}'] = rolling_vol
251
+ # Calculate summary stats for this window's volatility
252
+ if not rolling_vol.dropna().empty: # Check if there are valid vol values
253
+ vol_stats_list.append({
254
+ "Window": label,
255
+ "Min Vol": f"{rolling_vol.min():.2%}",
256
+ "Max Vol": f"{rolling_vol.max():.2%}",
257
+ "Mean Vol": f"{rolling_vol.mean():.2%}"
258
+ })
259
+ vol_calculated = True
260
+ except Exception as vol_e:
261
+ print(f"Error calculating rolling volatility for window {label}: {vol_e}")
262
+ status_parts.append(f"Rolling Vol ({label}) Error.")
263
+
264
+ # Store the rolling volatility DataFrame (reset index to get 'Time' column for plotting)
265
+ if not vol_df.empty:
266
+ analysis_results["rolling_vol_df"] = vol_df.reset_index()
267
+
268
+ # Store the summary statistics if any were calculated
269
+ if vol_stats_list:
270
+ analysis_results["rolling_vol_stats"] = pd.DataFrame(vol_stats_list)
271
+ status_parts.append("Rolling Vol OK.")
272
+ elif not vol_calculated and "Error" not in " ".join(status_parts): # If no vol calculated and no errors reported
273
+ status_parts.append("Rolling Vol skipped (insufficient data for windows).")
274
+
275
+
276
+ # --- Drawdown Table Calculation ---
277
+ try:
278
+ analysis_results["drawdown_table"] = get_drawdown_table(returns_series, top=5)
279
+ if not analysis_results["drawdown_table"].empty:
280
+ status_parts.append("Drawdown Table OK.")
281
+ else:
282
+ status_parts.append("Drawdown Table: No drawdowns found or error.")
283
+ except Exception as dd_e:
284
+ print(f"Error calculating drawdown table: {dd_e}")
285
+ traceback.print_exc()
286
+ status_parts.append("Drawdown Table Error.")
287
+
288
+
289
+ # --- Final Status ---
290
+ analysis_results["status"] = " ".join(status_parts) if status_parts else "Analysis completed (no specific issues)."
291
+
292
+ except Exception as e:
293
+ # Catch-all for any unexpected error during the entire analysis
294
+ error_msg = f"Error during manual risk analysis: {e}"
295
+ print(error_msg)
296
+ traceback.print_exc()
297
+ analysis_results["status"] = f"Manual risk analysis failed: {e}"
298
+
299
+ return analysis_results
300
+
301
+
302
+ def calculate_correlation(all_results):
303
+ """
304
+ Calculates the correlation matrix for the daily returns of multiple strategies
305
+ and optionally includes the benchmark.
306
+
307
+ Args:
308
+ all_results: A dictionary where keys are strategy filenames and values are
309
+ the result dictionaries obtained from process_single_file.
310
+ These results should contain 'equity_df' and optionally 'benchmark_df'.
311
+
312
+ Returns:
313
+ A tuple containing:
314
+ - correlation_matrix: DataFrame of the Pearson correlation coefficients.
315
+ - heatmap_fig: Plotly heatmap figure of the correlation matrix.
316
+ - corr_status: String message indicating the status of the correlation calculation.
317
+ """
318
+ # Default outputs
319
+ default_corr_matrix = pd.DataFrame()
320
+ default_heatmap = create_empty_figure("Correlation Heatmap (Insufficient Data)")
321
+ corr_status = "Correlation analysis skipped."
322
+
323
+ equity_data_all = {} # Dictionary to store equity series {filename: Series}
324
+ benchmark_data = None # To store the first valid benchmark series found
325
+ valid_strategies_count = 0 # Count strategies with valid equity data
326
+
327
+ # --- Extract Equity and Benchmark Data ---
328
+ for filename, results in all_results.items():
329
+ if results.get("error"): # Skip files that had processing errors
330
+ print(f"Skipping {filename} for correlation due to processing error.")
331
+ continue
332
+
333
+ equity_df = results.get("equity_df") # DataFrame with 'Time', 'Equity'
334
+ bench_df = results.get("benchmark_df") # DataFrame with 'Time', 'Benchmark'
335
+
336
+ # Check for valid equity data
337
+ if equity_df is not None and not equity_df.empty and \
338
+ 'Time' in equity_df.columns and 'Equity' in equity_df.columns and \
339
+ pd.api.types.is_datetime64_any_dtype(equity_df['Time']):
340
+
341
+ # Set 'Time' as index, select 'Equity', remove duplicate indices
342
+ df_eq = equity_df.set_index('Time')['Equity']
343
+ df_eq = df_eq[~df_eq.index.duplicated(keep='first')]
344
+
345
+ # Ensure index is UTC
346
+ if df_eq.index.tz is None: df_eq = df_eq.tz_localize('UTC')
347
+ elif df_eq.index.tz != 'UTC': df_eq = df_eq.tz_convert('UTC')
348
+
349
+ if not df_eq.empty:
350
+ equity_data_all[filename] = df_eq
351
+ valid_strategies_count += 1
352
+
353
+ # Try to grab the benchmark data from the *first* strategy that has it
354
+ if benchmark_data is None and bench_df is not None and not bench_df.empty and \
355
+ 'Time' in bench_df.columns and 'Benchmark' in bench_df.columns and \
356
+ pd.api.types.is_datetime64_any_dtype(bench_df['Time']):
357
+
358
+ df_b = bench_df.set_index('Time')['Benchmark']
359
+ df_b = df_b[~df_b.index.duplicated(keep='first')]
360
+
361
+ # Ensure index is UTC
362
+ if df_b.index.tz is None: df_b = df_b.tz_localize('UTC')
363
+ elif df_b.index.tz != 'UTC': df_b = df_b.tz_convert('UTC')
364
+
365
+ if not df_b.empty:
366
+ benchmark_data = df_b
367
+ print(f"Using benchmark data from {filename} for correlation.")
368
+ else:
369
+ print(f"Skipping {filename} for correlation: Invalid or empty equity_df or Time column.")
370
+
371
+ # --- Check if enough data for correlation ---
372
+ # Need at least 1 strategy for correlation (against itself or benchmark)
373
+ # Need at least 2 strategies if no benchmark is available
374
+ if valid_strategies_count == 0:
375
+ corr_status = "Correlation skipped: No valid strategy equity data found."
376
+ return default_corr_matrix, default_heatmap, corr_status
377
+ if valid_strategies_count == 1 and benchmark_data is None:
378
+ corr_status = "Correlation skipped: Only one strategy and no benchmark data."
379
+ # Return the single equity series maybe? Or just empty. Empty is safer.
380
+ return default_corr_matrix, default_heatmap, corr_status
381
+
382
+
383
+ # --- Combine Data and Calculate Returns ---
384
+ # Combine all valid equity series into a single DataFrame
385
+ combined_equity = pd.concat(equity_data_all, axis=1, join='outer') # Use outer join to keep all dates
386
+
387
+ # Add benchmark data if available
388
+ if benchmark_data is not None:
389
+ combined_equity['Benchmark'] = benchmark_data
390
+
391
+ # Sort by index (Time)
392
+ combined_equity = combined_equity.sort_index()
393
+
394
+ # Forward-fill missing values (common for aligning different start/end dates)
395
+ # Consider alternatives like backward fill or interpolation if ffill isn't appropriate
396
+ combined_equity_filled = combined_equity.ffill()
397
+
398
+ # Calculate daily percentage returns
399
+ daily_returns = combined_equity_filled.pct_change()
400
+
401
+ # Handle potential infinite values resulting from division by zero (e.g., price was 0)
402
+ daily_returns.replace([np.inf, -np.inf], np.nan, inplace=True)
403
+
404
+ # Drop rows with any NaN values (typically the first row after pct_change, and any rows affected by NaNs)
405
+ daily_returns.dropna(inplace=True)
406
+
407
+ # Check if enough overlapping data remains after cleaning
408
+ if daily_returns.empty or len(daily_returns) < 2:
409
+ corr_status = "Correlation skipped: Not enough overlapping daily data points after cleaning."
410
+ return default_corr_matrix, default_heatmap, corr_status
411
+
412
+ # --- Calculate Correlation Matrix ---
413
+ try:
414
+ correlation_matrix = daily_returns.corr(method='pearson') # Can change method if needed ('kendall', 'spearman')
415
+ corr_status = f"Correlation calculated for {valid_strategies_count} strategies"
416
+ if benchmark_data is not None:
417
+ corr_status += " and Benchmark."
418
+ else:
419
+ corr_status += "."
420
+ except Exception as corr_e:
421
+ print(f"Error calculating correlation matrix: {corr_e}")
422
+ traceback.print_exc()
423
+ corr_status = f"Correlation calculation failed: {corr_e}"
424
+ return default_corr_matrix, default_heatmap, corr_status
425
+
426
+
427
+ # --- Generate Correlation Heatmap Figure ---
428
+ heatmap_fig = create_empty_figure("Correlation Heatmap") # Default empty
429
+ try:
430
+ heatmap_fig = go.Figure(data=go.Heatmap(
431
+ z=correlation_matrix.values,
432
+ x=correlation_matrix.columns,
433
+ y=correlation_matrix.columns,
434
+ colorscale='RdBu', # Red-Blue diverging scale is good for correlation
435
+ zmin=-1, zmax=1, # Set scale limits to -1 and 1
436
+ colorbar=dict(title='Correlation')
437
+ ))
438
+ heatmap_fig.update_layout(
439
+ title='Strategy (+Benchmark) Daily Return Correlation',
440
+ xaxis_tickangle=-45, # Angle labels for better readability if many strategies
441
+ yaxis_autorange='reversed' # Often preferred for matrices
442
+ )
443
+
444
+ # Add text annotations (correlation values) to the heatmap cells
445
+ for i in range(len(correlation_matrix.columns)):
446
+ for j in range(len(correlation_matrix.columns)):
447
+ corr_value = correlation_matrix.iloc[i, j]
448
+ if pd.notna(corr_value):
449
+ # Choose text color based on background intensity for better contrast
450
+ text_color = "white" if abs(corr_value) > 0.5 else "black"
451
+ heatmap_fig.add_annotation(
452
+ x=correlation_matrix.columns[j],
453
+ y=correlation_matrix.columns[i],
454
+ text=f"{corr_value:.2f}", # Format to 2 decimal places
455
+ showarrow=False,
456
+ font=dict(color=text_color)
457
+ )
458
+ except Exception as e:
459
+ print(f"Error creating correlation heatmap figure: {e}")
460
+ traceback.print_exc()
461
+ heatmap_fig = create_empty_figure("Error Creating Correlation Heatmap") # Update title on error
462
+
463
+ return correlation_matrix, heatmap_fig, corr_status
utils.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """utils.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1RyRghhbleQJ01USX_0O4uUALsuFM10hJ
8
+ """
9
+
10
+ # utils.py
11
+ # Helper functions for data manipulation and plotting defaults.
12
+
13
+ import pandas as pd
14
+ import plotly.graph_objects as go
15
+ import re
16
+ import numpy as np
17
+ import traceback
18
+
19
+ def get_nested_value(data_dict, keys, default=None):
20
+ """Safely get a value from a nested dictionary or list."""
21
+ current_level = data_dict
22
+ for key in keys:
23
+ if isinstance(current_level, dict) and key in current_level:
24
+ current_level = current_level[key]
25
+ elif isinstance(current_level, list) and isinstance(key, int) and 0 <= key < len(current_level):
26
+ current_level = current_level[key]
27
+ else:
28
+ return default
29
+ return current_level
30
+
31
+ def parse_numeric_string(value_str, default=None):
32
+ """Attempts to parse numeric values from strings, handling $, %, and commas."""
33
+ if not isinstance(value_str, str):
34
+ # If it's already a number (int, float), return it directly
35
+ if isinstance(value_str, (int, float)):
36
+ return value_str
37
+ # Otherwise, it might be None or some other non-string type
38
+ return default # Return default for non-string, non-numeric types
39
+ try:
40
+ # Remove currency symbols, percentage signs, and commas
41
+ cleaned_str = re.sub(r'[$,%]', '', value_str).strip()
42
+ return float(cleaned_str)
43
+ except (ValueError, TypeError):
44
+ # Return default if cleaning/conversion fails
45
+ return default
46
+
47
+
48
+ def create_empty_figure(title="No Data Available"):
49
+ """Creates an empty Plotly figure with a title."""
50
+ fig = go.Figure()
51
+ fig.update_layout(
52
+ title=title,
53
+ xaxis={'visible': False},
54
+ yaxis={'visible': False},
55
+ annotations=[{
56
+ 'text': title,
57
+ 'xref': 'paper', 'yref': 'paper',
58
+ 'showarrow': False, 'font': {'size': 16}
59
+ }]
60
+ )
61
+ return fig
62
+
63
+ def process_timeseries_chart(chart_data, value_col_name='Value'):
64
+ """
65
+ Processes QuantConnect timeseries chart data like [[timestamp, value, ...], ...].
66
+ Assumes timestamp is in SECONDS. Extracts the second element as the value.
67
+ Returns a DataFrame with 'Time' (datetime) index and value_col_name.
68
+ Handles potential errors during processing.
69
+ """
70
+ # Check if input data is valid list format
71
+ if not chart_data or not isinstance(chart_data, list):
72
+ # print(f"Warning: Invalid or empty chart_data for {value_col_name}. Returning empty DataFrame.")
73
+ return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
74
+
75
+ # Check if the first element is a list/tuple with at least two items
76
+ if not chart_data[0] or not isinstance(chart_data[0], (list, tuple)) or len(chart_data[0]) < 2:
77
+ # print(f"Warning: First element format incorrect for {value_col_name}. Returning empty DataFrame.")
78
+ return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
79
+
80
+ try:
81
+ # Extract timestamp (assumed index 0) and value (assumed index 1)
82
+ # Filter out entries where timestamp or value is None
83
+ processed_data = [
84
+ [item[0], item[1]] for item in chart_data
85
+ if isinstance(item, (list, tuple)) and len(item) >= 2 and item[0] is not None and item[1] is not None
86
+ ]
87
+
88
+ # If no valid data points remain after filtering
89
+ if not processed_data:
90
+ # print(f"Warning: No valid data points after filtering for {value_col_name}. Returning empty DataFrame.")
91
+ return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
92
+
93
+ # Create DataFrame
94
+ df = pd.DataFrame(processed_data, columns=['Time_Raw', value_col_name])
95
+
96
+ # Convert timestamp (assumed seconds) to numeric, coercing errors
97
+ df['Time_Raw'] = pd.to_numeric(df['Time_Raw'], errors='coerce')
98
+ df.dropna(subset=['Time_Raw'], inplace=True) # Drop rows where timestamp conversion failed
99
+ if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
100
+
101
+
102
+ # Convert numeric timestamp to datetime, coercing errors
103
+ df['Time'] = pd.to_datetime(df['Time_Raw'], unit='s', errors='coerce')
104
+ df.dropna(subset=['Time'], inplace=True) # Drop rows where datetime conversion failed
105
+ if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
106
+
107
+
108
+ # Convert value column to numeric, coercing errors
109
+ df[value_col_name] = pd.to_numeric(df[value_col_name], errors='coerce')
110
+ df.dropna(subset=[value_col_name], inplace=True) # Drop rows where value conversion failed
111
+ if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
112
+
113
+
114
+ # Set the datetime 'Time' column as the index
115
+ df = df.set_index('Time')
116
+
117
+ # Verify the index is indeed a DatetimeIndex
118
+ if not isinstance(df.index, pd.DatetimeIndex):
119
+ print(f"Warning: Index is not DatetimeIndex for {value_col_name} after setting. Attempting conversion.")
120
+ df.index = pd.to_datetime(df.index, errors='coerce')
121
+ df.dropna(inplace=True) # Drop rows if conversion failed
122
+ if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
123
+
124
+
125
+ # Ensure the DatetimeIndex is timezone-aware (UTC)
126
+ if df.index.tz is None:
127
+ df = df.tz_localize('UTC') # Localize if naive
128
+ elif df.index.tz != 'UTC':
129
+ df = df.tz_convert('UTC') # Convert if different timezone
130
+
131
+ # Return the DataFrame with only the value column, sorted by time
132
+ return df[[value_col_name]].sort_index()
133
+
134
+ except Exception as e:
135
+ print(f"Error creating/processing DataFrame for {value_col_name}: {e}")
136
+ traceback.print_exc()
137
+ # Return an empty DataFrame in case of any unexpected error
138
+ return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')