| |
| """processing.py |
| |
| Automatically generated by Colab. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/13EcoLMljb9XzVBELmFC0EBDknuHS79Vy |
| """ |
|
|
| |
| |
|
|
| import json |
| import pandas as pd |
| import traceback |
| import numpy as np |
| from utils import get_nested_value, process_timeseries_chart |
|
|
| def process_single_file(file_path): |
| """ |
| Processes a single QuantConnect JSON file. |
| Extracts statistics, equity, drawdown, benchmark, trades, exposure, and turnover data. |
| Returns a dictionary containing processed dataframes and series. |
| """ |
| |
| filename = file_path.split('/')[-1] if file_path else "Unknown File" |
|
|
| |
| results = { |
| "filename": filename, |
| "stats_df": pd.DataFrame(columns=['Metric', 'Value']), |
| "equity_df": pd.DataFrame(), |
| "daily_returns": None, |
| "drawdown_df": pd.DataFrame(), |
| "benchmark_df": pd.DataFrame(), |
| "trades_df": pd.DataFrame(), |
| "exposure_series": None, |
| "turnover_df": pd.DataFrame(), |
| "error": None |
| } |
|
|
| try: |
| |
| with open(file_path, 'r', encoding='utf-8') as f: |
| data = json.load(f) |
|
|
| |
| |
| stats_dict = get_nested_value(data, ['statistics']) or \ |
| get_nested_value(data, ['totalPerformance', 'portfolioStatistics']) |
| if stats_dict: |
| |
| results["stats_df"] = pd.DataFrame(list(stats_dict.items()), columns=['Metric', 'Value']) |
|
|
| |
| equity_values = get_nested_value(data, ['charts', 'Strategy Equity', 'series', 'Equity', 'values']) |
| equity_df_indexed = process_timeseries_chart(equity_values, 'Equity') |
| if not equity_df_indexed.empty: |
| |
| results["equity_df"] = equity_df_indexed.reset_index() |
| |
| returns_series = equity_df_indexed['Equity'].pct_change().dropna() |
| |
| if not returns_series.empty: |
| results["daily_returns"] = returns_series |
|
|
| |
| drawdown_values = get_nested_value(data, ['charts', 'Drawdown', 'series', 'Equity Drawdown', 'values']) |
| drawdown_df_indexed = process_timeseries_chart(drawdown_values, 'Drawdown') |
| if not drawdown_df_indexed.empty: |
| results["drawdown_df"] = drawdown_df_indexed.reset_index() |
|
|
| |
| benchmark_values = get_nested_value(data, ['charts', 'Benchmark', 'series', 'Benchmark', 'values']) |
| benchmark_df_indexed = process_timeseries_chart(benchmark_values, 'Benchmark') |
| if not benchmark_df_indexed.empty: |
| results["benchmark_df"] = benchmark_df_indexed.reset_index() |
|
|
| |
| closed_trades_list = get_nested_value(data, ['totalPerformance', 'closedTrades']) |
| if closed_trades_list and isinstance(closed_trades_list, list): |
| temp_trades_df = pd.DataFrame(closed_trades_list) |
| if not temp_trades_df.empty: |
| |
| numeric_cols = ['profitLoss', 'entryPrice', 'exitPrice', 'quantity', 'totalFees'] |
| for col in numeric_cols: |
| if col in temp_trades_df.columns: |
| temp_trades_df[col] = pd.to_numeric(temp_trades_df[col], errors='coerce') |
|
|
| |
| time_cols = ['entryTime', 'exitTime'] |
| for col in time_cols: |
| if col in temp_trades_df.columns: |
| |
| try: |
| temp_trades_df[col] = pd.to_datetime(temp_trades_df[col], errors='coerce', utc=True) |
| except ValueError: |
| temp_trades_df[col] = pd.to_datetime(temp_trades_df[col].str.slice(0, 19), errors='coerce') |
| if temp_trades_df[col].notna().any(): |
| temp_trades_df[col] = temp_trades_df[col].dt.tz_localize(None) |
|
|
|
|
| |
| if 'entryTime' in temp_trades_df.columns and 'exitTime' in temp_trades_df.columns and \ |
| pd.api.types.is_datetime64_any_dtype(temp_trades_df['entryTime']) and \ |
| pd.api.types.is_datetime64_any_dtype(temp_trades_df['exitTime']) and \ |
| not temp_trades_df['entryTime'].isnull().all() and \ |
| not temp_trades_df['exitTime'].isnull().all(): |
|
|
| |
| if temp_trades_df['entryTime'].dt.tz is not None: |
| temp_trades_df['entryTime'] = temp_trades_df['entryTime'].dt.tz_convert(None) |
| if temp_trades_df['exitTime'].dt.tz is not None: |
| temp_trades_df['exitTime'] = temp_trades_df['exitTime'].dt.tz_convert(None) |
|
|
| |
| temp_trades_df['duration_td'] = temp_trades_df['exitTime'] - temp_trades_df['entryTime'] |
| temp_trades_df['duration_days'] = temp_trades_df['duration_td'].dt.total_seconds() / (24 * 60 * 60) |
| else: |
| |
| temp_trades_df['duration_td'] = pd.NaT |
| temp_trades_df['duration_days'] = np.nan |
|
|
| |
| results["trades_df"] = temp_trades_df |
|
|
| |
| |
| results["exposure_series"] = get_nested_value(data, ['charts', 'Exposure', 'series']) |
|
|
| |
| turnover_values = get_nested_value(data, ['charts', 'Portfolio Turnover', 'series', 'Portfolio Turnover', 'values']) |
| turnover_df_indexed = process_timeseries_chart(turnover_values, 'Turnover') |
| if not turnover_df_indexed.empty: |
| results["turnover_df"] = turnover_df_indexed.reset_index() |
|
|
| except FileNotFoundError: |
| error_msg = f"Error: File not found at {file_path}" |
| print(error_msg) |
| results["error"] = error_msg |
| except json.JSONDecodeError: |
| error_msg = f"Error: Could not decode JSON from {filename}" |
| print(error_msg) |
| results["error"] = error_msg |
| except Exception as e: |
| |
| error_msg = f"Error processing file {filename}: {e}" |
| print(error_msg) |
| traceback.print_exc() |
| results["error"] = error_msg |
|
|
| return results |