Spaces:

RazHadas
/

Quant_Connect_JSON_analysis

Sleeping

App Files Files Community

RazHadas commited on Apr 27, 2025

Commit

76317bb

verified ·

1 Parent(s): ff08568

Upload 6 files

Browse files

Files changed (6) hide show

app.py +541 -0
plotting.py +263 -0
processing.py +152 -0
requirements.txt +9 -0
risk_analysis.py +463 -0
utils.py +138 -0

app.py ADDED Viewed

	@@ -0,0 +1,541 @@

+# -*- coding: utf-8 -*-
+"""app.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/18CPi10QPKtnp8wBs3Fd21JjaDxoHytAM
+"""
+# app.py
+# Main Gradio application script for QuantConnect Report Enhancer.
+import gradio as gr
+import pandas as pd
+import numpy as np
+import traceback
+# Import functions from other modules
+from utils import create_empty_figure
+from processing import process_single_file
+from risk_analysis import calculate_correlation, calculate_manual_risk_stats
+from plotting import generate_figures_for_strategy, generate_manual_risk_figures
+# --- Constants for UI ---
+DEFAULT_TRADES_COLS_DISPLAY = [
+    'symbol', 'entryTime', 'exitTime', 'direction', 'quantity',
+    'entryPrice', 'exitPrice', 'profitLoss', 'totalFees', 'duration_days'
+]
+MAX_TRADES_DISPLAY = 50 # Limit number of trades shown in the table
+# --- Gradio Interface Callbacks ---
+def process_files_and_update_ui(uploaded_files):
+    """
+    Callback function triggered when files are uploaded.
+    Processes each file, calculates overall metrics (like correlation),
+    updates the application state, and populates the UI with the first strategy's details.
+    Args:
+        uploaded_files: A list of file objects uploaded via the Gradio interface.
+    Returns:
+        A tuple containing updated values for all relevant Gradio components:
+        - Status message (Textbox)
+        - Strategy dropdown (Dropdown) - updated choices, value, visibility
+        - Application state (State) - dictionary holding all processed results
+        - Outputs for individual strategy tabs (DataFrames, Plots)
+        - Outputs for correlation tab (DataFrame, Plot)
+        - Outputs for manual risk analysis tab (DataFrames, Plots)
+    """
+    # --- Initialize Default/Empty Outputs ---
+    # Create empty figures and dataframes to return if processing fails or no files uploaded
+    default_stats_df = pd.DataFrame(columns=['Metric', 'Value'])
+    default_trades_df_display = pd.DataFrame()
+    default_equity_fig = create_empty_figure("Equity Curve")
+    default_drawdown_fig = create_empty_figure("Drawdown Curve")
+    default_benchmark_fig = create_empty_figure("Equity vs Benchmark")
+    default_pnl_hist_fig = create_empty_figure("P/L Distribution")
+    default_duration_hist_fig = create_empty_figure("Trade Duration Distribution")
+    default_exposure_fig = create_empty_figure("Exposure")
+    default_turnover_fig = create_empty_figure("Portfolio Turnover")
+    default_corr_matrix = pd.DataFrame()
+    default_corr_heatmap = create_empty_figure("Correlation Heatmap")
+    default_monthly_table_display = pd.DataFrame() # For the formatted table in UI
+    default_monthly_stats = pd.DataFrame(columns=['Metric', 'Value'])
+    default_monthly_heatmap = create_empty_figure("Monthly Returns Heatmap")
+    default_rolling_vol_stats = pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol'])
+    default_rolling_vol_plot = create_empty_figure("Rolling Volatility")
+    default_drawdown_table = pd.DataFrame()
+    # Structure default outputs for return statement clarity
+    initial_outputs = [
+        default_stats_df, default_equity_fig, default_drawdown_fig, default_benchmark_fig,
+        default_pnl_hist_fig, default_duration_hist_fig, default_exposure_fig,
+        default_turnover_fig, default_trades_df_display
+    ]
+    correlation_outputs = [default_corr_matrix, default_corr_heatmap]
+    manual_risk_outputs = [
+        default_monthly_table_display, default_monthly_stats, default_monthly_heatmap,
+        default_rolling_vol_plot, default_rolling_vol_stats, default_drawdown_table
+    ]
+    # Combine all output lists for the final return
+    all_default_outputs = initial_outputs + correlation_outputs + manual_risk_outputs
+    # --- Handle No Files Uploaded ---
+    if not uploaded_files:
+        return (
+            "Please upload one or more QuantConnect JSON files.", # Status message
+            gr.Dropdown(choices=[], value=None, visible=False), # Hide dropdown
+            {}, # Empty state
+            *all_default_outputs # Return all default outputs
+        )
+    # --- Process Uploaded Files ---
+    all_results = {} # Dictionary to store results for each processed file {filename: results_dict}
+    status_messages = [] # List to collect status/error messages
+    processed_files_count = 0
+    for file_obj in uploaded_files:
+        if file_obj is None: # Skip if file object is somehow None
+             continue
+        try:
+            file_path = file_obj.name # Get the temporary file path from Gradio
+            # Process the single file using the function from processing.py
+            strategy_result = process_single_file(file_path)
+            # Store the result using the filename as the key
+            all_results[strategy_result["filename"]] = strategy_result
+            # Log errors or increment success count
+            if strategy_result["error"]:
+                status_messages.append(strategy_result["error"])
+            else:
+                processed_files_count += 1
+        except Exception as e:
+            # Catch unexpected errors during the file processing loop
+            error_msg = f"Failed to process an uploaded file object: {e}"
+            print(error_msg)
+            traceback.print_exc()
+            status_messages.append(error_msg)
+    # --- Handle No Valid Files Processed ---
+    if not all_results or processed_files_count == 0:
+        status = "\n".join(status_messages) if status_messages else "No valid QuantConnect JSON files processed."
+        return (
+            status,
+            gr.Dropdown(choices=[], value=None, visible=False), # Hide dropdown
+            {}, # Empty state
+            *all_default_outputs
+        )
+    # --- Calculate Correlation (Across All Processed Files) ---
+    try:
+        corr_matrix_df, corr_heatmap_fig, corr_status = calculate_correlation(all_results)
+        status_messages.append(corr_status) # Add correlation status to messages
+    except Exception as e:
+        print(f"Error during correlation calculation: {e}")
+        traceback.print_exc()
+        status_messages.append(f"Correlation Error: {e}")
+        # Use default correlation outputs on error
+        corr_matrix_df = default_corr_matrix
+        corr_heatmap_fig = default_corr_heatmap
+    # --- Prepare Initial UI Display (Using the First Processed Strategy) ---
+    first_filename = list(all_results.keys())[0]
+    initial_strategy_results = all_results[first_filename]
+    # Generate standard plots for the first strategy
+    try:
+        initial_figures = generate_figures_for_strategy(initial_strategy_results)
+    except Exception as e:
+         print(f"Error generating initial figures for {first_filename}: {e}")
+         initial_figures = {k: create_empty_figure(f"{k.replace('_fig','')} - Error") for k in initial_outputs_map.keys() if k.endswith('_fig')} # Create error figures
+         status_messages.append(f"Plotting Error (Initial): {e}")
+    # Perform manual risk analysis for the first strategy
+    try:
+        initial_manual_risk_analysis = calculate_manual_risk_stats(initial_strategy_results.get("daily_returns"))
+        status_messages.append(f"Risk Analysis ({first_filename}): {initial_manual_risk_analysis['status']}")
+        # Generate risk plots based on the analysis results
+        initial_manual_risk_figures = generate_manual_risk_figures(initial_manual_risk_analysis, first_filename)
+    except Exception as e:
+        print(f"Error during initial manual risk analysis or plotting for {first_filename}: {e}")
+        traceback.print_exc()
+        status_messages.append(f"Risk Analysis/Plot Error (Initial): {e}")
+        # Use default risk outputs on error
+        initial_manual_risk_analysis = {
+            "monthly_returns_table_for_heatmap": None, "monthly_perf_stats": default_monthly_stats,
+            "rolling_vol_df": None, "rolling_vol_stats": default_rolling_vol_stats,
+            "drawdown_table": default_drawdown_table
+        }
+        initial_manual_risk_figures = {
+            "monthly_heatmap_fig": default_monthly_heatmap, "rolling_vol_fig": default_rolling_vol_plot
+        }
+    # --- Prepare DataFrames for Initial Display ---
+    initial_stats_df = initial_strategy_results.get("stats_df", default_stats_df)
+    initial_trades_df = initial_strategy_results.get("trades_df", pd.DataFrame())
+    # Select and format trades table for display
+    if not initial_trades_df.empty:
+        # Filter columns to display
+        existing_display_cols = [col for col in DEFAULT_TRADES_COLS_DISPLAY if col in initial_trades_df.columns]
+        initial_trades_df_display = initial_trades_df[existing_display_cols].head(MAX_TRADES_DISPLAY)
+        # Handle complex 'symbol' column (often a dictionary in QC output)
+        if 'symbol' in initial_trades_df_display.columns:
+             # Check if the first non-null symbol is a dict
+             first_symbol = initial_trades_df_display['symbol'].dropna().iloc[0] if not initial_trades_df_display['symbol'].dropna().empty else None
+             if isinstance(first_symbol, dict):
+                  # Apply function to extract 'value' or 'ticker' if it's a dict, otherwise keep original
+                  initial_trades_df_display.loc[:, 'symbol'] = initial_trades_df_display['symbol'].apply(
+                       lambda x: x.get('value', x.get('ticker', str(x))) if isinstance(x, dict) else x
+                  )
+             # Convert datetime columns to string for display if needed (Gradio often handles it)
+             for col in ['entryTime', 'exitTime']:
+                  if col in initial_trades_df_display.columns and pd.api.types.is_datetime64_any_dtype(initial_trades_df_display[col]):
+                      initial_trades_df_display[col] = initial_trades_df_display[col].dt.strftime('%Y-%m-%d %H:%M:%S')
+    else:
+        initial_trades_df_display = default_trades_df_display
+    # Prepare formatted monthly returns table for UI display
+    formatted_monthly_table = default_monthly_table_display
+    heatmap_data = initial_manual_risk_analysis.get("monthly_returns_table_for_heatmap")
+    if heatmap_data is not None and not heatmap_data.empty:
+        df_display = heatmap_data.copy() # Work on a copy
+        # Format values as percentages (e.g., "1.23%")
+        df_display = df_display.applymap(lambda x: f'{x:.2f}%' if pd.notna(x) else '')
+        # Reset index to make 'Year' a regular column for Gradio DataFrame display
+        formatted_monthly_table = df_display.reset_index()
+    # --- Consolidate Status Message ---
+    final_status = "\n".join(s for s in status_messages if s).strip()
+    if not final_status:
+        final_status = f"Successfully processed {processed_files_count} file(s)."
+    # --- Assemble Final Outputs ---
+    outputs_to_return = [
+        final_status, # Status Textbox
+        gr.Dropdown( # Strategy Dropdown
+            choices=list(all_results.keys()), # Update choices
+            value=first_filename,             # Set initial value
+            visible=True,                     # Make visible
+            label="Select Strategy to View",
+            interactive=True
+        ),
+        all_results, # Update the hidden state
+        # --- Individual Strategy Tab Outputs ---
+        initial_stats_df,
+        initial_figures.get("equity_fig", default_equity_fig),
+        initial_figures.get("drawdown_fig", default_drawdown_fig),
+        initial_figures.get("benchmark_fig", default_benchmark_fig),
+        initial_figures.get("pnl_hist_fig", default_pnl_hist_fig),
+        initial_figures.get("duration_hist_fig", default_duration_hist_fig),
+        initial_figures.get("exposure_fig", default_exposure_fig),
+        initial_figures.get("turnover_fig", default_turnover_fig),
+        initial_trades_df_display,
+        # --- Correlation Tab Outputs ---
+        corr_matrix_df,
+        corr_heatmap_fig,
+        # --- Manual Risk Tab Outputs ---
+        formatted_monthly_table, # Use the formatted table for display
+        initial_manual_risk_analysis.get("monthly_perf_stats", default_monthly_stats),
+        initial_manual_risk_figures.get("monthly_heatmap_fig", default_monthly_heatmap),
+        initial_manual_risk_figures.get("rolling_vol_fig", default_rolling_vol_plot),
+        initial_manual_risk_analysis.get("rolling_vol_stats", default_rolling_vol_stats),
+        initial_manual_risk_analysis.get("drawdown_table", default_drawdown_table)
+    ]
+    return tuple(outputs_to_return)
+def display_selected_strategy(selected_filename, all_results_state):
+    """
+    Callback function triggered when a strategy is selected from the dropdown.
+    Retrieves the data for the selected strategy from the state and updates
+    the individual strategy tabs and the manual risk analysis tab accordingly.
+    Args:
+        selected_filename: The filename of the strategy selected in the dropdown.
+        all_results_state: The current state dictionary containing all processed results.
+    Returns:
+        A tuple containing updated values for the Gradio components related to
+        the selected strategy's details (Overview, Performance, Trade Analysis,
+        Other Charts, Risk Analysis tabs). Correlation tab is not updated here.
+    """
+    # --- Initialize Default/Empty Outputs ---
+    # (Same defaults as in process_files_and_update_ui for the relevant outputs)
+    default_stats_df = pd.DataFrame(columns=['Metric', 'Value'])
+    default_trades_df_display = pd.DataFrame()
+    default_equity_fig = create_empty_figure("Equity Curve")
+    default_drawdown_fig = create_empty_figure("Drawdown Curve")
+    default_benchmark_fig = create_empty_figure("Equity vs Benchmark")
+    default_pnl_hist_fig = create_empty_figure("P/L Distribution")
+    default_duration_hist_fig = create_empty_figure("Trade Duration Distribution")
+    default_exposure_fig = create_empty_figure("Exposure")
+    default_turnover_fig = create_empty_figure("Portfolio Turnover")
+    default_monthly_table_display = pd.DataFrame()
+    default_monthly_stats = pd.DataFrame(columns=['Metric', 'Value'])
+    default_monthly_heatmap = create_empty_figure("Monthly Returns Heatmap")
+    default_rolling_vol_stats = pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol'])
+    default_rolling_vol_plot = create_empty_figure("Rolling Volatility")
+    default_drawdown_table = pd.DataFrame()
+    # Structure default outputs for return statement clarity
+    initial_outputs = [
+        default_stats_df, default_equity_fig, default_drawdown_fig, default_benchmark_fig,
+        default_pnl_hist_fig, default_duration_hist_fig, default_exposure_fig,
+        default_turnover_fig, default_trades_df_display
+    ]
+    manual_risk_outputs = [
+        default_monthly_table_display, default_monthly_stats, default_monthly_heatmap,
+        default_rolling_vol_plot, default_rolling_vol_stats, default_drawdown_table
+    ]
+    all_default_outputs = initial_outputs + manual_risk_outputs
+    # --- Validate Selection and State ---
+    if not selected_filename or not all_results_state or selected_filename not in all_results_state:
+        print(f"Warning: Invalid selection ('{selected_filename}') or state. Returning defaults.")
+        # Potentially add a status message update here if you have a dedicated status output for selection changes
+        return tuple(all_default_outputs)
+    # --- Retrieve Selected Strategy Data ---
+    strategy_results = all_results_state[selected_filename]
+    # --- Handle Case Where Selected Strategy Had Processing Errors ---
+    if strategy_results.get("error"):
+        print(f"Displaying error state for {selected_filename}: {strategy_results['error']}")
+        # Show the error in the statistics table and clear other plots/tables
+        error_df = pd.DataFrame([{"Metric": "Error", "Value": strategy_results['error']}])
+        error_outputs = [error_df] + [ # Use error df for stats table
+            create_empty_figure(f"{fig_name} - Error") for fig_name in [ # Create empty error figures
+                "Equity", "Drawdown", "Benchmark", "P/L", "Duration", "Exposure", "Turnover"
+            ]
+        ] + [default_trades_df_display] # Empty trades table
+        error_risk_outputs = [ # Empty risk outputs
+             default_monthly_table_display, default_monthly_stats, create_empty_figure("Monthly Heatmap - Error"),
+             create_empty_figure("Rolling Vol - Error"), default_rolling_vol_stats, default_drawdown_table
+        ]
+        return tuple(error_outputs + error_risk_outputs)
+    # --- Generate Figures and Analysis for Selected Strategy ---
+    # Generate standard plots
+    try:
+        figures = generate_figures_for_strategy(strategy_results)
+    except Exception as e:
+        print(f"Error generating figures for {selected_filename}: {e}")
+        figures = {k: create_empty_figure(f"{k.replace('_fig','')} - Error") for k in initial_outputs_map.keys() if k.endswith('_fig')}
+    # Perform manual risk analysis
+    try:
+        manual_risk_analysis = calculate_manual_risk_stats(strategy_results.get("daily_returns"))
+        # Generate risk plots
+        manual_risk_figures = generate_manual_risk_figures(manual_risk_analysis, selected_filename)
+    except Exception as e:
+        print(f"Error during manual risk analysis or plotting for {selected_filename}: {e}")
+        traceback.print_exc()
+        # Use default risk outputs on error
+        manual_risk_analysis = {
+            "monthly_returns_table_for_heatmap": None, "monthly_perf_stats": default_monthly_stats,
+            "rolling_vol_df": None, "rolling_vol_stats": default_rolling_vol_stats,
+            "drawdown_table": default_drawdown_table
+        }
+        manual_risk_figures = {
+            "monthly_heatmap_fig": default_monthly_heatmap, "rolling_vol_fig": default_rolling_vol_plot
+        }
+    # --- Prepare DataFrames for Display ---
+    stats_df = strategy_results.get("stats_df", default_stats_df)
+    trades_df = strategy_results.get("trades_df", pd.DataFrame())
+    # Select and format trades table
+    if not trades_df.empty:
+        existing_display_cols = [col for col in DEFAULT_TRADES_COLS_DISPLAY if col in trades_df.columns]
+        trades_df_display = trades_df[existing_display_cols].head(MAX_TRADES_DISPLAY)
+        if 'symbol' in trades_df_display.columns:
+             first_symbol = trades_df_display['symbol'].dropna().iloc[0] if not trades_df_display['symbol'].dropna().empty else None
+             if isinstance(first_symbol, dict):
+                  trades_df_display.loc[:, 'symbol'] = trades_df_display['symbol'].apply(
+                       lambda x: x.get('value', x.get('ticker', str(x))) if isinstance(x, dict) else x
+                  )
+             # Convert datetime columns to string for display
+             for col in ['entryTime', 'exitTime']:
+                  if col in trades_df_display.columns and pd.api.types.is_datetime64_any_dtype(trades_df_display[col]):
+                      trades_df_display[col] = trades_df_display[col].dt.strftime('%Y-%m-%d %H:%M:%S')
+    else:
+        trades_df_display = default_trades_df_display
+    # Prepare formatted monthly returns table
+    formatted_monthly_table = default_monthly_table_display
+    heatmap_data = manual_risk_analysis.get("monthly_returns_table_for_heatmap")
+    if heatmap_data is not None and not heatmap_data.empty:
+        df_display = heatmap_data.copy()
+        df_display = df_display.applymap(lambda x: f'{x:.2f}%' if pd.notna(x) else '')
+        formatted_monthly_table = df_display.reset_index()
+    # --- Assemble Outputs for Return ---
+    # Return components for the tabs updated by the dropdown selection
+    outputs_to_return = [
+        # --- Individual Strategy Tab Outputs ---
+        stats_df,
+        figures.get("equity_fig", default_equity_fig),
+        figures.get("drawdown_fig", default_drawdown_fig),
+        figures.get("benchmark_fig", default_benchmark_fig),
+        figures.get("pnl_hist_fig", default_pnl_hist_fig),
+        figures.get("duration_hist_fig", default_duration_hist_fig),
+        figures.get("exposure_fig", default_exposure_fig),
+        figures.get("turnover_fig", default_turnover_fig),
+        trades_df_display,
+        # --- Manual Risk Tab Outputs ---
+        formatted_monthly_table, # Use formatted table
+        manual_risk_analysis.get("monthly_perf_stats", default_monthly_stats),
+        manual_risk_figures.get("monthly_heatmap_fig", default_monthly_heatmap),
+        manual_risk_figures.get("rolling_vol_fig", default_rolling_vol_plot),
+        manual_risk_analysis.get("rolling_vol_stats", default_rolling_vol_stats),
+        manual_risk_analysis.get("drawdown_table", default_drawdown_table)
+    ]
+    return tuple(outputs_to_return)
+# --- Build Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown("# Trading Platform Report Enhancer")
+    gr.Markdown("Upload one or more QuantConnect backtest JSON files to generate analysis reports and compare strategies.")
+    # Hidden state to store all processed results between interactions
+    all_results_state = gr.State({})
+    # --- Row 1: File Upload ---
+    with gr.Row():
+        file_input = gr.File(
+            label="Upload QuantConnect JSON File(s)",
+            file_count="multiple", # Allow multiple files
+            file_types=['.json']   # Restrict to JSON files
+        )
+    # --- Row 2: Status Output ---
+    with gr.Row():
+        status_output = gr.Textbox(label="Processing Status", interactive=False, lines=2) # Reduced lines
+    # --- Row 3: Strategy Selection Dropdown ---
+    with gr.Row():
+        strategy_dropdown = gr.Dropdown(
+            label="Select Strategy to View",
+            choices=[],          # Initially empty, populated after file processing
+            visible=False,       # Initially hidden
+            interactive=True     # User can interact with it
+        )
+    # --- Tabs for Different Analysis Views ---
+    with gr.Tabs():
+        # --- Tab 1: Overview ---
+        with gr.TabItem("📊 Overview"):
+            with gr.Column():
+                gr.Markdown("## Key Performance Metrics")
+                stats_output = gr.DataFrame(label="Overall Statistics", interactive=False, wrap=True)
+        # --- Tab 2: Performance Charts ---
+        with gr.TabItem("📈 Performance Charts"):
+             with gr.Column():
+                 gr.Markdown("## Equity & Drawdown")
+                 with gr.Row():
+                     plot_equity = gr.Plot(label="Equity Curve")
+                     plot_drawdown = gr.Plot(label="Drawdown Curve")
+                 gr.Markdown("## Benchmark Comparison")
+                 plot_benchmark = gr.Plot(label="Equity vs Benchmark (Normalized)") # Clarified title
+        # --- Tab 3: Trade Analysis ---
+        with gr.TabItem("💹 Trade Analysis"):
+             with gr.Column():
+                 gr.Markdown("## Profit/Loss and Duration")
+                 with gr.Row():
+                     plot_pnl_hist = gr.Plot(label="P/L Distribution")
+                     plot_duration_hist = gr.Plot(label="Trade Duration Distribution (Days)")
+                 gr.Markdown(f"## Closed Trades (Sample - First {MAX_TRADES_DISPLAY})") # Dynamic title
+                 trades_output = gr.DataFrame(label="Closed Trades Sample", interactive=False, wrap=True)
+        # --- Tab 4: Other Charts ---
+        with gr.TabItem("⚙️ Other Charts"):
+             with gr.Column():
+                 gr.Markdown("## Exposure & Turnover")
+                 with gr.Row():
+                     plot_exposure = gr.Plot(label="Exposure")
+                     plot_turnover = gr.Plot(label="Portfolio Turnover")
+        # --- Tab 5: Risk Analysis (Manual Calculations) ---
+        with gr.TabItem("🔎 Risk Analysis"):
+             with gr.Column():
+                 gr.Markdown("## Monthly Performance")
+                 plot_monthly_heatmap = gr.Plot(label="Monthly Returns Heatmap")
+                 # Use specific names matching callback outputs
+                 monthly_returns_table_output = gr.DataFrame(label="Monthly Returns (%) Table", interactive=False, wrap=True)
+                 monthly_perf_stats_output = gr.DataFrame(label="Monthly Performance Stats", interactive=False, wrap=True)
+                 gr.Markdown("## Rolling Volatility")
+                 plot_rolling_vol = gr.Plot(label="Annualized Rolling Volatility")
+                 rolling_vol_stats_output = gr.DataFrame(label="Rolling Volatility Stats", interactive=False, wrap=True)
+                 gr.Markdown("## Drawdown Analysis")
+                 drawdown_table_output = gr.DataFrame(label=f"Top {5} Drawdown Periods", interactive=False, wrap=True) # Can make 'top' dynamic if needed
+        # --- Tab 6: Correlation ---
+        with gr.TabItem("🤝 Correlation"):
+            with gr.Column():
+                gr.Markdown("## Strategy (+Benchmark) Correlation")
+                gr.Markdown("_Based on daily equity percentage change._") # Subtitle explanation
+                corr_heatmap_output = gr.Plot(label="Correlation Heatmap")
+                corr_matrix_output = gr.DataFrame(label="Correlation Matrix", interactive=False, wrap=True)
+    # --- Define Output Lists for Callbacks ---
+    # Outputs updated by file upload (all tabs + state + dropdown)
+    individual_report_outputs = [
+        stats_output, plot_equity, plot_drawdown, plot_benchmark, plot_pnl_hist,
+        plot_duration_hist, plot_exposure, plot_turnover, trades_output
+    ]
+    manual_risk_tab_outputs = [ # Renamed for clarity
+        monthly_returns_table_output, monthly_perf_stats_output, plot_monthly_heatmap,
+        plot_rolling_vol, rolling_vol_stats_output, drawdown_table_output
+    ]
+    correlation_tab_outputs = [corr_matrix_output, corr_heatmap_output]
+    file_processing_outputs = [status_output, strategy_dropdown, all_results_state]
+    # Combine ALL outputs for the file upload callback trigger
+    file_upload_all_outputs = (
+        file_processing_outputs +
+        individual_report_outputs +
+        correlation_tab_outputs +
+        manual_risk_tab_outputs
+    )
+    # Outputs updated by dropdown selection (individual strategy tabs + risk tab)
+    dropdown_outputs = individual_report_outputs + manual_risk_tab_outputs
+    # --- Connect Callbacks to Events ---
+    # When files are uploaded (or cleared), trigger file processing
+    file_input.change(
+        fn=process_files_and_update_ui,
+        inputs=[file_input],
+        outputs=file_upload_all_outputs # Pass the combined list
+    )
+    # When the dropdown value changes, trigger display update
+    strategy_dropdown.change(
+        fn=display_selected_strategy,
+        inputs=[strategy_dropdown, all_results_state],
+        outputs=dropdown_outputs # Pass the relevant outputs list
+    )
+# --- Launch the Gradio App ---
+if __name__ == '__main__':
+    # share=True creates a public link (useful for HF Spaces)
+    # debug=True provides detailed error logs in the console
+    iface.launch(debug=True, share=False) # Set share=True for Hugging Face deployment if needed

plotting.py ADDED Viewed

	@@ -0,0 +1,263 @@

+# -*- coding: utf-8 -*-
+"""plotting.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1ILADgRrYqkAEj5jyymO50ZvzDzVdfD6g
+"""
+# plotting.py
+# Functions for generating Plotly figures from processed strategy data.
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+import numpy as np
+import traceback
+from utils import create_empty_figure # Import helper
+def generate_figures_for_strategy(strategy_results):
+    """
+    Generates standard Plotly figures for a single strategy's results.
+    Args:
+        strategy_results: Dictionary containing processed data for one strategy,
+                          as returned by process_single_file. Expected keys include:
+                          'filename', 'equity_df', 'drawdown_df', 'benchmark_df',
+                          'trades_df', 'exposure_series', 'turnover_df'.
+    Returns:
+        A dictionary containing Plotly figure objects:
+        'equity_fig', 'drawdown_fig', 'benchmark_fig', 'pnl_hist_fig',
+        'duration_hist_fig', 'exposure_fig', 'turnover_fig'.
+        Uses empty figures if data is missing or invalid.
+    """
+    figures = {
+        "equity_fig": create_empty_figure("Equity Curve"),
+        "drawdown_fig": create_empty_figure("Drawdown Curve"),
+        "benchmark_fig": create_empty_figure("Equity vs Benchmark"),
+        "pnl_hist_fig": create_empty_figure("P/L Distribution"),
+        "duration_hist_fig": create_empty_figure("Trade Duration Distribution"),
+        "exposure_fig": create_empty_figure("Exposure"),
+        "turnover_fig": create_empty_figure("Portfolio Turnover")
+    }
+    filename = strategy_results.get("filename", "Strategy") # Get filename for titles
+    try:
+        # --- Equity Curve ---
+        equity_df = strategy_results.get("equity_df")
+        if equity_df is not None and not equity_df.empty and 'Time' in equity_df.columns and 'Equity' in equity_df.columns:
+             # Ensure Time is datetime
+             equity_df['Time'] = pd.to_datetime(equity_df['Time'])
+             fig = px.line(equity_df, x='Time', y='Equity', title=f'Equity Curve ({filename})')
+             fig.update_layout(yaxis_title="Portfolio Value")
+             figures["equity_fig"] = fig
+        # --- Drawdown Curve ---
+        drawdown_df = strategy_results.get("drawdown_df")
+        if drawdown_df is not None and not drawdown_df.empty and 'Time' in drawdown_df.columns and 'Drawdown' in drawdown_df.columns:
+             # Ensure Time is datetime
+             drawdown_df['Time'] = pd.to_datetime(drawdown_df['Time'])
+             # Convert drawdown to percentage for plotting
+             drawdown_df['Drawdown_pct'] = drawdown_df['Drawdown'] * 100
+             fig = px.area(drawdown_df, x='Time', y='Drawdown_pct', title=f'Drawdown Curve (%) ({filename})', labels={'Drawdown_pct': 'Drawdown (%)'})
+             fig.update_layout(yaxis_title="Drawdown (%)")
+             figures["drawdown_fig"] = fig
+        # --- Equity vs Benchmark ---
+        benchmark_df = strategy_results.get("benchmark_df")
+        # Requires both equity and benchmark data
+        if equity_df is not None and not equity_df.empty and 'Time' in equity_df.columns and 'Equity' in equity_df.columns and \
+           benchmark_df is not None and not benchmark_df.empty and 'Time' in benchmark_df.columns and 'Benchmark' in benchmark_df.columns:
+            try:
+                # Ensure Time columns are datetime
+                equity_df['Time'] = pd.to_datetime(equity_df['Time'])
+                benchmark_df['Time'] = pd.to_datetime(benchmark_df['Time'])
+                # Merge on Time after setting as index
+                equity_indexed = equity_df.set_index('Time')['Equity']
+                benchmark_indexed = benchmark_df.set_index('Time')['Benchmark']
+                # Combine, handling potential different start/end dates
+                combined = pd.concat([equity_indexed, benchmark_indexed], axis=1, keys=['Equity', 'Benchmark'], join='outer')
+                # Normalize to start at 1 (or 100) for comparison
+                # Check if first row has NaN values after outer join
+                first_valid_index = combined.first_valid_index()
+                if first_valid_index is not None:
+                    # Normalize using the first non-NaN value for each column
+                    normalized_equity = (combined['Equity'] / combined['Equity'].loc[combined['Equity'].first_valid_index()])#.fillna(method='ffill') # Optional fill
+                    normalized_benchmark = (combined['Benchmark'] / combined['Benchmark'].loc[combined['Benchmark'].first_valid_index()])#.fillna(method='ffill') # Optional fill
+                    # Create figure and add traces
+                    fig = go.Figure()
+                    fig.add_trace(go.Scatter(x=normalized_equity.index, y=normalized_equity, mode='lines', name='Strategy Equity'))
+                    fig.add_trace(go.Scatter(x=normalized_benchmark.index, y=normalized_benchmark, mode='lines', name='Benchmark'))
+                    fig.update_layout(title=f'Normalized Equity vs Benchmark ({filename})', xaxis_title='Date', yaxis_title='Normalized Value (Start = 1)')
+                    figures["benchmark_fig"] = fig
+                else:
+                    print("Could not normalize Equity vs Benchmark: No valid starting point found after merge.")
+                    figures["benchmark_fig"] = create_empty_figure(f"Equity vs Benchmark ({filename}) - Normalization Failed")
+            except Exception as merge_err:
+                 print(f"Error merging/plotting Equity vs Benchmark: {merge_err}")
+                 figures["benchmark_fig"] = create_empty_figure(f"Equity vs Benchmark ({filename}) - Error")
+        # --- Trade P/L Distribution ---
+        trades_df = strategy_results.get("trades_df")
+        if trades_df is not None and not trades_df.empty and 'profitLoss' in trades_df.columns:
+            # Ensure profitLoss is numeric
+            trades_df['profitLoss'] = pd.to_numeric(trades_df['profitLoss'], errors='coerce')
+            valid_pnl = trades_df['profitLoss'].dropna()
+            if not valid_pnl.empty:
+                fig = px.histogram(valid_pnl, title=f'Trade Profit/Loss Distribution ({filename})', labels={'value': 'Profit/Loss'})
+                figures["pnl_hist_fig"] = fig
+        # --- Trade Duration Distribution ---
+        # Uses 'duration_days' calculated in processing.py
+        if trades_df is not None and not trades_df.empty and 'duration_days' in trades_df.columns:
+             # Ensure duration_days is numeric
+             trades_df['duration_days'] = pd.to_numeric(trades_df['duration_days'], errors='coerce')
+             valid_duration = trades_df['duration_days'].dropna()
+             if not valid_duration.empty:
+                fig = px.histogram(valid_duration, title=f'Trade Duration Distribution (Days) ({filename})', labels={'value': 'Duration (Days)'})
+                figures["duration_hist_fig"] = fig
+        # --- Exposure Chart ---
+        # Exposure data format varies; this is a basic example assuming a dict of series
+        exposure_series_dict = strategy_results.get("exposure_series")
+        if exposure_series_dict and isinstance(exposure_series_dict, dict):
+            fig = go.Figure()
+            exposure_plotted = False
+            for series_name, series_data in exposure_series_dict.items():
+                if 'values' in series_data and isinstance(series_data['values'], list):
+                    # Process this specific series using the timeseries helper
+                    exposure_df = process_timeseries_chart(series_data['values'], series_name)
+                    if not exposure_df.empty:
+                        # Plot as area chart if 'Exposure' in name, else line
+                        plot_type = 'area' if 'Exposure' in series_name else 'scatter'
+                        fill_type = 'tozeroy' if plot_type == 'area' else None
+                        fig.add_trace(go.Scatter(x=exposure_df.index, y=exposure_df[series_name],
+                                                 mode='lines', name=series_name, fill=fill_type))
+                        exposure_plotted = True
+            if exposure_plotted:
+                fig.update_layout(title=f'Exposure ({filename})', xaxis_title='Date', yaxis_title='Value / % Exposure')
+                figures["exposure_fig"] = fig
+            else:
+                 figures["exposure_fig"] = create_empty_figure(f"Exposure ({filename}) - No PlotData")
+        else:
+             figures["exposure_fig"] = create_empty_figure(f"Exposure ({filename}) - Data Missing/Invalid")
+        # --- Portfolio Turnover ---
+        turnover_df = strategy_results.get("turnover_df")
+        if turnover_df is not None and not turnover_df.empty and 'Time' in turnover_df.columns and 'Turnover' in turnover_df.columns:
+             # Ensure Time is datetime
+             turnover_df['Time'] = pd.to_datetime(turnover_df['Time'])
+             fig = px.line(turnover_df, x='Time', y='Turnover', title=f'Portfolio Turnover ({filename})')
+             fig.update_layout(yaxis_title="Turnover")
+             figures["turnover_fig"] = fig
+    except Exception as e:
+        print(f"Error generating figures for {filename}: {e}")
+        traceback.print_exc()
+        # Keep default empty figures on error
+    return figures
+def generate_manual_risk_figures(analysis_results, filename="Strategy"):
+    """
+    Generates Plotly figures from manually calculated risk analysis results.
+    Args:
+        analysis_results: Dictionary containing results from calculate_manual_risk_stats.
+                          Expected keys: 'monthly_returns_table_for_heatmap', 'rolling_vol_df'.
+        filename: Name of the strategy for figure titles.
+    Returns:
+        A dictionary containing Plotly figure objects:
+        'monthly_heatmap_fig', 'rolling_vol_fig'.
+        Uses empty figures if data is missing or invalid.
+    """
+    figures = {
+        "monthly_heatmap_fig": create_empty_figure(f"Monthly Returns Heatmap ({filename})"),
+        "rolling_vol_fig": create_empty_figure(f"Rolling Volatility ({filename})")
+    }
+    try:
+        # --- Monthly Returns Heatmap ---
+        # Expects percentages (values * 100) from calculate_manual_risk_stats
+        monthly_ret_table = analysis_results.get("monthly_returns_table_for_heatmap")
+        if monthly_ret_table is not None and not monthly_ret_table.empty:
+            z = monthly_ret_table.values # The percentage values
+            x = monthly_ret_table.columns # Month names
+            y = monthly_ret_table.index   # Years
+            # Create heatmap
+            fig = go.Figure(data=go.Heatmap(
+                z=z, x=x, y=y,
+                colorscale='RdYlGn', # Red-Yellow-Green scale, good for returns
+                zmid=0,             # Center color scale around zero
+                # Format text labels shown on the heatmap cells
+                text=monthly_ret_table.applymap(lambda v: f'{v:.1f}%' if pd.notna(v) else '').values,
+                texttemplate="%{text}", # Use the formatted text
+                hoverongaps=False,      # Don't show hover info for gaps
+                colorbar=dict(title='Monthly Return (%)') # Add color bar title
+                ))
+            fig.update_layout(
+                 title=f'Monthly Returns (%) ({filename})',
+                 yaxis_nticks=len(y), # Ensure all years are shown as ticks
+                 yaxis_title="Year",
+                 yaxis_autorange='reversed' # Show earlier years at the top
+                 )
+            figures["monthly_heatmap_fig"] = fig
+        # --- Rolling Volatility Plot ---
+        rolling_vol_df = analysis_results.get("rolling_vol_df")
+        # Check if DataFrame exists, is not empty, and has the 'Time' column
+        if rolling_vol_df is not None and not rolling_vol_df.empty and 'Time' in rolling_vol_df.columns:
+            # Ensure Time is datetime
+            rolling_vol_df['Time'] = pd.to_datetime(rolling_vol_df['Time'])
+            fig = go.Figure()
+            colors = px.colors.qualitative.Plotly # Get a qualitative color sequence
+            i = 0 # Color index
+            vol_plotted = False
+            # Iterate through columns starting with 'vol_'
+            for col in rolling_vol_df.columns:
+                if col.startswith('vol_'):
+                    window_label = col.split('_')[1] # Extract window label (e.g., '3M')
+                    # Plot volatility as percentage
+                    fig.add_trace(go.Scatter(
+                         x=rolling_vol_df['Time'],
+                         y=rolling_vol_df[col] * 100, # Convert to percentage
+                         mode='lines',
+                         name=f'Rolling Vol ({window_label})',
+                         line=dict(color=colors[i % len(colors)]) # Cycle through colors
+                         ))
+                    i += 1
+                    vol_plotted = True
+            # Update layout if at least one volatility series was plotted
+            if vol_plotted:
+                fig.update_layout(
+                    title=f'Annualized Rolling Volatility ({filename})',
+                    xaxis_title='Date',
+                    yaxis_title='Volatility (%)' # Y-axis label as percentage
+                    )
+                figures["rolling_vol_fig"] = fig
+            else:
+                 figures["rolling_vol_fig"] = create_empty_figure(f"Rolling Volatility ({filename}) - No Plot Data")
+        else:
+             figures["rolling_vol_fig"] = create_empty_figure(f"Rolling Volatility ({filename}) - Data Missing/Invalid")
+    except Exception as e:
+        print(f"Error generating manual risk figures for {filename}: {e}")
+        traceback.print_exc()
+        # Keep default empty figures on error
+    return figures

processing.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# -*- coding: utf-8 -*-
+"""processing.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/13EcoLMljb9XzVBELmFC0EBDknuHS79Vy
+"""
+# processing.py
+# Functions for processing QuantConnect JSON data.
+import json
+import pandas as pd
+import traceback
+import numpy as np
+from utils import get_nested_value, process_timeseries_chart # Import helpers
+def process_single_file(file_path):
+    """
+    Processes a single QuantConnect JSON file.
+    Extracts statistics, equity, drawdown, benchmark, trades, exposure, and turnover data.
+    Returns a dictionary containing processed dataframes and series.
+    """
+    # Extract filename from the full path
+    filename = file_path.split('/')[-1] if file_path else "Unknown File"
+    # Initialize results dictionary with default empty structures
+    results = {
+        "filename": filename,
+        "stats_df": pd.DataFrame(columns=['Metric', 'Value']), # Overall statistics
+        "equity_df": pd.DataFrame(), # Equity curve data (with 'Time' column)
+        "daily_returns": None,       # Series of daily percentage returns (DatetimeIndex)
+        "drawdown_df": pd.DataFrame(), # Drawdown curve data (with 'Time' column)
+        "benchmark_df": pd.DataFrame(),# Benchmark data (with 'Time' column)
+        "trades_df": pd.DataFrame(),   # Closed trades data
+        "exposure_series": None,   # Raw exposure data series (often needs further processing for plotting)
+        "turnover_df": pd.DataFrame(), # Portfolio turnover data (with 'Time' column)
+        "error": None                  # Stores any error message during processing
+    }
+    try:
+        # Open and load the JSON file
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        # --- Extract Statistics ---
+        # Try primary location, then fallback location for statistics
+        stats_dict = get_nested_value(data, ['statistics']) or \
+                     get_nested_value(data, ['totalPerformance', 'portfolioStatistics'])
+        if stats_dict:
+            # Convert dictionary to DataFrame
+            results["stats_df"] = pd.DataFrame(list(stats_dict.items()), columns=['Metric', 'Value'])
+        # --- Process Equity Curve and Calculate Daily Returns ---
+        equity_values = get_nested_value(data, ['charts', 'Strategy Equity', 'series', 'Equity', 'values'])
+        equity_df_indexed = process_timeseries_chart(equity_values, 'Equity') # Gets DF with DatetimeIndex
+        if not equity_df_indexed.empty:
+            # Store equity curve with 'Time' as a column for easier plotting
+            results["equity_df"] = equity_df_indexed.reset_index()
+            # Calculate daily percentage returns from the indexed equity data
+            returns_series = equity_df_indexed['Equity'].pct_change().dropna()
+            # Store the returns series if calculation was successful
+            if not returns_series.empty:
+                results["daily_returns"] = returns_series # Has DatetimeIndex (UTC)
+        # --- Process Drawdown Curve ---
+        drawdown_values = get_nested_value(data, ['charts', 'Drawdown', 'series', 'Equity Drawdown', 'values'])
+        drawdown_df_indexed = process_timeseries_chart(drawdown_values, 'Drawdown')
+        if not drawdown_df_indexed.empty:
+             results["drawdown_df"] = drawdown_df_indexed.reset_index() # Store with 'Time' column
+        # --- Process Benchmark Curve ---
+        benchmark_values = get_nested_value(data, ['charts', 'Benchmark', 'series', 'Benchmark', 'values'])
+        benchmark_df_indexed = process_timeseries_chart(benchmark_values, 'Benchmark')
+        if not benchmark_df_indexed.empty:
+            results["benchmark_df"] = benchmark_df_indexed.reset_index() # Store with 'Time' column
+        # --- Process Closed Trades ---
+        closed_trades_list = get_nested_value(data, ['totalPerformance', 'closedTrades'])
+        if closed_trades_list and isinstance(closed_trades_list, list):
+            temp_trades_df = pd.DataFrame(closed_trades_list)
+            if not temp_trades_df.empty:
+                # Convert relevant columns to numeric, coercing errors
+                numeric_cols = ['profitLoss', 'entryPrice', 'exitPrice', 'quantity', 'totalFees']
+                for col in numeric_cols:
+                    if col in temp_trades_df.columns:
+                        temp_trades_df[col] = pd.to_numeric(temp_trades_df[col], errors='coerce')
+                # Convert time columns to datetime, coercing errors
+                time_cols = ['entryTime', 'exitTime']
+                for col in time_cols:
+                     if col in temp_trades_df.columns:
+                         # Attempt conversion, handle potential ISO 8601 format with timezone
+                         try:
+                             temp_trades_df[col] = pd.to_datetime(temp_trades_df[col], errors='coerce', utc=True)
+                         except ValueError: # Fallback if direct conversion fails
+                             temp_trades_df[col] = pd.to_datetime(temp_trades_df[col].str.slice(0, 19), errors='coerce') # Try without timezone
+                             if temp_trades_df[col].notna().any(): # If some converted, make timezone naive for consistency before duration calc
+                                 temp_trades_df[col] = temp_trades_df[col].dt.tz_localize(None)
+                # Calculate trade duration if both entry and exit times are valid datetimes
+                if 'entryTime' in temp_trades_df.columns and 'exitTime' in temp_trades_df.columns and \
+                   pd.api.types.is_datetime64_any_dtype(temp_trades_df['entryTime']) and \
+                   pd.api.types.is_datetime64_any_dtype(temp_trades_df['exitTime']) and \
+                   not temp_trades_df['entryTime'].isnull().all() and \
+                   not temp_trades_df['exitTime'].isnull().all():
+                    # Make times timezone-naive for direct subtraction if they have timezones
+                    if temp_trades_df['entryTime'].dt.tz is not None:
+                        temp_trades_df['entryTime'] = temp_trades_df['entryTime'].dt.tz_convert(None)
+                    if temp_trades_df['exitTime'].dt.tz is not None:
+                        temp_trades_df['exitTime'] = temp_trades_df['exitTime'].dt.tz_convert(None)
+                    # Calculate duration as timedelta and in days
+                    temp_trades_df['duration_td'] = temp_trades_df['exitTime'] - temp_trades_df['entryTime']
+                    temp_trades_df['duration_days'] = temp_trades_df['duration_td'].dt.total_seconds() / (24 * 60 * 60)
+                else:
+                    # Set duration columns to None if times are invalid/missing
+                    temp_trades_df['duration_td'] = pd.NaT
+                    temp_trades_df['duration_days'] = np.nan
+                # Store the processed trades DataFrame
+                results["trades_df"] = temp_trades_df
+        # --- Extract Exposure Series Data ---
+        # Note: This is often nested and might need specific parsing for plotting
+        results["exposure_series"] = get_nested_value(data, ['charts', 'Exposure', 'series'])
+        # --- Process Portfolio Turnover ---
+        turnover_values = get_nested_value(data, ['charts', 'Portfolio Turnover', 'series', 'Portfolio Turnover', 'values'])
+        turnover_df_indexed = process_timeseries_chart(turnover_values, 'Turnover')
+        if not turnover_df_indexed.empty:
+            results["turnover_df"] = turnover_df_indexed.reset_index() # Store with 'Time' column
+    except FileNotFoundError:
+        error_msg = f"Error: File not found at {file_path}"
+        print(error_msg)
+        results["error"] = error_msg
+    except json.JSONDecodeError:
+        error_msg = f"Error: Could not decode JSON from {filename}"
+        print(error_msg)
+        results["error"] = error_msg
+    except Exception as e:
+        # Catch any other unexpected errors during processing
+        error_msg = f"Error processing file {filename}: {e}"
+        print(error_msg)
+        traceback.print_exc()
+        results["error"] = error_msg
+    return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+# requirements.txt
+# List of Python packages required for the Gradio application.
+gradio
+pandas
+plotly
+numpy
+# Optional: Add specific versions if needed, e.g., gradio==3.50.2

risk_analysis.py ADDED Viewed

	@@ -0,0 +1,463 @@

+# -*- coding: utf-8 -*-
+"""risk_analysis.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/10u2Di5_droisNYuq_KYAmdgVHixe6oVi
+"""
+# risk_analysis.py
+# Functions for calculating risk metrics and correlations.
+import pandas as pd
+import numpy as np
+import traceback
+import plotly.graph_objects as go
+from utils import create_empty_figure # Import helper
+def get_drawdown_table(returns: pd.Series, top: int = 5) -> pd.DataFrame:
+    """
+    Calculates drawdown periods and statistics from a series of returns.
+    Args:
+        returns: Series of daily returns with a DatetimeIndex.
+        top: Number of top drawdowns (by magnitude) to return.
+    Returns:
+        DataFrame containing information about the top drawdown periods:
+        'Peak Date', 'Valley Date', 'End Date', 'Duration (Days)', 'Max Drawdown (%)'.
+        Returns an empty DataFrame if input is invalid or no drawdowns occur.
+    """
+    # Input validation
+    if returns is None or not isinstance(returns, pd.Series) or returns.empty:
+        # print("Drawdown calculation skipped: Input returns series is invalid or empty.")
+        return pd.DataFrame()
+    if not isinstance(returns.index, pd.DatetimeIndex):
+        # print("Drawdown calculation skipped: Input returns series index is not DatetimeIndex.")
+        return pd.DataFrame()
+    # Create a DataFrame from the returns series
+    df = returns.to_frame(name='returns')
+    # Ensure returns are numeric, drop non-numeric values
+    df['returns'] = pd.to_numeric(df['returns'], errors='coerce')
+    df.dropna(subset=['returns'], inplace=True)
+    if df.empty:
+        # print("Drawdown calculation skipped: No valid numeric returns.")
+        return pd.DataFrame()
+    # Calculate cumulative returns (compounded)
+    df['Cumulative'] = (1 + df['returns']).cumprod()
+    # Calculate the running maximum cumulative return (high watermark)
+    df['HighWatermark'] = df['Cumulative'].cummax()
+    # Calculate drawdown as the percentage decline from the high watermark
+    df['Drawdown'] = (df['Cumulative'] / df['HighWatermark']) - 1
+    # Identify drawdown periods
+    in_drawdown = False # Flag to track if currently in a drawdown
+    periods = []        # List to store completed drawdown period dictionaries
+    current_period = {} # Dictionary to store details of the ongoing drawdown
+    peak_idx = df.index[0] # Initialize peak index to the start
+    for idx, row in df.iterrows():
+        # Update the peak index if a new high watermark is reached
+        # Use .loc for safe index-based comparison, especially with potential duplicate indices
+        if row['Cumulative'] >= df.loc[peak_idx, 'Cumulative']:
+            peak_idx = idx
+        is_dd = row['Drawdown'] < 0 # Check if currently in a drawdown state
+        # Start of a new drawdown period
+        if not in_drawdown and is_dd:
+            in_drawdown = True
+            current_period = {
+                'Peak Date': peak_idx,          # Date the drawdown started (previous peak)
+                'Valley Date': idx,             # Date the maximum drawdown was reached (initially the start)
+                'End Date': pd.NaT,             # Date the drawdown ended (recovered to peak) - initially NaT
+                'Max Drawdown (%)': row['Drawdown'], # The maximum drawdown percentage (initially the current DD)
+                'Duration (Days)': 0            # Duration of the drawdown - calculated at the end
+            }
+        # Inside an ongoing drawdown period
+        elif in_drawdown:
+            # Update valley date and max drawdown if a lower point is reached
+            if row['Drawdown'] < current_period['Max Drawdown (%)']:
+                current_period['Valley Date'] = idx
+                current_period['Max Drawdown (%)'] = row['Drawdown']
+            # End of the current drawdown period (recovered)
+            if not is_dd: # Recovered when Drawdown is no longer negative (or zero)
+                in_drawdown = False
+                current_period['End Date'] = idx # Mark the recovery date
+                # Calculate duration (using business days if possible, else calendar days)
+                start_date = current_period['Peak Date']
+                end_date = current_period['End Date']
+                if pd.notna(start_date) and pd.notna(end_date):
+                    try:
+                        # Attempt to use business days for duration
+                        duration = len(pd.bdate_range(start=start_date, end=end_date))
+                    except Exception: # Fallback to calendar days if bdate_range fails (e.g., non-standard dates)
+                         duration = (end_date - start_date).days + 1 # Inclusive of start/end day
+                    current_period['Duration (Days)'] = duration
+                else:
+                    current_period['Duration (Days)'] = np.nan # Duration is NaN if dates are invalid
+                periods.append(current_period) # Add the completed period to the list
+                current_period = {} # Reset for the next potential drawdown
+    # Handle the case where the series ends while still in a drawdown
+    if in_drawdown:
+        start_date = current_period['Peak Date']
+        end_date = df.index[-1] # End date is the last date in the series
+        if pd.notna(start_date) and pd.notna(end_date):
+             try:
+                 duration = len(pd.bdate_range(start=start_date, end=end_date))
+             except Exception:
+                 duration = (end_date - start_date).days + 1
+             current_period['Duration (Days)'] = duration
+        else:
+             current_period['Duration (Days)'] = np.nan
+        # 'End Date' remains NaT as recovery hasn't happened by the end of the data
+        periods.append(current_period)
+    # If no drawdown periods were identified
+    if not periods:
+        return pd.DataFrame()
+    # Create DataFrame from the identified periods
+    drawdown_df = pd.DataFrame(periods)
+    # Sort by the magnitude of the drawdown (most negative first) and select the top N
+    drawdown_df = drawdown_df.sort_values(by='Max Drawdown (%)', ascending=True).head(top)
+    # Format the Max Drawdown column as percentage
+    drawdown_df['Max Drawdown (%)'] = drawdown_df['Max Drawdown (%)'].map('{:.2%}'.format)
+    # Format date columns to YYYY-MM-DD strings for display
+    for col in ['Peak Date', 'Valley Date', 'End Date']:
+        if col in drawdown_df.columns:
+             # Ensure conversion to datetime first, then format
+             drawdown_df[col] = pd.to_datetime(drawdown_df[col]).dt.strftime('%Y-%m-%d')
+    # Select and order columns for the final output table
+    cols_to_select = ['Peak Date', 'Valley Date', 'End Date', 'Duration (Days)', 'Max Drawdown (%)']
+    # Ensure only existing columns are selected (e.g., 'End Date' might be all NaT if never recovered)
+    existing_cols = [col for col in cols_to_select if col in drawdown_df.columns]
+    return drawdown_df[existing_cols]
+def calculate_manual_risk_stats(returns_series):
+    """
+    Calculates various risk and performance metrics manually using pandas based on daily returns.
+    Args:
+        returns_series: A pandas Series of daily percentage returns with a DatetimeIndex.
+    Returns:
+        A dictionary containing:
+        - monthly_returns_table_for_heatmap: DataFrame pivoted for monthly return heatmap (values as percentages).
+        - monthly_perf_stats: DataFrame with summary stats for monthly returns.
+        - rolling_vol_df: DataFrame containing rolling annualized volatility calculations (with 'Time' column).
+        - rolling_vol_stats: DataFrame summarizing min/max/mean rolling volatility.
+        - drawdown_table: DataFrame with top drawdown periods (from get_drawdown_table).
+        - status: A string indicating the status of the analysis.
+    """
+    # Initialize results dictionary with default empty structures
+    analysis_results = {
+        "monthly_returns_table_for_heatmap": pd.DataFrame(),
+        "monthly_perf_stats": pd.DataFrame(columns=['Metric', 'Value']),
+        "rolling_vol_df": pd.DataFrame(),
+        "rolling_vol_stats": pd.DataFrame(columns=['Window', 'Min Vol', 'Max Vol', 'Mean Vol']),
+        "drawdown_table": pd.DataFrame(),
+        "status": "Analysis skipped." # Default status
+    }
+    # --- Input Validation ---
+    if returns_series is None or not isinstance(returns_series, pd.Series) or returns_series.empty or len(returns_series) < 2:
+        analysis_results["status"] = "Analysis skipped: Insufficient/invalid returns data."
+        return analysis_results
+    if not isinstance(returns_series.index, pd.DatetimeIndex):
+        analysis_results["status"] = "Analysis skipped: Returns index is not DatetimeIndex."
+        return analysis_results
+    try:
+        status_parts = [] # To collect status messages for different parts
+        # Ensure returns are numeric and index is UTC DatetimeIndex
+        returns_series = pd.to_numeric(returns_series, errors='coerce').dropna()
+        if returns_series.empty or len(returns_series) < 2:
+             analysis_results["status"] = "Analysis skipped: No valid numeric returns after cleaning."
+             return analysis_results
+        if returns_series.index.tz is None:
+            returns_series = returns_series.tz_localize('UTC')
+        elif returns_series.index.tz != 'UTC':
+            returns_series = returns_series.tz_convert('UTC')
+        # --- Monthly Returns Analysis ---
+        # Resample daily returns to monthly, calculating compounded monthly return
+        # The lambda function calculates (1+r1)*(1+r2)*...*(1+rn) - 1 for each month
+        monthly_rets = returns_series.resample('M').apply(lambda x: (1 + x).prod() - 1)
+        if not monthly_rets.empty:
+            # Create table for heatmap: Year rows, Month columns
+            monthly_ret_table_df = pd.DataFrame({'returns': monthly_rets})
+            monthly_ret_table_df['Year'] = monthly_ret_table_df.index.year
+            monthly_ret_table_df['Month'] = monthly_ret_table_df.index.strftime('%b') # Month abbreviation (Jan, Feb, ...)
+            # Pivot the table
+            monthly_heatmap_data = monthly_ret_table_df.pivot_table(index='Year', columns='Month', values='returns')
+            # Order columns chronologically
+            month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+            present_months = [m for m in month_order if m in monthly_heatmap_data.columns]
+            monthly_heatmap_data = monthly_heatmap_data[present_months]
+            # Sort index (Year) ascending
+            monthly_heatmap_data.sort_index(ascending=True, inplace=True)
+            # Store as percentages for the heatmap plot
+            analysis_results["monthly_returns_table_for_heatmap"] = monthly_heatmap_data * 100
+            # Monthly Performance Statistics
+            monthly_stats = {
+                "Min": f"{monthly_rets.min():.2%}",
+                "Max": f"{monthly_rets.max():.2%}",
+                "Mean": f"{monthly_rets.mean():.2%}",
+                "Positive Months": (monthly_rets > 0).sum(),
+                "Negative Months": (monthly_rets <= 0).sum()
+            }
+            analysis_results["monthly_perf_stats"] = pd.DataFrame(list(monthly_stats.items()), columns=['Metric', 'Value'])
+            status_parts.append("Monthly stats OK.")
+        else:
+            status_parts.append("Monthly stats skipped (no monthly data).")
+        # --- Rolling Volatility Analysis ---
+        vol_df = pd.DataFrame(index=returns_series.index) # Initialize DF to store rolling vol results
+        vol_stats_list = [] # List to store summary stats for each window
+        # Define windows (label: number of trading days)
+        windows = {'3M': 63, '6M': 126, '12M': 252}
+        vol_calculated = False
+        for label, window in windows.items():
+            # Check if there's enough data for the window
+            if len(returns_series) >= window:
+                try:
+                    # Calculate rolling standard deviation
+                    # min_periods ensures calculation starts even if window isn't full yet (adjust as needed)
+                    rolling_std = returns_series.rolling(window=window, min_periods=window // 2).std()
+                    # Annualize the volatility (multiply by sqrt of trading days per year)
+                    rolling_vol = rolling_std * np.sqrt(252)
+                    # Store the result in the DataFrame
+                    vol_df[f'vol_{label}'] = rolling_vol
+                    # Calculate summary stats for this window's volatility
+                    if not rolling_vol.dropna().empty: # Check if there are valid vol values
+                        vol_stats_list.append({
+                            "Window": label,
+                            "Min Vol": f"{rolling_vol.min():.2%}",
+                            "Max Vol": f"{rolling_vol.max():.2%}",
+                            "Mean Vol": f"{rolling_vol.mean():.2%}"
+                        })
+                        vol_calculated = True
+                except Exception as vol_e:
+                    print(f"Error calculating rolling volatility for window {label}: {vol_e}")
+                    status_parts.append(f"Rolling Vol ({label}) Error.")
+        # Store the rolling volatility DataFrame (reset index to get 'Time' column for plotting)
+        if not vol_df.empty:
+            analysis_results["rolling_vol_df"] = vol_df.reset_index()
+        # Store the summary statistics if any were calculated
+        if vol_stats_list:
+            analysis_results["rolling_vol_stats"] = pd.DataFrame(vol_stats_list)
+            status_parts.append("Rolling Vol OK.")
+        elif not vol_calculated and "Error" not in " ".join(status_parts): # If no vol calculated and no errors reported
+             status_parts.append("Rolling Vol skipped (insufficient data for windows).")
+        # --- Drawdown Table Calculation ---
+        try:
+            analysis_results["drawdown_table"] = get_drawdown_table(returns_series, top=5)
+            if not analysis_results["drawdown_table"].empty:
+                 status_parts.append("Drawdown Table OK.")
+            else:
+                 status_parts.append("Drawdown Table: No drawdowns found or error.")
+        except Exception as dd_e:
+             print(f"Error calculating drawdown table: {dd_e}")
+             traceback.print_exc()
+             status_parts.append("Drawdown Table Error.")
+        # --- Final Status ---
+        analysis_results["status"] = " ".join(status_parts) if status_parts else "Analysis completed (no specific issues)."
+    except Exception as e:
+        # Catch-all for any unexpected error during the entire analysis
+        error_msg = f"Error during manual risk analysis: {e}"
+        print(error_msg)
+        traceback.print_exc()
+        analysis_results["status"] = f"Manual risk analysis failed: {e}"
+    return analysis_results
+def calculate_correlation(all_results):
+    """
+    Calculates the correlation matrix for the daily returns of multiple strategies
+    and optionally includes the benchmark.
+    Args:
+        all_results: A dictionary where keys are strategy filenames and values are
+                     the result dictionaries obtained from process_single_file.
+                     These results should contain 'equity_df' and optionally 'benchmark_df'.
+    Returns:
+        A tuple containing:
+        - correlation_matrix: DataFrame of the Pearson correlation coefficients.
+        - heatmap_fig: Plotly heatmap figure of the correlation matrix.
+        - corr_status: String message indicating the status of the correlation calculation.
+    """
+    # Default outputs
+    default_corr_matrix = pd.DataFrame()
+    default_heatmap = create_empty_figure("Correlation Heatmap (Insufficient Data)")
+    corr_status = "Correlation analysis skipped."
+    equity_data_all = {} # Dictionary to store equity series {filename: Series}
+    benchmark_data = None # To store the first valid benchmark series found
+    valid_strategies_count = 0 # Count strategies with valid equity data
+    # --- Extract Equity and Benchmark Data ---
+    for filename, results in all_results.items():
+        if results.get("error"): # Skip files that had processing errors
+             print(f"Skipping {filename} for correlation due to processing error.")
+             continue
+        equity_df = results.get("equity_df") # DataFrame with 'Time', 'Equity'
+        bench_df = results.get("benchmark_df") # DataFrame with 'Time', 'Benchmark'
+        # Check for valid equity data
+        if equity_df is not None and not equity_df.empty and \
+           'Time' in equity_df.columns and 'Equity' in equity_df.columns and \
+           pd.api.types.is_datetime64_any_dtype(equity_df['Time']):
+            # Set 'Time' as index, select 'Equity', remove duplicate indices
+            df_eq = equity_df.set_index('Time')['Equity']
+            df_eq = df_eq[~df_eq.index.duplicated(keep='first')]
+            # Ensure index is UTC
+            if df_eq.index.tz is None: df_eq = df_eq.tz_localize('UTC')
+            elif df_eq.index.tz != 'UTC': df_eq = df_eq.tz_convert('UTC')
+            if not df_eq.empty:
+                equity_data_all[filename] = df_eq
+                valid_strategies_count += 1
+                # Try to grab the benchmark data from the *first* strategy that has it
+                if benchmark_data is None and bench_df is not None and not bench_df.empty and \
+                   'Time' in bench_df.columns and 'Benchmark' in bench_df.columns and \
+                   pd.api.types.is_datetime64_any_dtype(bench_df['Time']):
+                    df_b = bench_df.set_index('Time')['Benchmark']
+                    df_b = df_b[~df_b.index.duplicated(keep='first')]
+                    # Ensure index is UTC
+                    if df_b.index.tz is None: df_b = df_b.tz_localize('UTC')
+                    elif df_b.index.tz != 'UTC': df_b = df_b.tz_convert('UTC')
+                    if not df_b.empty:
+                        benchmark_data = df_b
+                        print(f"Using benchmark data from {filename} for correlation.")
+        else:
+            print(f"Skipping {filename} for correlation: Invalid or empty equity_df or Time column.")
+    # --- Check if enough data for correlation ---
+    # Need at least 1 strategy for correlation (against itself or benchmark)
+    # Need at least 2 strategies if no benchmark is available
+    if valid_strategies_count == 0:
+         corr_status = "Correlation skipped: No valid strategy equity data found."
+         return default_corr_matrix, default_heatmap, corr_status
+    if valid_strategies_count == 1 and benchmark_data is None:
+         corr_status = "Correlation skipped: Only one strategy and no benchmark data."
+         # Return the single equity series maybe? Or just empty. Empty is safer.
+         return default_corr_matrix, default_heatmap, corr_status
+    # --- Combine Data and Calculate Returns ---
+    # Combine all valid equity series into a single DataFrame
+    combined_equity = pd.concat(equity_data_all, axis=1, join='outer') # Use outer join to keep all dates
+    # Add benchmark data if available
+    if benchmark_data is not None:
+        combined_equity['Benchmark'] = benchmark_data
+    # Sort by index (Time)
+    combined_equity = combined_equity.sort_index()
+    # Forward-fill missing values (common for aligning different start/end dates)
+    # Consider alternatives like backward fill or interpolation if ffill isn't appropriate
+    combined_equity_filled = combined_equity.ffill()
+    # Calculate daily percentage returns
+    daily_returns = combined_equity_filled.pct_change()
+    # Handle potential infinite values resulting from division by zero (e.g., price was 0)
+    daily_returns.replace([np.inf, -np.inf], np.nan, inplace=True)
+    # Drop rows with any NaN values (typically the first row after pct_change, and any rows affected by NaNs)
+    daily_returns.dropna(inplace=True)
+    # Check if enough overlapping data remains after cleaning
+    if daily_returns.empty or len(daily_returns) < 2:
+        corr_status = "Correlation skipped: Not enough overlapping daily data points after cleaning."
+        return default_corr_matrix, default_heatmap, corr_status
+    # --- Calculate Correlation Matrix ---
+    try:
+        correlation_matrix = daily_returns.corr(method='pearson') # Can change method if needed ('kendall', 'spearman')
+        corr_status = f"Correlation calculated for {valid_strategies_count} strategies"
+        if benchmark_data is not None:
+            corr_status += " and Benchmark."
+        else:
+            corr_status += "."
+    except Exception as corr_e:
+         print(f"Error calculating correlation matrix: {corr_e}")
+         traceback.print_exc()
+         corr_status = f"Correlation calculation failed: {corr_e}"
+         return default_corr_matrix, default_heatmap, corr_status
+    # --- Generate Correlation Heatmap Figure ---
+    heatmap_fig = create_empty_figure("Correlation Heatmap") # Default empty
+    try:
+        heatmap_fig = go.Figure(data=go.Heatmap(
+            z=correlation_matrix.values,
+            x=correlation_matrix.columns,
+            y=correlation_matrix.columns,
+            colorscale='RdBu', # Red-Blue diverging scale is good for correlation
+            zmin=-1, zmax=1,   # Set scale limits to -1 and 1
+            colorbar=dict(title='Correlation')
+        ))
+        heatmap_fig.update_layout(
+            title='Strategy (+Benchmark) Daily Return Correlation',
+            xaxis_tickangle=-45, # Angle labels for better readability if many strategies
+            yaxis_autorange='reversed' # Often preferred for matrices
+        )
+        # Add text annotations (correlation values) to the heatmap cells
+        for i in range(len(correlation_matrix.columns)):
+            for j in range(len(correlation_matrix.columns)):
+                corr_value = correlation_matrix.iloc[i, j]
+                if pd.notna(corr_value):
+                    # Choose text color based on background intensity for better contrast
+                    text_color = "white" if abs(corr_value) > 0.5 else "black"
+                    heatmap_fig.add_annotation(
+                        x=correlation_matrix.columns[j],
+                        y=correlation_matrix.columns[i],
+                        text=f"{corr_value:.2f}", # Format to 2 decimal places
+                        showarrow=False,
+                        font=dict(color=text_color)
+                    )
+    except Exception as e:
+        print(f"Error creating correlation heatmap figure: {e}")
+        traceback.print_exc()
+        heatmap_fig = create_empty_figure("Error Creating Correlation Heatmap") # Update title on error
+    return correlation_matrix, heatmap_fig, corr_status

utils.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# -*- coding: utf-8 -*-
+"""utils.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1RyRghhbleQJ01USX_0O4uUALsuFM10hJ
+"""
+# utils.py
+# Helper functions for data manipulation and plotting defaults.
+import pandas as pd
+import plotly.graph_objects as go
+import re
+import numpy as np
+import traceback
+def get_nested_value(data_dict, keys, default=None):
+    """Safely get a value from a nested dictionary or list."""
+    current_level = data_dict
+    for key in keys:
+        if isinstance(current_level, dict) and key in current_level:
+            current_level = current_level[key]
+        elif isinstance(current_level, list) and isinstance(key, int) and 0 <= key < len(current_level):
+            current_level = current_level[key]
+        else:
+            return default
+    return current_level
+def parse_numeric_string(value_str, default=None):
+    """Attempts to parse numeric values from strings, handling $, %, and commas."""
+    if not isinstance(value_str, str):
+        # If it's already a number (int, float), return it directly
+        if isinstance(value_str, (int, float)):
+             return value_str
+        # Otherwise, it might be None or some other non-string type
+        return default # Return default for non-string, non-numeric types
+    try:
+        # Remove currency symbols, percentage signs, and commas
+        cleaned_str = re.sub(r'[$,%]', '', value_str).strip()
+        return float(cleaned_str)
+    except (ValueError, TypeError):
+        # Return default if cleaning/conversion fails
+        return default
+def create_empty_figure(title="No Data Available"):
+    """Creates an empty Plotly figure with a title."""
+    fig = go.Figure()
+    fig.update_layout(
+        title=title,
+        xaxis={'visible': False},
+        yaxis={'visible': False},
+        annotations=[{
+            'text': title,
+            'xref': 'paper', 'yref': 'paper',
+            'showarrow': False, 'font': {'size': 16}
+        }]
+    )
+    return fig
+def process_timeseries_chart(chart_data, value_col_name='Value'):
+    """
+    Processes QuantConnect timeseries chart data like [[timestamp, value, ...], ...].
+    Assumes timestamp is in SECONDS. Extracts the second element as the value.
+    Returns a DataFrame with 'Time' (datetime) index and value_col_name.
+    Handles potential errors during processing.
+    """
+    # Check if input data is valid list format
+    if not chart_data or not isinstance(chart_data, list):
+        # print(f"Warning: Invalid or empty chart_data for {value_col_name}. Returning empty DataFrame.")
+        return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+    # Check if the first element is a list/tuple with at least two items
+    if not chart_data[0] or not isinstance(chart_data[0], (list, tuple)) or len(chart_data[0]) < 2:
+        # print(f"Warning: First element format incorrect for {value_col_name}. Returning empty DataFrame.")
+        return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+    try:
+        # Extract timestamp (assumed index 0) and value (assumed index 1)
+        # Filter out entries where timestamp or value is None
+        processed_data = [
+            [item[0], item[1]] for item in chart_data
+            if isinstance(item, (list, tuple)) and len(item) >= 2 and item[0] is not None and item[1] is not None
+        ]
+        # If no valid data points remain after filtering
+        if not processed_data:
+            # print(f"Warning: No valid data points after filtering for {value_col_name}. Returning empty DataFrame.")
+            return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+        # Create DataFrame
+        df = pd.DataFrame(processed_data, columns=['Time_Raw', value_col_name])
+        # Convert timestamp (assumed seconds) to numeric, coercing errors
+        df['Time_Raw'] = pd.to_numeric(df['Time_Raw'], errors='coerce')
+        df.dropna(subset=['Time_Raw'], inplace=True) # Drop rows where timestamp conversion failed
+        if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+        # Convert numeric timestamp to datetime, coercing errors
+        df['Time'] = pd.to_datetime(df['Time_Raw'], unit='s', errors='coerce')
+        df.dropna(subset=['Time'], inplace=True) # Drop rows where datetime conversion failed
+        if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+        # Convert value column to numeric, coercing errors
+        df[value_col_name] = pd.to_numeric(df[value_col_name], errors='coerce')
+        df.dropna(subset=[value_col_name], inplace=True) # Drop rows where value conversion failed
+        if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+        # Set the datetime 'Time' column as the index
+        df = df.set_index('Time')
+        # Verify the index is indeed a DatetimeIndex
+        if not isinstance(df.index, pd.DatetimeIndex):
+              print(f"Warning: Index is not DatetimeIndex for {value_col_name} after setting. Attempting conversion.")
+              df.index = pd.to_datetime(df.index, errors='coerce')
+              df.dropna(inplace=True) # Drop rows if conversion failed
+              if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
+        # Ensure the DatetimeIndex is timezone-aware (UTC)
+        if df.index.tz is None:
+            df = df.tz_localize('UTC') # Localize if naive
+        elif df.index.tz != 'UTC':
+            df = df.tz_convert('UTC') # Convert if different timezone
+        # Return the DataFrame with only the value column, sorted by time
+        return df[[value_col_name]].sort_index()
+    except Exception as e:
+        print(f"Error creating/processing DataFrame for {value_col_name}: {e}")
+        traceback.print_exc()
+        # Return an empty DataFrame in case of any unexpected error
+        return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')