algorembrant's picture
Upload 63 files
c5ef85d verified
import pandas as pd
import numpy as np
import scipy.stats as stats
def calculate_rolling_metrics(input_csv, output_csv):
print(f"Reading data from: {input_csv}")
try:
df = pd.read_csv(input_csv)
except FileNotFoundError:
print(f"Error: The file '{input_csv}' was not found.")
return
# ---------------------------------------------------------
# 1. Preprocessing & Cleaning
# ---------------------------------------------------------
# Convert time columns to datetime objects
df['Time_deal'] = pd.to_datetime(df['Time_deal'], errors='coerce')
# Sort by time to ensure rolling metrics are chronological
df = df.sort_values(by='Time_deal').reset_index(drop=True)
# Ensure numeric columns are floats and handle missing values
df['Profit'] = pd.to_numeric(df['Profit'], errors='coerce').fillna(0.0)
df['Balance'] = pd.to_numeric(df['Balance'], errors='coerce').ffill()
# Define helper series
equity = df['Balance']
profits = df['Profit']
# Determine Initial Balance (Start of the window)
# We assume the first record's balance is the starting point
initial_balance = equity.iloc[0]
# Calculate Time Elapsed in Years (for CAGR)
start_time = df['Time_deal'].iloc[0]
# Add small epsilon to prevent division by zero on the first row
years_elapsed = (df['Time_deal'] - start_time).dt.total_seconds() / (365.25 * 24 * 3600)
years_elapsed = years_elapsed.replace(0, 0.000001)
# ---------------------------------------------------------
# 2. Rolling (Expanding) Metric Calculations
# ---------------------------------------------------------
print("Calculating rolling metrics...")
# --- Helpers for Win/Loss Stats ---
# Create streams of just wins and just losses (positive)
wins = profits.clip(lower=0)
losses = profits.clip(upper=0).abs()
expand_wins_sum = wins.expanding().sum()
expand_losses_sum = losses.expanding().sum()
expand_max_win = wins.expanding().max()
expand_max_loss = losses.expanding().max()
# --- Helper for Drawdown Stats ---
hwm = equity.expanding().max() # High Water Mark
dd_dollar = hwm - equity # Drawdown in $
dd_pct = dd_dollar / hwm # Drawdown in %
dd_sq_mean = (dd_pct ** 2).expanding().mean() # For Ulcer Index
dd_sq_sum = (dd_pct ** 2).expanding().sum() # For Burke Ratio
max_dd_pct = dd_pct.expanding().max() # For Sterling Ratio
# --- Helper for Returns Distribution (Risk of Ruin / Sharpe) ---
pct_ret = equity.pct_change().fillna(0)
roll_mean_ret = pct_ret.expanding().mean()
roll_var_ret = pct_ret.expanding().var()
roll_std_ret = pct_ret.expanding().std()
roll_skew = pct_ret.expanding().skew().fillna(0)
roll_kurt = pct_ret.expanding().kurt().fillna(0) # Excess kurtosis
n_trades = profits.expanding().count()
# ==========================================
# Requested Metrics
# ==========================================
# 1. Expectancy (Average Profit)
df['rolling_Expectancy_Expectancy'] = profits.expanding().mean()
# 2. Gain-to-Pain Ratio (GPR) -> Sum(Wins) / Abs(Sum(Losses))
df['rolling_GPR_GainToPainRatio'] = expand_wins_sum / expand_losses_sum.replace(0, np.nan)
# 3. CAGR (Compound Annual Growth Rate)
# Formula: (End_Equity / Start_Equity)^(1/years) - 1
# We use abs() to handle negative equity scenarios safely
df['rolling_CAGR_CompoundAnnualGrowthRate'] = (
(equity / initial_balance).abs().pow(1 / years_elapsed) - 1
)
# 4. Martin Ratio -> CAGR / Ulcer Index
ulcer_index = np.sqrt(dd_sq_mean)
df['rolling_Martin_MartinRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / ulcer_index.replace(0, np.nan)
# 5. Sterling Ratio -> CAGR / Max Drawdown
df['rolling_Sterling_SterlingRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / max_dd_pct.replace(0, np.nan)
# 6. Burke Ratio -> CAGR / Sqrt(Sum(Drawdown^2))
burke_denom = np.sqrt(dd_sq_sum)
df['rolling_Burke_BurkeRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / burke_denom.replace(0, np.nan)
# 7. Risk of Ruin -> exp(-2 * mean_ret / var_ret)
# If mean return is negative, Ruin is 100% (1.0).
ror = np.exp(-2 * roll_mean_ret / roll_var_ret)
ror = np.where(roll_mean_ret < 0, 1.0, ror)
df['rolling_RoR_RiskOfRuin'] = ror.clip(0, 1)
# 8. Deflated Sharpe Ratio (DSR)
# Using Probabilistic Sharpe Ratio (PSR) logic on the expanding window
sr = roll_mean_ret / roll_std_ret
# DSR Denominator term: 1 - skew*SR + ((kurt+2)/4)*SR^2
dsr_denom_term = 1 - roll_skew * sr + ((roll_kurt + 2) / 4) * (sr**2)
dsr_denom = np.sqrt(dsr_denom_term.abs())
dsr_stat = (sr * np.sqrt(n_trades - 1)) / dsr_denom.replace(0, np.nan)
df['rolling_DSR_DeflatedSharpeRatio'] = stats.norm.cdf(dsr_stat)
# 9. Pain Index -> Mean Drawdown Depth
df['rolling_PainIndex_PainIndex'] = dd_pct.expanding().mean()
# 10. Pain Ratio -> CAGR / Pain Index
df['rolling_PainRatio_PainRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / df['rolling_PainIndex_PainIndex'].replace(0, np.nan)
# 11. Lake Ratio -> Sum(Drawdown_Peaks) / Total Profit
# Approximated here as Area Under Water / Total Profit
df['rolling_Lake_LakeRatio'] = dd_dollar.expanding().sum() / profits.cumsum().replace(0, np.nan)
# 12. Outlier Win/Loss Ratio (OWLR) -> Max Win / Max Loss
df['rolling_OWLR_OutlierWinLossRatio'] = expand_max_win / expand_max_loss.replace(0, np.nan)
# 13. Profitability Index -> Profit Factor (Gross Wins / Gross Losses)
df['rolling_PI_ProfitabilityIndex'] = expand_wins_sum / expand_losses_sum.replace(0, np.nan)
# ---------------------------------------------------------
# 3. Export
# ---------------------------------------------------------
# Clean up Infinite values (divide by zero artifacts)
metric_cols = [c for c in df.columns if 'rolling_' in c]
df[metric_cols] = df[metric_cols].replace([np.inf, -np.inf], np.nan)
df.to_csv(output_csv, index=False)
print(f"Success! Processed data saved to: {output_csv}")
# --- Execution ---
if __name__ == "__main__":
input_filename = 'merged_extracted_orders_and_deals.csv'
output_filename = '5_layer_output.csv'
calculate_rolling_metrics(input_filename, output_filename)