Upload 63 files

c5ef85d verified 5 days ago

6.58 kB

	import pandas as pd
	import numpy as np
	import scipy.stats as stats

	def calculate_rolling_metrics(input_csv, output_csv):
	print(f"Reading data from: {input_csv}")

	try:
	df = pd.read_csv(input_csv)
	except FileNotFoundError:
	print(f"Error: The file '{input_csv}' was not found.")
	return

	# ---------------------------------------------------------
	# 1. Preprocessing & Cleaning
	# ---------------------------------------------------------
	# Convert time columns to datetime objects
	df['Time_deal'] = pd.to_datetime(df['Time_deal'], errors='coerce')

	# Sort by time to ensure rolling metrics are chronological
	df = df.sort_values(by='Time_deal').reset_index(drop=True)

	# Ensure numeric columns are floats and handle missing values
	df['Profit'] = pd.to_numeric(df['Profit'], errors='coerce').fillna(0.0)
	df['Balance'] = pd.to_numeric(df['Balance'], errors='coerce').ffill()

	# Define helper series
	equity = df['Balance']
	profits = df['Profit']

	# Determine Initial Balance (Start of the window)
	# We assume the first record's balance is the starting point
	initial_balance = equity.iloc[0]

	# Calculate Time Elapsed in Years (for CAGR)
	start_time = df['Time_deal'].iloc[0]
	# Add small epsilon to prevent division by zero on the first row
	years_elapsed = (df['Time_deal'] - start_time).dt.total_seconds() / (365.25 * 24 * 3600)
	years_elapsed = years_elapsed.replace(0, 0.000001)

	# ---------------------------------------------------------
	# 2. Rolling (Expanding) Metric Calculations
	# ---------------------------------------------------------
	print("Calculating rolling metrics...")

	# --- Helpers for Win/Loss Stats ---
	# Create streams of just wins and just losses (positive)
	wins = profits.clip(lower=0)
	losses = profits.clip(upper=0).abs()

	expand_wins_sum = wins.expanding().sum()
	expand_losses_sum = losses.expanding().sum()
	expand_max_win = wins.expanding().max()
	expand_max_loss = losses.expanding().max()

	# --- Helper for Drawdown Stats ---
	hwm = equity.expanding().max() # High Water Mark
	dd_dollar = hwm - equity # Drawdown in $
	dd_pct = dd_dollar / hwm # Drawdown in %

	dd_sq_mean = (dd_pct ** 2).expanding().mean() # For Ulcer Index
	dd_sq_sum = (dd_pct ** 2).expanding().sum() # For Burke Ratio
	max_dd_pct = dd_pct.expanding().max() # For Sterling Ratio

	# --- Helper for Returns Distribution (Risk of Ruin / Sharpe) ---
	pct_ret = equity.pct_change().fillna(0)
	roll_mean_ret = pct_ret.expanding().mean()
	roll_var_ret = pct_ret.expanding().var()
	roll_std_ret = pct_ret.expanding().std()
	roll_skew = pct_ret.expanding().skew().fillna(0)
	roll_kurt = pct_ret.expanding().kurt().fillna(0) # Excess kurtosis
	n_trades = profits.expanding().count()

	# ==========================================
	# Requested Metrics
	# ==========================================

	# 1. Expectancy (Average Profit)
	df['rolling_Expectancy_Expectancy'] = profits.expanding().mean()

	# 2. Gain-to-Pain Ratio (GPR) -> Sum(Wins) / Abs(Sum(Losses))
	df['rolling_GPR_GainToPainRatio'] = expand_wins_sum / expand_losses_sum.replace(0, np.nan)

	# 3. CAGR (Compound Annual Growth Rate)
	# Formula: (End_Equity / Start_Equity)^(1/years) - 1
	# We use abs() to handle negative equity scenarios safely
	df['rolling_CAGR_CompoundAnnualGrowthRate'] = (
	(equity / initial_balance).abs().pow(1 / years_elapsed) - 1
	)

	# 4. Martin Ratio -> CAGR / Ulcer Index
	ulcer_index = np.sqrt(dd_sq_mean)
	df['rolling_Martin_MartinRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / ulcer_index.replace(0, np.nan)

	# 5. Sterling Ratio -> CAGR / Max Drawdown
	df['rolling_Sterling_SterlingRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / max_dd_pct.replace(0, np.nan)

	# 6. Burke Ratio -> CAGR / Sqrt(Sum(Drawdown^2))
	burke_denom = np.sqrt(dd_sq_sum)
	df['rolling_Burke_BurkeRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / burke_denom.replace(0, np.nan)

	# 7. Risk of Ruin -> exp(-2 * mean_ret / var_ret)
	# If mean return is negative, Ruin is 100% (1.0).
	ror = np.exp(-2 * roll_mean_ret / roll_var_ret)
	ror = np.where(roll_mean_ret < 0, 1.0, ror)
	df['rolling_RoR_RiskOfRuin'] = ror.clip(0, 1)

	# 8. Deflated Sharpe Ratio (DSR)
	# Using Probabilistic Sharpe Ratio (PSR) logic on the expanding window
	sr = roll_mean_ret / roll_std_ret
	# DSR Denominator term: 1 - skewSR + ((kurt+2)/4)SR^2
	dsr_denom_term = 1 - roll_skew * sr + ((roll_kurt + 2) / 4) * (sr**2)
	dsr_denom = np.sqrt(dsr_denom_term.abs())

	dsr_stat = (sr * np.sqrt(n_trades - 1)) / dsr_denom.replace(0, np.nan)
	df['rolling_DSR_DeflatedSharpeRatio'] = stats.norm.cdf(dsr_stat)

	# 9. Pain Index -> Mean Drawdown Depth
	df['rolling_PainIndex_PainIndex'] = dd_pct.expanding().mean()

	# 10. Pain Ratio -> CAGR / Pain Index
	df['rolling_PainRatio_PainRatio'] = df['rolling_CAGR_CompoundAnnualGrowthRate'] / df['rolling_PainIndex_PainIndex'].replace(0, np.nan)

	# 11. Lake Ratio -> Sum(Drawdown_Peaks) / Total Profit
	# Approximated here as Area Under Water / Total Profit
	df['rolling_Lake_LakeRatio'] = dd_dollar.expanding().sum() / profits.cumsum().replace(0, np.nan)

	# 12. Outlier Win/Loss Ratio (OWLR) -> Max Win / Max Loss
	df['rolling_OWLR_OutlierWinLossRatio'] = expand_max_win / expand_max_loss.replace(0, np.nan)

	# 13. Profitability Index -> Profit Factor (Gross Wins / Gross Losses)
	df['rolling_PI_ProfitabilityIndex'] = expand_wins_sum / expand_losses_sum.replace(0, np.nan)

	# ---------------------------------------------------------
	# 3. Export
	# ---------------------------------------------------------
	# Clean up Infinite values (divide by zero artifacts)
	metric_cols = [c for c in df.columns if 'rolling_' in c]
	df[metric_cols] = df[metric_cols].replace([np.inf, -np.inf], np.nan)

	df.to_csv(output_csv, index=False)
	print(f"Success! Processed data saved to: {output_csv}")

	# --- Execution ---
	if __name__ == "__main__":
	input_filename = 'merged_extracted_orders_and_deals.csv'
	output_filename = '5_layer_output.csv'

	calculate_rolling_metrics(input_filename, output_filename)