Spaces:
Sleeping
Sleeping
Update backtest.py
Browse files- backtest.py +82 -35
backtest.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import matplotlib
|
| 2 |
matplotlib.use('Agg')
|
| 3 |
import yfinance as yf
|
|
@@ -7,31 +10,83 @@ import matplotlib.pyplot as plt
|
|
| 7 |
import os
|
| 8 |
import time
|
| 9 |
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# --- CONFIGURATION ---
|
| 12 |
START_DATE = "2010-01-01"
|
| 13 |
INITIAL_CAPITAL = 1000000
|
| 14 |
SIMULATION_TIME_MIN = 35
|
|
|
|
| 15 |
|
| 16 |
-
def
|
| 17 |
-
"""Loads the ENTIRE universe without look-ahead fundamental bias."""
|
| 18 |
try:
|
| 19 |
df = pd.read_csv("EQUITY_L.csv")
|
| 20 |
df.columns = [c.strip() for c in df.columns]
|
| 21 |
if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
|
| 22 |
-
|
| 23 |
-
# Filter by listing date to ensure no future knowledge is used
|
| 24 |
-
if 'DATE OF LISTING' in df.columns:
|
| 25 |
-
df['ListDate'] = pd.to_datetime(df['DATE OF LISTING'], format='%d-%b-%Y', errors='coerce')
|
| 26 |
-
df = df[df['ListDate'] < pd.to_datetime("2010-01-01")]
|
| 27 |
-
|
| 28 |
tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
# UNLOCKED: Returns all valid stocks, no more [:250] limit.
|
| 32 |
-
return tickers
|
| 33 |
except:
|
| 34 |
-
return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def run_strategy_genome(data, genome):
|
| 37 |
if data.empty: return -1.0, []
|
|
@@ -43,7 +98,6 @@ def run_strategy_genome(data, genome):
|
|
| 43 |
stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
|
| 44 |
stocks = data[stock_cols]
|
| 45 |
|
| 46 |
-
# --- AI EVOLVED TRAITS ---
|
| 47 |
lookback = int(genome['lookback'])
|
| 48 |
top_n = int(genome['top_n'])
|
| 49 |
rebalance_days = int(genome['rebalance'])
|
|
@@ -51,11 +105,9 @@ def run_strategy_genome(data, genome):
|
|
| 51 |
trend_filter = int(genome['trend_filter'])
|
| 52 |
max_vol = float(genome['max_vol'])
|
| 53 |
|
| 54 |
-
# Calculate Momentum and Volatility matrices
|
| 55 |
momentum = stocks.pct_change(lookback)
|
| 56 |
daily_returns = stocks.pct_change(1)
|
| 57 |
volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
|
| 58 |
-
|
| 59 |
nifty_ma = nifty.rolling(trend_filter).mean()
|
| 60 |
|
| 61 |
curve = [INITIAL_CAPITAL]
|
|
@@ -78,26 +130,25 @@ def run_strategy_genome(data, genome):
|
|
| 78 |
scores = momentum.loc[curr]
|
| 79 |
vols = volatility.loc[curr]
|
| 80 |
|
| 81 |
-
#
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
|
| 84 |
|
| 85 |
-
# Ensure we have at least some stocks to buy, otherwise hold cash
|
| 86 |
if len(picks) > 0:
|
| 87 |
p1 = stocks.loc[curr, picks]
|
| 88 |
p2 = stocks.loc[nxt, picks]
|
| 89 |
-
|
| 90 |
-
# Equal-weight basket return
|
| 91 |
stock_ret = ((p2 - p1) / p1).mean()
|
| 92 |
if pd.isna(stock_ret): stock_ret = 0.0
|
| 93 |
period_ret = stock_ret
|
| 94 |
else:
|
| 95 |
-
# Bear Market Hedge: Flee to Gold
|
| 96 |
g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
|
| 97 |
if pd.isna(g_ret): g_ret = 0.0
|
| 98 |
period_ret = g_ret
|
| 99 |
|
| 100 |
-
# Stop Loss Execution
|
| 101 |
if period_ret < -stop_loss: period_ret = -stop_loss
|
| 102 |
|
| 103 |
curr_val = curr_val * (1 + period_ret)
|
|
@@ -117,29 +168,26 @@ def run_strategy_genome(data, genome):
|
|
| 117 |
return cagr, pd.Series(curve, index=sim_dates)
|
| 118 |
|
| 119 |
def backtest_engine():
|
| 120 |
-
print(f"⚙️ Initializing
|
| 121 |
start_time = time.time()
|
| 122 |
|
| 123 |
-
tickers =
|
| 124 |
tickers += ["^NSEI", "GC=F"]
|
| 125 |
|
| 126 |
try:
|
| 127 |
-
print(f"🌍 Fetching 16-Year History for
|
| 128 |
-
# Downloading 2000+ stocks is data-heavy. Using multiple threads automatically via yfinance.
|
| 129 |
data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
|
| 130 |
-
|
| 131 |
if isinstance(data.columns, pd.MultiIndex):
|
| 132 |
try: data = data['Close']
|
| 133 |
except: pass
|
| 134 |
|
| 135 |
-
data = data.ffill().bfill()
|
| 136 |
if data.empty: return None
|
| 137 |
|
| 138 |
population = []
|
| 139 |
for _ in range(30):
|
| 140 |
population.append({
|
| 141 |
'lookback': random.choice([10, 20, 30, 45, 60, 90]),
|
| 142 |
-
# STRICT CONSTRAINT: Must hold between 5 and 10 stocks.
|
| 143 |
'top_n': random.choice([5, 6, 7, 8, 9, 10]),
|
| 144 |
'rebalance': random.choice([3, 5, 7, 10, 14]),
|
| 145 |
'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
|
|
@@ -153,7 +201,7 @@ def backtest_engine():
|
|
| 153 |
generation = 1
|
| 154 |
|
| 155 |
while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
|
| 156 |
-
print(f"\n🧬 Gen {generation}:
|
| 157 |
results = []
|
| 158 |
|
| 159 |
for genome in population:
|
|
@@ -173,7 +221,7 @@ def backtest_engine():
|
|
| 173 |
else:
|
| 174 |
stall_count += 1
|
| 175 |
|
| 176 |
-
print(f" 🏆
|
| 177 |
print(f" 🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
|
| 178 |
|
| 179 |
survivors = [x[2] for x in results[:6]]
|
|
@@ -195,7 +243,6 @@ def backtest_engine():
|
|
| 195 |
mutation_rate = 0.8 if stall_count >= 3 else 0.3
|
| 196 |
|
| 197 |
if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
|
| 198 |
-
# STRICT CONSTRAINT during mutation as well
|
| 199 |
if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
|
| 200 |
if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
|
| 201 |
if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
|
|
@@ -217,14 +264,14 @@ def backtest_engine():
|
|
| 217 |
|
| 218 |
if best_curve is not None:
|
| 219 |
plt.figure(figsize=(12, 7))
|
| 220 |
-
plt.plot(best_curve, label=f"
|
| 221 |
|
| 222 |
nifty = data["^NSEI"]
|
| 223 |
bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
|
| 224 |
plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
|
| 225 |
|
| 226 |
plt.yscale('log')
|
| 227 |
-
plt.title("Renaissance Engine:
|
| 228 |
plt.ylabel("Portfolio Value (Log Scale)")
|
| 229 |
plt.legend()
|
| 230 |
plt.grid(True, alpha=0.3)
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
warnings.filterwarnings("ignore")
|
| 3 |
+
|
| 4 |
import matplotlib
|
| 5 |
matplotlib.use('Agg')
|
| 6 |
import yfinance as yf
|
|
|
|
| 10 |
import os
|
| 11 |
import time
|
| 12 |
import random
|
| 13 |
+
import json
|
| 14 |
+
|
| 15 |
+
# Silence Pandas Future Warnings
|
| 16 |
+
pd.options.mode.chained_assignment = None
|
| 17 |
+
try: pd.set_option('future.no_silent_downcasting', True)
|
| 18 |
+
except: pass
|
| 19 |
|
| 20 |
# --- CONFIGURATION ---
|
| 21 |
START_DATE = "2010-01-01"
|
| 22 |
INITIAL_CAPITAL = 1000000
|
| 23 |
SIMULATION_TIME_MIN = 35
|
| 24 |
+
CACHE_FILE = "fundamental_cache.json"
|
| 25 |
|
| 26 |
+
def get_all_csv_tickers():
|
|
|
|
| 27 |
try:
|
| 28 |
df = pd.read_csv("EQUITY_L.csv")
|
| 29 |
df.columns = [c.strip() for c in df.columns]
|
| 30 |
if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
|
| 32 |
+
return tickers
|
|
|
|
|
|
|
|
|
|
| 33 |
except:
|
| 34 |
+
return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HDFCBANK.NS"]
|
| 35 |
+
|
| 36 |
+
def fundamental_deep_scan(tickers):
|
| 37 |
+
print(f"🔍 PHASE 1: Deep Fundamental Scan of {len(tickers)} stocks...")
|
| 38 |
+
print("⏳ This will take 30-45 minutes. It will only happen ONCE and save to cache.")
|
| 39 |
+
|
| 40 |
+
scored_stocks = []
|
| 41 |
+
count = 0
|
| 42 |
+
|
| 43 |
+
for ticker in tickers:
|
| 44 |
+
count += 1
|
| 45 |
+
if count % 50 == 0:
|
| 46 |
+
print(f" -> Scanned {count}/{len(tickers)} stocks...")
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
stock = yf.Ticker(ticker)
|
| 50 |
+
info = stock.info
|
| 51 |
+
|
| 52 |
+
roe = info.get('returnOnEquity', 0) or 0
|
| 53 |
+
pe = info.get('trailingPE', 0) or 1000
|
| 54 |
+
growth = info.get('revenueGrowth', 0) or 0
|
| 55 |
+
|
| 56 |
+
score = 0
|
| 57 |
+
if roe > 0.15: score += 40
|
| 58 |
+
if growth > 0.10: score += 30
|
| 59 |
+
if 0 < pe < 60: score += 30
|
| 60 |
+
|
| 61 |
+
# Keep companies with strong actual business fundamentals
|
| 62 |
+
if score >= 40:
|
| 63 |
+
scored_stocks.append({'ticker': ticker, 'score': score})
|
| 64 |
+
|
| 65 |
+
except Exception:
|
| 66 |
+
pass
|
| 67 |
+
|
| 68 |
+
# Delay to prevent IP Ban from Yahoo Finance
|
| 69 |
+
time.sleep(random.uniform(0.1, 0.4))
|
| 70 |
+
|
| 71 |
+
scored_stocks.sort(key=lambda x: x['score'], reverse=True)
|
| 72 |
+
|
| 73 |
+
# We take the top 250 fundamentally strongest companies
|
| 74 |
+
elite_tickers = [x['ticker'] for x in scored_stocks[:250]]
|
| 75 |
+
|
| 76 |
+
with open(CACHE_FILE, 'w') as f:
|
| 77 |
+
json.dump(elite_tickers, f)
|
| 78 |
+
|
| 79 |
+
print(f"✅ Phase 1 Complete. Saved {len(elite_tickers)} Elite Stocks to cache.")
|
| 80 |
+
return elite_tickers
|
| 81 |
+
|
| 82 |
+
def load_fundamental_universe():
|
| 83 |
+
if os.path.exists(CACHE_FILE):
|
| 84 |
+
print("📂 Loading Fundamentally Strong Universe from Cache...")
|
| 85 |
+
with open(CACHE_FILE, 'r') as f:
|
| 86 |
+
return json.load(f)
|
| 87 |
+
else:
|
| 88 |
+
all_tickers = get_all_csv_tickers()
|
| 89 |
+
return fundamental_deep_scan(all_tickers)
|
| 90 |
|
| 91 |
def run_strategy_genome(data, genome):
|
| 92 |
if data.empty: return -1.0, []
|
|
|
|
| 98 |
stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
|
| 99 |
stocks = data[stock_cols]
|
| 100 |
|
|
|
|
| 101 |
lookback = int(genome['lookback'])
|
| 102 |
top_n = int(genome['top_n'])
|
| 103 |
rebalance_days = int(genome['rebalance'])
|
|
|
|
| 105 |
trend_filter = int(genome['trend_filter'])
|
| 106 |
max_vol = float(genome['max_vol'])
|
| 107 |
|
|
|
|
| 108 |
momentum = stocks.pct_change(lookback)
|
| 109 |
daily_returns = stocks.pct_change(1)
|
| 110 |
volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
|
|
|
|
| 111 |
nifty_ma = nifty.rolling(trend_filter).mean()
|
| 112 |
|
| 113 |
curve = [INITIAL_CAPITAL]
|
|
|
|
| 130 |
scores = momentum.loc[curr]
|
| 131 |
vols = volatility.loc[curr]
|
| 132 |
|
| 133 |
+
# MICRO-CAP UNLOCKED: Lowered to ₹10 to catch real fundamental turnarounds.
|
| 134 |
+
# (Since Phase 1 ensures they have >15% ROE, a ₹12 stock here is a true hidden gem, not a scam)
|
| 135 |
+
valid_prices = stocks.loc[curr] > 10.0
|
| 136 |
+
|
| 137 |
+
# Must be fundamentally strong (by universe), positive momentum, low vol, NOT a fractional penny stock
|
| 138 |
+
valid_stocks = scores[(scores > 0) & (vols < max_vol) & valid_prices]
|
| 139 |
picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
|
| 140 |
|
|
|
|
| 141 |
if len(picks) > 0:
|
| 142 |
p1 = stocks.loc[curr, picks]
|
| 143 |
p2 = stocks.loc[nxt, picks]
|
|
|
|
|
|
|
| 144 |
stock_ret = ((p2 - p1) / p1).mean()
|
| 145 |
if pd.isna(stock_ret): stock_ret = 0.0
|
| 146 |
period_ret = stock_ret
|
| 147 |
else:
|
|
|
|
| 148 |
g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
|
| 149 |
if pd.isna(g_ret): g_ret = 0.0
|
| 150 |
period_ret = g_ret
|
| 151 |
|
|
|
|
| 152 |
if period_ret < -stop_loss: period_ret = -stop_loss
|
| 153 |
|
| 154 |
curr_val = curr_val * (1 + period_ret)
|
|
|
|
| 168 |
return cagr, pd.Series(curve, index=sim_dates)
|
| 169 |
|
| 170 |
def backtest_engine():
|
| 171 |
+
print(f"⚙️ Initializing Phase 2: AI Genetic Backtest...")
|
| 172 |
start_time = time.time()
|
| 173 |
|
| 174 |
+
tickers = load_fundamental_universe()
|
| 175 |
tickers += ["^NSEI", "GC=F"]
|
| 176 |
|
| 177 |
try:
|
| 178 |
+
print(f"🌍 Fetching 16-Year History for Elite Universe...")
|
|
|
|
| 179 |
data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
|
|
|
|
| 180 |
if isinstance(data.columns, pd.MultiIndex):
|
| 181 |
try: data = data['Close']
|
| 182 |
except: pass
|
| 183 |
|
| 184 |
+
data = data.ffill().bfill().infer_objects(copy=False)
|
| 185 |
if data.empty: return None
|
| 186 |
|
| 187 |
population = []
|
| 188 |
for _ in range(30):
|
| 189 |
population.append({
|
| 190 |
'lookback': random.choice([10, 20, 30, 45, 60, 90]),
|
|
|
|
| 191 |
'top_n': random.choice([5, 6, 7, 8, 9, 10]),
|
| 192 |
'rebalance': random.choice([3, 5, 7, 10, 14]),
|
| 193 |
'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
|
|
|
|
| 201 |
generation = 1
|
| 202 |
|
| 203 |
while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
|
| 204 |
+
print(f"\n🧬 Gen {generation}: Testing 1.0x Portfolios (Strict Fundamentals + Price > ₹10)")
|
| 205 |
results = []
|
| 206 |
|
| 207 |
for genome in population:
|
|
|
|
| 221 |
else:
|
| 222 |
stall_count += 1
|
| 223 |
|
| 224 |
+
print(f" 🏆 16-Year Average CAGR: {best_cagr*100:.1f}%")
|
| 225 |
print(f" 🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
|
| 226 |
|
| 227 |
survivors = [x[2] for x in results[:6]]
|
|
|
|
| 243 |
mutation_rate = 0.8 if stall_count >= 3 else 0.3
|
| 244 |
|
| 245 |
if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
|
|
|
|
| 246 |
if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
|
| 247 |
if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
|
| 248 |
if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
|
|
|
|
| 264 |
|
| 265 |
if best_curve is not None:
|
| 266 |
plt.figure(figsize=(12, 7))
|
| 267 |
+
plt.plot(best_curve, label=f"Fundamentally Strong Strategy ({best_cagr*100:.1f}%)", color='blue', linewidth=2)
|
| 268 |
|
| 269 |
nifty = data["^NSEI"]
|
| 270 |
bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
|
| 271 |
plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
|
| 272 |
|
| 273 |
plt.yscale('log')
|
| 274 |
+
plt.title("Renaissance Engine: Quality Momentum (Zero-Leverage, 5-10 Stocks, ₹10+ Floor)")
|
| 275 |
plt.ylabel("Portfolio Value (Log Scale)")
|
| 276 |
plt.legend()
|
| 277 |
plt.grid(True, alpha=0.3)
|