ekjotsingh commited on
Commit
0198e22
·
verified ·
1 Parent(s): 15bee54

Update backtest.py

Browse files
Files changed (1) hide show
  1. backtest.py +82 -35
backtest.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import matplotlib
2
  matplotlib.use('Agg')
3
  import yfinance as yf
@@ -7,31 +10,83 @@ import matplotlib.pyplot as plt
7
  import os
8
  import time
9
  import random
 
 
 
 
 
 
10
 
11
  # --- CONFIGURATION ---
12
  START_DATE = "2010-01-01"
13
  INITIAL_CAPITAL = 1000000
14
  SIMULATION_TIME_MIN = 35
 
15
 
16
- def load_universe():
17
- """Loads the ENTIRE universe without look-ahead fundamental bias."""
18
  try:
19
  df = pd.read_csv("EQUITY_L.csv")
20
  df.columns = [c.strip() for c in df.columns]
21
  if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
22
-
23
- # Filter by listing date to ensure no future knowledge is used
24
- if 'DATE OF LISTING' in df.columns:
25
- df['ListDate'] = pd.to_datetime(df['DATE OF LISTING'], format='%d-%b-%Y', errors='coerce')
26
- df = df[df['ListDate'] < pd.to_datetime("2010-01-01")]
27
-
28
  tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
29
- print(f"✅ Loaded ALL {len(tickers)} historical tickers. Commencing Full-Market Scan.")
30
-
31
- # UNLOCKED: Returns all valid stocks, no more [:250] limit.
32
- return tickers
33
  except:
34
- return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "SBIN.NS", "HDFCBANK.NS", "ITC.NS"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def run_strategy_genome(data, genome):
37
  if data.empty: return -1.0, []
@@ -43,7 +98,6 @@ def run_strategy_genome(data, genome):
43
  stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
44
  stocks = data[stock_cols]
45
 
46
- # --- AI EVOLVED TRAITS ---
47
  lookback = int(genome['lookback'])
48
  top_n = int(genome['top_n'])
49
  rebalance_days = int(genome['rebalance'])
@@ -51,11 +105,9 @@ def run_strategy_genome(data, genome):
51
  trend_filter = int(genome['trend_filter'])
52
  max_vol = float(genome['max_vol'])
53
 
54
- # Calculate Momentum and Volatility matrices
55
  momentum = stocks.pct_change(lookback)
56
  daily_returns = stocks.pct_change(1)
57
  volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
58
-
59
  nifty_ma = nifty.rolling(trend_filter).mean()
60
 
61
  curve = [INITIAL_CAPITAL]
@@ -78,26 +130,25 @@ def run_strategy_genome(data, genome):
78
  scores = momentum.loc[curr]
79
  vols = volatility.loc[curr]
80
 
81
- # NO CHEATING FILTER: Positive absolute momentum + Below AI's volatility threshold
82
- valid_stocks = scores[(scores > 0) & (vols < max_vol)]
 
 
 
 
83
  picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
84
 
85
- # Ensure we have at least some stocks to buy, otherwise hold cash
86
  if len(picks) > 0:
87
  p1 = stocks.loc[curr, picks]
88
  p2 = stocks.loc[nxt, picks]
89
-
90
- # Equal-weight basket return
91
  stock_ret = ((p2 - p1) / p1).mean()
92
  if pd.isna(stock_ret): stock_ret = 0.0
93
  period_ret = stock_ret
94
  else:
95
- # Bear Market Hedge: Flee to Gold
96
  g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
97
  if pd.isna(g_ret): g_ret = 0.0
98
  period_ret = g_ret
99
 
100
- # Stop Loss Execution
101
  if period_ret < -stop_loss: period_ret = -stop_loss
102
 
103
  curr_val = curr_val * (1 + period_ret)
@@ -117,29 +168,26 @@ def run_strategy_genome(data, genome):
117
  return cagr, pd.Series(curve, index=sim_dates)
118
 
119
  def backtest_engine():
120
- print(f"⚙️ Initializing 'No-Cheating' Full-Market Simulator...")
121
  start_time = time.time()
122
 
123
- tickers = load_universe()
124
  tickers += ["^NSEI", "GC=F"]
125
 
126
  try:
127
- print(f"🌍 Fetching 16-Year History for {len(tickers)} assets... (This will take a few minutes)")
128
- # Downloading 2000+ stocks is data-heavy. Using multiple threads automatically via yfinance.
129
  data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
130
-
131
  if isinstance(data.columns, pd.MultiIndex):
132
  try: data = data['Close']
133
  except: pass
134
 
135
- data = data.ffill().bfill()
136
  if data.empty: return None
137
 
138
  population = []
139
  for _ in range(30):
140
  population.append({
141
  'lookback': random.choice([10, 20, 30, 45, 60, 90]),
142
- # STRICT CONSTRAINT: Must hold between 5 and 10 stocks.
143
  'top_n': random.choice([5, 6, 7, 8, 9, 10]),
144
  'rebalance': random.choice([3, 5, 7, 10, 14]),
145
  'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
@@ -153,7 +201,7 @@ def backtest_engine():
153
  generation = 1
154
 
155
  while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
156
- print(f"\n🧬 Gen {generation}: Evaluating 1.0x Portfolios (Basket size: 5-10)...")
157
  results = []
158
 
159
  for genome in population:
@@ -173,7 +221,7 @@ def backtest_engine():
173
  else:
174
  stall_count += 1
175
 
176
- print(f" 🏆 Best: {best_cagr*100:.1f}% CAGR")
177
  print(f" 🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
178
 
179
  survivors = [x[2] for x in results[:6]]
@@ -195,7 +243,6 @@ def backtest_engine():
195
  mutation_rate = 0.8 if stall_count >= 3 else 0.3
196
 
197
  if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
198
- # STRICT CONSTRAINT during mutation as well
199
  if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
200
  if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
201
  if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
@@ -217,14 +264,14 @@ def backtest_engine():
217
 
218
  if best_curve is not None:
219
  plt.figure(figsize=(12, 7))
220
- plt.plot(best_curve, label=f"Diversified Strategy ({best_cagr*100:.1f}%)", color='blue', linewidth=2)
221
 
222
  nifty = data["^NSEI"]
223
  bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
224
  plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
225
 
226
  plt.yscale('log')
227
- plt.title("Renaissance Engine: Full Market (Zero-Leverage, 5-10 Stocks)")
228
  plt.ylabel("Portfolio Value (Log Scale)")
229
  plt.legend()
230
  plt.grid(True, alpha=0.3)
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
  import matplotlib
5
  matplotlib.use('Agg')
6
  import yfinance as yf
 
10
  import os
11
  import time
12
  import random
13
+ import json
14
+
15
+ # Silence Pandas Future Warnings
16
+ pd.options.mode.chained_assignment = None
17
+ try: pd.set_option('future.no_silent_downcasting', True)
18
+ except: pass
19
 
20
  # --- CONFIGURATION ---
21
  START_DATE = "2010-01-01"
22
  INITIAL_CAPITAL = 1000000
23
  SIMULATION_TIME_MIN = 35
24
+ CACHE_FILE = "fundamental_cache.json"
25
 
26
+ def get_all_csv_tickers():
 
27
  try:
28
  df = pd.read_csv("EQUITY_L.csv")
29
  df.columns = [c.strip() for c in df.columns]
30
  if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
 
 
 
 
 
 
31
  tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
32
+ return tickers
 
 
 
33
  except:
34
+ return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HDFCBANK.NS"]
35
+
36
+ def fundamental_deep_scan(tickers):
37
+ print(f"🔍 PHASE 1: Deep Fundamental Scan of {len(tickers)} stocks...")
38
+ print("⏳ This will take 30-45 minutes. It will only happen ONCE and save to cache.")
39
+
40
+ scored_stocks = []
41
+ count = 0
42
+
43
+ for ticker in tickers:
44
+ count += 1
45
+ if count % 50 == 0:
46
+ print(f" -> Scanned {count}/{len(tickers)} stocks...")
47
+
48
+ try:
49
+ stock = yf.Ticker(ticker)
50
+ info = stock.info
51
+
52
+ roe = info.get('returnOnEquity', 0) or 0
53
+ pe = info.get('trailingPE', 0) or 1000
54
+ growth = info.get('revenueGrowth', 0) or 0
55
+
56
+ score = 0
57
+ if roe > 0.15: score += 40
58
+ if growth > 0.10: score += 30
59
+ if 0 < pe < 60: score += 30
60
+
61
+ # Keep companies with strong actual business fundamentals
62
+ if score >= 40:
63
+ scored_stocks.append({'ticker': ticker, 'score': score})
64
+
65
+ except Exception:
66
+ pass
67
+
68
+ # Delay to prevent IP Ban from Yahoo Finance
69
+ time.sleep(random.uniform(0.1, 0.4))
70
+
71
+ scored_stocks.sort(key=lambda x: x['score'], reverse=True)
72
+
73
+ # We take the top 250 fundamentally strongest companies
74
+ elite_tickers = [x['ticker'] for x in scored_stocks[:250]]
75
+
76
+ with open(CACHE_FILE, 'w') as f:
77
+ json.dump(elite_tickers, f)
78
+
79
+ print(f"✅ Phase 1 Complete. Saved {len(elite_tickers)} Elite Stocks to cache.")
80
+ return elite_tickers
81
+
82
+ def load_fundamental_universe():
83
+ if os.path.exists(CACHE_FILE):
84
+ print("📂 Loading Fundamentally Strong Universe from Cache...")
85
+ with open(CACHE_FILE, 'r') as f:
86
+ return json.load(f)
87
+ else:
88
+ all_tickers = get_all_csv_tickers()
89
+ return fundamental_deep_scan(all_tickers)
90
 
91
  def run_strategy_genome(data, genome):
92
  if data.empty: return -1.0, []
 
98
  stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
99
  stocks = data[stock_cols]
100
 
 
101
  lookback = int(genome['lookback'])
102
  top_n = int(genome['top_n'])
103
  rebalance_days = int(genome['rebalance'])
 
105
  trend_filter = int(genome['trend_filter'])
106
  max_vol = float(genome['max_vol'])
107
 
 
108
  momentum = stocks.pct_change(lookback)
109
  daily_returns = stocks.pct_change(1)
110
  volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
 
111
  nifty_ma = nifty.rolling(trend_filter).mean()
112
 
113
  curve = [INITIAL_CAPITAL]
 
130
  scores = momentum.loc[curr]
131
  vols = volatility.loc[curr]
132
 
133
+ # MICRO-CAP UNLOCKED: Lowered to ₹10 to catch real fundamental turnarounds.
134
+ # (Since Phase 1 ensures they have >15% ROE, a ₹12 stock here is a true hidden gem, not a scam)
135
+ valid_prices = stocks.loc[curr] > 10.0
136
+
137
+ # Must be fundamentally strong (by universe), positive momentum, low vol, NOT a fractional penny stock
138
+ valid_stocks = scores[(scores > 0) & (vols < max_vol) & valid_prices]
139
  picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
140
 
 
141
  if len(picks) > 0:
142
  p1 = stocks.loc[curr, picks]
143
  p2 = stocks.loc[nxt, picks]
 
 
144
  stock_ret = ((p2 - p1) / p1).mean()
145
  if pd.isna(stock_ret): stock_ret = 0.0
146
  period_ret = stock_ret
147
  else:
 
148
  g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
149
  if pd.isna(g_ret): g_ret = 0.0
150
  period_ret = g_ret
151
 
 
152
  if period_ret < -stop_loss: period_ret = -stop_loss
153
 
154
  curr_val = curr_val * (1 + period_ret)
 
168
  return cagr, pd.Series(curve, index=sim_dates)
169
 
170
  def backtest_engine():
171
+ print(f"⚙️ Initializing Phase 2: AI Genetic Backtest...")
172
  start_time = time.time()
173
 
174
+ tickers = load_fundamental_universe()
175
  tickers += ["^NSEI", "GC=F"]
176
 
177
  try:
178
+ print(f"🌍 Fetching 16-Year History for Elite Universe...")
 
179
  data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
 
180
  if isinstance(data.columns, pd.MultiIndex):
181
  try: data = data['Close']
182
  except: pass
183
 
184
+ data = data.ffill().bfill().infer_objects(copy=False)
185
  if data.empty: return None
186
 
187
  population = []
188
  for _ in range(30):
189
  population.append({
190
  'lookback': random.choice([10, 20, 30, 45, 60, 90]),
 
191
  'top_n': random.choice([5, 6, 7, 8, 9, 10]),
192
  'rebalance': random.choice([3, 5, 7, 10, 14]),
193
  'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
 
201
  generation = 1
202
 
203
  while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
204
+ print(f"\n🧬 Gen {generation}: Testing 1.0x Portfolios (Strict Fundamentals + Price > ₹10)")
205
  results = []
206
 
207
  for genome in population:
 
221
  else:
222
  stall_count += 1
223
 
224
+ print(f" 🏆 16-Year Average CAGR: {best_cagr*100:.1f}%")
225
  print(f" 🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
226
 
227
  survivors = [x[2] for x in results[:6]]
 
243
  mutation_rate = 0.8 if stall_count >= 3 else 0.3
244
 
245
  if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
 
246
  if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
247
  if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
248
  if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
 
264
 
265
  if best_curve is not None:
266
  plt.figure(figsize=(12, 7))
267
+ plt.plot(best_curve, label=f"Fundamentally Strong Strategy ({best_cagr*100:.1f}%)", color='blue', linewidth=2)
268
 
269
  nifty = data["^NSEI"]
270
  bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
271
  plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
272
 
273
  plt.yscale('log')
274
+ plt.title("Renaissance Engine: Quality Momentum (Zero-Leverage, 5-10 Stocks, ₹10+ Floor)")
275
  plt.ylabel("Portfolio Value (Log Scale)")
276
  plt.legend()
277
  plt.grid(True, alpha=0.3)