Spaces:

ekjotsingh
/

Kairo-Brain

Sleeping

App Files Files Community

ekjotsingh commited on Feb 26

Commit

0198e22

verified ·

1 Parent(s): 15bee54

Update backtest.py

Browse files

Files changed (1) hide show

backtest.py +82 -35

backtest.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import matplotlib
 matplotlib.use('Agg')
 import yfinance as yf
@@ -7,31 +10,83 @@ import matplotlib.pyplot as plt
 import os
 import time
 import random
 # --- CONFIGURATION ---
 START_DATE = "2010-01-01"
 INITIAL_CAPITAL = 1000000
 SIMULATION_TIME_MIN = 35
-def load_universe():
-    """Loads the ENTIRE universe without look-ahead fundamental bias."""
     try:
         df = pd.read_csv("EQUITY_L.csv")
         df.columns = [c.strip() for c in df.columns]
         if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
-        # Filter by listing date to ensure no future knowledge is used
-        if 'DATE OF LISTING' in df.columns:
-            df['ListDate'] = pd.to_datetime(df['DATE OF LISTING'], format='%d-%b-%Y', errors='coerce')
-            df = df[df['ListDate'] < pd.to_datetime("2010-01-01")]
         tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
-        print(f"✅ Loaded ALL {len(tickers)} historical tickers. Commencing Full-Market Scan.")
-        # UNLOCKED: Returns all valid stocks, no more [:250] limit.
-        return tickers
     except:
-        return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "SBIN.NS", "HDFCBANK.NS", "ITC.NS"]
 def run_strategy_genome(data, genome):
     if data.empty: return -1.0, []
@@ -43,7 +98,6 @@ def run_strategy_genome(data, genome):
     stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
     stocks = data[stock_cols]
-    # --- AI EVOLVED TRAITS ---
     lookback = int(genome['lookback'])
     top_n = int(genome['top_n'])
     rebalance_days = int(genome['rebalance'])
@@ -51,11 +105,9 @@ def run_strategy_genome(data, genome):
     trend_filter = int(genome['trend_filter'])
     max_vol = float(genome['max_vol'])
-    # Calculate Momentum and Volatility matrices
     momentum = stocks.pct_change(lookback)
     daily_returns = stocks.pct_change(1)
     volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
     nifty_ma = nifty.rolling(trend_filter).mean()
     curve = [INITIAL_CAPITAL]
@@ -78,26 +130,25 @@ def run_strategy_genome(data, genome):
                     scores = momentum.loc[curr]
                     vols = volatility.loc[curr]
-                    # NO CHEATING FILTER: Positive absolute momentum + Below AI's volatility threshold
-                    valid_stocks = scores[(scores > 0) & (vols < max_vol)]
                     picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
-                    # Ensure we have at least some stocks to buy, otherwise hold cash
                     if len(picks) > 0:
                         p1 = stocks.loc[curr, picks]
                         p2 = stocks.loc[nxt, picks]
-                        # Equal-weight basket return
                         stock_ret = ((p2 - p1) / p1).mean()
                         if pd.isna(stock_ret): stock_ret = 0.0
                         period_ret = stock_ret
             else:
-                # Bear Market Hedge: Flee to Gold
                 g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
                 if pd.isna(g_ret): g_ret = 0.0
                 period_ret = g_ret
-            # Stop Loss Execution
             if period_ret < -stop_loss: period_ret = -stop_loss
             curr_val = curr_val * (1 + period_ret)
@@ -117,29 +168,26 @@ def run_strategy_genome(data, genome):
     return cagr, pd.Series(curve, index=sim_dates)
 def backtest_engine():
-    print(f"⚙️ Initializing 'No-Cheating' Full-Market Simulator...")
     start_time = time.time()
-    tickers = load_universe()
     tickers += ["^NSEI", "GC=F"]
     try:
-        print(f"🌍 Fetching 16-Year History for {len(tickers)} assets... (This will take a few minutes)")
-        # Downloading 2000+ stocks is data-heavy. Using multiple threads automatically via yfinance.
         data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
         if isinstance(data.columns, pd.MultiIndex):
             try: data = data['Close']
             except: pass
-        data = data.ffill().bfill()
         if data.empty: return None
         population = []
         for _ in range(30):
             population.append({
                 'lookback': random.choice([10, 20, 30, 45, 60, 90]),
-                # STRICT CONSTRAINT: Must hold between 5 and 10 stocks.
                 'top_n': random.choice([5, 6, 7, 8, 9, 10]),
                 'rebalance': random.choice([3, 5, 7, 10, 14]),
                 'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
@@ -153,7 +201,7 @@ def backtest_engine():
         generation = 1
         while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
-            print(f"\n🧬 Gen {generation}: Evaluating 1.0x Portfolios (Basket size: 5-10)...")
             results = []
             for genome in population:
@@ -173,7 +221,7 @@ def backtest_engine():
                 else:
                     stall_count += 1
-                print(f"   🏆 Best: {best_cagr*100:.1f}% CAGR")
                 print(f"   🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
                 survivors = [x[2] for x in results[:6]]
@@ -195,7 +243,6 @@ def backtest_engine():
                     mutation_rate = 0.8 if stall_count >= 3 else 0.3
                     if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
-                    # STRICT CONSTRAINT during mutation as well
                     if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
                     if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
                     if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
@@ -217,14 +264,14 @@ def backtest_engine():
         if best_curve is not None:
             plt.figure(figsize=(12, 7))
-            plt.plot(best_curve, label=f"Diversified Strategy ({best_cagr*100:.1f}%)", color='blue', linewidth=2)
             nifty = data["^NSEI"]
             bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
             plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
             plt.yscale('log')
-            plt.title("Renaissance Engine: Full Market (Zero-Leverage, 5-10 Stocks)")
             plt.ylabel("Portfolio Value (Log Scale)")
             plt.legend()
             plt.grid(True, alpha=0.3)

+import warnings
+warnings.filterwarnings("ignore")
 import matplotlib
 matplotlib.use('Agg')
 import yfinance as yf
 import os
 import time
 import random
+import json
+# Silence Pandas Future Warnings
+pd.options.mode.chained_assignment = None
+try: pd.set_option('future.no_silent_downcasting', True)
+except: pass
 # --- CONFIGURATION ---
 START_DATE = "2010-01-01"
 INITIAL_CAPITAL = 1000000
 SIMULATION_TIME_MIN = 35
+CACHE_FILE = "fundamental_cache.json"
+def get_all_csv_tickers():
     try:
         df = pd.read_csv("EQUITY_L.csv")
         df.columns = [c.strip() for c in df.columns]
         if 'SERIES' in df.columns: df = df[df['SERIES'] == 'EQ']
         tickers = [f"{x}.NS" for x in df['SYMBOL'].tolist()]
+        return tickers
     except:
+        return ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HDFCBANK.NS"]
+def fundamental_deep_scan(tickers):
+    print(f"🔍 PHASE 1: Deep Fundamental Scan of {len(tickers)} stocks...")
+    print("⏳ This will take 30-45 minutes. It will only happen ONCE and save to cache.")
+    scored_stocks = []
+    count = 0
+    for ticker in tickers:
+        count += 1
+        if count % 50 == 0:
+            print(f"   -> Scanned {count}/{len(tickers)} stocks...")
+        try:
+            stock = yf.Ticker(ticker)
+            info = stock.info
+            roe = info.get('returnOnEquity', 0) or 0
+            pe = info.get('trailingPE', 0) or 1000
+            growth = info.get('revenueGrowth', 0) or 0
+            score = 0
+            if roe > 0.15: score += 40
+            if growth > 0.10: score += 30
+            if 0 < pe < 60: score += 30
+            # Keep companies with strong actual business fundamentals
+            if score >= 40:
+                scored_stocks.append({'ticker': ticker, 'score': score})
+        except Exception:
+            pass
+        # Delay to prevent IP Ban from Yahoo Finance
+        time.sleep(random.uniform(0.1, 0.4))
+    scored_stocks.sort(key=lambda x: x['score'], reverse=True)
+    # We take the top 250 fundamentally strongest companies
+    elite_tickers = [x['ticker'] for x in scored_stocks[:250]]
+    with open(CACHE_FILE, 'w') as f:
+        json.dump(elite_tickers, f)
+    print(f"✅ Phase 1 Complete. Saved {len(elite_tickers)} Elite Stocks to cache.")
+    return elite_tickers
+def load_fundamental_universe():
+    if os.path.exists(CACHE_FILE):
+        print("📂 Loading Fundamentally Strong Universe from Cache...")
+        with open(CACHE_FILE, 'r') as f:
+            return json.load(f)
+    else:
+        all_tickers = get_all_csv_tickers()
+        return fundamental_deep_scan(all_tickers)
 def run_strategy_genome(data, genome):
     if data.empty: return -1.0, []
     stock_cols = [c for c in data.columns if c not in ["^NSEI", "GC=F"]]
     stocks = data[stock_cols]
     lookback = int(genome['lookback'])
     top_n = int(genome['top_n'])
     rebalance_days = int(genome['rebalance'])
     trend_filter = int(genome['trend_filter'])
     max_vol = float(genome['max_vol'])
     momentum = stocks.pct_change(lookback)
     daily_returns = stocks.pct_change(1)
     volatility = daily_returns.rolling(lookback).std() * np.sqrt(252)
     nifty_ma = nifty.rolling(trend_filter).mean()
     curve = [INITIAL_CAPITAL]
                     scores = momentum.loc[curr]
                     vols = volatility.loc[curr]
+                    # MICRO-CAP UNLOCKED: Lowered to ₹10 to catch real fundamental turnarounds.
+                    # (Since Phase 1 ensures they have >15% ROE, a ₹12 stock here is a true hidden gem, not a scam)
+                    valid_prices = stocks.loc[curr] > 10.0
+                    # Must be fundamentally strong (by universe), positive momentum, low vol, NOT a fractional penny stock
+                    valid_stocks = scores[(scores > 0) & (vols < max_vol) & valid_prices]
                     picks = valid_stocks.sort_values(ascending=False).head(top_n).index.tolist()
                     if len(picks) > 0:
                         p1 = stocks.loc[curr, picks]
                         p2 = stocks.loc[nxt, picks]
                         stock_ret = ((p2 - p1) / p1).mean()
                         if pd.isna(stock_ret): stock_ret = 0.0
                         period_ret = stock_ret
             else:
                 g_ret = (gold.loc[nxt] - gold.loc[curr]) / gold.loc[curr]
                 if pd.isna(g_ret): g_ret = 0.0
                 period_ret = g_ret
             if period_ret < -stop_loss: period_ret = -stop_loss
             curr_val = curr_val * (1 + period_ret)
     return cagr, pd.Series(curve, index=sim_dates)
 def backtest_engine():
+    print(f"⚙️ Initializing Phase 2: AI Genetic Backtest...")
     start_time = time.time()
+    tickers = load_fundamental_universe()
     tickers += ["^NSEI", "GC=F"]
     try:
+        print(f"🌍 Fetching 16-Year History for Elite Universe...")
         data = yf.download(tickers, start=START_DATE, progress=False, threads=True)
         if isinstance(data.columns, pd.MultiIndex):
             try: data = data['Close']
             except: pass
+        data = data.ffill().bfill().infer_objects(copy=False)
         if data.empty: return None
         population = []
         for _ in range(30):
             population.append({
                 'lookback': random.choice([10, 20, 30, 45, 60, 90]),
                 'top_n': random.choice([5, 6, 7, 8, 9, 10]),
                 'rebalance': random.choice([3, 5, 7, 10, 14]),
                 'stop_loss': random.choice([0.02, 0.04, 0.06, 0.08]),
         generation = 1
         while (time.time() - start_time) < (SIMULATION_TIME_MIN * 60):
+            print(f"\n🧬 Gen {generation}: Testing 1.0x Portfolios (Strict Fundamentals + Price > ₹10)")
             results = []
             for genome in population:
                 else:
                     stall_count += 1
+                print(f"   🏆 16-Year Average CAGR: {best_cagr*100:.1f}%")
                 print(f"   🧬 DNA: {best_dna['top_n']} Stocks | Bal: {best_dna['rebalance']}d | Regime: {best_dna['trend_filter']}d | Vol Cap: {best_dna['max_vol']*100}%")
                 survivors = [x[2] for x in results[:6]]
                     mutation_rate = 0.8 if stall_count >= 3 else 0.3
                     if random.random() < mutation_rate: child['lookback'] = random.choice([10, 20, 30, 45, 60, 90])
                     if random.random() < mutation_rate: child['top_n'] = random.choice([5, 6, 7, 8, 9, 10])
                     if random.random() < mutation_rate: child['rebalance'] = random.choice([3, 5, 7, 10, 14])
                     if random.random() < mutation_rate: child['stop_loss'] = random.choice([0.02, 0.04, 0.06, 0.08])
         if best_curve is not None:
             plt.figure(figsize=(12, 7))
+            plt.plot(best_curve, label=f"Fundamentally Strong Strategy ({best_cagr*100:.1f}%)", color='blue', linewidth=2)
             nifty = data["^NSEI"]
             bench = (nifty.loc[best_curve.index] / nifty.loc[best_curve.index[0]]) * INITIAL_CAPITAL
             plt.plot(bench, label="Nifty 50 Index", color='gray', linestyle='--')
             plt.yscale('log')
+            plt.title("Renaissance Engine: Quality Momentum (Zero-Leverage, 5-10 Stocks, ₹10+ Floor)")
             plt.ylabel("Portfolio Value (Log Scale)")
             plt.legend()
             plt.grid(True, alpha=0.3)