Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 7, 2025

Commit

95f813e

verified ·

1 Parent(s): fd2bc94

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +123 -188

backtest_engine.py CHANGED Viewed

@@ -1,10 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V89.0 - GEM-Architect: Dual-Core Reactor)
-# ============================================================
-# التغيير الجذري:
-# 1. التحميل: Async (شبكة فقط).
-# 2. المعالجة: Multiprocessing على البيانات الموجودة في الرام.
-# النتيجة: 100% CPU Usage على 2 vCPUs.
 # ============================================================
 import asyncio
@@ -17,7 +12,6 @@ import os
 import gc
 import sys
 import traceback
-import concurrent.futures
 from datetime import datetime, timezone
 from typing import Dict, Any, List
@@ -33,127 +27,6 @@ except ImportError:
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
-# ==============================================================================
-# 🔥 PURE CPU WORKER (No Network, No I/O, Just Math)
-# ==============================================================================
-def cpu_crunch_worker(payload):
-    """
-    هذا العامل يستلم البيانات جاهزة ويقوم بطحنها رياضياً فقط.
-    معزول تماماً عن الشبكة لضمان عدم التوقف.
-    """
-    worker_id, candles, symbol = payload
-    print(f"   🔥 [Core {worker_id}] Crunching {len(candles)} candles...", flush=True)
-    # إعادة بناء كائنات خفيفة داخل العملية (بدون اتصال)
-    # نمرر None للخدمات لأننا لا نحتاج شبكة هنا
-    local_dm = DataManager(None, None, None)
-    local_proc = MLProcessor(local_dm)
-    # حيلة: تشغيل تهيئة ML Processor (تحميل النماذج) داخل العملية
-    # نستخدم حلقة وهمية لأن الدوال async
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(local_proc.initialize())
-    except: pass # نتجاهل أخطاء الشبكة، يهمنا النماذج فقط
-    # تحويل البيانات
-    df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
-    cols = ['open', 'high', 'low', 'close', 'volume']
-    df_1m[cols] = df_1m[cols].astype('float32')
-    df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
-    df_1m.set_index('datetime', inplace=True)
-    df_1m = df_1m.sort_index()
-    frames = {}
-    agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
-    frames['1m'] = df_1m.copy()
-    frames['1m']['timestamp'] = frames['1m'].index.astype(np.int64) // 10**6
-    # Resampling
-    for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
-        frames[tf_str] = df_1m.resample(tf_code).agg(agg_dict).dropna()
-        frames[tf_str]['timestamp'] = frames[tf_str].index.astype(np.int64) // 10**6
-    # Main Analysis Loop
-    ai_results = []
-    # نبدأ التحليل بعد فترة كافية (مثلاً 500 شمعة) لضمان دقة المؤشرات
-    # بما أن البيانات مقسمة، كل قسم يحتاج "هامش" (Overlap) ولكن للتبسيط سنعالج الكل
-    valid_indices = frames['5m'].index
-    # Helper to avoid recreating object
-    def df_to_list(df):
-        if df.empty: return []
-        return df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].values.tolist()
-    local_proc_instance = local_proc # Cache reference
-    count = 0
-    total = len(valid_indices)
-    # تشغيل حلقة متزامنة (Synchronous) داخل الـ Worker للسرعة
-    # لكن MLProcessor async، لذا نستخدم loop.run_until_complete للحالات الضرورية
-    for t_idx in valid_indices:
-        count += 1
-        if count % 5000 == 0:
-            print(f"   🔥 [Core {worker_id}] Progress: {int(count/total*100)}%", flush=True)
-        current_timestamp = int(t_idx.timestamp() * 1000)
-        # Slicing
-        ohlcv_data = {}
-        try:
-            cutoff = t_idx
-            ohlcv_data['1m']  = df_to_list(frames['1m'].loc[:cutoff].tail(500))
-            ohlcv_data['5m']  = df_to_list(frames['5m'].loc[:cutoff].tail(200))
-            ohlcv_data['15m'] = df_to_list(frames['15m'].loc[:cutoff].tail(200))
-            ohlcv_data['1h']  = df_to_list(frames['1h'].loc[:cutoff].tail(200))
-            ohlcv_data['4h']  = df_to_list(frames['4h'].loc[:cutoff].tail(100))
-            ohlcv_data['1d']  = df_to_list(frames['1d'].loc[:cutoff].tail(50))
-        except: continue
-        if len(ohlcv_data['1h']) < 60: continue
-        current_price = frames['5m'].loc[t_idx]['close']
-        # Logic Tree
-        logic_packet = {
-            'symbol': symbol,
-            'ohlcv_1h': ohlcv_data['1h'][-60:],
-            'ohlcv_15m': ohlcv_data['15m'][-60:],
-            'change_24h': 0.0
-        }
-        logic_result = local_dm._apply_logic_tree(logic_packet)
-        signal_type = logic_result.get('type', 'NONE')
-        l1_score = logic_result.get('score', 0.0)
-        real_titan = 0.5
-        if signal_type in ['BREAKOUT', 'REVERSAL']:
-            raw_data_for_proc = {'symbol': symbol, 'ohlcv': ohlcv_data, 'current_price': current_price}
-            try:
-                # تشغيل الـ AI
-                proc_res = loop.run_until_complete(local_proc_instance.process_compound_signal(raw_data_for_proc))
-                if proc_res: real_titan = proc_res.get('titan_score', 0.5)
-            except: pass
-            ai_results.append({
-                'timestamp': current_timestamp,
-                'symbol': symbol,
-                'close': current_price,
-                'real_titan': real_titan,
-                'signal_type': signal_type,
-                'l1_score': l1_score
-            })
-    loop.close()
-    return ai_results
-# ==============================================================================
-# 🧠 Main Class
-# ==============================================================================
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
@@ -167,19 +40,25 @@ class HeavyDutyBacktester:
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V89.0] Dual-Core Reactor (100% CPU Target).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
     # ==============================================================
-    # ⚡ STEP 1: FAST DOWNLOAD
     # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
-        print(f"   ⚡ [Network] Burst-Downloading {sym}...", flush=True)
         limit = 1000
         duration_per_batch = limit * 60 * 1000
         tasks = []
         current = start_ms
         while current < end_ms:
@@ -187,87 +66,145 @@ class HeavyDutyBacktester:
             current += duration_per_batch
         all_candles = []
-        sem = asyncio.Semaphore(15)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
-                    try: return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
-                    except: await asyncio.sleep(1)
                 return []
-        chunk_size = 25
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
             results = await asyncio.gather(*futures)
-            for res in results:
                 if res: all_candles.extend(res)
-            print(f"     📥 Downloaded {int((i+chunk_size)/len(tasks)*100)}%...", flush=True)
         if not all_candles: return None
-        # تصفية وترتيب
-        unique = {c[0]: c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms}
-        final_candles = sorted(unique.values(), key=lambda x: x[0])
-        return final_candles
     # ==============================================================
-    # 🔥 STEP 2: PARALLEL CPU CRUNCHING
     # ==============================================================
-    async def _dispatch_to_cores(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        # 1. تقسيم البيانات (Splitting)
-        cpu_count = os.cpu_count() or 2
-        # نضيف تداخل (Overlap) بسيط لضمان استمرارية المؤشرات عند نقطة القطع
-        # سنقسم القائمة ببساطة، العمال سيعيدون حساب المؤشرات
-        chunk_size = len(candles) // cpu_count
-        chunks = []
-        print(f"   ⚙️ [CPU] Splitting {len(candles)} candles into {cpu_count} cores for 100% Load...", flush=True)
-        for i in range(cpu_count):
-            start_idx = i * chunk_size
-            # للإتقان: نحتاج لتداخل، لكن للتبسيط والسرعة سنقسم مباشرة
-            # العامل الأول يأخذ من البداية، الثاني يأخذ من (بداية - 500) لضمان الـ Warmup
-            actual_start = max(0, start_idx - 1000) if i > 0 else 0
-            end_idx = (i + 1) * chunk_size if i < cpu_count - 1 else len(candles)
-            chunk_data = candles[actual_start:end_idx]
-            chunks.append((i, chunk_data, sym))
         t0 = time.time()
-        # 2. تشغيل المفاعل (Reactor)
-        loop = asyncio.get_running_loop()
-        final_results = []
-        with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
-            futures = [loop.run_in_executor(executor, cpu_crunch_worker, chunk) for chunk in chunks]
-            results = await asyncio.gather(*futures)
-            for res in results:
-                final_results.extend(res)
-        dt = time.time() - t0
-        # 3. الحفظ
-        if final_results:
-            # إزالة التكرارات الناتجة عن الـ Overlap
-            df_res = pd.DataFrame(final_results).drop_duplicates(subset=['timestamp']).sort_values('timestamp')
-            df_res.to_pickle(scores_file)
-            print(f"   💾 [{sym}] SAVED {len(df_res)} signals. (Compute Time: {dt:.1f}s)")
         else:
             print(f"   ⚠️ [{sym}] No signals found.")
-        del candles, chunks, results
         gc.collect()
     # ==============================================================
@@ -284,19 +221,19 @@ class HeavyDutyBacktester:
             return
         for sym in self.TARGET_COINS:
-            # 1. Download to RAM
             candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
             if candles:
-                # 2. Burn CPU
-                await self._dispatch_to_cores(sym, candles, start_time_ms, end_time_ms)
             else:
                 print(f"   ❌ Failed to download data for {sym}")
             gc.collect()
     # ==============================================================
-    # PHASE 2: Portfolio Digital Twin Engine (Unchanged)
     # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
@@ -446,7 +383,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Dual-Core Reactor Initiated...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)
@@ -477,6 +414,4 @@ async def run_strategic_optimization_task():
         await dm.close()
 if __name__ == "__main__":
-    import multiprocessing
-    multiprocessing.freeze_support()
     asyncio.run(run_strategic_optimization_task())

 # ============================================================
+# 🧪 backtest_engine.py (V88.1 - GEM-Architect: RAM-Burst Fix)
 # ============================================================
 import asyncio
 import gc
 import sys
 import traceback
 from datetime import datetime, timezone
 from typing import Dict, Any, List
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V88.1] RAM-Burst Edition (Fix Applied).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
+    def df_to_list(self, df):
+        if df.empty: return []
+        return df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].values.tolist()
     # ==============================================================
+    # ⚡ FAST DATA DOWNLOADER (Async Burst)
     # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
+        print(f"   ⚡ [Network] Burst-Downloading {sym} ({start_ms} -> {end_ms})...", flush=True)
         limit = 1000
         duration_per_batch = limit * 60 * 1000
         tasks = []
         current = start_ms
         while current < end_ms:
             current += duration_per_batch
         all_candles = []
+        total_batches = len(tasks)
+        sem = asyncio.Semaphore(10)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
+                    try:
+                        return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
+                    except Exception:
+                        await asyncio.sleep(1)
                 return []
+        chunk_size = 20
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
             results = await asyncio.gather(*futures)
+            for res in results:
                 if res: all_candles.extend(res)
+            progress = min(100, int((i + chunk_size) / total_batches * 100))
+            print(f"     📥 Downloaded {progress}%... (Total: {len(all_candles)} candles)", flush=True)
         if not all_candles: return None
+        filtered = [c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms]
+        seen = set()
+        unique_candles = []
+        for c in filtered:
+            if c[0] not in seen:
+                unique_candles.append(c)
+                seen.add(c[0])
+        unique_candles.sort(key=lambda x: x[0])
+        return unique_candles
     # ==============================================================
+    # 🧠 CPU PROCESSING (In-Memory)
     # ==============================================================
+    async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
+        # ✅ FIX: Use passed arguments directly
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        print(f"   ⚙️ [CPU] Processing {len(candles)} candles from RAM...", flush=True)
         t0 = time.time()
+        df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
+        cols = ['open', 'high', 'low', 'close', 'volume']
+        df_1m[cols] = df_1m[cols].astype('float32')
+        df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
+        df_1m.set_index('datetime', inplace=True)
+        df_1m = df_1m.sort_index()
+        frames = {}
+        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
+        frames['1m'] = df_1m.copy()
+        frames['1m']['timestamp'] = frames['1m'].index.astype(np.int64) // 10**6
+        for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
+            frames[tf_str] = df_1m.resample(tf_code).agg(agg_dict).dropna()
+            frames[tf_str]['timestamp'] = frames[tf_str].index.astype(np.int64) // 10**6
+        ai_results = []
+        start_analysis_dt = df_1m.index[0] + pd.Timedelta(minutes=500)
+        valid_indices = frames['5m'].loc[start_analysis_dt:].index
+        total_steps = len(valid_indices)
+        step_count = 0
+        for t_idx in valid_indices:
+            step_count += 1
+            if step_count % 2000 == 0:
+                 pct = int((step_count / total_steps) * 100)
+                 print(f"     🧠 AI Analysis: {pct}%...", flush=True)
+            ohlcv_data = {}
+            try:
+                cutoff = t_idx
+                ohlcv_data['1m']  = self.df_to_list(frames['1m'].loc[:cutoff].tail(500))
+                ohlcv_data['5m']  = self.df_to_list(frames['5m'].loc[:cutoff].tail(200))
+                ohlcv_data['15m'] = self.df_to_list(frames['15m'].loc[:cutoff].tail(200))
+                ohlcv_data['1h']  = self.df_to_list(frames['1h'].loc[:cutoff].tail(200))
+                ohlcv_data['4h']  = self.df_to_list(frames['4h'].loc[:cutoff].tail(100))
+                ohlcv_data['1d']  = self.df_to_list(frames['1d'].loc[:cutoff].tail(50))
+            except: continue
+            if len(ohlcv_data['1h']) < 60: continue
+            current_price = frames['5m'].loc[t_idx]['close']
+            logic_packet = {
+                'symbol': sym,
+                'ohlcv_1h': ohlcv_data['1h'][-60:],
+                'ohlcv_15m': ohlcv_data['15m'][-60:],
+                'change_24h': 0.0
+            }
+            try:
+                if len(ohlcv_data['1h']) >= 24:
+                    p_now = ohlcv_data['1h'][-1][4]
+                    p_old = ohlcv_data['1h'][-24][4]
+                    logic_packet['change_24h'] = ((p_now - p_old) / p_old) * 100
+            except: pass
+            logic_result = self.dm._apply_logic_tree(logic_packet)
+            signal_type = logic_result.get('type', 'NONE')
+            l1_score = logic_result.get('score', 0.0)
+            real_titan = 0.5
+            if signal_type in ['BREAKOUT', 'REVERSAL']:
+                raw_data_for_proc = {'symbol': sym, 'ohlcv': ohlcv_data, 'current_price': current_price}
+                try:
+                    proc_res = await self.proc.process_compound_signal(raw_data_for_proc)
+                    if proc_res: real_titan = proc_res.get('titan_score', 0.5)
+                except: pass
+                ai_results.append({
+                    'timestamp': int(t_idx.timestamp() * 1000),
+                    'symbol': sym,
+                    'close': current_price,
+                    'real_titan': real_titan,
+                    'signal_type': signal_type,
+                    'l1_score': l1_score
+                })
+        dt = time.time() - t0
+        if ai_results:
+            pd.DataFrame(ai_results).to_pickle(scores_file)
+            print(f"   💾 [{sym}] Saved {len(ai_results)} signals. (Compute Time: {dt:.1f}s)")
         else:
             print(f"   ⚠️ [{sym}] No signals found.")
+        del frames, df_1m, candles
         gc.collect()
     # ==============================================================
             return
         for sym in self.TARGET_COINS:
+            # 1. Download Phase (Async Burst)
             candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
             if candles:
+                # 2. Processing Phase (Sequential CPU)
+                await self._process_data_in_memory(sym, candles, start_time_ms, end_time_ms)
             else:
                 print(f"   ❌ Failed to download data for {sym}")
             gc.collect()
     # ==============================================================
+    # PHASE 2: Portfolio Digital Twin Engine
     # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] RAM-Burst Mode Initiated...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)
         await dm.close()
 if __name__ == "__main__":
     asyncio.run(run_strategic_optimization_task())