Spaces:

Riy777
/

Trad

Sleeping

App Files Files Community

Riy777 commited on Dec 7, 2025

Commit

fd2bc94

verified ·

1 Parent(s): 4f4b3b7

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +188 -123

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,10 @@
 # ============================================================
-# 🧪 backtest_engine.py (V88.1 - GEM-Architect: RAM-Burst Fix)
 # ============================================================
 import asyncio
@@ -12,6 +17,7 @@ import os
 import gc
 import sys
 import traceback
 from datetime import datetime, timezone
 from typing import Dict, Any, List
@@ -27,6 +33,127 @@ except ImportError:
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
@@ -40,25 +167,19 @@ class HeavyDutyBacktester:
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V88.1] RAM-Burst Edition (Fix Applied).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
-    def df_to_list(self, df):
-        if df.empty: return []
-        return df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].values.tolist()
     # ==============================================================
-    # ⚡ FAST DATA DOWNLOADER (Async Burst)
     # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
-        print(f"   ⚡ [Network] Burst-Downloading {sym} ({start_ms} -> {end_ms})...", flush=True)
         limit = 1000
         duration_per_batch = limit * 60 * 1000
         tasks = []
         current = start_ms
         while current < end_ms:
@@ -66,145 +187,87 @@ class HeavyDutyBacktester:
             current += duration_per_batch
         all_candles = []
-        total_batches = len(tasks)
-        sem = asyncio.Semaphore(10)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
-                    try:
-                        return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
-                    except Exception:
-                        await asyncio.sleep(1)
                 return []
-        chunk_size = 20
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
             results = await asyncio.gather(*futures)
-            for res in results:
                 if res: all_candles.extend(res)
-            progress = min(100, int((i + chunk_size) / total_batches * 100))
-            print(f"     📥 Downloaded {progress}%... (Total: {len(all_candles)} candles)", flush=True)
         if not all_candles: return None
-        filtered = [c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms]
-        seen = set()
-        unique_candles = []
-        for c in filtered:
-            if c[0] not in seen:
-                unique_candles.append(c)
-                seen.add(c[0])
-        unique_candles.sort(key=lambda x: x[0])
-        return unique_candles
     # ==============================================================
-    # 🧠 CPU PROCESSING (In-Memory)
     # ==============================================================
-    async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
-        # ✅ FIX: Use passed arguments directly
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        print(f"   ⚙️ [CPU] Processing {len(candles)} candles from RAM...", flush=True)
-        t0 = time.time()
-        df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
-        cols = ['open', 'high', 'low', 'close', 'volume']
-        df_1m[cols] = df_1m[cols].astype('float32')
-        df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
-        df_1m.set_index('datetime', inplace=True)
-        df_1m = df_1m.sort_index()
-        frames = {}
-        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
-        frames['1m'] = df_1m.copy()
-        frames['1m']['timestamp'] = frames['1m'].index.astype(np.int64) // 10**6
-        for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
-            frames[tf_str] = df_1m.resample(tf_code).agg(agg_dict).dropna()
-            frames[tf_str]['timestamp'] = frames[tf_str].index.astype(np.int64) // 10**6
-        ai_results = []
-        start_analysis_dt = df_1m.index[0] + pd.Timedelta(minutes=500)
-        valid_indices = frames['5m'].loc[start_analysis_dt:].index
-        total_steps = len(valid_indices)
-        step_count = 0
-        for t_idx in valid_indices:
-            step_count += 1
-            if step_count % 2000 == 0:
-                 pct = int((step_count / total_steps) * 100)
-                 print(f"     🧠 AI Analysis: {pct}%...", flush=True)
-            ohlcv_data = {}
-            try:
-                cutoff = t_idx
-                ohlcv_data['1m']  = self.df_to_list(frames['1m'].loc[:cutoff].tail(500))
-                ohlcv_data['5m']  = self.df_to_list(frames['5m'].loc[:cutoff].tail(200))
-                ohlcv_data['15m'] = self.df_to_list(frames['15m'].loc[:cutoff].tail(200))
-                ohlcv_data['1h']  = self.df_to_list(frames['1h'].loc[:cutoff].tail(200))
-                ohlcv_data['4h']  = self.df_to_list(frames['4h'].loc[:cutoff].tail(100))
-                ohlcv_data['1d']  = self.df_to_list(frames['1d'].loc[:cutoff].tail(50))
-            except: continue
-            if len(ohlcv_data['1h']) < 60: continue
-            current_price = frames['5m'].loc[t_idx]['close']
-            logic_packet = {
-                'symbol': sym,
-                'ohlcv_1h': ohlcv_data['1h'][-60:],
-                'ohlcv_15m': ohlcv_data['15m'][-60:],
-                'change_24h': 0.0
-            }
-            try:
-                if len(ohlcv_data['1h']) >= 24:
-                    p_now = ohlcv_data['1h'][-1][4]
-                    p_old = ohlcv_data['1h'][-24][4]
-                    logic_packet['change_24h'] = ((p_now - p_old) / p_old) * 100
-            except: pass
-            logic_result = self.dm._apply_logic_tree(logic_packet)
-            signal_type = logic_result.get('type', 'NONE')
-            l1_score = logic_result.get('score', 0.0)
-            real_titan = 0.5
-            if signal_type in ['BREAKOUT', 'REVERSAL']:
-                raw_data_for_proc = {'symbol': sym, 'ohlcv': ohlcv_data, 'current_price': current_price}
-                try:
-                    proc_res = await self.proc.process_compound_signal(raw_data_for_proc)
-                    if proc_res: real_titan = proc_res.get('titan_score', 0.5)
-                except: pass
-                ai_results.append({
-                    'timestamp': int(t_idx.timestamp() * 1000),
-                    'symbol': sym,
-                    'close': current_price,
-                    'real_titan': real_titan,
-                    'signal_type': signal_type,
-                    'l1_score': l1_score
-                })
         dt = time.time() - t0
-        if ai_results:
-            pd.DataFrame(ai_results).to_pickle(scores_file)
-            print(f"   💾 [{sym}] Saved {len(ai_results)} signals. (Compute Time: {dt:.1f}s)")
         else:
             print(f"   ⚠️ [{sym}] No signals found.")
-        del frames, df_1m, candles
         gc.collect()
     # ==============================================================
@@ -221,19 +284,19 @@ class HeavyDutyBacktester:
             return
         for sym in self.TARGET_COINS:
-            # 1. Download Phase (Async Burst)
             candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
             if candles:
-                # 2. Processing Phase (Sequential CPU)
-                await self._process_data_in_memory(sym, candles, start_time_ms, end_time_ms)
             else:
                 print(f"   ❌ Failed to download data for {sym}")
             gc.collect()
     # ==============================================================
-    # PHASE 2: Portfolio Digital Twin Engine
     # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
@@ -383,7 +446,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] RAM-Burst Mode Initiated...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)
@@ -414,4 +477,6 @@ async def run_strategic_optimization_task():
         await dm.close()
 if __name__ == "__main__":
     asyncio.run(run_strategic_optimization_task())

 # ============================================================
+# 🧪 backtest_engine.py (V89.0 - GEM-Architect: Dual-Core Reactor)
+# ============================================================
+# التغيير الجذري:
+# 1. التحميل: Async (شبكة فقط).
+# 2. المعالجة: Multiprocessing على البيانات الموجودة في الرام.
+# النتيجة: 100% CPU Usage على 2 vCPUs.
 # ============================================================
 import asyncio
 import gc
 import sys
 import traceback
+import concurrent.futures
 from datetime import datetime, timezone
 from typing import Dict, Any, List
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
+# ==============================================================================
+# 🔥 PURE CPU WORKER (No Network, No I/O, Just Math)
+# ==============================================================================
+def cpu_crunch_worker(payload):
+    """
+    هذا العامل يستلم البيانات جاهزة ويقوم بطحنها رياضياً فقط.
+    معزول تماماً عن الشبكة لضمان عدم التوقف.
+    """
+    worker_id, candles, symbol = payload
+    print(f"   🔥 [Core {worker_id}] Crunching {len(candles)} candles...", flush=True)
+    # إعادة بناء كائنات خفيفة داخل العملية (بدون اتصال)
+    # نمرر None للخدمات لأننا لا نحتاج شبكة هنا
+    local_dm = DataManager(None, None, None)
+    local_proc = MLProcessor(local_dm)
+    # حيلة: تشغيل تهيئة ML Processor (تحميل النماذج) داخل العملية
+    # نستخدم حلقة وهمية لأن الدوال async
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(local_proc.initialize())
+    except: pass # نتجاهل أخطاء الشبكة، يهمنا النماذج فقط
+    # تحويل البيانات
+    df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
+    cols = ['open', 'high', 'low', 'close', 'volume']
+    df_1m[cols] = df_1m[cols].astype('float32')
+    df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
+    df_1m.set_index('datetime', inplace=True)
+    df_1m = df_1m.sort_index()
+    frames = {}
+    agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
+    frames['1m'] = df_1m.copy()
+    frames['1m']['timestamp'] = frames['1m'].index.astype(np.int64) // 10**6
+    # Resampling
+    for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
+        frames[tf_str] = df_1m.resample(tf_code).agg(agg_dict).dropna()
+        frames[tf_str]['timestamp'] = frames[tf_str].index.astype(np.int64) // 10**6
+    # Main Analysis Loop
+    ai_results = []
+    # نبدأ التحليل بعد فترة كافية (مثلاً 500 شمعة) لضمان دقة المؤشرات
+    # بما أن البيانات مقسمة، كل قسم يحتاج "هامش" (Overlap) ولكن للتبسيط سنعالج الكل
+    valid_indices = frames['5m'].index
+    # Helper to avoid recreating object
+    def df_to_list(df):
+        if df.empty: return []
+        return df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].values.tolist()
+    local_proc_instance = local_proc # Cache reference
+    count = 0
+    total = len(valid_indices)
+    # تشغيل حلقة متزامنة (Synchronous) داخل الـ Worker للسرعة
+    # لكن MLProcessor async، لذا نستخدم loop.run_until_complete للحالات الضرورية
+    for t_idx in valid_indices:
+        count += 1
+        if count % 5000 == 0:
+            print(f"   🔥 [Core {worker_id}] Progress: {int(count/total*100)}%", flush=True)
+        current_timestamp = int(t_idx.timestamp() * 1000)
+        # Slicing
+        ohlcv_data = {}
+        try:
+            cutoff = t_idx
+            ohlcv_data['1m']  = df_to_list(frames['1m'].loc[:cutoff].tail(500))
+            ohlcv_data['5m']  = df_to_list(frames['5m'].loc[:cutoff].tail(200))
+            ohlcv_data['15m'] = df_to_list(frames['15m'].loc[:cutoff].tail(200))
+            ohlcv_data['1h']  = df_to_list(frames['1h'].loc[:cutoff].tail(200))
+            ohlcv_data['4h']  = df_to_list(frames['4h'].loc[:cutoff].tail(100))
+            ohlcv_data['1d']  = df_to_list(frames['1d'].loc[:cutoff].tail(50))
+        except: continue
+        if len(ohlcv_data['1h']) < 60: continue
+        current_price = frames['5m'].loc[t_idx]['close']
+        # Logic Tree
+        logic_packet = {
+            'symbol': symbol,
+            'ohlcv_1h': ohlcv_data['1h'][-60:],
+            'ohlcv_15m': ohlcv_data['15m'][-60:],
+            'change_24h': 0.0
+        }
+        logic_result = local_dm._apply_logic_tree(logic_packet)
+        signal_type = logic_result.get('type', 'NONE')
+        l1_score = logic_result.get('score', 0.0)
+        real_titan = 0.5
+        if signal_type in ['BREAKOUT', 'REVERSAL']:
+            raw_data_for_proc = {'symbol': symbol, 'ohlcv': ohlcv_data, 'current_price': current_price}
+            try:
+                # تشغيل الـ AI
+                proc_res = loop.run_until_complete(local_proc_instance.process_compound_signal(raw_data_for_proc))
+                if proc_res: real_titan = proc_res.get('titan_score', 0.5)
+            except: pass
+            ai_results.append({
+                'timestamp': current_timestamp,
+                'symbol': symbol,
+                'close': current_price,
+                'real_titan': real_titan,
+                'signal_type': signal_type,
+                'l1_score': l1_score
+            })
+    loop.close()
+    return ai_results
+# ==============================================================================
+# 🧠 Main Class
+# ==============================================================================
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V89.0] Dual-Core Reactor (100% CPU Target).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
     # ==============================================================
+    # ⚡ STEP 1: FAST DOWNLOAD
     # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
+        print(f"   ⚡ [Network] Burst-Downloading {sym}...", flush=True)
         limit = 1000
         duration_per_batch = limit * 60 * 1000
         tasks = []
         current = start_ms
         while current < end_ms:
             current += duration_per_batch
         all_candles = []
+        sem = asyncio.Semaphore(15)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
+                    try: return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
+                    except: await asyncio.sleep(1)
                 return []
+        chunk_size = 25
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
             results = await asyncio.gather(*futures)
+            for res in results:
                 if res: all_candles.extend(res)
+            print(f"     📥 Downloaded {int((i+chunk_size)/len(tasks)*100)}%...", flush=True)
         if not all_candles: return None
+        # تصفية وترتيب
+        unique = {c[0]: c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms}
+        final_candles = sorted(unique.values(), key=lambda x: x[0])
+        return final_candles
     # ==============================================================
+    # 🔥 STEP 2: PARALLEL CPU CRUNCHING
     # ==============================================================
+    async def _dispatch_to_cores(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        # 1. تقسيم البيانات (Splitting)
+        cpu_count = os.cpu_count() or 2
+        # نضيف تداخل (Overlap) بسيط لضمان استمرارية المؤشرات عند نقطة القطع
+        # سنقسم القائمة ببساطة، العمال سيعيدون حساب المؤشرات
+        chunk_size = len(candles) // cpu_count
+        chunks = []
+        print(f"   ⚙️ [CPU] Splitting {len(candles)} candles into {cpu_count} cores for 100% Load...", flush=True)
+        for i in range(cpu_count):
+            start_idx = i * chunk_size
+            # للإتقان: نحتاج لتداخل، لكن للتبسيط والسرعة سنقسم مباشرة
+            # العامل الأول يأخذ من البداية، الثاني يأخذ من (بداية - 500) لضمان الـ Warmup
+            actual_start = max(0, start_idx - 1000) if i > 0 else 0
+            end_idx = (i + 1) * chunk_size if i < cpu_count - 1 else len(candles)
+            chunk_data = candles[actual_start:end_idx]
+            chunks.append((i, chunk_data, sym))
+        t0 = time.time()
+        # 2. تشغيل المفاعل (Reactor)
+        loop = asyncio.get_running_loop()
+        final_results = []
+        with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+            futures = [loop.run_in_executor(executor, cpu_crunch_worker, chunk) for chunk in chunks]
+            results = await asyncio.gather(*futures)
+            for res in results:
+                final_results.extend(res)
         dt = time.time() - t0
+        # 3. الحفظ
+        if final_results:
+            # إزالة التكرارات الناتجة عن الـ Overlap
+            df_res = pd.DataFrame(final_results).drop_duplicates(subset=['timestamp']).sort_values('timestamp')
+            df_res.to_pickle(scores_file)
+            print(f"   💾 [{sym}] SAVED {len(df_res)} signals. (Compute Time: {dt:.1f}s)")
         else:
             print(f"   ⚠️ [{sym}] No signals found.")
+        del candles, chunks, results
         gc.collect()
     # ==============================================================
             return
         for sym in self.TARGET_COINS:
+            # 1. Download to RAM
             candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
             if candles:
+                # 2. Burn CPU
+                await self._dispatch_to_cores(sym, candles, start_time_ms, end_time_ms)
             else:
                 print(f"   ❌ Failed to download data for {sym}")
             gc.collect()
     # ==============================================================
+    # PHASE 2: Portfolio Digital Twin Engine (Unchanged)
     # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Dual-Core Reactor Initiated...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)
         await dm.close()
 if __name__ == "__main__":
+    import multiprocessing
+    multiprocessing.freeze_support()
     asyncio.run(run_strategic_optimization_task())