diff --git "a/versions/2/version 9.ipynb" "b/versions/2/version 9.ipynb" new file mode 100644--- /dev/null +++ "b/versions/2/version 9.ipynb" @@ -0,0 +1,2769 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9b085bca", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-25T11:42:35.310569Z", + "iopub.status.busy": "2025-10-25T11:42:35.310358Z", + "iopub.status.idle": "2025-10-25T11:43:07.305498Z", + "shell.execute_reply": "2025-10-25T11:43:07.304622Z" + }, + "papermill": { + "duration": 32.0093, + "end_time": "2025-10-25T11:43:07.311339", + "exception": false, + "start_time": "2025-10-25T11:42:35.302039", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 1: PYTORCH GPU SETUP (KAGGLE 30GB GPU)\n", + "# ============================================================================\n", + "\n", + "!pip install -q ta\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import numpy as np\n", + "import pandas as pd\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "print(\"=\"*70)\n", + "print(\" PYTORCH GPU SETUP (30GB GPU)\")\n", + "print(\"=\"*70)\n", + "\n", + "# ============================================================================\n", + "# GPU CONFIGURATION FOR MAXIMUM PERFORMANCE\n", + "# ============================================================================\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "if torch.cuda.is_available():\n", + " # Get GPU info\n", + " gpu_name = torch.cuda.get_device_name(0)\n", + " gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9\n", + " \n", + " print(f\"✅ GPU: {gpu_name}\")\n", + " print(f\"✅ GPU Memory: {gpu_mem:.1f} GB\")\n", + " \n", + " # Enable TF32 for faster matmul (Ampere GPUs: A100, RTX 30xx, 40xx)\n", + " torch.backends.cuda.matmul.allow_tf32 = True\n", + " torch.backends.cudnn.allow_tf32 = True\n", + " print(\"✅ TF32: Enabled (2-3x speedup on Ampere)\")\n", + " \n", + " # Enable cuDNN autotuner\n", + " torch.backends.cudnn.benchmark = True\n", + " print(\"✅ cuDNN benchmark: Enabled\")\n", + " \n", + " # Set default tensor type to CUDA\n", + " torch.set_default_device('cuda')\n", + " print(\"✅ Default device: CUDA\")\n", + " \n", + "else:\n", + " print(\"⚠️ No GPU detected, using CPU\")\n", + "\n", + "print(f\"\\n✅ PyTorch: {torch.__version__}\")\n", + "print(f\"✅ Device: {device}\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7730408f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-25T11:43:07.325051Z", + "iopub.status.busy": "2025-10-25T11:43:07.324573Z", + "iopub.status.idle": "2025-10-25T11:43:16.012274Z", + "shell.execute_reply": "2025-10-25T11:43:16.011145Z" + }, + "papermill": { + "duration": 8.696398, + "end_time": "2025-10-25T11:43:16.013680", + "exception": false, + "start_time": "2025-10-25T11:43:07.317282", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 2: LOAD DATA + FEATURES + ENVIRONMENT (MULTI-TIMEFRAME)\n", + "# ============================================================================\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import gym\n", + "from gym import spaces\n", + "from ta.momentum import RSIIndicator, StochasticOscillator, ROCIndicator, WilliamsRIndicator\n", + "from ta.trend import MACD, EMAIndicator, SMAIndicator, ADXIndicator, CCIIndicator\n", + "from ta.volatility import BollingerBands, AverageTrueRange\n", + "from ta.volume import OnBalanceVolumeIndicator\n", + "import os\n", + "\n", + "print(\"=\"*70)\n", + "print(\" LOADING MULTI-TIMEFRAME DATA + FEATURES\")\n", + "print(\"=\"*70)\n", + "\n", + "# ============================================================================\n", + "# HELPER: CALCULATE INDICATORS FOR ANY TIMEFRAME\n", + "# ============================================================================\n", + "def calculate_indicators(df, suffix=''):\n", + " \"\"\"Calculate all technical indicators for a given dataframe\"\"\"\n", + " data = df.copy()\n", + " s = f'_{suffix}' if suffix else ''\n", + " \n", + " # Momentum\n", + " data[f'rsi_14{s}'] = RSIIndicator(close=data['close'], window=14).rsi() / 100\n", + " data[f'rsi_7{s}'] = RSIIndicator(close=data['close'], window=7).rsi() / 100\n", + " \n", + " stoch = StochasticOscillator(high=data['high'], low=data['low'], close=data['close'], window=14)\n", + " data[f'stoch_k{s}'] = stoch.stoch() / 100\n", + " data[f'stoch_d{s}'] = stoch.stoch_signal() / 100\n", + " \n", + " roc = ROCIndicator(close=data['close'], window=12)\n", + " data[f'roc_12{s}'] = np.tanh(roc.roc() / 100)\n", + " \n", + " williams = WilliamsRIndicator(high=data['high'], low=data['low'], close=data['close'], lbp=14)\n", + " data[f'williams_r{s}'] = (williams.williams_r() + 100) / 100\n", + " \n", + " macd = MACD(close=data['close'])\n", + " data[f'macd{s}'] = np.tanh(macd.macd() / data['close'] * 100)\n", + " data[f'macd_signal{s}'] = np.tanh(macd.macd_signal() / data['close'] * 100)\n", + " data[f'macd_diff{s}'] = np.tanh(macd.macd_diff() / data['close'] * 100)\n", + " \n", + " # Trend\n", + " data[f'sma_20{s}'] = SMAIndicator(close=data['close'], window=20).sma_indicator()\n", + " data[f'sma_50{s}'] = SMAIndicator(close=data['close'], window=50).sma_indicator()\n", + " data[f'ema_12{s}'] = EMAIndicator(close=data['close'], window=12).ema_indicator()\n", + " data[f'ema_26{s}'] = EMAIndicator(close=data['close'], window=26).ema_indicator()\n", + " \n", + " data[f'price_vs_sma20{s}'] = (data['close'] - data[f'sma_20{s}']) / data[f'sma_20{s}']\n", + " data[f'price_vs_sma50{s}'] = (data['close'] - data[f'sma_50{s}']) / data[f'sma_50{s}']\n", + " \n", + " adx = ADXIndicator(high=data['high'], low=data['low'], close=data['close'], window=14)\n", + " data[f'adx{s}'] = adx.adx() / 100\n", + " data[f'adx_pos{s}'] = adx.adx_pos() / 100\n", + " data[f'adx_neg{s}'] = adx.adx_neg() / 100\n", + " \n", + " cci = CCIIndicator(high=data['high'], low=data['low'], close=data['close'], window=20)\n", + " data[f'cci{s}'] = np.tanh(cci.cci() / 100)\n", + " \n", + " # Volatility\n", + " bb = BollingerBands(close=data['close'], window=20, window_dev=2)\n", + " data[f'bb_width{s}'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()\n", + " data[f'bb_position{s}'] = (data['close'] - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())\n", + " \n", + " atr = AverageTrueRange(high=data['high'], low=data['low'], close=data['close'], window=14)\n", + " data[f'atr_percent{s}'] = atr.average_true_range() / data['close']\n", + " \n", + " # Volume\n", + " data[f'volume_ma_20{s}'] = data['volume'].rolling(20).mean()\n", + " data[f'volume_ratio{s}'] = data['volume'] / (data[f'volume_ma_20{s}'] + 1e-8)\n", + " \n", + " obv = OnBalanceVolumeIndicator(close=data['close'], volume=data['volume'])\n", + " data[f'obv_slope{s}'] = (obv.on_balance_volume().diff(5) / (obv.on_balance_volume().shift(5).abs() + 1e-8))\n", + " \n", + " # Price action\n", + " data[f'returns_1{s}'] = data['close'].pct_change()\n", + " data[f'returns_5{s}'] = data['close'].pct_change(5)\n", + " data[f'returns_20{s}'] = data['close'].pct_change(20)\n", + " data[f'volatility_20{s}'] = data[f'returns_1{s}'].rolling(20).std()\n", + " \n", + " data[f'body_size{s}'] = abs(data['close'] - data['open']) / (data['open'] + 1e-8)\n", + " data[f'high_20{s}'] = data['high'].rolling(20).max()\n", + " data[f'low_20{s}'] = data['low'].rolling(20).min()\n", + " data[f'price_position{s}'] = (data['close'] - data[f'low_20{s}']) / (data[f'high_20{s}'] - data[f'low_20{s}'] + 1e-8)\n", + " \n", + " # Drop intermediate columns\n", + " cols_to_drop = [c for c in [f'sma_20{s}', f'sma_50{s}', f'ema_12{s}', f'ema_26{s}', \n", + " f'volume_ma_20{s}', f'high_20{s}', f'low_20{s}'] if c in data.columns]\n", + " data = data.drop(columns=cols_to_drop)\n", + " \n", + " return data\n", + "\n", + "def load_and_clean_btc(filepath):\n", + " \"\"\"Load and clean BTC data from CSV\"\"\"\n", + " df = pd.read_csv(filepath)\n", + " column_mapping = {'Open time': 'timestamp', 'Open': 'open', 'High': 'high', \n", + " 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}\n", + " df = df.rename(columns=column_mapping)\n", + " df['timestamp'] = pd.to_datetime(df['timestamp'])\n", + " df.set_index('timestamp', inplace=True)\n", + " df = df[['open', 'high', 'low', 'close', 'volume']]\n", + " \n", + " for col in df.columns:\n", + " df[col] = pd.to_numeric(df[col], errors='coerce')\n", + " \n", + " df = df[df.index >= '2021-01-01']\n", + " df = df[~df.index.duplicated(keep='first')]\n", + " df = df.replace(0, np.nan).dropna().sort_index()\n", + " return df\n", + "\n", + "# ============================================================================\n", + "# 1. LOAD ALL TIMEFRAMES\n", + "# ============================================================================\n", + "data_path = '/kaggle/input/bitcoin-historical-datasets-2018-2024/'\n", + "\n", + "print(\"��� Loading 15-minute data...\")\n", + "btc_15m = load_and_clean_btc(data_path + 'btc_15m_data_2018_to_2025.csv')\n", + "print(f\" ✅ 15m: {len(btc_15m):,} candles\")\n", + "\n", + "print(\"📊 Loading 1-hour data...\")\n", + "btc_1h = load_and_clean_btc(data_path + 'btc_1h_data_2018_to_2025.csv')\n", + "print(f\" ✅ 1h: {len(btc_1h):,} candles\")\n", + "\n", + "print(\"📊 Loading 4-hour data...\")\n", + "btc_4h = load_and_clean_btc(data_path + 'btc_4h_data_2018_to_2025.csv')\n", + "print(f\" ✅ 4h: {len(btc_4h):,} candles\")\n", + "\n", + "# ============================================================================\n", + "# 2. LOAD FEAR & GREED INDEX\n", + "# ============================================================================\n", + "fgi_loaded = False\n", + "\n", + "try:\n", + " fgi_path = '/kaggle/input/btc-usdt-4h-ohlc-fgi-daily-2020/'\n", + " files = os.listdir(fgi_path)\n", + " \n", + " for filename in files:\n", + " if filename.endswith('.csv'):\n", + " fgi_data = pd.read_csv(fgi_path + filename)\n", + " \n", + " time_col = [c for c in fgi_data.columns if 'time' in c.lower() or 'date' in c.lower()]\n", + " if time_col:\n", + " fgi_data['timestamp'] = pd.to_datetime(fgi_data[time_col[0]])\n", + " else:\n", + " fgi_data['timestamp'] = pd.to_datetime(fgi_data.iloc[:, 0])\n", + " \n", + " fgi_data.set_index('timestamp', inplace=True)\n", + " \n", + " fgi_col = [c for c in fgi_data.columns if 'fgi' in c.lower() or 'fear' in c.lower() or 'greed' in c.lower()]\n", + " if fgi_col:\n", + " fgi_data = fgi_data[[fgi_col[0]]].rename(columns={fgi_col[0]: 'fgi'})\n", + " fgi_loaded = True\n", + " print(f\"✅ Fear & Greed loaded: {len(fgi_data):,} values\")\n", + " break\n", + "except:\n", + " pass\n", + "\n", + "if not fgi_loaded:\n", + " fgi_data = pd.DataFrame(index=btc_15m.index)\n", + " fgi_data['fgi'] = 50\n", + " print(\"⚠️ Using neutral FGI values\")\n", + "\n", + "# ============================================================================\n", + "# 3. CALCULATE INDICATORS FOR EACH TIMEFRAME\n", + "# ============================================================================\n", + "print(\"\\n🔧 Calculating indicators for 15m...\")\n", + "data_15m = calculate_indicators(btc_15m, suffix='15m')\n", + "\n", + "print(\"🔧 Calculating indicators for 1h...\")\n", + "data_1h = calculate_indicators(btc_1h, suffix='1h')\n", + "\n", + "print(\"🔧 Calculating indicators for 4h...\")\n", + "data_4h = calculate_indicators(btc_4h, suffix='4h')\n", + "\n", + "# ============================================================================\n", + "# 4. MERGE HIGHER TIMEFRAMES INTO 15M (FORWARD FILL)\n", + "# ============================================================================\n", + "print(\"\\n🔗 Merging timeframes...\")\n", + "\n", + "cols_1h = [c for c in data_1h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n", + "cols_4h = [c for c in data_4h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n", + "\n", + "data = data_15m.copy()\n", + "data = data.join(data_1h[cols_1h], how='left')\n", + "data = data.join(data_4h[cols_4h], how='left')\n", + "\n", + "for col in cols_1h + cols_4h:\n", + " data[col] = data[col].fillna(method='ffill')\n", + "\n", + "# Merge FGI\n", + "data = data.join(fgi_data, how='left')\n", + "data['fgi'] = data['fgi'].fillna(method='ffill').fillna(method='bfill').fillna(50)\n", + "\n", + "# Fear & Greed derived features\n", + "data['fgi_normalized'] = (data['fgi'] - 50) / 50\n", + "data['fgi_change'] = data['fgi'].diff() / 50\n", + "data['fgi_ma7'] = data['fgi'].rolling(7).mean()\n", + "data['fgi_vs_ma'] = (data['fgi'] - data['fgi_ma7']) / 50\n", + "\n", + "# Time features\n", + "data['hour'] = data.index.hour / 24\n", + "data['day_of_week'] = data.index.dayofweek / 7\n", + "data['us_session'] = ((data.index.hour >= 14) & (data.index.hour < 21)).astype(float)\n", + "\n", + "btc_features = data.dropna()\n", + "\n", + "feature_cols = [col for col in btc_features.columns \n", + " if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n", + "\n", + "print(f\"\\n✅ Multi-timeframe features complete!\")\n", + "print(f\" 15m features: {len([c for c in feature_cols if '15m' in c])}\")\n", + "print(f\" 1h features: {len([c for c in feature_cols if '1h' in c])}\")\n", + "print(f\" 4h features: {len([c for c in feature_cols if '4h' in c])}\")\n", + "print(f\" Other features: {len([c for c in feature_cols if '15m' not in c and '1h' not in c and '4h' not in c])}\")\n", + "print(f\" TOTAL features: {len(feature_cols)}\")\n", + "print(f\" Clean data: {len(btc_features):,} candles\")\n", + "\n", + "# ============================================================================\n", + "# 5. TRAIN/VALID/TEST SPLITS\n", + "# ============================================================================\n", + "print(\"\\n📊 Creating Data Splits...\")\n", + "\n", + "train_size = int(len(btc_features) * 0.70)\n", + "valid_size = int(len(btc_features) * 0.15)\n", + "\n", + "train_data = btc_features.iloc[:train_size].copy()\n", + "valid_data = btc_features.iloc[train_size:train_size+valid_size].copy()\n", + "test_data = btc_features.iloc[train_size+valid_size:].copy()\n", + "\n", + "print(f\" Train: {len(train_data):,} | Valid: {len(valid_data):,} | Test: {len(test_data):,}\")\n", + "\n", + "# Store full data for walk-forward\n", + "full_data = btc_features.copy()\n", + "\n", + "# ============================================================================\n", + "# 6. ROLLING NORMALIZATION CLASS\n", + "# ============================================================================\n", + "class RollingNormalizer:\n", + " \"\"\"\n", + " Rolling z-score normalization to prevent look-ahead bias.\n", + " Uses a rolling window to calculate mean and std.\n", + " \"\"\"\n", + " def __init__(self, window_size=2880): # 2880 = 30 days of 15m candles\n", + " self.window_size = window_size\n", + " self.feature_cols = None\n", + " \n", + " def fit_transform(self, df, feature_cols):\n", + " \"\"\"Apply rolling normalization to dataframe\"\"\"\n", + " self.feature_cols = feature_cols\n", + " result = df.copy()\n", + " \n", + " for col in feature_cols:\n", + " rolling_mean = df[col].rolling(window=self.window_size, min_periods=100).mean()\n", + " rolling_std = df[col].rolling(window=self.window_size, min_periods=100).std()\n", + " result[col] = (df[col] - rolling_mean) / (rolling_std + 1e-8)\n", + " \n", + " # Clip extreme values\n", + " result[feature_cols] = result[feature_cols].clip(-5, 5)\n", + " \n", + " # Fill NaN at start with 0 (neutral)\n", + " result[feature_cols] = result[feature_cols].fillna(0)\n", + " \n", + " return result\n", + "\n", + "print(\"✅ RollingNormalizer class defined\")\n", + "\n", + "# ============================================================================\n", + "# 7. TRADING ENVIRONMENT WITH DSR + RANDOM FLIP AUGMENTATION\n", + "# ============================================================================\n", + "class BitcoinTradingEnv(gym.Env):\n", + " \"\"\"\n", + " Trading environment with:\n", + " - Differential Sharpe Ratio (DSR) reward with warmup\n", + " - Previous action in state (to learn cost of switching)\n", + " - Transaction fee ramping (0 -> 0.1% after warmup)\n", + " - Random flip data augmentation (50% chance to invert market)\n", + " \"\"\"\n", + " \n", + " def __init__(self, df, initial_balance=10000, episode_length=500,\n", + " base_transaction_fee=0.001, # 0.1% max fee\n", + " dsr_eta=0.01): # DSR adaptation rate\n", + " super().__init__()\n", + " self.df = df.reset_index(drop=True)\n", + " self.initial_balance = initial_balance\n", + " self.episode_length = episode_length\n", + " self.base_transaction_fee = base_transaction_fee\n", + " self.dsr_eta = dsr_eta\n", + " \n", + " # Fee ramping (controlled externally via set_fee_multiplier)\n", + " self.fee_multiplier = 0.0\n", + " \n", + " # Training mode for data augmentation (random flips)\n", + " self.training_mode = True\n", + " self.flip_sign = 1.0 # Will be -1 or +1 for augmentation\n", + " \n", + " # DSR warmup period (return 0 reward until EMAs settle)\n", + " self.dsr_warmup_steps = 100\n", + " \n", + " self.feature_cols = [col for col in df.columns \n", + " if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n", + " \n", + " self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)\n", + " # +6 for: position, total_return, drawdown, returns_1, rsi_14, PREVIOUS_ACTION\n", + " self.observation_space = spaces.Box(\n", + " low=-10, high=10, \n", + " shape=(len(self.feature_cols) + 6,), \n", + " dtype=np.float32\n", + " )\n", + " self.reset()\n", + " \n", + " def set_fee_multiplier(self, multiplier):\n", + " \"\"\"Set fee multiplier (0.0 to 1.0) for fee ramping\"\"\"\n", + " self.fee_multiplier = np.clip(multiplier, 0.0, 1.0)\n", + " \n", + " def set_training_mode(self, training=True):\n", + " \"\"\"Set training mode (enables random flips for augmentation)\"\"\"\n", + " self.training_mode = training\n", + " \n", + " @property\n", + " def current_fee(self):\n", + " \"\"\"Current transaction fee based on multiplier\"\"\"\n", + " return self.base_transaction_fee * self.fee_multiplier\n", + " \n", + " def reset(self):\n", + " max_start = len(self.df) - self.episode_length - 1\n", + " self.start_idx = np.random.randint(100, max(101, max_start))\n", + " \n", + " self.current_step = 0\n", + " self.balance = self.initial_balance\n", + " self.position = 0.0\n", + " self.entry_price = 0.0\n", + " self.total_value = self.initial_balance\n", + " self.prev_total_value = self.initial_balance\n", + " self.max_value = self.initial_balance\n", + " \n", + " # Previous action for state\n", + " self.prev_action = 0.0\n", + " \n", + " # DSR variables (Differential Sharpe Ratio)\n", + " self.A_t = 0.0 # EMA of returns\n", + " self.B_t = 0.0 # EMA of squared returns\n", + " \n", + " # Position tracking\n", + " self.long_steps = 0\n", + " self.short_steps = 0\n", + " self.neutral_steps = 0\n", + " self.num_trades = 0\n", + " \n", + " # Random flip for data augmentation (50% chance during training)\n", + " # This inverts price movements: what was bullish becomes bearish\n", + " if self.training_mode:\n", + " self.flip_sign = -1.0 if np.random.random() < 0.5 else 1.0\n", + " else:\n", + " self.flip_sign = 1.0 # No flip during eval\n", + " \n", + " return self._get_obs()\n", + " \n", + " def _get_obs(self):\n", + " idx = self.start_idx + self.current_step\n", + " features = self.df.loc[idx, self.feature_cols].values.copy()\n", + " \n", + " # Apply random flip augmentation to return-based features\n", + " # This inverts bullish/bearish signals when flip_sign = -1\n", + " if self.flip_sign < 0:\n", + " for i, col in enumerate(self.feature_cols):\n", + " if any(x in col.lower() for x in ['returns', 'roc', 'macd', 'cci', 'obv', 'sentiment']):\n", + " features[i] *= self.flip_sign\n", + " \n", + " total_return = (self.total_value / self.initial_balance) - 1\n", + " drawdown = (self.max_value - self.total_value) / self.max_value if self.max_value > 0 else 0\n", + " \n", + " # Apply flip to market returns shown in portfolio info\n", + " market_return = self.df.loc[idx, 'returns_1_15m'] * self.flip_sign\n", + " \n", + " portfolio_info = np.array([\n", + " self.position,\n", + " total_return,\n", + " drawdown,\n", + " market_return,\n", + " self.df.loc[idx, 'rsi_14_15m'],\n", + " self.prev_action\n", + " ], dtype=np.float32)\n", + " \n", + " obs = np.concatenate([features, portfolio_info])\n", + " return np.clip(obs, -10, 10).astype(np.float32)\n", + " \n", + " def _calculate_dsr(self, return_t):\n", + " \"\"\"\n", + " Calculate Differential Sharpe Ratio reward.\n", + " DSR = (B_{t-1} * ΔA_t - 0.5 * A_{t-1} * ΔB_t) / (B_{t-1} - A_{t-1}^2)^1.5\n", + " \"\"\"\n", + " eta = self.dsr_eta\n", + " \n", + " A_prev = self.A_t\n", + " B_prev = self.B_t\n", + " \n", + " delta_A = eta * (return_t - A_prev)\n", + " delta_B = eta * (return_t**2 - B_prev)\n", + " \n", + " self.A_t = A_prev + delta_A\n", + " self.B_t = B_prev + delta_B\n", + " \n", + " variance = B_prev - A_prev**2\n", + " \n", + " if variance <= 1e-8:\n", + " return return_t\n", + " \n", + " dsr = (B_prev * delta_A - 0.5 * A_prev * delta_B) / (variance ** 1.5 + 1e-8)\n", + " return np.clip(dsr, -0.5, 0.5)\n", + " \n", + " def step(self, action):\n", + " idx = self.start_idx + self.current_step\n", + " current_price = self.df.loc[idx, 'close']\n", + " target_position = np.clip(action[0], -1.0, 1.0)\n", + " \n", + " self.prev_total_value = self.total_value\n", + " \n", + " # Position change logic with transaction costs\n", + " if abs(target_position - self.position) > 0.1:\n", + " if self.position != 0:\n", + " self._close_position(current_price)\n", + " if abs(target_position) > 0.1:\n", + " self._open_position(target_position, current_price)\n", + " self.num_trades += 1\n", + " \n", + " self._update_total_value(current_price)\n", + " self.max_value = max(self.max_value, self.total_value)\n", + " \n", + " # Track position type\n", + " if self.position > 0.1:\n", + " self.long_steps += 1\n", + " elif self.position < -0.1:\n", + " self.short_steps += 1\n", + " else:\n", + " self.neutral_steps += 1\n", + " \n", + " self.current_step += 1\n", + " done = (self.current_step >= self.episode_length) or (self.total_value <= self.initial_balance * 0.5)\n", + " \n", + " # ============ DSR REWARD WITH WARMUP ============\n", + " raw_return = (self.total_value - self.prev_total_value) / self.initial_balance\n", + " \n", + " # Apply flip_sign to reward (if we flipped the market, flip what \"good\" means)\n", + " raw_return *= self.flip_sign\n", + " \n", + " # DSR Warmup: Return tiny penalty for first N steps to let EMAs settle\n", + " if self.current_step < self.dsr_warmup_steps:\n", + " reward = -0.0001 # Tiny constant penalty during warmup\n", + " else:\n", + " reward = self._calculate_dsr(raw_return)\n", + " \n", + " self.prev_action = target_position\n", + " \n", + " obs = self._get_obs()\n", + " info = {\n", + " 'total_value': self.total_value, \n", + " 'position': self.position,\n", + " 'long_steps': self.long_steps,\n", + " 'short_steps': self.short_steps,\n", + " 'neutral_steps': self.neutral_steps,\n", + " 'num_trades': self.num_trades,\n", + " 'current_fee': self.current_fee,\n", + " 'flip_sign': self.flip_sign,\n", + " 'raw_return': raw_return,\n", + " 'dsr_reward': reward\n", + " }\n", + " \n", + " return obs, reward, done, info\n", + " \n", + " def _update_total_value(self, current_price):\n", + " if self.position != 0:\n", + " if self.position > 0:\n", + " pnl = self.position * self.initial_balance * (current_price / self.entry_price - 1)\n", + " else:\n", + " pnl = abs(self.position) * self.initial_balance * (1 - current_price / self.entry_price)\n", + " self.total_value = self.balance + pnl\n", + " else:\n", + " self.total_value = self.balance\n", + " \n", + " def _open_position(self, size, price):\n", + " self.position = size\n", + " self.entry_price = price\n", + " fee_cost = abs(size) * self.initial_balance * self.current_fee\n", + " self.balance -= fee_cost\n", + " \n", + " def _close_position(self, price):\n", + " if self.position > 0:\n", + " pnl = self.position * self.initial_balance * (price / self.entry_price - 1)\n", + " else:\n", + " pnl = abs(self.position) * self.initial_balance * (1 - price / self.entry_price)\n", + " \n", + " fee_cost = abs(pnl) * self.current_fee\n", + " self.balance += pnl - fee_cost\n", + " self.position = 0.0\n", + "\n", + "print(\"✅ Environment class ready:\")\n", + "print(\" - DSR reward with 100-step warmup\")\n", + "print(\" - Random flip augmentation (50% probability)\")\n", + "print(\" - Previous action in state\")\n", + "print(\" - Transaction fee ramping\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bab183bf", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 3: LOAD SENTIMENT DATA\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" LOADING SENTIMENT DATA\")\n", + "print(\"=\"*70)\n", + "\n", + "sentiment_file = '/kaggle/input/bitcoin-news-with-sentimen/bitcoin_news_3hour_intervals_with_sentiment.csv'\n", + "\n", + "try:\n", + " sentiment_raw = pd.read_csv(sentiment_file)\n", + " \n", + " def parse_time_range(time_str):\n", + " parts = str(time_str).split(' ')\n", + " if len(parts) >= 2:\n", + " date = parts[0]\n", + " time_range = parts[1]\n", + " start_time = time_range.split('-')[0]\n", + " return f\"{date} {start_time}:00\"\n", + " return time_str\n", + " \n", + " sentiment_raw['timestamp'] = sentiment_raw['time_interval'].apply(parse_time_range)\n", + " sentiment_raw['timestamp'] = pd.to_datetime(sentiment_raw['timestamp'])\n", + " sentiment_raw = sentiment_raw.set_index('timestamp').sort_index()\n", + " \n", + " sentiment_clean = pd.DataFrame(index=sentiment_raw.index)\n", + " sentiment_clean['prob_bullish'] = pd.to_numeric(sentiment_raw['prob_bullish'], errors='coerce')\n", + " sentiment_clean['prob_bearish'] = pd.to_numeric(sentiment_raw['prob_bearish'], errors='coerce')\n", + " sentiment_clean['prob_neutral'] = pd.to_numeric(sentiment_raw['prob_neutral'], errors='coerce')\n", + " sentiment_clean['confidence'] = pd.to_numeric(sentiment_raw['sentiment_confidence'], errors='coerce')\n", + " sentiment_clean = sentiment_clean.dropna()\n", + " \n", + " # Merge with data\n", + " for df in [train_data, valid_data, test_data]:\n", + " df_temp = df.join(sentiment_clean, how='left')\n", + " for col in ['prob_bullish', 'prob_bearish', 'prob_neutral', 'confidence']:\n", + " df[col] = df_temp[col].fillna(method='ffill').fillna(method='bfill').fillna(0.33 if col != 'confidence' else 0.5)\n", + " \n", + " df['sentiment_net'] = df['prob_bullish'] - df['prob_bearish']\n", + " df['sentiment_strength'] = (df['prob_bullish'] - df['prob_bearish']).abs()\n", + " df['sentiment_weighted'] = df['sentiment_net'] * df['confidence']\n", + " \n", + " print(f\"✅ Sentiment loaded: {len(sentiment_clean):,} records\")\n", + " print(f\"✅ Features added: 7 sentiment features\")\n", + " \n", + "except Exception as e:\n", + " print(f\"⚠️ Sentiment not loaded: {e}\")\n", + " for df in [train_data, valid_data, test_data]:\n", + " df['sentiment_net'] = 0\n", + " df['sentiment_strength'] = 0\n", + " df['sentiment_weighted'] = 0\n", + "\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4640182f", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 4: ROLLING NORMALIZATION + CREATE ENVIRONMENTS\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" ROLLING NORMALIZATION + CREATING ENVIRONMENTS\")\n", + "print(\"=\"*70)\n", + "\n", + "# Get feature columns (all except OHLCV and intermediate columns)\n", + "feature_cols = [col for col in train_data.columns \n", + " if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n", + "\n", + "print(f\"📊 Total features: {len(feature_cols)}\")\n", + "\n", + "# ============================================================================\n", + "# ROLLING NORMALIZATION (Prevents look-ahead bias!)\n", + "# Uses only past data for normalization at each point\n", + "# ============================================================================\n", + "rolling_normalizer = RollingNormalizer(window_size=2880) # 30 days of 15m data\n", + "\n", + "print(\"🔄 Applying rolling normalization (window=2880)...\")\n", + "\n", + "# Apply rolling normalization to each split\n", + "train_data_norm = rolling_normalizer.fit_transform(train_data, feature_cols)\n", + "valid_data_norm = rolling_normalizer.fit_transform(valid_data, feature_cols) \n", + "test_data_norm = rolling_normalizer.fit_transform(test_data, feature_cols)\n", + "\n", + "print(\"✅ Rolling normalization complete (no look-ahead bias!)\")\n", + "\n", + "# Create environments\n", + "train_env = BitcoinTradingEnv(train_data_norm, episode_length=500)\n", + "valid_env = BitcoinTradingEnv(valid_data_norm, episode_length=500)\n", + "test_env = BitcoinTradingEnv(test_data_norm, episode_length=500)\n", + "\n", + "state_dim = train_env.observation_space.shape[0]\n", + "action_dim = 1\n", + "\n", + "print(f\"\\n✅ Environments created:\")\n", + "print(f\" State dim: {state_dim} (features={len(feature_cols)} + portfolio=6)\")\n", + "print(f\" Action dim: {action_dim}\")\n", + "print(f\" Train samples: {len(train_data):,}\")\n", + "print(f\" Fee starts at: 0% (ramps to 0.1% after warmup)\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a48bf946", + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "execution": { + "iopub.execute_input": "2025-10-25T11:43:16.495113Z", + "iopub.status.busy": "2025-10-25T11:43:16.494816Z", + "iopub.status.idle": "2025-10-25T11:43:16.516176Z", + "shell.execute_reply": "2025-10-25T11:43:16.515329Z" + }, + "papermill": { + "duration": 0.029962, + "end_time": "2025-10-25T11:43:16.517375", + "exception": false, + "start_time": "2025-10-25T11:43:16.487413", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 5: PYTORCH SAC AGENT (GPU OPTIMIZED)\n", + "# ============================================================================\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.distributions import Normal\n", + "\n", + "print(\"=\"*70)\n", + "print(\" PYTORCH SAC AGENT\")\n", + "print(\"=\"*70)\n", + "\n", + "# ============================================================================\n", + "# ACTOR NETWORK (Policy)\n", + "# ============================================================================\n", + "class Actor(nn.Module):\n", + " def __init__(self, state_dim, action_dim, hidden_dim=512):\n", + " super().__init__()\n", + " # Larger network for 90+ features: 512 -> 512 -> 256 -> output\n", + " self.fc1 = nn.Linear(state_dim, hidden_dim)\n", + " self.fc2 = nn.Linear(hidden_dim, hidden_dim)\n", + " self.fc3 = nn.Linear(hidden_dim, hidden_dim // 2) # Taper down\n", + " \n", + " self.mean = nn.Linear(hidden_dim // 2, action_dim)\n", + " self.log_std = nn.Linear(hidden_dim // 2, action_dim)\n", + " \n", + " self.LOG_STD_MIN = -20\n", + " self.LOG_STD_MAX = 2\n", + " \n", + " def forward(self, state):\n", + " x = F.relu(self.fc1(state))\n", + " x = F.relu(self.fc2(x))\n", + " x = F.relu(self.fc3(x))\n", + " \n", + " mean = self.mean(x)\n", + " log_std = self.log_std(x)\n", + " log_std = torch.clamp(log_std, self.LOG_STD_MIN, self.LOG_STD_MAX)\n", + " \n", + " return mean, log_std\n", + " \n", + " def sample(self, state):\n", + " mean, log_std = self.forward(state)\n", + " std = log_std.exp()\n", + " \n", + " normal = Normal(mean, std)\n", + " x_t = normal.rsample() # Reparameterization trick\n", + " action = torch.tanh(x_t)\n", + " \n", + " # Log prob with tanh correction\n", + " log_prob = normal.log_prob(x_t)\n", + " log_prob -= torch.log(1 - action.pow(2) + 1e-6)\n", + " log_prob = log_prob.sum(dim=-1, keepdim=True)\n", + " \n", + " return action, log_prob, mean\n", + "\n", + "# ============================================================================\n", + "# CRITIC NETWORK (Twin Q-functions)\n", + "# ============================================================================\n", + "class Critic(nn.Module):\n", + " def __init__(self, state_dim, action_dim, hidden_dim=512):\n", + " super().__init__()\n", + " # Q1 network: 512 -> 512 -> 256 -> 1\n", + " self.fc1_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n", + " self.fc1_2 = nn.Linear(hidden_dim, hidden_dim)\n", + " self.fc1_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n", + " self.fc1_out = nn.Linear(hidden_dim // 2, 1)\n", + " \n", + " # Q2 network: 512 -> 512 -> 256 -> 1\n", + " self.fc2_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n", + " self.fc2_2 = nn.Linear(hidden_dim, hidden_dim)\n", + " self.fc2_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n", + " self.fc2_out = nn.Linear(hidden_dim // 2, 1)\n", + " \n", + " def forward(self, state, action):\n", + " x = torch.cat([state, action], dim=-1)\n", + " \n", + " # Q1\n", + " q1 = F.relu(self.fc1_1(x))\n", + " q1 = F.relu(self.fc1_2(q1))\n", + " q1 = F.relu(self.fc1_3(q1))\n", + " q1 = self.fc1_out(q1)\n", + " \n", + " # Q2\n", + " q2 = F.relu(self.fc2_1(x))\n", + " q2 = F.relu(self.fc2_2(q2))\n", + " q2 = F.relu(self.fc2_3(q2))\n", + " q2 = self.fc2_out(q2)\n", + " \n", + " return q1, q2\n", + " \n", + " def q1(self, state, action):\n", + " x = torch.cat([state, action], dim=-1)\n", + " q1 = F.relu(self.fc1_1(x))\n", + " q1 = F.relu(self.fc1_2(q1))\n", + " q1 = F.relu(self.fc1_3(q1))\n", + " return self.fc1_out(q1)\n", + "\n", + "# ============================================================================\n", + "# SAC AGENT\n", + "# ============================================================================\n", + "class SACAgent:\n", + " def __init__(self, state_dim, action_dim, device,\n", + " actor_lr=3e-4, critic_lr=3e-4, alpha_lr=3e-4,\n", + " gamma=0.99, tau=0.005, initial_alpha=0.2):\n", + " \n", + " self.device = device\n", + " self.gamma = gamma\n", + " self.tau = tau\n", + " self.action_dim = action_dim\n", + " \n", + " # Networks\n", + " self.actor = Actor(state_dim, action_dim).to(device)\n", + " self.critic = Critic(state_dim, action_dim).to(device)\n", + " self.critic_target = Critic(state_dim, action_dim).to(device)\n", + " self.critic_target.load_state_dict(self.critic.state_dict())\n", + " \n", + " # Optimizers\n", + " self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)\n", + " self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)\n", + " \n", + " # Entropy (auto-tuning alpha)\n", + " self.target_entropy = -action_dim\n", + " self.log_alpha = torch.tensor(np.log(initial_alpha), requires_grad=True, device=device)\n", + " self.alpha_optimizer = optim.Adam([self.log_alpha], lr=alpha_lr)\n", + " \n", + " @property\n", + " def alpha(self):\n", + " return self.log_alpha.exp()\n", + " \n", + " def select_action(self, state, deterministic=False):\n", + " with torch.no_grad():\n", + " state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", + " if deterministic:\n", + " mean, _ = self.actor(state)\n", + " action = torch.tanh(mean)\n", + " else:\n", + " action, _, _ = self.actor.sample(state)\n", + " return action.cpu().numpy()[0]\n", + " \n", + " def update(self, batch):\n", + " states, actions, rewards, next_states, dones = batch\n", + " \n", + " states = torch.FloatTensor(states).to(self.device)\n", + " actions = torch.FloatTensor(actions).to(self.device)\n", + " rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)\n", + " next_states = torch.FloatTensor(next_states).to(self.device)\n", + " dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)\n", + " \n", + " # ============ Update Critic ============\n", + " with torch.no_grad():\n", + " next_actions, next_log_probs, _ = self.actor.sample(next_states)\n", + " q1_target, q2_target = self.critic_target(next_states, next_actions)\n", + " q_target = torch.min(q1_target, q2_target)\n", + " target_q = rewards + (1 - dones) * self.gamma * (q_target - self.alpha * next_log_probs)\n", + " \n", + " q1, q2 = self.critic(states, actions)\n", + " critic_loss = F.mse_loss(q1, target_q) + F.mse_loss(q2, target_q)\n", + " \n", + " self.critic_optimizer.zero_grad()\n", + " critic_loss.backward()\n", + " self.critic_optimizer.step()\n", + " \n", + " # ============ Update Actor ============\n", + " new_actions, log_probs, _ = self.actor.sample(states)\n", + " q1_new, q2_new = self.critic(states, new_actions)\n", + " q_new = torch.min(q1_new, q2_new)\n", + " actor_loss = (self.alpha * log_probs - q_new).mean()\n", + " \n", + " self.actor_optimizer.zero_grad()\n", + " actor_loss.backward()\n", + " self.actor_optimizer.step()\n", + " \n", + " # ============ Update Alpha ============\n", + " alpha_loss = -(self.log_alpha * (log_probs.detach() + self.target_entropy)).mean()\n", + " \n", + " self.alpha_optimizer.zero_grad()\n", + " alpha_loss.backward()\n", + " self.alpha_optimizer.step()\n", + " \n", + " # ============ Update Target Network ============\n", + " for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):\n", + " target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)\n", + " \n", + " return {\n", + " 'critic_loss': critic_loss.item(),\n", + " 'actor_loss': actor_loss.item(),\n", + " 'alpha': self.alpha.item()\n", + " }\n", + "\n", + "print(\"✅ Actor: 512→512→256→1\")\n", + "print(\"✅ Critic: Twin Q (512→512→256→1)\")\n", + "print(\"✅ SAC Agent with auto-tuning alpha\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7f72357", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-25T11:43:16.531841Z", + "iopub.status.busy": "2025-10-25T11:43:16.531619Z", + "iopub.status.idle": "2025-10-25T11:43:16.549706Z", + "shell.execute_reply": "2025-10-25T11:43:16.548781Z" + }, + "papermill": { + "duration": 0.026952, + "end_time": "2025-10-25T11:43:16.550849", + "exception": false, + "start_time": "2025-10-25T11:43:16.523897", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 6: REPLAY BUFFER (GPU-FRIENDLY)\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" REPLAY BUFFER\")\n", + "print(\"=\"*70)\n", + "\n", + "class ReplayBuffer:\n", + " def __init__(self, state_dim, action_dim, max_size=1_000_000):\n", + " self.max_size = max_size\n", + " self.ptr = 0\n", + " self.size = 0\n", + " \n", + " self.states = np.zeros((max_size, state_dim), dtype=np.float32)\n", + " self.actions = np.zeros((max_size, action_dim), dtype=np.float32)\n", + " self.rewards = np.zeros((max_size, 1), dtype=np.float32)\n", + " self.next_states = np.zeros((max_size, state_dim), dtype=np.float32)\n", + " self.dones = np.zeros((max_size, 1), dtype=np.float32)\n", + " \n", + " mem_gb = (self.states.nbytes + self.actions.nbytes + self.rewards.nbytes + \n", + " self.next_states.nbytes + self.dones.nbytes) / 1e9\n", + " print(f\"📦 Buffer capacity: {max_size:,} | Memory: {mem_gb:.2f} GB\")\n", + " \n", + " def add(self, state, action, reward, next_state, done):\n", + " self.states[self.ptr] = state\n", + " self.actions[self.ptr] = action\n", + " self.rewards[self.ptr] = reward\n", + " self.next_states[self.ptr] = next_state\n", + " self.dones[self.ptr] = done\n", + " \n", + " self.ptr = (self.ptr + 1) % self.max_size\n", + " self.size = min(self.size + 1, self.max_size)\n", + " \n", + " def sample(self, batch_size):\n", + " idx = np.random.randint(0, self.size, size=batch_size)\n", + " return (\n", + " self.states[idx],\n", + " self.actions[idx],\n", + " self.rewards[idx],\n", + " self.next_states[idx],\n", + " self.dones[idx]\n", + " )\n", + "\n", + "print(\"✅ ReplayBuffer defined\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f88fc10c", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-25T11:43:16.566540Z", + "iopub.status.busy": "2025-10-25T11:43:16.565845Z", + "iopub.status.idle": "2025-10-25T11:43:18.815426Z", + "shell.execute_reply": "2025-10-25T11:43:18.814475Z" + }, + "papermill": { + "duration": 2.258566, + "end_time": "2025-10-25T11:43:18.816724", + "exception": false, + "start_time": "2025-10-25T11:43:16.558158", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 7: CREATE AGENT + BUFFER\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" CREATING AGENT + BUFFER\")\n", + "print(\"=\"*70)\n", + "\n", + "# Create SAC agent\n", + "agent = SACAgent(\n", + " state_dim=state_dim,\n", + " action_dim=action_dim,\n", + " device=device,\n", + " actor_lr=3e-4,\n", + " critic_lr=3e-4,\n", + " alpha_lr=3e-4,\n", + " gamma=0.99,\n", + " tau=0.005,\n", + " initial_alpha=0.2\n", + ")\n", + "\n", + "# Create replay buffer\n", + "buffer = ReplayBuffer(\n", + " state_dim=state_dim,\n", + " action_dim=action_dim,\n", + " max_size=1_000_000\n", + ")\n", + "\n", + "# Count parameters\n", + "total_params = sum(p.numel() for p in agent.actor.parameters()) + \\\n", + " sum(p.numel() for p in agent.critic.parameters())\n", + "\n", + "print(f\"\\n✅ Agent created on {device}\")\n", + "print(f\" Actor params: {sum(p.numel() for p in agent.actor.parameters()):,}\")\n", + "print(f\" Critic params: {sum(p.numel() for p in agent.critic.parameters()):,}\")\n", + "print(f\" Total params: {total_params:,}\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "150b4202", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-25T11:43:18.832274Z", + "iopub.status.busy": "2025-10-25T11:43:18.831944Z", + "iopub.status.idle": "2025-10-25T11:43:19.038505Z", + "shell.execute_reply": "2025-10-25T11:43:19.037696Z" + }, + "papermill": { + "duration": 0.215721, + "end_time": "2025-10-25T11:43:19.039678", + "exception": false, + "start_time": "2025-10-25T11:43:18.823957", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 8: TRAINING FUNCTION (GPU OPTIMIZED + FEE RAMPING)\n", + "# ============================================================================\n", + "\n", + "from tqdm.notebook import tqdm\n", + "import time\n", + "\n", + "print(\"=\"*70)\n", + "print(\" TRAINING FUNCTION\")\n", + "print(\"=\"*70)\n", + "\n", + "def train_sac(agent, env, valid_env, buffer, \n", + " total_timesteps=700_000,\n", + " warmup_steps=10_000,\n", + " batch_size=1024,\n", + " update_freq=1,\n", + " fee_warmup_steps=100_000, # When to start fee ramping\n", + " fee_ramp_steps=100_000, # Steps to ramp from 0 to max fee\n", + " save_path=\"sac_v9\"):\n", + " \n", + " print(f\"\\n🚀 Training Configuration:\")\n", + " print(f\" Total steps: {total_timesteps:,}\")\n", + " print(f\" Warmup: {warmup_steps:,}\")\n", + " print(f\" Batch size: {batch_size}\")\n", + " print(f\" Fee warmup: {fee_warmup_steps:,} steps (then ramp over {fee_ramp_steps:,})\")\n", + " print(f\" Data augmentation: Random flips (50% probability)\")\n", + " print(f\" DSR warmup: 100 steps per episode (0 reward)\")\n", + " print(f\" Device: {agent.device}\")\n", + " \n", + " # Set training modes for augmentation\n", + " env.set_training_mode(True) # Enable random flips\n", + " valid_env.set_training_mode(False) # No augmentation for validation\n", + " \n", + " # Stats tracking\n", + " episode_rewards = []\n", + " episode_lengths = []\n", + " eval_rewards = []\n", + " best_reward = -np.inf\n", + " best_eval = -np.inf\n", + " \n", + " # Training stats\n", + " critic_losses = []\n", + " actor_losses = []\n", + " \n", + " state = env.reset()\n", + " episode_reward = 0\n", + " episode_length = 0\n", + " episode_count = 0\n", + " \n", + " start_time = time.time()\n", + " \n", + " pbar = tqdm(range(total_timesteps), desc=\"Training\")\n", + " \n", + " for step in pbar:\n", + " # ============ FEE RAMPING CURRICULUM ============\n", + " # 0 fees until fee_warmup_steps, then ramp to 1.0 over fee_ramp_steps\n", + " if step < fee_warmup_steps:\n", + " fee_multiplier = 0.0\n", + " else:\n", + " progress = (step - fee_warmup_steps) / fee_ramp_steps\n", + " fee_multiplier = min(1.0, progress)\n", + " \n", + " env.set_fee_multiplier(fee_multiplier)\n", + " valid_env.set_fee_multiplier(fee_multiplier)\n", + " \n", + " # Select action\n", + " if step < warmup_steps:\n", + " action = env.action_space.sample()\n", + " else:\n", + " action = agent.select_action(state, deterministic=False)\n", + " \n", + " # Step environment\n", + " next_state, reward, done, info = env.step(action)\n", + " \n", + " # Store transition\n", + " buffer.add(state, action, reward, next_state, float(done))\n", + " \n", + " state = next_state\n", + " episode_reward += reward\n", + " episode_length += 1\n", + " \n", + " # Update agent\n", + " stats = None\n", + " if step >= warmup_steps and step % update_freq == 0:\n", + " batch = buffer.sample(batch_size)\n", + " stats = agent.update(batch)\n", + " critic_losses.append(stats['critic_loss'])\n", + " actor_losses.append(stats['actor_loss'])\n", + " \n", + " # Episode end\n", + " if done:\n", + " episode_rewards.append(episode_reward)\n", + " episode_lengths.append(episode_length)\n", + " episode_count += 1\n", + " \n", + " # Calculate episode stats\n", + " final_value = info.get('total_value', 10000)\n", + " pnl_pct = (final_value / 10000 - 1) * 100\n", + " num_trades = info.get('num_trades', 0)\n", + " current_fee = info.get('current_fee', 0) * 100 # Convert to %\n", + " \n", + " # Get position distribution\n", + " long_steps = info.get('long_steps', 0)\n", + " short_steps = info.get('short_steps', 0)\n", + " neutral_steps = info.get('neutral_steps', 0)\n", + " total_active = long_steps + short_steps\n", + " long_pct = (long_steps / total_active * 100) if total_active > 0 else 0\n", + " short_pct = (short_steps / total_active * 100) if total_active > 0 else 0\n", + " \n", + " # Update progress bar with detailed info\n", + " avg_reward = np.mean(episode_rewards[-10:]) if len(episode_rewards) >= 10 else episode_reward\n", + " avg_critic = np.mean(critic_losses[-100:]) if critic_losses else 0\n", + " \n", + " pbar.set_postfix({\n", + " 'ep': episode_count,\n", + " 'R': f'{episode_reward:.4f}',\n", + " 'avg10': f'{avg_reward:.4f}',\n", + " 'PnL%': f'{pnl_pct:+.2f}',\n", + " 'L/S': f'{long_pct:.0f}/{short_pct:.0f}',\n", + " 'fee%': f'{current_fee:.3f}',\n", + " 'α': f'{agent.alpha.item():.3f}',\n", + " })\n", + " \n", + " # ============ EVAL EVERY EPISODE ============\n", + " eval_reward, eval_pnl, eval_long_pct = evaluate_agent(agent, valid_env, n_episodes=1)\n", + " eval_rewards.append(eval_reward)\n", + " \n", + " # Print detailed episode summary\n", + " elapsed = time.time() - start_time\n", + " steps_per_sec = (step + 1) / elapsed\n", + " \n", + " print(f\"\\n{'='*60}\")\n", + " print(f\"📊 Episode {episode_count} Complete | Step {step+1:,}/{total_timesteps:,}\")\n", + " print(f\"{'='*60}\")\n", + " print(f\" 🎮 TRAIN:\")\n", + " print(f\" Reward (DSR): {episode_reward:.4f} | PnL: {pnl_pct:+.2f}%\")\n", + " print(f\" Length: {episode_length} steps | Trades: {num_trades}\")\n", + " print(f\" Avg (last 10): {avg_reward:.4f}\")\n", + " print(f\" 📊 POSITION BALANCE:\")\n", + " print(f\" Long: {long_steps} steps ({long_pct:.1f}%)\")\n", + " print(f\" Short: {short_steps} steps ({short_pct:.1f}%)\")\n", + " print(f\" Neutral: {neutral_steps} steps\")\n", + " print(f\" 💰 FEE CURRICULUM:\")\n", + " print(f\" Current fee: {current_fee:.4f}% (multiplier: {fee_multiplier:.2f})\")\n", + " print(f\" 📈 EVAL (validation):\")\n", + " print(f\" Reward: {eval_reward:.4f} | PnL: {eval_pnl:+.2f}%\")\n", + " print(f\" Long%: {eval_long_pct:.1f}%\")\n", + " print(f\" Avg (last 5): {np.mean(eval_rewards[-5:]):.4f}\")\n", + " print(f\" 🧠 AGENT:\")\n", + " print(f\" Alpha: {agent.alpha.item():.4f}\")\n", + " print(f\" Critic loss: {avg_critic:.5f}\")\n", + " print(f\" ⚡ Speed: {steps_per_sec:.0f} steps/sec\")\n", + " print(f\" 💾 Buffer: {buffer.size:,} transitions\")\n", + " \n", + " # Save best train\n", + " if episode_reward > best_reward:\n", + " best_reward = episode_reward\n", + " torch.save({\n", + " 'actor': agent.actor.state_dict(),\n", + " 'critic': agent.critic.state_dict(),\n", + " 'critic_target': agent.critic_target.state_dict(),\n", + " 'log_alpha': agent.log_alpha,\n", + " }, f\"{save_path}_best_train.pt\")\n", + " print(f\" 🏆 NEW BEST TRAIN: {best_reward:.4f}\")\n", + " \n", + " # Save best eval\n", + " if eval_reward > best_eval:\n", + " best_eval = eval_reward\n", + " torch.save({\n", + " 'actor': agent.actor.state_dict(),\n", + " 'critic': agent.critic.state_dict(),\n", + " 'critic_target': agent.critic_target.state_dict(),\n", + " 'log_alpha': agent.log_alpha,\n", + " }, f\"{save_path}_best_eval.pt\")\n", + " print(f\" 🏆 NEW BEST EVAL: {best_eval:.4f}\")\n", + " \n", + " # Reset\n", + " state = env.reset()\n", + " episode_reward = 0\n", + " episode_length = 0\n", + " \n", + " # Final save\n", + " torch.save({\n", + " 'actor': agent.actor.state_dict(),\n", + " 'critic': agent.critic.state_dict(),\n", + " 'critic_target': agent.critic_target.state_dict(),\n", + " 'log_alpha': agent.log_alpha,\n", + " }, f\"{save_path}_final.pt\")\n", + " \n", + " total_time = time.time() - start_time\n", + " print(f\"\\n{'='*70}\")\n", + " print(f\" TRAINING COMPLETE\")\n", + " print(f\"{'='*70}\")\n", + " print(f\" Total time: {total_time/60:.1f} min\")\n", + " print(f\" Episodes: {episode_count}\")\n", + " print(f\" Best train reward (DSR): {best_reward:.4f}\")\n", + " print(f\" Best eval reward (DSR): {best_eval:.4f}\")\n", + " print(f\" Avg speed: {total_timesteps/total_time:.0f} steps/sec\")\n", + " \n", + " return episode_rewards, eval_rewards\n", + "\n", + "\n", + "def evaluate_agent(agent, env, n_episodes=1):\n", + " \"\"\"Run evaluation episodes\"\"\"\n", + " total_reward = 0\n", + " total_pnl = 0\n", + " total_long_pct = 0\n", + " \n", + " for _ in range(n_episodes):\n", + " state = env.reset()\n", + " episode_reward = 0\n", + " done = False\n", + " \n", + " while not done:\n", + " action = agent.select_action(state, deterministic=True)\n", + " state, reward, done, info = env.step(action)\n", + " episode_reward += reward\n", + " \n", + " total_reward += episode_reward\n", + " final_value = info.get('total_value', 10000)\n", + " total_pnl += (final_value / 10000 - 1) * 100\n", + " \n", + " # Calculate long percentage\n", + " long_steps = info.get('long_steps', 0)\n", + " short_steps = info.get('short_steps', 0)\n", + " total_active = long_steps + short_steps\n", + " total_long_pct += (long_steps / total_active * 100) if total_active > 0 else 0\n", + " \n", + " return total_reward / n_episodes, total_pnl / n_episodes, total_long_pct / n_episodes\n", + "\n", + "\n", + "print(\"✅ Training function ready:\")\n", + "print(\" - Per-episode eval + position tracking\")\n", + "print(\" - DSR reward (risk-adjusted)\")\n", + "print(\" - Fee ramping: 0% → 0.1% after 100k steps\")\n", + "print(\" - Model checkpointing\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0097e547", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 9: START TRAINING\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" STARTING SAC TRAINING\")\n", + "print(\"=\"*70)\n", + "\n", + "# Training parameters\n", + "TOTAL_STEPS = 500_000 # 500K steps\n", + "WARMUP_STEPS = 10_000 # 10K random warmup\n", + "BATCH_SIZE = 256 # Standard batch size\n", + "UPDATE_FREQ = 1 # Update every step\n", + "FEE_WARMUP = 100_000 # Start fee ramping after 100k steps\n", + "FEE_RAMP = 100_000 # Ramp fees over 100k steps (0 → 0.1%)\n", + "\n", + "print(f\"\\n📋 Configuration:\")\n", + "print(f\" Steps: {TOTAL_STEPS:,}\")\n", + "print(f\" Batch: {BATCH_SIZE}\")\n", + "print(f\" Train env: {len(train_data):,} candles\")\n", + "print(f\" Valid env: {len(valid_data):,} candles\")\n", + "print(f\" Device: {device}\")\n", + "print(f\"\\n💰 Fee Curriculum:\")\n", + "print(f\" Steps 0-{FEE_WARMUP:,}: 0% fee (learn basic trading)\")\n", + "print(f\" Steps {FEE_WARMUP:,}-{FEE_WARMUP+FEE_RAMP:,}: Ramp 0%→0.1%\")\n", + "print(f\" Steps {FEE_WARMUP+FEE_RAMP:,}+: Full 0.1% fee\")\n", + "print(f\"\\n🎯 Reward: Differential Sharpe Ratio (DSR)\")\n", + "print(f\" - Risk-adjusted returns (not just PnL)\")\n", + "print(f\" - Small values (-0.5 to 0.5) are normal\")\n", + "print(f\" - NOT normalized further\")\n", + "\n", + "# Run training with validation eval every episode\n", + "episode_rewards, eval_rewards = train_sac(\n", + " agent=agent,\n", + " env=train_env,\n", + " valid_env=valid_env,\n", + " buffer=buffer,\n", + " total_timesteps=TOTAL_STEPS,\n", + " warmup_steps=WARMUP_STEPS,\n", + " batch_size=BATCH_SIZE,\n", + " update_freq=UPDATE_FREQ,\n", + " fee_warmup_steps=FEE_WARMUP,\n", + " fee_ramp_steps=FEE_RAMP,\n", + " save_path=\"sac_v9_pytorch\"\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\" TRAINING COMPLETE\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "712fb0b2", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 10: LOAD TRAINED MODELS\n", + "# ============================================================================\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as mpatches\n", + "from matplotlib.gridspec import GridSpec\n", + "import seaborn as sns\n", + "\n", + "# Set style for beautiful charts\n", + "plt.style.use('dark_background')\n", + "sns.set_palette(\"husl\")\n", + "\n", + "print(\"=\"*70)\n", + "print(\" LOADING TRAINED MODELS\")\n", + "print(\"=\"*70)\n", + "\n", + "# Model paths from Kaggle\n", + "MODEL_PATH = '/kaggle/input/models/'\n", + "FINAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_final (1).pt'\n", + "BEST_TRAIN_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_train (1).pt'\n", + "BEST_EVAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_eval (1).pt'\n", + "\n", + "def load_model(agent, checkpoint_path, name=\"model\"):\n", + " \"\"\"Load model weights from checkpoint\"\"\"\n", + " try:\n", + " checkpoint = torch.load(checkpoint_path, map_location=device)\n", + " agent.actor.load_state_dict(checkpoint['actor'])\n", + " agent.critic.load_state_dict(checkpoint['critic'])\n", + " agent.critic_target.load_state_dict(checkpoint['critic_target'])\n", + " if 'log_alpha' in checkpoint:\n", + " agent.log_alpha = checkpoint['log_alpha']\n", + " print(f\"✅ {name} loaded successfully!\")\n", + " return True\n", + " except Exception as e:\n", + " print(f\"❌ Error loading {name}: {e}\")\n", + " return False\n", + "\n", + "# Create fresh agent for evaluation\n", + "eval_agent = SACAgent(\n", + " state_dim=state_dim,\n", + " action_dim=action_dim,\n", + " device=device\n", + ")\n", + "\n", + "# Load best eval model (most generalizable)\n", + "load_model(eval_agent, BEST_EVAL_MODEL, \"Best Eval Model\")\n", + "\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec761346", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 11: TRAINING SUMMARY VISUALIZATION\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" TRAINING SUMMARY VISUALIZATION\")\n", + "print(\"=\"*70)\n", + "\n", + "# Training results from your run\n", + "training_stats = {\n", + " 'total_time_min': 131.0,\n", + " 'total_episodes': 1000,\n", + " 'total_steps': 500_000,\n", + " 'best_train_dsr': 0.5949,\n", + " 'best_eval_dsr': 0.2125,\n", + " 'avg_speed': 64, # steps/sec\n", + "}\n", + "\n", + "# Create summary figure\n", + "fig = plt.figure(figsize=(16, 10))\n", + "gs = GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)\n", + "\n", + "# Colors\n", + "colors = {\n", + " 'primary': '#00D4AA',\n", + " 'secondary': '#FF6B6B', \n", + " 'accent': '#4ECDC4',\n", + " 'warning': '#FFE66D',\n", + " 'bg': '#1a1a2e',\n", + " 'grid': '#333355'\n", + "}\n", + "\n", + "fig.patch.set_facecolor(colors['bg'])\n", + "\n", + "# ============================================================================\n", + "# 1. Training Configuration Card\n", + "# ============================================================================\n", + "ax1 = fig.add_subplot(gs[0, 0])\n", + "ax1.set_facecolor(colors['bg'])\n", + "ax1.axis('off')\n", + "\n", + "config_text = f\"\"\"\n", + "╔══════════════════════════════════════╗\n", + "║ 🎯 TRAINING CONFIGURATION ║\n", + "╠══════════════════════════════════════╣\n", + "║ ║\n", + "║ Total Steps: 500,000 ║\n", + "║ Episodes: 1,000 ║\n", + "║ Batch Size: 256 ║\n", + "║ Episode Length: 500 steps ║\n", + "║ ║\n", + "║ 📊 Network Architecture ║\n", + "║ Actor: 512 → 512 → 256 → 1 ║\n", + "║ Critic: 512 → 512 → 256 → 1 (x2) ║\n", + "║ ║\n", + "║ 💰 Fee Curriculum ║\n", + "║ 0-100k: 0% fee ║\n", + "║ 100k-200k: Ramp to 0.1% ║\n", + "║ 200k+: Full 0.1% fee ║\n", + "║ ║\n", + "║ 🎲 Data Augmentation ║\n", + "║ Random Flip: 50% probability ║\n", + "║ DSR Warmup: 100 steps ║\n", + "╚══════════════════════════════════════╝\n", + "\"\"\"\n", + "ax1.text(0.5, 0.5, config_text, transform=ax1.transAxes, fontsize=10,\n", + " verticalalignment='center', horizontalalignment='center',\n", + " fontfamily='monospace', color='white',\n", + " bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['primary'], linewidth=2))\n", + "\n", + "# ============================================================================\n", + "# 2. Key Metrics Card\n", + "# ============================================================================\n", + "ax2 = fig.add_subplot(gs[0, 1])\n", + "ax2.set_facecolor(colors['bg'])\n", + "ax2.axis('off')\n", + "\n", + "metrics_text = f\"\"\"\n", + "╔══════════════════════════════════════╗\n", + "║ 📈 TRAINING RESULTS ║\n", + "╠══════════════════════════════════════╣\n", + "║ ║\n", + "║ ⏱️ Total Time: 131.0 min ║\n", + "║ ⚡ Avg Speed: 64 steps/sec ║\n", + "║ ║\n", + "║ 🏆 BEST REWARDS (DSR) ║\n", + "║ ┌────────────────────────────┐ ║\n", + "║ │ Train: 0.5949 │ ║\n", + "║ │ Eval: 0.2125 │ ║\n", + "║ └────────────────────────────┘ ║\n", + "║ ║\n", + "║ 📊 Multi-Timeframe Features ║\n", + "║ 15m: 26 features ║\n", + "║ 1h: 26 features ║\n", + "║ 4h: 26 features ║\n", + "║ Other: ~10 features ║\n", + "║ TOTAL: ~88 features ║\n", + "║ ║\n", + "╚══════════════════════════════════════╝\n", + "\"\"\"\n", + "ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=10,\n", + " verticalalignment='center', horizontalalignment='center',\n", + " fontfamily='monospace', color='white',\n", + " bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['accent'], linewidth=2))\n", + "\n", + "# ============================================================================\n", + "# 3. Reward Type Explanation\n", + "# ============================================================================\n", + "ax3 = fig.add_subplot(gs[0, 2])\n", + "ax3.set_facecolor(colors['bg'])\n", + "ax3.axis('off')\n", + "\n", + "dsr_text = f\"\"\"\n", + "╔══════════════════════════════════════╗\n", + "║ 🧮 DIFFERENTIAL SHARPE RATIO ║\n", + "╠══════════════════════════════════════╣\n", + "║ ║\n", + "║ Formula: ║\n", + "║ ║\n", + "║ B·ΔA - 0.5·A·ΔB ║\n", + "║ DSR = ───────────────── ║\n", + "║ (B - A²)^1.5 ║\n", + "║ ║\n", + "║ Where: ║\n", + "║ A = EMA of returns ║\n", + "║ B = EMA of squared returns ║\n", + "║ ║\n", + "║ ✅ Benefits: ║\n", + "║ • Risk-adjusted (Sharpe-like) ║\n", + "║ • Penalizes volatility ║\n", + "║ • Rewards consistency ║\n", + "║ • Scale: -0.5 to +0.5 ║\n", + "║ ║\n", + "║ ⚠️ Note: Small values are normal! ║\n", + "╚══════════════════════════════════════╝\n", + "\"\"\"\n", + "ax3.text(0.5, 0.5, dsr_text, transform=ax3.transAxes, fontsize=10,\n", + " verticalalignment='center', horizontalalignment='center',\n", + " fontfamily='monospace', color='white',\n", + " bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['warning'], linewidth=2))\n", + "\n", + "# ============================================================================\n", + "# 4. Training Progress Bar (Visual)\n", + "# ============================================================================\n", + "ax4 = fig.add_subplot(gs[1, :])\n", + "ax4.set_facecolor(colors['bg'])\n", + "\n", + "# Create timeline visualization\n", + "phases = [\n", + " ('Random Warmup', 0, 10000, '#666699'),\n", + " ('No Fees (Learning)', 10000, 100000, colors['primary']),\n", + " ('Fee Ramping', 100000, 200000, colors['warning']),\n", + " ('Full Fees', 200000, 500000, colors['secondary']),\n", + "]\n", + "\n", + "for name, start, end, color in phases:\n", + " ax4.barh(0, end-start, left=start, height=0.4, color=color, edgecolor='white', linewidth=0.5)\n", + " mid = (start + end) / 2\n", + " ax4.text(mid, 0, name, ha='center', va='center', fontsize=9, color='white', fontweight='bold')\n", + "\n", + "# Add markers\n", + "ax4.axvline(x=10000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n", + "ax4.axvline(x=100000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n", + "ax4.axvline(x=200000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n", + "\n", + "ax4.set_xlim(0, 500000)\n", + "ax4.set_ylim(-0.5, 0.5)\n", + "ax4.set_xlabel('Training Steps', fontsize=12, color='white')\n", + "ax4.set_title('📊 Training Curriculum Timeline', fontsize=14, color='white', fontweight='bold', pad=20)\n", + "ax4.set_yticks([])\n", + "ax4.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x/1000:.0f}K'))\n", + "ax4.tick_params(colors='white')\n", + "ax4.spines['top'].set_visible(False)\n", + "ax4.spines['right'].set_visible(False)\n", + "ax4.spines['left'].set_visible(False)\n", + "ax4.spines['bottom'].set_color('white')\n", + "\n", + "# Add step markers\n", + "for step in [0, 100000, 200000, 300000, 400000, 500000]:\n", + " ax4.text(step, -0.35, f'{step//1000}K', ha='center', va='top', fontsize=8, color='gray')\n", + "\n", + "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Training Summary', fontsize=18, color='white', fontweight='bold', y=0.98)\n", + "plt.tight_layout()\n", + "plt.savefig('training_summary.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n", + "plt.show()\n", + "\n", + "print(\"\\n✅ Training summary visualization saved!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46d509d3", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 12: COMPREHENSIVE BACKTESTING FUNCTION\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" BACKTESTING ENGINE\")\n", + "print(\"=\"*70)\n", + "\n", + "def run_backtest(agent, env, name=\"Test\", verbose=True):\n", + " \"\"\"\n", + " Run comprehensive backtest and collect detailed metrics\n", + " \"\"\"\n", + " env.set_training_mode(False) # No augmentation during testing\n", + " env.set_fee_multiplier(1.0) # Full 0.1% fees\n", + " \n", + " # Run full episode\n", + " state = env.reset()\n", + " done = False\n", + " \n", + " # Track everything\n", + " history = {\n", + " 'step': [],\n", + " 'price': [],\n", + " 'position': [],\n", + " 'action': [],\n", + " 'balance': [],\n", + " 'total_value': [],\n", + " 'pnl_pct': [],\n", + " 'reward': [],\n", + " 'trades': []\n", + " }\n", + " \n", + " step = 0\n", + " total_reward = 0\n", + " prev_position = 0\n", + " \n", + " while not done:\n", + " action = agent.select_action(state, deterministic=True)\n", + " next_state, reward, done, info = env.step(action)\n", + " \n", + " idx = env.start_idx + env.current_step - 1\n", + " price = env.df.loc[idx, 'close']\n", + " \n", + " # Track trade\n", + " if abs(info['position'] - prev_position) > 0.1:\n", + " history['trades'].append({\n", + " 'step': step,\n", + " 'price': price,\n", + " 'from_pos': prev_position,\n", + " 'to_pos': info['position'],\n", + " 'type': 'LONG' if info['position'] > 0 else ('SHORT' if info['position'] < 0 else 'CLOSE')\n", + " })\n", + " \n", + " history['step'].append(step)\n", + " history['price'].append(price)\n", + " history['position'].append(info['position'])\n", + " history['action'].append(action[0])\n", + " history['balance'].append(env.balance)\n", + " history['total_value'].append(info['total_value'])\n", + " history['pnl_pct'].append((info['total_value'] / env.initial_balance - 1) * 100)\n", + " history['reward'].append(reward)\n", + " \n", + " prev_position = info['position']\n", + " total_reward += reward\n", + " state = next_state\n", + " step += 1\n", + " \n", + " # Calculate final metrics\n", + " final_value = history['total_value'][-1]\n", + " initial_value = env.initial_balance\n", + " total_pnl_pct = (final_value / initial_value - 1) * 100\n", + " \n", + " # Calculate Sharpe ratio\n", + " returns = np.diff(history['total_value']) / np.array(history['total_value'][:-1])\n", + " sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96) # Annualized (96 = 15m candles per day)\n", + " \n", + " # Max drawdown\n", + " peak = np.maximum.accumulate(history['total_value'])\n", + " drawdowns = (peak - history['total_value']) / peak * 100\n", + " max_drawdown = np.max(drawdowns)\n", + " \n", + " # Position distribution\n", + " positions = np.array(history['position'])\n", + " long_pct = np.mean(positions > 0.1) * 100\n", + " short_pct = np.mean(positions < -0.1) * 100\n", + " neutral_pct = np.mean(np.abs(positions) <= 0.1) * 100\n", + " \n", + " # Win rate (for trades)\n", + " if len(history['trades']) > 1:\n", + " trade_pnls = []\n", + " for i in range(1, len(history['trades'])):\n", + " entry = history['trades'][i-1]\n", + " exit_trade = history['trades'][i]\n", + " if entry['type'] != 'CLOSE':\n", + " pnl = (exit_trade['price'] - entry['price']) / entry['price'] * 100\n", + " if entry['type'] == 'SHORT':\n", + " pnl = -pnl\n", + " trade_pnls.append(pnl)\n", + " win_rate = np.mean(np.array(trade_pnls) > 0) * 100 if trade_pnls else 0\n", + " else:\n", + " win_rate = 0\n", + " trade_pnls = []\n", + " \n", + " metrics = {\n", + " 'name': name,\n", + " 'total_reward': total_reward,\n", + " 'total_pnl_pct': total_pnl_pct,\n", + " 'final_value': final_value,\n", + " 'sharpe_ratio': sharpe,\n", + " 'max_drawdown': max_drawdown,\n", + " 'num_trades': len(history['trades']),\n", + " 'long_pct': long_pct,\n", + " 'short_pct': short_pct,\n", + " 'neutral_pct': neutral_pct,\n", + " 'win_rate': win_rate,\n", + " 'avg_trade_pnl': np.mean(trade_pnls) if trade_pnls else 0,\n", + " 'history': history\n", + " }\n", + " \n", + " if verbose:\n", + " print(f\"\\n{'='*50}\")\n", + " print(f\"📊 {name} Results\")\n", + " print(f\"{'='*50}\")\n", + " print(f\" 💰 Total PnL: {total_pnl_pct:+.2f}%\")\n", + " print(f\" 📈 Final Value: ${final_value:,.2f}\")\n", + " print(f\" 🎯 DSR Reward: {total_reward:.4f}\")\n", + " print(f\" 📉 Max Drawdown: {max_drawdown:.2f}%\")\n", + " print(f\" 📊 Sharpe Ratio: {sharpe:.3f}\")\n", + " print(f\" 🔄 Num Trades: {len(history['trades'])}\")\n", + " print(f\" ✅ Win Rate: {win_rate:.1f}%\")\n", + " print(f\" 📊 Position Mix: L:{long_pct:.0f}% | S:{short_pct:.0f}% | N:{neutral_pct:.0f}%\")\n", + " \n", + " return metrics\n", + "\n", + "print(\"✅ Backtesting engine ready!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28f0c4d9", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 13: TEST ON UNSEEN DATA (TEST SET)\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" TESTING ON UNSEEN DATA\")\n", + "print(\"=\"*70)\n", + "\n", + "# Create test environment with UNSEEN data (test_data_norm)\n", + "print(f\"📊 Test Data: {len(test_data):,} candles (unseen during training)\")\n", + "print(f\" Date range: {test_data.index[0]} to {test_data.index[-1]}\")\n", + "\n", + "# Test with all three models\n", + "models_to_test = [\n", + " ('Best Eval', BEST_EVAL_MODEL),\n", + " ('Best Train', BEST_TRAIN_MODEL),\n", + " ('Final', FINAL_MODEL),\n", + "]\n", + "\n", + "all_results = []\n", + "\n", + "for model_name, model_path in models_to_test:\n", + " print(f\"\\n🔄 Loading {model_name} model...\")\n", + " \n", + " # Create fresh agent\n", + " test_agent = SACAgent(\n", + " state_dim=state_dim,\n", + " action_dim=action_dim,\n", + " device=device\n", + " )\n", + " \n", + " # Load model\n", + " if load_model(test_agent, model_path, model_name):\n", + " # Run multiple test episodes for robustness\n", + " episode_results = []\n", + " \n", + " for ep in range(5): # 5 test episodes\n", + " metrics = run_backtest(test_agent, test_env, f\"{model_name} (Ep {ep+1})\", verbose=False)\n", + " episode_results.append(metrics)\n", + " \n", + " # Average results\n", + " avg_pnl = np.mean([r['total_pnl_pct'] for r in episode_results])\n", + " avg_sharpe = np.mean([r['sharpe_ratio'] for r in episode_results])\n", + " avg_drawdown = np.mean([r['max_drawdown'] for r in episode_results])\n", + " avg_trades = np.mean([r['num_trades'] for r in episode_results])\n", + " \n", + " print(f\"\\n📊 {model_name} Model - Average over 5 episodes:\")\n", + " print(f\" 💰 Avg PnL: {avg_pnl:+.2f}%\")\n", + " print(f\" 📊 Avg Sharpe: {avg_sharpe:.3f}\")\n", + " print(f\" 📉 Avg Drawdown: {avg_drawdown:.2f}%\")\n", + " print(f\" 🔄 Avg Trades: {avg_trades:.0f}\")\n", + " \n", + " # Store best episode for visualization\n", + " best_ep = max(episode_results, key=lambda x: x['total_pnl_pct'])\n", + " best_ep['model_name'] = model_name\n", + " best_ep['avg_pnl'] = avg_pnl\n", + " best_ep['avg_sharpe'] = avg_sharpe\n", + " all_results.append(best_ep)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\" ALL MODELS TESTED\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3209ba1", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 14: DETAILED PERFORMANCE VISUALIZATION\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" DETAILED PERFORMANCE CHARTS\")\n", + "print(\"=\"*70)\n", + "\n", + "# Use best eval model results\n", + "if all_results:\n", + " best_result = all_results[0] # Best Eval model\n", + " history = best_result['history']\n", + " \n", + " # Create comprehensive visualization\n", + " fig = plt.figure(figsize=(20, 16))\n", + " gs = GridSpec(4, 3, figure=fig, hspace=0.35, wspace=0.25)\n", + " fig.patch.set_facecolor('#1a1a2e')\n", + " \n", + " # ============================================================================\n", + " # 1. Portfolio Value Over Time\n", + " # ============================================================================\n", + " ax1 = fig.add_subplot(gs[0, :2])\n", + " ax1.set_facecolor('#1a1a2e')\n", + " \n", + " steps = history['step']\n", + " portfolio = history['total_value']\n", + " \n", + " # Color based on profit/loss\n", + " colors_line = ['#00D4AA' if v >= 10000 else '#FF6B6B' for v in portfolio]\n", + " \n", + " ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) >= 10000, \n", + " color='#00D4AA', alpha=0.3, label='Profit')\n", + " ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) < 10000,\n", + " color='#FF6B6B', alpha=0.3, label='Loss')\n", + " ax1.plot(steps, portfolio, color='white', linewidth=1.5, alpha=0.9)\n", + " ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial')\n", + " \n", + " ax1.set_xlabel('Step', fontsize=11, color='white')\n", + " ax1.set_ylabel('Portfolio Value ($)', fontsize=11, color='white')\n", + " ax1.set_title('💰 Portfolio Value Over Time', fontsize=14, color='white', fontweight='bold')\n", + " ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n", + " ax1.tick_params(colors='white')\n", + " ax1.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax1.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # Final value annotation\n", + " final_val = portfolio[-1]\n", + " pnl_pct = (final_val / 10000 - 1) * 100\n", + " color = '#00D4AA' if pnl_pct >= 0 else '#FF6B6B'\n", + " ax1.annotate(f'${final_val:,.0f}\\n({pnl_pct:+.1f}%)', \n", + " xy=(steps[-1], final_val), \n", + " fontsize=12, color=color, fontweight='bold',\n", + " ha='right', va='bottom')\n", + " \n", + " # ============================================================================\n", + " # 2. Metrics Summary Box\n", + " # ============================================================================\n", + " ax2 = fig.add_subplot(gs[0, 2])\n", + " ax2.set_facecolor('#1a1a2e')\n", + " ax2.axis('off')\n", + " \n", + " metrics_text = f\"\"\"\n", + " ╔═══════════════════════════╗\n", + " ║ 📊 TEST PERFORMANCE ║\n", + " ╠═══════════════════════════╣\n", + " ║ ║\n", + " ║ PnL: {best_result['total_pnl_pct']:+.2f}% ║\n", + " ║ Sharpe: {best_result['sharpe_ratio']:.3f} ║\n", + " ║ Max DD: {best_result['max_drawdown']:.2f}% ║\n", + " ║ Trades: {best_result['num_trades']} ║\n", + " ║ Win%: {best_result['win_rate']:.1f}% ║\n", + " ║ ║\n", + " ║ Long: {best_result['long_pct']:.0f}% ║\n", + " ║ Short: {best_result['short_pct']:.0f}% ║\n", + " ║ Neutral: {best_result['neutral_pct']:.0f}% ║\n", + " ╚═══════════════════════════╝\n", + " \"\"\"\n", + " ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=11,\n", + " verticalalignment='center', horizontalalignment='center',\n", + " fontfamily='monospace', color='white',\n", + " bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='#00D4AA', linewidth=2))\n", + " \n", + " # ============================================================================\n", + " # 3. Price with Position Overlay\n", + " # ============================================================================\n", + " ax3 = fig.add_subplot(gs[1, :])\n", + " ax3.set_facecolor('#1a1a2e')\n", + " \n", + " prices = history['price']\n", + " positions = history['position']\n", + " \n", + " # Normalize price for display\n", + " price_norm = (np.array(prices) - np.min(prices)) / (np.max(prices) - np.min(prices))\n", + " \n", + " ax3.plot(steps, prices, color='white', linewidth=1, alpha=0.8, label='BTC Price')\n", + " \n", + " # Color background by position\n", + " for i in range(len(steps)-1):\n", + " if positions[i] > 0.1:\n", + " ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#00D4AA')\n", + " elif positions[i] < -0.1:\n", + " ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#FF6B6B')\n", + " \n", + " # Add trade markers\n", + " for trade in history['trades'][:50]: # Limit markers for clarity\n", + " step_idx = trade['step']\n", + " if step_idx < len(prices):\n", + " marker = '^' if trade['type'] == 'LONG' else ('v' if trade['type'] == 'SHORT' else 'o')\n", + " color = '#00D4AA' if trade['type'] == 'LONG' else ('#FF6B6B' if trade['type'] == 'SHORT' else 'yellow')\n", + " ax3.scatter(step_idx, prices[step_idx], marker=marker, color=color, s=80, zorder=5, edgecolors='white')\n", + " \n", + " ax3.set_xlabel('Step', fontsize=11, color='white')\n", + " ax3.set_ylabel('BTC Price ($)', fontsize=11, color='white')\n", + " ax3.set_title('📈 Price Chart with Agent Positions (Green=Long, Red=Short)', fontsize=14, color='white', fontweight='bold')\n", + " ax3.tick_params(colors='white')\n", + " ax3.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax3.spines.values():\n", + " spine.set_color('gray')\n", + " ax3.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n", + " \n", + " # Legend for trades\n", + " long_patch = mpatches.Patch(color='#00D4AA', alpha=0.5, label='Long Position')\n", + " short_patch = mpatches.Patch(color='#FF6B6B', alpha=0.5, label='Short Position')\n", + " ax3.legend(handles=[long_patch, short_patch], loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n", + " \n", + " # ============================================================================\n", + " # 4. Position Distribution\n", + " # ============================================================================\n", + " ax4 = fig.add_subplot(gs[2, 0])\n", + " ax4.set_facecolor('#1a1a2e')\n", + " \n", + " pos_labels = ['Long', 'Short', 'Neutral']\n", + " pos_values = [best_result['long_pct'], best_result['short_pct'], best_result['neutral_pct']]\n", + " pos_colors = ['#00D4AA', '#FF6B6B', '#FFE66D']\n", + " \n", + " wedges, texts, autotexts = ax4.pie(pos_values, labels=pos_labels, colors=pos_colors,\n", + " autopct='%1.1f%%', startangle=90,\n", + " explode=(0.05, 0.05, 0.05),\n", + " textprops={'color': 'white', 'fontsize': 10})\n", + " ax4.set_title('📊 Position Distribution', fontsize=12, color='white', fontweight='bold')\n", + " \n", + " # ============================================================================\n", + " # 5. Drawdown Chart\n", + " # ============================================================================\n", + " ax5 = fig.add_subplot(gs[2, 1])\n", + " ax5.set_facecolor('#1a1a2e')\n", + " \n", + " peak = np.maximum.accumulate(portfolio)\n", + " drawdown = (peak - np.array(portfolio)) / peak * 100\n", + " \n", + " ax5.fill_between(steps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n", + " ax5.plot(steps, drawdown, color='#FF6B6B', linewidth=1)\n", + " ax5.axhline(y=best_result['max_drawdown'], color='yellow', linestyle='--', \n", + " label=f'Max DD: {best_result[\"max_drawdown\"]:.1f}%')\n", + " \n", + " ax5.set_xlabel('Step', fontsize=11, color='white')\n", + " ax5.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n", + " ax5.set_title('📉 Drawdown Over Time', fontsize=12, color='white', fontweight='bold')\n", + " ax5.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n", + " ax5.tick_params(colors='white')\n", + " ax5.grid(True, alpha=0.2, color='gray')\n", + " ax5.invert_yaxis()\n", + " for spine in ax5.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # ============================================================================\n", + " # 6. Action Distribution\n", + " # ============================================================================\n", + " ax6 = fig.add_subplot(gs[2, 2])\n", + " ax6.set_facecolor('#1a1a2e')\n", + " \n", + " actions = history['action']\n", + " ax6.hist(actions, bins=50, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n", + " ax6.axvline(x=0, color='yellow', linestyle='--', alpha=0.7, label='Neutral')\n", + " ax6.axvline(x=np.mean(actions), color='#00D4AA', linestyle='-', linewidth=2, label=f'Mean: {np.mean(actions):.2f}')\n", + " \n", + " ax6.set_xlabel('Action Value', fontsize=11, color='white')\n", + " ax6.set_ylabel('Frequency', fontsize=11, color='white')\n", + " ax6.set_title('🎯 Action Distribution', fontsize=12, color='white', fontweight='bold')\n", + " ax6.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n", + " ax6.tick_params(colors='white')\n", + " ax6.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax6.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # ============================================================================\n", + " # 7. Cumulative Reward\n", + " # ============================================================================\n", + " ax7 = fig.add_subplot(gs[3, 0])\n", + " ax7.set_facecolor('#1a1a2e')\n", + " \n", + " cum_rewards = np.cumsum(history['reward'])\n", + " ax7.plot(steps, cum_rewards, color='#00D4AA', linewidth=1.5)\n", + " ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards >= 0, color='#00D4AA', alpha=0.3)\n", + " ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards < 0, color='#FF6B6B', alpha=0.3)\n", + " ax7.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n", + " \n", + " ax7.set_xlabel('Step', fontsize=11, color='white')\n", + " ax7.set_ylabel('Cumulative DSR', fontsize=11, color='white')\n", + " ax7.set_title('🎯 Cumulative DSR Reward', fontsize=12, color='white', fontweight='bold')\n", + " ax7.tick_params(colors='white')\n", + " ax7.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax7.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # ============================================================================\n", + " # 8. Model Comparison\n", + " # ============================================================================\n", + " ax8 = fig.add_subplot(gs[3, 1:])\n", + " ax8.set_facecolor('#1a1a2e')\n", + " \n", + " if len(all_results) >= 3:\n", + " model_names = [r['model_name'] for r in all_results]\n", + " pnls = [r['total_pnl_pct'] for r in all_results]\n", + " sharpes = [r['sharpe_ratio'] for r in all_results]\n", + " \n", + " x = np.arange(len(model_names))\n", + " width = 0.35\n", + " \n", + " bars1 = ax8.bar(x - width/2, pnls, width, label='PnL %', color='#00D4AA', alpha=0.8)\n", + " \n", + " ax8_twin = ax8.twinx()\n", + " bars2 = ax8_twin.bar(x + width/2, sharpes, width, label='Sharpe', color='#4ECDC4', alpha=0.8)\n", + " \n", + " ax8.set_xlabel('Model', fontsize=11, color='white')\n", + " ax8.set_ylabel('PnL (%)', fontsize=11, color='#00D4AA')\n", + " ax8_twin.set_ylabel('Sharpe Ratio', fontsize=11, color='#4ECDC4')\n", + " ax8.set_title('📊 Model Comparison (Test Set)', fontsize=12, color='white', fontweight='bold')\n", + " ax8.set_xticks(x)\n", + " ax8.set_xticklabels(model_names, color='white')\n", + " ax8.tick_params(colors='white')\n", + " ax8_twin.tick_params(colors='white')\n", + " ax8.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n", + " \n", + " # Add value labels\n", + " for bar, val in zip(bars1, pnls):\n", + " ax8.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:+.1f}%',\n", + " ha='center', va='bottom', color='white', fontsize=9)\n", + " \n", + " for bar, val in zip(bars2, sharpes):\n", + " ax8_twin.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.2f}',\n", + " ha='center', va='bottom', color='white', fontsize=9)\n", + " \n", + " ax8.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n", + " ax8_twin.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n", + " \n", + " for spine in ax8.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " plt.suptitle('🚀 SAC Bitcoin Trading Agent - Test Performance Analysis', \n", + " fontsize=18, color='white', fontweight='bold', y=0.98)\n", + " plt.tight_layout()\n", + " plt.savefig('test_performance.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n", + " plt.show()\n", + " \n", + " print(\"\\n✅ Performance visualization saved!\")\n", + "else:\n", + " print(\"⚠️ No results to visualize. Run the test cells first.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dee9c95f", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 15: EXTENDED BACKTEST (FULL TEST PERIOD)\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" EXTENDED BACKTEST - FULL TEST PERIOD\")\n", + "print(\"=\"*70)\n", + "\n", + "def run_extended_backtest(agent, df, initial_balance=10000, fee=0.001):\n", + " \"\"\"\n", + " Run backtest over the ENTIRE test dataset (not just one episode)\n", + " \"\"\"\n", + " agent_copy = agent\n", + " \n", + " # We'll manually step through the entire dataset\n", + " balance = initial_balance\n", + " position = 0.0\n", + " entry_price = 0.0\n", + " \n", + " history = {\n", + " 'timestamp': [],\n", + " 'price': [],\n", + " 'position': [],\n", + " 'portfolio_value': [],\n", + " 'pnl_pct': [],\n", + " 'trades': []\n", + " }\n", + " \n", + " # Get feature columns\n", + " feature_cols = [col for col in df.columns \n", + " if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n", + " \n", + " prev_action = 0.0\n", + " \n", + " # Step through entire dataset\n", + " for i in range(100, len(df) - 1): # Start at 100 to have lookback\n", + " row = df.iloc[i]\n", + " price = row['close']\n", + " \n", + " # Build observation (simplified)\n", + " features = row[feature_cols].values\n", + " \n", + " # Calculate portfolio value\n", + " if position > 0:\n", + " pnl = position * initial_balance * (price / entry_price - 1)\n", + " elif position < 0:\n", + " pnl = abs(position) * initial_balance * (1 - price / entry_price)\n", + " else:\n", + " pnl = 0\n", + " portfolio_value = balance + pnl\n", + " \n", + " # Build state\n", + " total_return = (portfolio_value / initial_balance) - 1\n", + " max_val = max(history['portfolio_value']) if history['portfolio_value'] else initial_balance\n", + " drawdown = (max_val - portfolio_value) / max_val if max_val > 0 else 0\n", + " \n", + " portfolio_info = np.array([\n", + " position,\n", + " total_return,\n", + " drawdown,\n", + " row['returns_1_15m'],\n", + " row['rsi_14_15m'],\n", + " prev_action\n", + " ], dtype=np.float32)\n", + " \n", + " obs = np.concatenate([features, portfolio_info])\n", + " obs = np.clip(obs, -10, 10).astype(np.float32)\n", + " \n", + " # Get action from agent\n", + " action = agent.select_action(obs, deterministic=True)\n", + " target_position = np.clip(action[0], -1.0, 1.0)\n", + " \n", + " # Execute trade if position changes significantly\n", + " if abs(target_position - position) > 0.1:\n", + " # Close existing position\n", + " if position != 0:\n", + " if position > 0:\n", + " close_pnl = position * initial_balance * (price / entry_price - 1)\n", + " else:\n", + " close_pnl = abs(position) * initial_balance * (1 - price / entry_price)\n", + " fee_cost = abs(close_pnl) * fee\n", + " balance += close_pnl - fee_cost\n", + " \n", + " history['trades'].append({\n", + " 'timestamp': df.index[i],\n", + " 'price': price,\n", + " 'type': 'CLOSE',\n", + " 'pnl': close_pnl - fee_cost\n", + " })\n", + " position = 0.0\n", + " \n", + " # Open new position\n", + " if abs(target_position) > 0.1:\n", + " position = target_position\n", + " entry_price = price\n", + " fee_cost = abs(position) * initial_balance * fee\n", + " balance -= fee_cost\n", + " \n", + " history['trades'].append({\n", + " 'timestamp': df.index[i],\n", + " 'price': price,\n", + " 'type': 'LONG' if position > 0 else 'SHORT',\n", + " 'size': position\n", + " })\n", + " \n", + " # Update portfolio value\n", + " if position > 0:\n", + " pnl = position * initial_balance * (price / entry_price - 1)\n", + " elif position < 0:\n", + " pnl = abs(position) * initial_balance * (1 - price / entry_price)\n", + " else:\n", + " pnl = 0\n", + " portfolio_value = balance + pnl\n", + " \n", + " # Record history\n", + " history['timestamp'].append(df.index[i])\n", + " history['price'].append(price)\n", + " history['position'].append(position)\n", + " history['portfolio_value'].append(portfolio_value)\n", + " history['pnl_pct'].append((portfolio_value / initial_balance - 1) * 100)\n", + " \n", + " prev_action = target_position\n", + " \n", + " return history\n", + "\n", + "# Load best eval model\n", + "print(\"🔄 Loading Best Eval model for extended backtest...\")\n", + "best_agent = SACAgent(state_dim=state_dim, action_dim=action_dim, device=device)\n", + "load_model(best_agent, BEST_EVAL_MODEL, \"Best Eval\")\n", + "\n", + "# Run extended backtest on test data\n", + "print(f\"\\n📊 Running extended backtest on {len(test_data_norm):,} candles...\")\n", + "extended_history = run_extended_backtest(best_agent, test_data_norm)\n", + "\n", + "# Calculate final metrics\n", + "final_portfolio = extended_history['portfolio_value'][-1]\n", + "total_pnl = (final_portfolio / 10000 - 1) * 100\n", + "num_trades = len(extended_history['trades'])\n", + "\n", + "# Calculate returns for Sharpe\n", + "returns = np.diff(extended_history['portfolio_value']) / np.array(extended_history['portfolio_value'][:-1])\n", + "sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96)\n", + "\n", + "# Max drawdown\n", + "peak = np.maximum.accumulate(extended_history['portfolio_value'])\n", + "drawdown = (peak - np.array(extended_history['portfolio_value'])) / peak * 100\n", + "max_dd = np.max(drawdown)\n", + "\n", + "# Buy and hold comparison\n", + "buy_hold_return = (extended_history['price'][-1] / extended_history['price'][0] - 1) * 100\n", + "\n", + "print(f\"\\n{'='*60}\")\n", + "print(f\"📊 EXTENDED BACKTEST RESULTS\")\n", + "print(f\"{'='*60}\")\n", + "print(f\" 📅 Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')}\")\n", + "print(f\" 📊 Candles: {len(extended_history['portfolio_value']):,}\")\n", + "print(f\"\\n 💰 AGENT PERFORMANCE:\")\n", + "print(f\" Final Value: ${final_portfolio:,.2f}\")\n", + "print(f\" Total PnL: {total_pnl:+.2f}%\")\n", + "print(f\" Sharpe Ratio: {sharpe:.3f}\")\n", + "print(f\" Max Drawdown: {max_dd:.2f}%\")\n", + "print(f\" Num Trades: {num_trades}\")\n", + "print(f\"\\n 📈 BUY & HOLD COMPARISON:\")\n", + "print(f\" B&H Return: {buy_hold_return:+.2f}%\")\n", + "print(f\" Alpha: {total_pnl - buy_hold_return:+.2f}%\")\n", + "print(f\"{'='*60}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b20eb2e", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 16: EXTENDED BACKTEST VISUALIZATION\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" EXTENDED BACKTEST VISUALIZATION\")\n", + "print(\"=\"*70)\n", + "\n", + "fig = plt.figure(figsize=(20, 14))\n", + "gs = GridSpec(3, 2, figure=fig, hspace=0.3, wspace=0.2)\n", + "fig.patch.set_facecolor('#1a1a2e')\n", + "\n", + "# ============================================================================\n", + "# 1. Portfolio Value vs Buy & Hold (Main Chart)\n", + "# ============================================================================\n", + "ax1 = fig.add_subplot(gs[0, :])\n", + "ax1.set_facecolor('#1a1a2e')\n", + "\n", + "timestamps = extended_history['timestamp']\n", + "portfolio = extended_history['portfolio_value']\n", + "prices = extended_history['price']\n", + "\n", + "# Normalize buy & hold to start at 10000\n", + "buy_hold = np.array(prices) / prices[0] * 10000\n", + "\n", + "# Plot\n", + "ax1.plot(timestamps, portfolio, color='#00D4AA', linewidth=2, label=f'SAC Agent ({total_pnl:+.1f}%)', zorder=3)\n", + "ax1.plot(timestamps, buy_hold, color='#4ECDC4', linewidth=1.5, alpha=0.7, label=f'Buy & Hold ({buy_hold_return:+.1f}%)', zorder=2)\n", + "ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial Capital')\n", + "\n", + "# Fill between\n", + "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) > buy_hold,\n", + " color='#00D4AA', alpha=0.2, label='Outperformance')\n", + "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) <= buy_hold,\n", + " color='#FF6B6B', alpha=0.2, label='Underperformance')\n", + "\n", + "ax1.set_xlabel('Date', fontsize=12, color='white')\n", + "ax1.set_ylabel('Portfolio Value ($)', fontsize=12, color='white')\n", + "ax1.set_title('💰 Agent Performance vs Buy & Hold', fontsize=16, color='white', fontweight='bold')\n", + "ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray', fontsize=10)\n", + "ax1.tick_params(colors='white')\n", + "ax1.grid(True, alpha=0.2, color='gray')\n", + "for spine in ax1.spines.values():\n", + " spine.set_color('gray')\n", + "ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n", + "\n", + "# Add final value annotations\n", + "ax1.annotate(f'Agent: ${portfolio[-1]:,.0f}', xy=(timestamps[-1], portfolio[-1]),\n", + " xytext=(10, 10), textcoords='offset points',\n", + " fontsize=11, color='#00D4AA', fontweight='bold')\n", + "ax1.annotate(f'B&H: ${buy_hold[-1]:,.0f}', xy=(timestamps[-1], buy_hold[-1]),\n", + " xytext=(10, -10), textcoords='offset points',\n", + " fontsize=11, color='#4ECDC4', fontweight='bold')\n", + "\n", + "# ============================================================================\n", + "# 2. BTC Price with Trade Markers\n", + "# ============================================================================\n", + "ax2 = fig.add_subplot(gs[1, :])\n", + "ax2.set_facecolor('#1a1a2e')\n", + "\n", + "ax2.plot(timestamps, prices, color='white', linewidth=1, alpha=0.8)\n", + "\n", + "# Add trade markers\n", + "long_trades = [t for t in extended_history['trades'] if t['type'] == 'LONG']\n", + "short_trades = [t for t in extended_history['trades'] if t['type'] == 'SHORT']\n", + "close_trades = [t for t in extended_history['trades'] if t['type'] == 'CLOSE']\n", + "\n", + "if long_trades:\n", + " ax2.scatter([t['timestamp'] for t in long_trades], [t['price'] for t in long_trades],\n", + " marker='^', color='#00D4AA', s=100, label=f'Long ({len(long_trades)})', zorder=5, edgecolors='white')\n", + "if short_trades:\n", + " ax2.scatter([t['timestamp'] for t in short_trades], [t['price'] for t in short_trades],\n", + " marker='v', color='#FF6B6B', s=100, label=f'Short ({len(short_trades)})', zorder=5, edgecolors='white')\n", + "\n", + "ax2.set_xlabel('Date', fontsize=12, color='white')\n", + "ax2.set_ylabel('BTC Price ($)', fontsize=12, color='white')\n", + "ax2.set_title('📈 BTC Price with Trade Entries', fontsize=14, color='white', fontweight='bold')\n", + "ax2.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n", + "ax2.tick_params(colors='white')\n", + "ax2.grid(True, alpha=0.2, color='gray')\n", + "for spine in ax2.spines.values():\n", + " spine.set_color('gray')\n", + "ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n", + "\n", + "# ============================================================================\n", + "# 3. Drawdown Chart\n", + "# ============================================================================\n", + "ax3 = fig.add_subplot(gs[2, 0])\n", + "ax3.set_facecolor('#1a1a2e')\n", + "\n", + "ax3.fill_between(timestamps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n", + "ax3.plot(timestamps, drawdown, color='#FF6B6B', linewidth=1)\n", + "ax3.axhline(y=max_dd, color='yellow', linestyle='--', linewidth=2, label=f'Max DD: {max_dd:.1f}%')\n", + "\n", + "ax3.set_xlabel('Date', fontsize=11, color='white')\n", + "ax3.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n", + "ax3.set_title('📉 Drawdown Over Time', fontsize=13, color='white', fontweight='bold')\n", + "ax3.legend(loc='lower right', facecolor='#1a1a2e', edgecolor='gray')\n", + "ax3.tick_params(colors='white')\n", + "ax3.grid(True, alpha=0.2, color='gray')\n", + "ax3.invert_yaxis()\n", + "for spine in ax3.spines.values():\n", + " spine.set_color('gray')\n", + "\n", + "# ============================================================================\n", + "# 4. Rolling Returns Comparison\n", + "# ============================================================================\n", + "ax4 = fig.add_subplot(gs[2, 1])\n", + "ax4.set_facecolor('#1a1a2e')\n", + "\n", + "# Calculate rolling 7-day returns (672 = 7 days of 15m candles)\n", + "window = 672\n", + "agent_rolling = pd.Series(extended_history['pnl_pct']).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n", + "bh_returns = (np.array(prices) / prices[0] - 1) * 100\n", + "bh_rolling = pd.Series(bh_returns).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n", + "\n", + "# Align timestamps with rolling data (use iloc to ensure same length)\n", + "valid_idx = agent_rolling.dropna().index\n", + "timestamps_arr = np.array(timestamps)\n", + "ax4.plot(timestamps_arr[valid_idx], agent_rolling.dropna().values, color='#00D4AA', linewidth=1.5, label='Agent', alpha=0.8)\n", + "ax4.plot(timestamps_arr[valid_idx], bh_rolling.iloc[valid_idx].values, color='#4ECDC4', linewidth=1.5, label='Buy & Hold', alpha=0.8)\n", + "ax4.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n", + "\n", + "ax4.set_xlabel('Date', fontsize=11, color='white')\n", + "ax4.set_ylabel('7-Day Rolling Return (%)', fontsize=11, color='white')\n", + "ax4.set_title('📊 7-Day Rolling Returns Comparison', fontsize=13, color='white', fontweight='bold')\n", + "ax4.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n", + "ax4.tick_params(colors='white')\n", + "ax4.grid(True, alpha=0.2, color='gray')\n", + "for spine in ax4.spines.values():\n", + " spine.set_color('gray')\n", + "\n", + "plt.suptitle('🚀 SAC Bitcoin Agent - Extended Backtest Analysis', \n", + " fontsize=18, color='white', fontweight='bold', y=0.98)\n", + "plt.tight_layout()\n", + "plt.savefig('extended_backtest.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n", + "plt.show()\n", + "\n", + "print(\"\\n✅ Extended backtest visualization saved!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "027f6534", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 17: FINAL SUMMARY DASHBOARD\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" FINAL SUMMARY DASHBOARD\")\n", + "print(\"=\"*70)\n", + "\n", + "fig = plt.figure(figsize=(20, 10))\n", + "fig.patch.set_facecolor('#1a1a2e')\n", + "\n", + "# Create grid\n", + "gs = GridSpec(2, 4, figure=fig, hspace=0.4, wspace=0.3)\n", + "\n", + "# Color palette\n", + "colors = {\n", + " 'green': '#00D4AA',\n", + " 'red': '#FF6B6B',\n", + " 'blue': '#4ECDC4',\n", + " 'yellow': '#FFE66D',\n", + " 'purple': '#9B59B6',\n", + " 'bg': '#1a1a2e'\n", + "}\n", + "\n", + "# ============================================================================\n", + "# Helper function for metric cards\n", + "# ============================================================================\n", + "def create_metric_card(ax, title, value, subtitle=\"\", color='#00D4AA', icon=\"📊\"):\n", + " ax.set_facecolor(colors['bg'])\n", + " ax.axis('off')\n", + " \n", + " # Main value\n", + " ax.text(0.5, 0.6, f\"{icon}\", transform=ax.transAxes, fontsize=30,\n", + " ha='center', va='center')\n", + " ax.text(0.5, 0.35, f\"{value}\", transform=ax.transAxes, fontsize=24,\n", + " ha='center', va='center', color=color, fontweight='bold')\n", + " ax.text(0.5, 0.15, f\"{title}\", transform=ax.transAxes, fontsize=11,\n", + " ha='center', va='center', color='white')\n", + " if subtitle:\n", + " ax.text(0.5, 0.02, f\"{subtitle}\", transform=ax.transAxes, fontsize=9,\n", + " ha='center', va='center', color='gray')\n", + " \n", + " # Border\n", + " for spine in ax.spines.values():\n", + " spine.set_visible(True)\n", + " spine.set_color(color)\n", + " spine.set_linewidth(2)\n", + "\n", + "# ============================================================================\n", + "# Create metric cards\n", + "# ============================================================================\n", + "# Row 1: Training Metrics\n", + "ax1 = fig.add_subplot(gs[0, 0])\n", + "create_metric_card(ax1, \"Training Time\", \"131 min\", \"1000 episodes\", colors['blue'], \"⏱️\")\n", + "\n", + "ax2 = fig.add_subplot(gs[0, 1])\n", + "create_metric_card(ax2, \"Best Train DSR\", \"0.5949\", \"Risk-adjusted reward\", colors['green'], \"🎯\")\n", + "\n", + "ax3 = fig.add_subplot(gs[0, 2])\n", + "create_metric_card(ax3, \"Best Eval DSR\", \"0.2125\", \"Validation set\", colors['yellow'], \"📈\")\n", + "\n", + "ax4 = fig.add_subplot(gs[0, 3])\n", + "create_metric_card(ax4, \"Training Speed\", \"64 sps\", \"steps per second\", colors['purple'], \"⚡\")\n", + "\n", + "# Row 2: Test Performance Metrics\n", + "ax5 = fig.add_subplot(gs[1, 0])\n", + "pnl_color = colors['green'] if total_pnl >= 0 else colors['red']\n", + "create_metric_card(ax5, \"Test PnL\", f\"{total_pnl:+.2f}%\", \"Extended backtest\", pnl_color, \"💰\")\n", + "\n", + "ax6 = fig.add_subplot(gs[1, 1])\n", + "sharpe_color = colors['green'] if sharpe > 0.5 else (colors['yellow'] if sharpe > 0 else colors['red'])\n", + "create_metric_card(ax6, \"Sharpe Ratio\", f\"{sharpe:.3f}\", \"Annualized\", sharpe_color, \"📊\")\n", + "\n", + "ax7 = fig.add_subplot(gs[1, 2])\n", + "create_metric_card(ax7, \"Max Drawdown\", f\"{max_dd:.1f}%\", \"Peak to trough\", colors['red'], \"📉\")\n", + "\n", + "ax8 = fig.add_subplot(gs[1, 3])\n", + "alpha = total_pnl - buy_hold_return\n", + "alpha_color = colors['green'] if alpha >= 0 else colors['red']\n", + "create_metric_card(ax8, \"Alpha vs B&H\", f\"{alpha:+.2f}%\", \"Excess return\", alpha_color, \"🏆\")\n", + "\n", + "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Performance Dashboard', \n", + " fontsize=20, color='white', fontweight='bold', y=0.98)\n", + "\n", + "# Add footer\n", + "fig.text(0.5, 0.02, \n", + " f\"Test Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')} | \"\n", + " f\"Trades: {num_trades} | Multi-timeframe: 15m/1h/4h | DSR Reward | 0.1% Transaction Fee\",\n", + " ha='center', fontsize=10, color='gray')\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig('final_dashboard.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n", + "plt.show()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\" ✅ ALL VISUALIZATIONS COMPLETE!\")\n", + "print(\"=\"*70)\n", + "print(\"\\n📁 Saved files:\")\n", + "print(\" • training_summary.png\")\n", + "print(\" • test_performance.png\")\n", + "print(\" • extended_backtest.png\")\n", + "print(\" • final_dashboard.png\")\n", + "print(\"\\n🎉 Analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d777375", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================================\n", + "# CELL 18: TRADE ANALYSIS & STATISTICS\n", + "# ============================================================================\n", + "\n", + "print(\"=\"*70)\n", + "print(\" TRADE ANALYSIS & STATISTICS\")\n", + "print(\"=\"*70)\n", + "\n", + "# Analyze trades\n", + "trades = extended_history['trades']\n", + "\n", + "if trades:\n", + " # Separate trade types\n", + " long_entries = [t for t in trades if t['type'] == 'LONG']\n", + " short_entries = [t for t in trades if t['type'] == 'SHORT']\n", + " closes = [t for t in trades if t['type'] == 'CLOSE']\n", + " \n", + " # Calculate trade PnLs from close trades\n", + " trade_pnls = [t.get('pnl', 0) for t in closes if 'pnl' in t]\n", + " \n", + " if trade_pnls:\n", + " winning_trades = [p for p in trade_pnls if p > 0]\n", + " losing_trades = [p for p in trade_pnls if p <= 0]\n", + " \n", + " win_rate = len(winning_trades) / len(trade_pnls) * 100\n", + " avg_win = np.mean(winning_trades) if winning_trades else 0\n", + " avg_loss = np.mean(losing_trades) if losing_trades else 0\n", + " profit_factor = abs(sum(winning_trades) / sum(losing_trades)) if losing_trades and sum(losing_trades) != 0 else float('inf')\n", + " \n", + " print(f\"\\n📊 TRADE STATISTICS:\")\n", + " print(f\" Total Trades: {len(trade_pnls)}\")\n", + " print(f\" Long Entries: {len(long_entries)}\")\n", + " print(f\" Short Entries: {len(short_entries)}\")\n", + " print(f\"\\n📈 PERFORMANCE:\")\n", + " print(f\" Win Rate: {win_rate:.1f}%\")\n", + " print(f\" Winning Trades: {len(winning_trades)}\")\n", + " print(f\" Losing Trades: {len(losing_trades)}\")\n", + " print(f\" Avg Win: ${avg_win:.2f}\")\n", + " print(f\" Avg Loss: ${avg_loss:.2f}\")\n", + " print(f\" Profit Factor: {profit_factor:.2f}\")\n", + " print(f\" Total P&L: ${sum(trade_pnls):.2f}\")\n", + " \n", + " # Create trade analysis visualization\n", + " fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n", + " fig.patch.set_facecolor('#1a1a2e')\n", + " \n", + " # 1. Trade P&L Distribution\n", + " ax1 = axes[0, 0]\n", + " ax1.set_facecolor('#1a1a2e')\n", + " \n", + " bins = np.linspace(min(trade_pnls), max(trade_pnls), 30)\n", + " ax1.hist([p for p in trade_pnls if p > 0], bins=bins, color='#00D4AA', alpha=0.7, label='Wins')\n", + " ax1.hist([p for p in trade_pnls if p <= 0], bins=bins, color='#FF6B6B', alpha=0.7, label='Losses')\n", + " ax1.axvline(x=0, color='white', linestyle='--', alpha=0.7)\n", + " ax1.axvline(x=np.mean(trade_pnls), color='#FFE66D', linestyle='-', linewidth=2, \n", + " label=f'Mean: ${np.mean(trade_pnls):.2f}')\n", + " \n", + " ax1.set_xlabel('Trade P&L ($)', fontsize=11, color='white')\n", + " ax1.set_ylabel('Frequency', fontsize=11, color='white')\n", + " ax1.set_title('📊 Trade P&L Distribution', fontsize=13, color='white', fontweight='bold')\n", + " ax1.legend(facecolor='#1a1a2e', edgecolor='gray')\n", + " ax1.tick_params(colors='white')\n", + " ax1.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax1.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # 2. Cumulative Trade P&L\n", + " ax2 = axes[0, 1]\n", + " ax2.set_facecolor('#1a1a2e')\n", + " \n", + " cum_pnl = np.cumsum(trade_pnls)\n", + " trade_nums = range(1, len(trade_pnls) + 1)\n", + " \n", + " ax2.plot(trade_nums, cum_pnl, color='#00D4AA', linewidth=2)\n", + " ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl >= 0, color='#00D4AA', alpha=0.3)\n", + " ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl < 0, color='#FF6B6B', alpha=0.3)\n", + " ax2.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n", + " \n", + " ax2.set_xlabel('Trade Number', fontsize=11, color='white')\n", + " ax2.set_ylabel('Cumulative P&L ($)', fontsize=11, color='white')\n", + " ax2.set_title('📈 Cumulative Trade P&L', fontsize=13, color='white', fontweight='bold')\n", + " ax2.tick_params(colors='white')\n", + " ax2.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax2.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " # 3. Win/Loss Ratio Pie Chart\n", + " ax3 = axes[1, 0]\n", + " ax3.set_facecolor('#1a1a2e')\n", + " \n", + " sizes = [len(winning_trades), len(losing_trades)]\n", + " labels = [f'Wins ({len(winning_trades)})', f'Losses ({len(losing_trades)})']\n", + " colors_pie = ['#00D4AA', '#FF6B6B']\n", + " explode = (0.05, 0.05)\n", + " \n", + " wedges, texts, autotexts = ax3.pie(sizes, labels=labels, colors=colors_pie,\n", + " autopct='%1.1f%%', startangle=90, explode=explode,\n", + " textprops={'color': 'white', 'fontsize': 11})\n", + " ax3.set_title('🎯 Win/Loss Distribution', fontsize=13, color='white', fontweight='bold')\n", + " \n", + " # 4. Trade Size Distribution \n", + " ax4 = axes[1, 1]\n", + " ax4.set_facecolor('#1a1a2e')\n", + " \n", + " # Position sizes from history\n", + " positions = [abs(p) for p in extended_history['position'] if abs(p) > 0.1]\n", + " \n", + " if positions:\n", + " ax4.hist(positions, bins=20, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n", + " ax4.axvline(x=np.mean(positions), color='#FFE66D', linestyle='-', linewidth=2,\n", + " label=f'Mean: {np.mean(positions):.2f}')\n", + " \n", + " ax4.set_xlabel('Position Size', fontsize=11, color='white')\n", + " ax4.set_ylabel('Frequency', fontsize=11, color='white')\n", + " ax4.set_title('📊 Position Size Distribution', fontsize=13, color='white', fontweight='bold')\n", + " ax4.legend(facecolor='#1a1a2e', edgecolor='gray')\n", + " ax4.tick_params(colors='white')\n", + " ax4.grid(True, alpha=0.2, color='gray')\n", + " for spine in ax4.spines.values():\n", + " spine.set_color('gray')\n", + " \n", + " plt.suptitle('🔍 Trade Analysis Deep Dive', fontsize=16, color='white', fontweight='bold', y=0.98)\n", + " plt.tight_layout()\n", + " plt.savefig('trade_analysis.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n", + " plt.show()\n", + " \n", + " print(\"\\n✅ Trade analysis visualization saved!\")\n", + " else:\n", + " print(\"⚠️ No trade P&L data available\")\n", + "else:\n", + " print(\"⚠️ No trades recorded\")" + ] + } + ], + "metadata": { + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "datasetId": 7097204, + "sourceId": 11420269, + "sourceType": "datasetVersion" + }, + { + "datasetId": 5656419, + "sourceId": 13492684, + "sourceType": "datasetVersion" + }, + { + "datasetId": 7608804, + "sourceId": 13495502, + "sourceType": "datasetVersion" + }, + { + "datasetId": 8569093, + "sourceId": 13496378, + "sourceType": "datasetVersion" + } + ], + "dockerImageVersionId": 31153, + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + }, + "papermill": { + "default_parameters": {}, + "duration": null, + "end_time": null, + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2025-10-25T11:42:30.221950", + "version": "2.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}