diff --git "a/versions/2/version 9.ipynb" "b/versions/2/version 9.ipynb"
new file mode 100644--- /dev/null
+++ "b/versions/2/version 9.ipynb"
@@ -0,0 +1,2769 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b085bca",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:42:35.310569Z",
+     "iopub.status.busy": "2025-10-25T11:42:35.310358Z",
+     "iopub.status.idle": "2025-10-25T11:43:07.305498Z",
+     "shell.execute_reply": "2025-10-25T11:43:07.304622Z"
+    },
+    "papermill": {
+     "duration": 32.0093,
+     "end_time": "2025-10-25T11:43:07.311339",
+     "exception": false,
+     "start_time": "2025-10-25T11:42:35.302039",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 1: PYTORCH GPU SETUP (KAGGLE 30GB GPU)\n",
+    "# ============================================================================\n",
+    "\n",
+    "!pip install -q ta\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" PYTORCH GPU SETUP (30GB GPU)\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# ============================================================================\n",
+    "# GPU CONFIGURATION FOR MAXIMUM PERFORMANCE\n",
+    "# ============================================================================\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "    # Get GPU info\n",
+    "    gpu_name = torch.cuda.get_device_name(0)\n",
+    "    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9\n",
+    "    \n",
+    "    print(f\"✅ GPU: {gpu_name}\")\n",
+    "    print(f\"✅ GPU Memory: {gpu_mem:.1f} GB\")\n",
+    "    \n",
+    "    # Enable TF32 for faster matmul (Ampere GPUs: A100, RTX 30xx, 40xx)\n",
+    "    torch.backends.cuda.matmul.allow_tf32 = True\n",
+    "    torch.backends.cudnn.allow_tf32 = True\n",
+    "    print(\"✅ TF32: Enabled (2-3x speedup on Ampere)\")\n",
+    "    \n",
+    "    # Enable cuDNN autotuner\n",
+    "    torch.backends.cudnn.benchmark = True\n",
+    "    print(\"✅ cuDNN benchmark: Enabled\")\n",
+    "    \n",
+    "    # Set default tensor type to CUDA\n",
+    "    torch.set_default_device('cuda')\n",
+    "    print(\"✅ Default device: CUDA\")\n",
+    "    \n",
+    "else:\n",
+    "    print(\"⚠️ No GPU detected, using CPU\")\n",
+    "\n",
+    "print(f\"\\n✅ PyTorch: {torch.__version__}\")\n",
+    "print(f\"✅ Device: {device}\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7730408f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:43:07.325051Z",
+     "iopub.status.busy": "2025-10-25T11:43:07.324573Z",
+     "iopub.status.idle": "2025-10-25T11:43:16.012274Z",
+     "shell.execute_reply": "2025-10-25T11:43:16.011145Z"
+    },
+    "papermill": {
+     "duration": 8.696398,
+     "end_time": "2025-10-25T11:43:16.013680",
+     "exception": false,
+     "start_time": "2025-10-25T11:43:07.317282",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 2: LOAD DATA + FEATURES + ENVIRONMENT (MULTI-TIMEFRAME)\n",
+    "# ============================================================================\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import gym\n",
+    "from gym import spaces\n",
+    "from ta.momentum import RSIIndicator, StochasticOscillator, ROCIndicator, WilliamsRIndicator\n",
+    "from ta.trend import MACD, EMAIndicator, SMAIndicator, ADXIndicator, CCIIndicator\n",
+    "from ta.volatility import BollingerBands, AverageTrueRange\n",
+    "from ta.volume import OnBalanceVolumeIndicator\n",
+    "import os\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" LOADING MULTI-TIMEFRAME DATA + FEATURES\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# ============================================================================\n",
+    "# HELPER: CALCULATE INDICATORS FOR ANY TIMEFRAME\n",
+    "# ============================================================================\n",
+    "def calculate_indicators(df, suffix=''):\n",
+    "    \"\"\"Calculate all technical indicators for a given dataframe\"\"\"\n",
+    "    data = df.copy()\n",
+    "    s = f'_{suffix}' if suffix else ''\n",
+    "    \n",
+    "    # Momentum\n",
+    "    data[f'rsi_14{s}'] = RSIIndicator(close=data['close'], window=14).rsi() / 100\n",
+    "    data[f'rsi_7{s}'] = RSIIndicator(close=data['close'], window=7).rsi() / 100\n",
+    "    \n",
+    "    stoch = StochasticOscillator(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
+    "    data[f'stoch_k{s}'] = stoch.stoch() / 100\n",
+    "    data[f'stoch_d{s}'] = stoch.stoch_signal() / 100\n",
+    "    \n",
+    "    roc = ROCIndicator(close=data['close'], window=12)\n",
+    "    data[f'roc_12{s}'] = np.tanh(roc.roc() / 100)\n",
+    "    \n",
+    "    williams = WilliamsRIndicator(high=data['high'], low=data['low'], close=data['close'], lbp=14)\n",
+    "    data[f'williams_r{s}'] = (williams.williams_r() + 100) / 100\n",
+    "    \n",
+    "    macd = MACD(close=data['close'])\n",
+    "    data[f'macd{s}'] = np.tanh(macd.macd() / data['close'] * 100)\n",
+    "    data[f'macd_signal{s}'] = np.tanh(macd.macd_signal() / data['close'] * 100)\n",
+    "    data[f'macd_diff{s}'] = np.tanh(macd.macd_diff() / data['close'] * 100)\n",
+    "    \n",
+    "    # Trend\n",
+    "    data[f'sma_20{s}'] = SMAIndicator(close=data['close'], window=20).sma_indicator()\n",
+    "    data[f'sma_50{s}'] = SMAIndicator(close=data['close'], window=50).sma_indicator()\n",
+    "    data[f'ema_12{s}'] = EMAIndicator(close=data['close'], window=12).ema_indicator()\n",
+    "    data[f'ema_26{s}'] = EMAIndicator(close=data['close'], window=26).ema_indicator()\n",
+    "    \n",
+    "    data[f'price_vs_sma20{s}'] = (data['close'] - data[f'sma_20{s}']) / data[f'sma_20{s}']\n",
+    "    data[f'price_vs_sma50{s}'] = (data['close'] - data[f'sma_50{s}']) / data[f'sma_50{s}']\n",
+    "    \n",
+    "    adx = ADXIndicator(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
+    "    data[f'adx{s}'] = adx.adx() / 100\n",
+    "    data[f'adx_pos{s}'] = adx.adx_pos() / 100\n",
+    "    data[f'adx_neg{s}'] = adx.adx_neg() / 100\n",
+    "    \n",
+    "    cci = CCIIndicator(high=data['high'], low=data['low'], close=data['close'], window=20)\n",
+    "    data[f'cci{s}'] = np.tanh(cci.cci() / 100)\n",
+    "    \n",
+    "    # Volatility\n",
+    "    bb = BollingerBands(close=data['close'], window=20, window_dev=2)\n",
+    "    data[f'bb_width{s}'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()\n",
+    "    data[f'bb_position{s}'] = (data['close'] - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())\n",
+    "    \n",
+    "    atr = AverageTrueRange(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
+    "    data[f'atr_percent{s}'] = atr.average_true_range() / data['close']\n",
+    "    \n",
+    "    # Volume\n",
+    "    data[f'volume_ma_20{s}'] = data['volume'].rolling(20).mean()\n",
+    "    data[f'volume_ratio{s}'] = data['volume'] / (data[f'volume_ma_20{s}'] + 1e-8)\n",
+    "    \n",
+    "    obv = OnBalanceVolumeIndicator(close=data['close'], volume=data['volume'])\n",
+    "    data[f'obv_slope{s}'] = (obv.on_balance_volume().diff(5) / (obv.on_balance_volume().shift(5).abs() + 1e-8))\n",
+    "    \n",
+    "    # Price action\n",
+    "    data[f'returns_1{s}'] = data['close'].pct_change()\n",
+    "    data[f'returns_5{s}'] = data['close'].pct_change(5)\n",
+    "    data[f'returns_20{s}'] = data['close'].pct_change(20)\n",
+    "    data[f'volatility_20{s}'] = data[f'returns_1{s}'].rolling(20).std()\n",
+    "    \n",
+    "    data[f'body_size{s}'] = abs(data['close'] - data['open']) / (data['open'] + 1e-8)\n",
+    "    data[f'high_20{s}'] = data['high'].rolling(20).max()\n",
+    "    data[f'low_20{s}'] = data['low'].rolling(20).min()\n",
+    "    data[f'price_position{s}'] = (data['close'] - data[f'low_20{s}']) / (data[f'high_20{s}'] - data[f'low_20{s}'] + 1e-8)\n",
+    "    \n",
+    "    # Drop intermediate columns\n",
+    "    cols_to_drop = [c for c in [f'sma_20{s}', f'sma_50{s}', f'ema_12{s}', f'ema_26{s}', \n",
+    "                                f'volume_ma_20{s}', f'high_20{s}', f'low_20{s}'] if c in data.columns]\n",
+    "    data = data.drop(columns=cols_to_drop)\n",
+    "    \n",
+    "    return data\n",
+    "\n",
+    "def load_and_clean_btc(filepath):\n",
+    "    \"\"\"Load and clean BTC data from CSV\"\"\"\n",
+    "    df = pd.read_csv(filepath)\n",
+    "    column_mapping = {'Open time': 'timestamp', 'Open': 'open', 'High': 'high', \n",
+    "                     'Low': 'low', 'Close': 'close', 'Volume': 'volume'}\n",
+    "    df = df.rename(columns=column_mapping)\n",
+    "    df['timestamp'] = pd.to_datetime(df['timestamp'])\n",
+    "    df.set_index('timestamp', inplace=True)\n",
+    "    df = df[['open', 'high', 'low', 'close', 'volume']]\n",
+    "    \n",
+    "    for col in df.columns:\n",
+    "        df[col] = pd.to_numeric(df[col], errors='coerce')\n",
+    "    \n",
+    "    df = df[df.index >= '2021-01-01']\n",
+    "    df = df[~df.index.duplicated(keep='first')]\n",
+    "    df = df.replace(0, np.nan).dropna().sort_index()\n",
+    "    return df\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 1. LOAD ALL TIMEFRAMES\n",
+    "# ============================================================================\n",
+    "data_path = '/kaggle/input/bitcoin-historical-datasets-2018-2024/'\n",
+    "\n",
+    "print(\"��� Loading 15-minute data...\")\n",
+    "btc_15m = load_and_clean_btc(data_path + 'btc_15m_data_2018_to_2025.csv')\n",
+    "print(f\"   ✅ 15m: {len(btc_15m):,} candles\")\n",
+    "\n",
+    "print(\"📊 Loading 1-hour data...\")\n",
+    "btc_1h = load_and_clean_btc(data_path + 'btc_1h_data_2018_to_2025.csv')\n",
+    "print(f\"   ✅ 1h: {len(btc_1h):,} candles\")\n",
+    "\n",
+    "print(\"📊 Loading 4-hour data...\")\n",
+    "btc_4h = load_and_clean_btc(data_path + 'btc_4h_data_2018_to_2025.csv')\n",
+    "print(f\"   ✅ 4h: {len(btc_4h):,} candles\")\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 2. LOAD FEAR & GREED INDEX\n",
+    "# ============================================================================\n",
+    "fgi_loaded = False\n",
+    "\n",
+    "try:\n",
+    "    fgi_path = '/kaggle/input/btc-usdt-4h-ohlc-fgi-daily-2020/'\n",
+    "    files = os.listdir(fgi_path)\n",
+    "    \n",
+    "    for filename in files:\n",
+    "        if filename.endswith('.csv'):\n",
+    "            fgi_data = pd.read_csv(fgi_path + filename)\n",
+    "            \n",
+    "            time_col = [c for c in fgi_data.columns if 'time' in c.lower() or 'date' in c.lower()]\n",
+    "            if time_col:\n",
+    "                fgi_data['timestamp'] = pd.to_datetime(fgi_data[time_col[0]])\n",
+    "            else:\n",
+    "                fgi_data['timestamp'] = pd.to_datetime(fgi_data.iloc[:, 0])\n",
+    "            \n",
+    "            fgi_data.set_index('timestamp', inplace=True)\n",
+    "            \n",
+    "            fgi_col = [c for c in fgi_data.columns if 'fgi' in c.lower() or 'fear' in c.lower() or 'greed' in c.lower()]\n",
+    "            if fgi_col:\n",
+    "                fgi_data = fgi_data[[fgi_col[0]]].rename(columns={fgi_col[0]: 'fgi'})\n",
+    "                fgi_loaded = True\n",
+    "                print(f\"✅ Fear & Greed loaded: {len(fgi_data):,} values\")\n",
+    "                break\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "if not fgi_loaded:\n",
+    "    fgi_data = pd.DataFrame(index=btc_15m.index)\n",
+    "    fgi_data['fgi'] = 50\n",
+    "    print(\"⚠️ Using neutral FGI values\")\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 3. CALCULATE INDICATORS FOR EACH TIMEFRAME\n",
+    "# ============================================================================\n",
+    "print(\"\\n🔧 Calculating indicators for 15m...\")\n",
+    "data_15m = calculate_indicators(btc_15m, suffix='15m')\n",
+    "\n",
+    "print(\"🔧 Calculating indicators for 1h...\")\n",
+    "data_1h = calculate_indicators(btc_1h, suffix='1h')\n",
+    "\n",
+    "print(\"🔧 Calculating indicators for 4h...\")\n",
+    "data_4h = calculate_indicators(btc_4h, suffix='4h')\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 4. MERGE HIGHER TIMEFRAMES INTO 15M (FORWARD FILL)\n",
+    "# ============================================================================\n",
+    "print(\"\\n🔗 Merging timeframes...\")\n",
+    "\n",
+    "cols_1h = [c for c in data_1h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n",
+    "cols_4h = [c for c in data_4h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n",
+    "\n",
+    "data = data_15m.copy()\n",
+    "data = data.join(data_1h[cols_1h], how='left')\n",
+    "data = data.join(data_4h[cols_4h], how='left')\n",
+    "\n",
+    "for col in cols_1h + cols_4h:\n",
+    "    data[col] = data[col].fillna(method='ffill')\n",
+    "\n",
+    "# Merge FGI\n",
+    "data = data.join(fgi_data, how='left')\n",
+    "data['fgi'] = data['fgi'].fillna(method='ffill').fillna(method='bfill').fillna(50)\n",
+    "\n",
+    "# Fear & Greed derived features\n",
+    "data['fgi_normalized'] = (data['fgi'] - 50) / 50\n",
+    "data['fgi_change'] = data['fgi'].diff() / 50\n",
+    "data['fgi_ma7'] = data['fgi'].rolling(7).mean()\n",
+    "data['fgi_vs_ma'] = (data['fgi'] - data['fgi_ma7']) / 50\n",
+    "\n",
+    "# Time features\n",
+    "data['hour'] = data.index.hour / 24\n",
+    "data['day_of_week'] = data.index.dayofweek / 7\n",
+    "data['us_session'] = ((data.index.hour >= 14) & (data.index.hour < 21)).astype(float)\n",
+    "\n",
+    "btc_features = data.dropna()\n",
+    "\n",
+    "feature_cols = [col for col in btc_features.columns \n",
+    "                if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
+    "\n",
+    "print(f\"\\n✅ Multi-timeframe features complete!\")\n",
+    "print(f\"   15m features: {len([c for c in feature_cols if '15m' in c])}\")\n",
+    "print(f\"   1h features: {len([c for c in feature_cols if '1h' in c])}\")\n",
+    "print(f\"   4h features: {len([c for c in feature_cols if '4h' in c])}\")\n",
+    "print(f\"   Other features: {len([c for c in feature_cols if '15m' not in c and '1h' not in c and '4h' not in c])}\")\n",
+    "print(f\"   TOTAL features: {len(feature_cols)}\")\n",
+    "print(f\"   Clean data: {len(btc_features):,} candles\")\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 5. TRAIN/VALID/TEST SPLITS\n",
+    "# ============================================================================\n",
+    "print(\"\\n📊 Creating Data Splits...\")\n",
+    "\n",
+    "train_size = int(len(btc_features) * 0.70)\n",
+    "valid_size = int(len(btc_features) * 0.15)\n",
+    "\n",
+    "train_data = btc_features.iloc[:train_size].copy()\n",
+    "valid_data = btc_features.iloc[train_size:train_size+valid_size].copy()\n",
+    "test_data = btc_features.iloc[train_size+valid_size:].copy()\n",
+    "\n",
+    "print(f\"   Train: {len(train_data):,} | Valid: {len(valid_data):,} | Test: {len(test_data):,}\")\n",
+    "\n",
+    "# Store full data for walk-forward\n",
+    "full_data = btc_features.copy()\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 6. ROLLING NORMALIZATION CLASS\n",
+    "# ============================================================================\n",
+    "class RollingNormalizer:\n",
+    "    \"\"\"\n",
+    "    Rolling z-score normalization to prevent look-ahead bias.\n",
+    "    Uses a rolling window to calculate mean and std.\n",
+    "    \"\"\"\n",
+    "    def __init__(self, window_size=2880):  # 2880 = 30 days of 15m candles\n",
+    "        self.window_size = window_size\n",
+    "        self.feature_cols = None\n",
+    "        \n",
+    "    def fit_transform(self, df, feature_cols):\n",
+    "        \"\"\"Apply rolling normalization to dataframe\"\"\"\n",
+    "        self.feature_cols = feature_cols\n",
+    "        result = df.copy()\n",
+    "        \n",
+    "        for col in feature_cols:\n",
+    "            rolling_mean = df[col].rolling(window=self.window_size, min_periods=100).mean()\n",
+    "            rolling_std = df[col].rolling(window=self.window_size, min_periods=100).std()\n",
+    "            result[col] = (df[col] - rolling_mean) / (rolling_std + 1e-8)\n",
+    "        \n",
+    "        # Clip extreme values\n",
+    "        result[feature_cols] = result[feature_cols].clip(-5, 5)\n",
+    "        \n",
+    "        # Fill NaN at start with 0 (neutral)\n",
+    "        result[feature_cols] = result[feature_cols].fillna(0)\n",
+    "        \n",
+    "        return result\n",
+    "\n",
+    "print(\"✅ RollingNormalizer class defined\")\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 7. TRADING ENVIRONMENT WITH DSR + RANDOM FLIP AUGMENTATION\n",
+    "# ============================================================================\n",
+    "class BitcoinTradingEnv(gym.Env):\n",
+    "    \"\"\"\n",
+    "    Trading environment with:\n",
+    "    - Differential Sharpe Ratio (DSR) reward with warmup\n",
+    "    - Previous action in state (to learn cost of switching)\n",
+    "    - Transaction fee ramping (0 -> 0.1% after warmup)\n",
+    "    - Random flip data augmentation (50% chance to invert market)\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    def __init__(self, df, initial_balance=10000, episode_length=500,\n",
+    "                 base_transaction_fee=0.001,  # 0.1% max fee\n",
+    "                 dsr_eta=0.01):  # DSR adaptation rate\n",
+    "        super().__init__()\n",
+    "        self.df = df.reset_index(drop=True)\n",
+    "        self.initial_balance = initial_balance\n",
+    "        self.episode_length = episode_length\n",
+    "        self.base_transaction_fee = base_transaction_fee\n",
+    "        self.dsr_eta = dsr_eta\n",
+    "        \n",
+    "        # Fee ramping (controlled externally via set_fee_multiplier)\n",
+    "        self.fee_multiplier = 0.0\n",
+    "        \n",
+    "        # Training mode for data augmentation (random flips)\n",
+    "        self.training_mode = True\n",
+    "        self.flip_sign = 1.0  # Will be -1 or +1 for augmentation\n",
+    "        \n",
+    "        # DSR warmup period (return 0 reward until EMAs settle)\n",
+    "        self.dsr_warmup_steps = 100\n",
+    "        \n",
+    "        self.feature_cols = [col for col in df.columns \n",
+    "                            if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
+    "        \n",
+    "        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)\n",
+    "        # +6 for: position, total_return, drawdown, returns_1, rsi_14, PREVIOUS_ACTION\n",
+    "        self.observation_space = spaces.Box(\n",
+    "            low=-10, high=10, \n",
+    "            shape=(len(self.feature_cols) + 6,), \n",
+    "            dtype=np.float32\n",
+    "        )\n",
+    "        self.reset()\n",
+    "    \n",
+    "    def set_fee_multiplier(self, multiplier):\n",
+    "        \"\"\"Set fee multiplier (0.0 to 1.0) for fee ramping\"\"\"\n",
+    "        self.fee_multiplier = np.clip(multiplier, 0.0, 1.0)\n",
+    "    \n",
+    "    def set_training_mode(self, training=True):\n",
+    "        \"\"\"Set training mode (enables random flips for augmentation)\"\"\"\n",
+    "        self.training_mode = training\n",
+    "    \n",
+    "    @property\n",
+    "    def current_fee(self):\n",
+    "        \"\"\"Current transaction fee based on multiplier\"\"\"\n",
+    "        return self.base_transaction_fee * self.fee_multiplier\n",
+    "    \n",
+    "    def reset(self):\n",
+    "        max_start = len(self.df) - self.episode_length - 1\n",
+    "        self.start_idx = np.random.randint(100, max(101, max_start))\n",
+    "        \n",
+    "        self.current_step = 0\n",
+    "        self.balance = self.initial_balance\n",
+    "        self.position = 0.0\n",
+    "        self.entry_price = 0.0\n",
+    "        self.total_value = self.initial_balance\n",
+    "        self.prev_total_value = self.initial_balance\n",
+    "        self.max_value = self.initial_balance\n",
+    "        \n",
+    "        # Previous action for state\n",
+    "        self.prev_action = 0.0\n",
+    "        \n",
+    "        # DSR variables (Differential Sharpe Ratio)\n",
+    "        self.A_t = 0.0  # EMA of returns\n",
+    "        self.B_t = 0.0  # EMA of squared returns\n",
+    "        \n",
+    "        # Position tracking\n",
+    "        self.long_steps = 0\n",
+    "        self.short_steps = 0\n",
+    "        self.neutral_steps = 0\n",
+    "        self.num_trades = 0\n",
+    "        \n",
+    "        # Random flip for data augmentation (50% chance during training)\n",
+    "        # This inverts price movements: what was bullish becomes bearish\n",
+    "        if self.training_mode:\n",
+    "            self.flip_sign = -1.0 if np.random.random() < 0.5 else 1.0\n",
+    "        else:\n",
+    "            self.flip_sign = 1.0  # No flip during eval\n",
+    "        \n",
+    "        return self._get_obs()\n",
+    "    \n",
+    "    def _get_obs(self):\n",
+    "        idx = self.start_idx + self.current_step\n",
+    "        features = self.df.loc[idx, self.feature_cols].values.copy()\n",
+    "        \n",
+    "        # Apply random flip augmentation to return-based features\n",
+    "        # This inverts bullish/bearish signals when flip_sign = -1\n",
+    "        if self.flip_sign < 0:\n",
+    "            for i, col in enumerate(self.feature_cols):\n",
+    "                if any(x in col.lower() for x in ['returns', 'roc', 'macd', 'cci', 'obv', 'sentiment']):\n",
+    "                    features[i] *= self.flip_sign\n",
+    "        \n",
+    "        total_return = (self.total_value / self.initial_balance) - 1\n",
+    "        drawdown = (self.max_value - self.total_value) / self.max_value if self.max_value > 0 else 0\n",
+    "        \n",
+    "        # Apply flip to market returns shown in portfolio info\n",
+    "        market_return = self.df.loc[idx, 'returns_1_15m'] * self.flip_sign\n",
+    "        \n",
+    "        portfolio_info = np.array([\n",
+    "            self.position,\n",
+    "            total_return,\n",
+    "            drawdown,\n",
+    "            market_return,\n",
+    "            self.df.loc[idx, 'rsi_14_15m'],\n",
+    "            self.prev_action\n",
+    "        ], dtype=np.float32)\n",
+    "        \n",
+    "        obs = np.concatenate([features, portfolio_info])\n",
+    "        return np.clip(obs, -10, 10).astype(np.float32)\n",
+    "    \n",
+    "    def _calculate_dsr(self, return_t):\n",
+    "        \"\"\"\n",
+    "        Calculate Differential Sharpe Ratio reward.\n",
+    "        DSR = (B_{t-1} * ΔA_t - 0.5 * A_{t-1} * ΔB_t) / (B_{t-1} - A_{t-1}^2)^1.5\n",
+    "        \"\"\"\n",
+    "        eta = self.dsr_eta\n",
+    "        \n",
+    "        A_prev = self.A_t\n",
+    "        B_prev = self.B_t\n",
+    "        \n",
+    "        delta_A = eta * (return_t - A_prev)\n",
+    "        delta_B = eta * (return_t**2 - B_prev)\n",
+    "        \n",
+    "        self.A_t = A_prev + delta_A\n",
+    "        self.B_t = B_prev + delta_B\n",
+    "        \n",
+    "        variance = B_prev - A_prev**2\n",
+    "        \n",
+    "        if variance <= 1e-8:\n",
+    "            return return_t\n",
+    "        \n",
+    "        dsr = (B_prev * delta_A - 0.5 * A_prev * delta_B) / (variance ** 1.5 + 1e-8)\n",
+    "        return np.clip(dsr, -0.5, 0.5)\n",
+    "    \n",
+    "    def step(self, action):\n",
+    "        idx = self.start_idx + self.current_step\n",
+    "        current_price = self.df.loc[idx, 'close']\n",
+    "        target_position = np.clip(action[0], -1.0, 1.0)\n",
+    "        \n",
+    "        self.prev_total_value = self.total_value\n",
+    "        \n",
+    "        # Position change logic with transaction costs\n",
+    "        if abs(target_position - self.position) > 0.1:\n",
+    "            if self.position != 0:\n",
+    "                self._close_position(current_price)\n",
+    "            if abs(target_position) > 0.1:\n",
+    "                self._open_position(target_position, current_price)\n",
+    "            self.num_trades += 1\n",
+    "        \n",
+    "        self._update_total_value(current_price)\n",
+    "        self.max_value = max(self.max_value, self.total_value)\n",
+    "        \n",
+    "        # Track position type\n",
+    "        if self.position > 0.1:\n",
+    "            self.long_steps += 1\n",
+    "        elif self.position < -0.1:\n",
+    "            self.short_steps += 1\n",
+    "        else:\n",
+    "            self.neutral_steps += 1\n",
+    "        \n",
+    "        self.current_step += 1\n",
+    "        done = (self.current_step >= self.episode_length) or (self.total_value <= self.initial_balance * 0.5)\n",
+    "        \n",
+    "        # ============ DSR REWARD WITH WARMUP ============\n",
+    "        raw_return = (self.total_value - self.prev_total_value) / self.initial_balance\n",
+    "        \n",
+    "        # Apply flip_sign to reward (if we flipped the market, flip what \"good\" means)\n",
+    "        raw_return *= self.flip_sign\n",
+    "        \n",
+    "        # DSR Warmup: Return tiny penalty for first N steps to let EMAs settle\n",
+    "        if self.current_step < self.dsr_warmup_steps:\n",
+    "            reward = -0.0001  # Tiny constant penalty during warmup\n",
+    "        else:\n",
+    "            reward = self._calculate_dsr(raw_return)\n",
+    "        \n",
+    "        self.prev_action = target_position\n",
+    "        \n",
+    "        obs = self._get_obs()\n",
+    "        info = {\n",
+    "            'total_value': self.total_value, \n",
+    "            'position': self.position,\n",
+    "            'long_steps': self.long_steps,\n",
+    "            'short_steps': self.short_steps,\n",
+    "            'neutral_steps': self.neutral_steps,\n",
+    "            'num_trades': self.num_trades,\n",
+    "            'current_fee': self.current_fee,\n",
+    "            'flip_sign': self.flip_sign,\n",
+    "            'raw_return': raw_return,\n",
+    "            'dsr_reward': reward\n",
+    "        }\n",
+    "        \n",
+    "        return obs, reward, done, info\n",
+    "    \n",
+    "    def _update_total_value(self, current_price):\n",
+    "        if self.position != 0:\n",
+    "            if self.position > 0:\n",
+    "                pnl = self.position * self.initial_balance * (current_price / self.entry_price - 1)\n",
+    "            else:\n",
+    "                pnl = abs(self.position) * self.initial_balance * (1 - current_price / self.entry_price)\n",
+    "            self.total_value = self.balance + pnl\n",
+    "        else:\n",
+    "            self.total_value = self.balance\n",
+    "    \n",
+    "    def _open_position(self, size, price):\n",
+    "        self.position = size\n",
+    "        self.entry_price = price\n",
+    "        fee_cost = abs(size) * self.initial_balance * self.current_fee\n",
+    "        self.balance -= fee_cost\n",
+    "    \n",
+    "    def _close_position(self, price):\n",
+    "        if self.position > 0:\n",
+    "            pnl = self.position * self.initial_balance * (price / self.entry_price - 1)\n",
+    "        else:\n",
+    "            pnl = abs(self.position) * self.initial_balance * (1 - price / self.entry_price)\n",
+    "        \n",
+    "        fee_cost = abs(pnl) * self.current_fee\n",
+    "        self.balance += pnl - fee_cost\n",
+    "        self.position = 0.0\n",
+    "\n",
+    "print(\"✅ Environment class ready:\")\n",
+    "print(\"   - DSR reward with 100-step warmup\")\n",
+    "print(\"   - Random flip augmentation (50% probability)\")\n",
+    "print(\"   - Previous action in state\")\n",
+    "print(\"   - Transaction fee ramping\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bab183bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 3: LOAD SENTIMENT DATA\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" LOADING SENTIMENT DATA\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "sentiment_file = '/kaggle/input/bitcoin-news-with-sentimen/bitcoin_news_3hour_intervals_with_sentiment.csv'\n",
+    "\n",
+    "try:\n",
+    "    sentiment_raw = pd.read_csv(sentiment_file)\n",
+    "    \n",
+    "    def parse_time_range(time_str):\n",
+    "        parts = str(time_str).split(' ')\n",
+    "        if len(parts) >= 2:\n",
+    "            date = parts[0]\n",
+    "            time_range = parts[1]\n",
+    "            start_time = time_range.split('-')[0]\n",
+    "            return f\"{date} {start_time}:00\"\n",
+    "        return time_str\n",
+    "    \n",
+    "    sentiment_raw['timestamp'] = sentiment_raw['time_interval'].apply(parse_time_range)\n",
+    "    sentiment_raw['timestamp'] = pd.to_datetime(sentiment_raw['timestamp'])\n",
+    "    sentiment_raw = sentiment_raw.set_index('timestamp').sort_index()\n",
+    "    \n",
+    "    sentiment_clean = pd.DataFrame(index=sentiment_raw.index)\n",
+    "    sentiment_clean['prob_bullish'] = pd.to_numeric(sentiment_raw['prob_bullish'], errors='coerce')\n",
+    "    sentiment_clean['prob_bearish'] = pd.to_numeric(sentiment_raw['prob_bearish'], errors='coerce')\n",
+    "    sentiment_clean['prob_neutral'] = pd.to_numeric(sentiment_raw['prob_neutral'], errors='coerce')\n",
+    "    sentiment_clean['confidence'] = pd.to_numeric(sentiment_raw['sentiment_confidence'], errors='coerce')\n",
+    "    sentiment_clean = sentiment_clean.dropna()\n",
+    "    \n",
+    "    # Merge with data\n",
+    "    for df in [train_data, valid_data, test_data]:\n",
+    "        df_temp = df.join(sentiment_clean, how='left')\n",
+    "        for col in ['prob_bullish', 'prob_bearish', 'prob_neutral', 'confidence']:\n",
+    "            df[col] = df_temp[col].fillna(method='ffill').fillna(method='bfill').fillna(0.33 if col != 'confidence' else 0.5)\n",
+    "        \n",
+    "        df['sentiment_net'] = df['prob_bullish'] - df['prob_bearish']\n",
+    "        df['sentiment_strength'] = (df['prob_bullish'] - df['prob_bearish']).abs()\n",
+    "        df['sentiment_weighted'] = df['sentiment_net'] * df['confidence']\n",
+    "    \n",
+    "    print(f\"✅ Sentiment loaded: {len(sentiment_clean):,} records\")\n",
+    "    print(f\"✅ Features added: 7 sentiment features\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"⚠️ Sentiment not loaded: {e}\")\n",
+    "    for df in [train_data, valid_data, test_data]:\n",
+    "        df['sentiment_net'] = 0\n",
+    "        df['sentiment_strength'] = 0\n",
+    "        df['sentiment_weighted'] = 0\n",
+    "\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4640182f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 4: ROLLING NORMALIZATION + CREATE ENVIRONMENTS\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" ROLLING NORMALIZATION + CREATING ENVIRONMENTS\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Get feature columns (all except OHLCV and intermediate columns)\n",
+    "feature_cols = [col for col in train_data.columns \n",
+    "                if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
+    "\n",
+    "print(f\"📊 Total features: {len(feature_cols)}\")\n",
+    "\n",
+    "# ============================================================================\n",
+    "# ROLLING NORMALIZATION (Prevents look-ahead bias!)\n",
+    "# Uses only past data for normalization at each point\n",
+    "# ============================================================================\n",
+    "rolling_normalizer = RollingNormalizer(window_size=2880)  # 30 days of 15m data\n",
+    "\n",
+    "print(\"🔄 Applying rolling normalization (window=2880)...\")\n",
+    "\n",
+    "# Apply rolling normalization to each split\n",
+    "train_data_norm = rolling_normalizer.fit_transform(train_data, feature_cols)\n",
+    "valid_data_norm = rolling_normalizer.fit_transform(valid_data, feature_cols)  \n",
+    "test_data_norm = rolling_normalizer.fit_transform(test_data, feature_cols)\n",
+    "\n",
+    "print(\"✅ Rolling normalization complete (no look-ahead bias!)\")\n",
+    "\n",
+    "# Create environments\n",
+    "train_env = BitcoinTradingEnv(train_data_norm, episode_length=500)\n",
+    "valid_env = BitcoinTradingEnv(valid_data_norm, episode_length=500)\n",
+    "test_env = BitcoinTradingEnv(test_data_norm, episode_length=500)\n",
+    "\n",
+    "state_dim = train_env.observation_space.shape[0]\n",
+    "action_dim = 1\n",
+    "\n",
+    "print(f\"\\n✅ Environments created:\")\n",
+    "print(f\"   State dim: {state_dim} (features={len(feature_cols)} + portfolio=6)\")\n",
+    "print(f\"   Action dim: {action_dim}\")\n",
+    "print(f\"   Train samples: {len(train_data):,}\")\n",
+    "print(f\"   Fee starts at: 0% (ramps to 0.1% after warmup)\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a48bf946",
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:43:16.495113Z",
+     "iopub.status.busy": "2025-10-25T11:43:16.494816Z",
+     "iopub.status.idle": "2025-10-25T11:43:16.516176Z",
+     "shell.execute_reply": "2025-10-25T11:43:16.515329Z"
+    },
+    "papermill": {
+     "duration": 0.029962,
+     "end_time": "2025-10-25T11:43:16.517375",
+     "exception": false,
+     "start_time": "2025-10-25T11:43:16.487413",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 5: PYTORCH SAC AGENT (GPU OPTIMIZED)\n",
+    "# ============================================================================\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "from torch.distributions import Normal\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" PYTORCH SAC AGENT\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# ============================================================================\n",
+    "# ACTOR NETWORK (Policy)\n",
+    "# ============================================================================\n",
+    "class Actor(nn.Module):\n",
+    "    def __init__(self, state_dim, action_dim, hidden_dim=512):\n",
+    "        super().__init__()\n",
+    "        # Larger network for 90+ features: 512 -> 512 -> 256 -> output\n",
+    "        self.fc1 = nn.Linear(state_dim, hidden_dim)\n",
+    "        self.fc2 = nn.Linear(hidden_dim, hidden_dim)\n",
+    "        self.fc3 = nn.Linear(hidden_dim, hidden_dim // 2)  # Taper down\n",
+    "        \n",
+    "        self.mean = nn.Linear(hidden_dim // 2, action_dim)\n",
+    "        self.log_std = nn.Linear(hidden_dim // 2, action_dim)\n",
+    "        \n",
+    "        self.LOG_STD_MIN = -20\n",
+    "        self.LOG_STD_MAX = 2\n",
+    "        \n",
+    "    def forward(self, state):\n",
+    "        x = F.relu(self.fc1(state))\n",
+    "        x = F.relu(self.fc2(x))\n",
+    "        x = F.relu(self.fc3(x))\n",
+    "        \n",
+    "        mean = self.mean(x)\n",
+    "        log_std = self.log_std(x)\n",
+    "        log_std = torch.clamp(log_std, self.LOG_STD_MIN, self.LOG_STD_MAX)\n",
+    "        \n",
+    "        return mean, log_std\n",
+    "    \n",
+    "    def sample(self, state):\n",
+    "        mean, log_std = self.forward(state)\n",
+    "        std = log_std.exp()\n",
+    "        \n",
+    "        normal = Normal(mean, std)\n",
+    "        x_t = normal.rsample()  # Reparameterization trick\n",
+    "        action = torch.tanh(x_t)\n",
+    "        \n",
+    "        # Log prob with tanh correction\n",
+    "        log_prob = normal.log_prob(x_t)\n",
+    "        log_prob -= torch.log(1 - action.pow(2) + 1e-6)\n",
+    "        log_prob = log_prob.sum(dim=-1, keepdim=True)\n",
+    "        \n",
+    "        return action, log_prob, mean\n",
+    "\n",
+    "# ============================================================================\n",
+    "# CRITIC NETWORK (Twin Q-functions)\n",
+    "# ============================================================================\n",
+    "class Critic(nn.Module):\n",
+    "    def __init__(self, state_dim, action_dim, hidden_dim=512):\n",
+    "        super().__init__()\n",
+    "        # Q1 network: 512 -> 512 -> 256 -> 1\n",
+    "        self.fc1_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n",
+    "        self.fc1_2 = nn.Linear(hidden_dim, hidden_dim)\n",
+    "        self.fc1_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n",
+    "        self.fc1_out = nn.Linear(hidden_dim // 2, 1)\n",
+    "        \n",
+    "        # Q2 network: 512 -> 512 -> 256 -> 1\n",
+    "        self.fc2_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n",
+    "        self.fc2_2 = nn.Linear(hidden_dim, hidden_dim)\n",
+    "        self.fc2_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n",
+    "        self.fc2_out = nn.Linear(hidden_dim // 2, 1)\n",
+    "        \n",
+    "    def forward(self, state, action):\n",
+    "        x = torch.cat([state, action], dim=-1)\n",
+    "        \n",
+    "        # Q1\n",
+    "        q1 = F.relu(self.fc1_1(x))\n",
+    "        q1 = F.relu(self.fc1_2(q1))\n",
+    "        q1 = F.relu(self.fc1_3(q1))\n",
+    "        q1 = self.fc1_out(q1)\n",
+    "        \n",
+    "        # Q2\n",
+    "        q2 = F.relu(self.fc2_1(x))\n",
+    "        q2 = F.relu(self.fc2_2(q2))\n",
+    "        q2 = F.relu(self.fc2_3(q2))\n",
+    "        q2 = self.fc2_out(q2)\n",
+    "        \n",
+    "        return q1, q2\n",
+    "    \n",
+    "    def q1(self, state, action):\n",
+    "        x = torch.cat([state, action], dim=-1)\n",
+    "        q1 = F.relu(self.fc1_1(x))\n",
+    "        q1 = F.relu(self.fc1_2(q1))\n",
+    "        q1 = F.relu(self.fc1_3(q1))\n",
+    "        return self.fc1_out(q1)\n",
+    "\n",
+    "# ============================================================================\n",
+    "# SAC AGENT\n",
+    "# ============================================================================\n",
+    "class SACAgent:\n",
+    "    def __init__(self, state_dim, action_dim, device,\n",
+    "                 actor_lr=3e-4, critic_lr=3e-4, alpha_lr=3e-4,\n",
+    "                 gamma=0.99, tau=0.005, initial_alpha=0.2):\n",
+    "        \n",
+    "        self.device = device\n",
+    "        self.gamma = gamma\n",
+    "        self.tau = tau\n",
+    "        self.action_dim = action_dim\n",
+    "        \n",
+    "        # Networks\n",
+    "        self.actor = Actor(state_dim, action_dim).to(device)\n",
+    "        self.critic = Critic(state_dim, action_dim).to(device)\n",
+    "        self.critic_target = Critic(state_dim, action_dim).to(device)\n",
+    "        self.critic_target.load_state_dict(self.critic.state_dict())\n",
+    "        \n",
+    "        # Optimizers\n",
+    "        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)\n",
+    "        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)\n",
+    "        \n",
+    "        # Entropy (auto-tuning alpha)\n",
+    "        self.target_entropy = -action_dim\n",
+    "        self.log_alpha = torch.tensor(np.log(initial_alpha), requires_grad=True, device=device)\n",
+    "        self.alpha_optimizer = optim.Adam([self.log_alpha], lr=alpha_lr)\n",
+    "        \n",
+    "    @property\n",
+    "    def alpha(self):\n",
+    "        return self.log_alpha.exp()\n",
+    "    \n",
+    "    def select_action(self, state, deterministic=False):\n",
+    "        with torch.no_grad():\n",
+    "            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n",
+    "            if deterministic:\n",
+    "                mean, _ = self.actor(state)\n",
+    "                action = torch.tanh(mean)\n",
+    "            else:\n",
+    "                action, _, _ = self.actor.sample(state)\n",
+    "            return action.cpu().numpy()[0]\n",
+    "    \n",
+    "    def update(self, batch):\n",
+    "        states, actions, rewards, next_states, dones = batch\n",
+    "        \n",
+    "        states = torch.FloatTensor(states).to(self.device)\n",
+    "        actions = torch.FloatTensor(actions).to(self.device)\n",
+    "        rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)\n",
+    "        next_states = torch.FloatTensor(next_states).to(self.device)\n",
+    "        dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)\n",
+    "        \n",
+    "        # ============ Update Critic ============\n",
+    "        with torch.no_grad():\n",
+    "            next_actions, next_log_probs, _ = self.actor.sample(next_states)\n",
+    "            q1_target, q2_target = self.critic_target(next_states, next_actions)\n",
+    "            q_target = torch.min(q1_target, q2_target)\n",
+    "            target_q = rewards + (1 - dones) * self.gamma * (q_target - self.alpha * next_log_probs)\n",
+    "        \n",
+    "        q1, q2 = self.critic(states, actions)\n",
+    "        critic_loss = F.mse_loss(q1, target_q) + F.mse_loss(q2, target_q)\n",
+    "        \n",
+    "        self.critic_optimizer.zero_grad()\n",
+    "        critic_loss.backward()\n",
+    "        self.critic_optimizer.step()\n",
+    "        \n",
+    "        # ============ Update Actor ============\n",
+    "        new_actions, log_probs, _ = self.actor.sample(states)\n",
+    "        q1_new, q2_new = self.critic(states, new_actions)\n",
+    "        q_new = torch.min(q1_new, q2_new)\n",
+    "        actor_loss = (self.alpha * log_probs - q_new).mean()\n",
+    "        \n",
+    "        self.actor_optimizer.zero_grad()\n",
+    "        actor_loss.backward()\n",
+    "        self.actor_optimizer.step()\n",
+    "        \n",
+    "        # ============ Update Alpha ============\n",
+    "        alpha_loss = -(self.log_alpha * (log_probs.detach() + self.target_entropy)).mean()\n",
+    "        \n",
+    "        self.alpha_optimizer.zero_grad()\n",
+    "        alpha_loss.backward()\n",
+    "        self.alpha_optimizer.step()\n",
+    "        \n",
+    "        # ============ Update Target Network ============\n",
+    "        for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):\n",
+    "            target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)\n",
+    "        \n",
+    "        return {\n",
+    "            'critic_loss': critic_loss.item(),\n",
+    "            'actor_loss': actor_loss.item(),\n",
+    "            'alpha': self.alpha.item()\n",
+    "        }\n",
+    "\n",
+    "print(\"✅ Actor: 512→512→256→1\")\n",
+    "print(\"✅ Critic: Twin Q (512→512→256→1)\")\n",
+    "print(\"✅ SAC Agent with auto-tuning alpha\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7f72357",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:43:16.531841Z",
+     "iopub.status.busy": "2025-10-25T11:43:16.531619Z",
+     "iopub.status.idle": "2025-10-25T11:43:16.549706Z",
+     "shell.execute_reply": "2025-10-25T11:43:16.548781Z"
+    },
+    "papermill": {
+     "duration": 0.026952,
+     "end_time": "2025-10-25T11:43:16.550849",
+     "exception": false,
+     "start_time": "2025-10-25T11:43:16.523897",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 6: REPLAY BUFFER (GPU-FRIENDLY)\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" REPLAY BUFFER\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "class ReplayBuffer:\n",
+    "    def __init__(self, state_dim, action_dim, max_size=1_000_000):\n",
+    "        self.max_size = max_size\n",
+    "        self.ptr = 0\n",
+    "        self.size = 0\n",
+    "        \n",
+    "        self.states = np.zeros((max_size, state_dim), dtype=np.float32)\n",
+    "        self.actions = np.zeros((max_size, action_dim), dtype=np.float32)\n",
+    "        self.rewards = np.zeros((max_size, 1), dtype=np.float32)\n",
+    "        self.next_states = np.zeros((max_size, state_dim), dtype=np.float32)\n",
+    "        self.dones = np.zeros((max_size, 1), dtype=np.float32)\n",
+    "        \n",
+    "        mem_gb = (self.states.nbytes + self.actions.nbytes + self.rewards.nbytes + \n",
+    "                  self.next_states.nbytes + self.dones.nbytes) / 1e9\n",
+    "        print(f\"📦 Buffer capacity: {max_size:,} | Memory: {mem_gb:.2f} GB\")\n",
+    "    \n",
+    "    def add(self, state, action, reward, next_state, done):\n",
+    "        self.states[self.ptr] = state\n",
+    "        self.actions[self.ptr] = action\n",
+    "        self.rewards[self.ptr] = reward\n",
+    "        self.next_states[self.ptr] = next_state\n",
+    "        self.dones[self.ptr] = done\n",
+    "        \n",
+    "        self.ptr = (self.ptr + 1) % self.max_size\n",
+    "        self.size = min(self.size + 1, self.max_size)\n",
+    "    \n",
+    "    def sample(self, batch_size):\n",
+    "        idx = np.random.randint(0, self.size, size=batch_size)\n",
+    "        return (\n",
+    "            self.states[idx],\n",
+    "            self.actions[idx],\n",
+    "            self.rewards[idx],\n",
+    "            self.next_states[idx],\n",
+    "            self.dones[idx]\n",
+    "        )\n",
+    "\n",
+    "print(\"✅ ReplayBuffer defined\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f88fc10c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:43:16.566540Z",
+     "iopub.status.busy": "2025-10-25T11:43:16.565845Z",
+     "iopub.status.idle": "2025-10-25T11:43:18.815426Z",
+     "shell.execute_reply": "2025-10-25T11:43:18.814475Z"
+    },
+    "papermill": {
+     "duration": 2.258566,
+     "end_time": "2025-10-25T11:43:18.816724",
+     "exception": false,
+     "start_time": "2025-10-25T11:43:16.558158",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 7: CREATE AGENT + BUFFER\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" CREATING AGENT + BUFFER\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Create SAC agent\n",
+    "agent = SACAgent(\n",
+    "    state_dim=state_dim,\n",
+    "    action_dim=action_dim,\n",
+    "    device=device,\n",
+    "    actor_lr=3e-4,\n",
+    "    critic_lr=3e-4,\n",
+    "    alpha_lr=3e-4,\n",
+    "    gamma=0.99,\n",
+    "    tau=0.005,\n",
+    "    initial_alpha=0.2\n",
+    ")\n",
+    "\n",
+    "# Create replay buffer\n",
+    "buffer = ReplayBuffer(\n",
+    "    state_dim=state_dim,\n",
+    "    action_dim=action_dim,\n",
+    "    max_size=1_000_000\n",
+    ")\n",
+    "\n",
+    "# Count parameters\n",
+    "total_params = sum(p.numel() for p in agent.actor.parameters()) + \\\n",
+    "               sum(p.numel() for p in agent.critic.parameters())\n",
+    "\n",
+    "print(f\"\\n✅ Agent created on {device}\")\n",
+    "print(f\"   Actor params: {sum(p.numel() for p in agent.actor.parameters()):,}\")\n",
+    "print(f\"   Critic params: {sum(p.numel() for p in agent.critic.parameters()):,}\")\n",
+    "print(f\"   Total params: {total_params:,}\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "150b4202",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-25T11:43:18.832274Z",
+     "iopub.status.busy": "2025-10-25T11:43:18.831944Z",
+     "iopub.status.idle": "2025-10-25T11:43:19.038505Z",
+     "shell.execute_reply": "2025-10-25T11:43:19.037696Z"
+    },
+    "papermill": {
+     "duration": 0.215721,
+     "end_time": "2025-10-25T11:43:19.039678",
+     "exception": false,
+     "start_time": "2025-10-25T11:43:18.823957",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 8: TRAINING FUNCTION (GPU OPTIMIZED + FEE RAMPING)\n",
+    "# ============================================================================\n",
+    "\n",
+    "from tqdm.notebook import tqdm\n",
+    "import time\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" TRAINING FUNCTION\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "def train_sac(agent, env, valid_env, buffer, \n",
+    "              total_timesteps=700_000,\n",
+    "              warmup_steps=10_000,\n",
+    "              batch_size=1024,\n",
+    "              update_freq=1,\n",
+    "              fee_warmup_steps=100_000,  # When to start fee ramping\n",
+    "              fee_ramp_steps=100_000,     # Steps to ramp from 0 to max fee\n",
+    "              save_path=\"sac_v9\"):\n",
+    "    \n",
+    "    print(f\"\\n🚀 Training Configuration:\")\n",
+    "    print(f\"   Total steps: {total_timesteps:,}\")\n",
+    "    print(f\"   Warmup: {warmup_steps:,}\")\n",
+    "    print(f\"   Batch size: {batch_size}\")\n",
+    "    print(f\"   Fee warmup: {fee_warmup_steps:,} steps (then ramp over {fee_ramp_steps:,})\")\n",
+    "    print(f\"   Data augmentation: Random flips (50% probability)\")\n",
+    "    print(f\"   DSR warmup: 100 steps per episode (0 reward)\")\n",
+    "    print(f\"   Device: {agent.device}\")\n",
+    "    \n",
+    "    # Set training modes for augmentation\n",
+    "    env.set_training_mode(True)   # Enable random flips\n",
+    "    valid_env.set_training_mode(False)  # No augmentation for validation\n",
+    "    \n",
+    "    # Stats tracking\n",
+    "    episode_rewards = []\n",
+    "    episode_lengths = []\n",
+    "    eval_rewards = []\n",
+    "    best_reward = -np.inf\n",
+    "    best_eval = -np.inf\n",
+    "    \n",
+    "    # Training stats\n",
+    "    critic_losses = []\n",
+    "    actor_losses = []\n",
+    "    \n",
+    "    state = env.reset()\n",
+    "    episode_reward = 0\n",
+    "    episode_length = 0\n",
+    "    episode_count = 0\n",
+    "    \n",
+    "    start_time = time.time()\n",
+    "    \n",
+    "    pbar = tqdm(range(total_timesteps), desc=\"Training\")\n",
+    "    \n",
+    "    for step in pbar:\n",
+    "        # ============ FEE RAMPING CURRICULUM ============\n",
+    "        # 0 fees until fee_warmup_steps, then ramp to 1.0 over fee_ramp_steps\n",
+    "        if step < fee_warmup_steps:\n",
+    "            fee_multiplier = 0.0\n",
+    "        else:\n",
+    "            progress = (step - fee_warmup_steps) / fee_ramp_steps\n",
+    "            fee_multiplier = min(1.0, progress)\n",
+    "        \n",
+    "        env.set_fee_multiplier(fee_multiplier)\n",
+    "        valid_env.set_fee_multiplier(fee_multiplier)\n",
+    "        \n",
+    "        # Select action\n",
+    "        if step < warmup_steps:\n",
+    "            action = env.action_space.sample()\n",
+    "        else:\n",
+    "            action = agent.select_action(state, deterministic=False)\n",
+    "        \n",
+    "        # Step environment\n",
+    "        next_state, reward, done, info = env.step(action)\n",
+    "        \n",
+    "        # Store transition\n",
+    "        buffer.add(state, action, reward, next_state, float(done))\n",
+    "        \n",
+    "        state = next_state\n",
+    "        episode_reward += reward\n",
+    "        episode_length += 1\n",
+    "        \n",
+    "        # Update agent\n",
+    "        stats = None\n",
+    "        if step >= warmup_steps and step % update_freq == 0:\n",
+    "            batch = buffer.sample(batch_size)\n",
+    "            stats = agent.update(batch)\n",
+    "            critic_losses.append(stats['critic_loss'])\n",
+    "            actor_losses.append(stats['actor_loss'])\n",
+    "        \n",
+    "        # Episode end\n",
+    "        if done:\n",
+    "            episode_rewards.append(episode_reward)\n",
+    "            episode_lengths.append(episode_length)\n",
+    "            episode_count += 1\n",
+    "            \n",
+    "            # Calculate episode stats\n",
+    "            final_value = info.get('total_value', 10000)\n",
+    "            pnl_pct = (final_value / 10000 - 1) * 100\n",
+    "            num_trades = info.get('num_trades', 0)\n",
+    "            current_fee = info.get('current_fee', 0) * 100  # Convert to %\n",
+    "            \n",
+    "            # Get position distribution\n",
+    "            long_steps = info.get('long_steps', 0)\n",
+    "            short_steps = info.get('short_steps', 0)\n",
+    "            neutral_steps = info.get('neutral_steps', 0)\n",
+    "            total_active = long_steps + short_steps\n",
+    "            long_pct = (long_steps / total_active * 100) if total_active > 0 else 0\n",
+    "            short_pct = (short_steps / total_active * 100) if total_active > 0 else 0\n",
+    "            \n",
+    "            # Update progress bar with detailed info\n",
+    "            avg_reward = np.mean(episode_rewards[-10:]) if len(episode_rewards) >= 10 else episode_reward\n",
+    "            avg_critic = np.mean(critic_losses[-100:]) if critic_losses else 0\n",
+    "            \n",
+    "            pbar.set_postfix({\n",
+    "                'ep': episode_count,\n",
+    "                'R': f'{episode_reward:.4f}',\n",
+    "                'avg10': f'{avg_reward:.4f}',\n",
+    "                'PnL%': f'{pnl_pct:+.2f}',\n",
+    "                'L/S': f'{long_pct:.0f}/{short_pct:.0f}',\n",
+    "                'fee%': f'{current_fee:.3f}',\n",
+    "                'α': f'{agent.alpha.item():.3f}',\n",
+    "            })\n",
+    "            \n",
+    "            # ============ EVAL EVERY EPISODE ============\n",
+    "            eval_reward, eval_pnl, eval_long_pct = evaluate_agent(agent, valid_env, n_episodes=1)\n",
+    "            eval_rewards.append(eval_reward)\n",
+    "            \n",
+    "            # Print detailed episode summary\n",
+    "            elapsed = time.time() - start_time\n",
+    "            steps_per_sec = (step + 1) / elapsed\n",
+    "            \n",
+    "            print(f\"\\n{'='*60}\")\n",
+    "            print(f\"📊 Episode {episode_count} Complete | Step {step+1:,}/{total_timesteps:,}\")\n",
+    "            print(f\"{'='*60}\")\n",
+    "            print(f\"   🎮 TRAIN:\")\n",
+    "            print(f\"      Reward (DSR): {episode_reward:.4f} | PnL: {pnl_pct:+.2f}%\")\n",
+    "            print(f\"      Length: {episode_length} steps | Trades: {num_trades}\")\n",
+    "            print(f\"      Avg (last 10): {avg_reward:.4f}\")\n",
+    "            print(f\"   📊 POSITION BALANCE:\")\n",
+    "            print(f\"      Long: {long_steps} steps ({long_pct:.1f}%)\")\n",
+    "            print(f\"      Short: {short_steps} steps ({short_pct:.1f}%)\")\n",
+    "            print(f\"      Neutral: {neutral_steps} steps\")\n",
+    "            print(f\"   💰 FEE CURRICULUM:\")\n",
+    "            print(f\"      Current fee: {current_fee:.4f}% (multiplier: {fee_multiplier:.2f})\")\n",
+    "            print(f\"   📈 EVAL (validation):\")\n",
+    "            print(f\"      Reward: {eval_reward:.4f} | PnL: {eval_pnl:+.2f}%\")\n",
+    "            print(f\"      Long%: {eval_long_pct:.1f}%\")\n",
+    "            print(f\"      Avg (last 5): {np.mean(eval_rewards[-5:]):.4f}\")\n",
+    "            print(f\"   🧠 AGENT:\")\n",
+    "            print(f\"      Alpha: {agent.alpha.item():.4f}\")\n",
+    "            print(f\"      Critic loss: {avg_critic:.5f}\")\n",
+    "            print(f\"   ⚡ Speed: {steps_per_sec:.0f} steps/sec\")\n",
+    "            print(f\"   💾 Buffer: {buffer.size:,} transitions\")\n",
+    "            \n",
+    "            # Save best train\n",
+    "            if episode_reward > best_reward:\n",
+    "                best_reward = episode_reward\n",
+    "                torch.save({\n",
+    "                    'actor': agent.actor.state_dict(),\n",
+    "                    'critic': agent.critic.state_dict(),\n",
+    "                    'critic_target': agent.critic_target.state_dict(),\n",
+    "                    'log_alpha': agent.log_alpha,\n",
+    "                }, f\"{save_path}_best_train.pt\")\n",
+    "                print(f\"   🏆 NEW BEST TRAIN: {best_reward:.4f}\")\n",
+    "            \n",
+    "            # Save best eval\n",
+    "            if eval_reward > best_eval:\n",
+    "                best_eval = eval_reward\n",
+    "                torch.save({\n",
+    "                    'actor': agent.actor.state_dict(),\n",
+    "                    'critic': agent.critic.state_dict(),\n",
+    "                    'critic_target': agent.critic_target.state_dict(),\n",
+    "                    'log_alpha': agent.log_alpha,\n",
+    "                }, f\"{save_path}_best_eval.pt\")\n",
+    "                print(f\"   🏆 NEW BEST EVAL: {best_eval:.4f}\")\n",
+    "            \n",
+    "            # Reset\n",
+    "            state = env.reset()\n",
+    "            episode_reward = 0\n",
+    "            episode_length = 0\n",
+    "    \n",
+    "    # Final save\n",
+    "    torch.save({\n",
+    "        'actor': agent.actor.state_dict(),\n",
+    "        'critic': agent.critic.state_dict(),\n",
+    "        'critic_target': agent.critic_target.state_dict(),\n",
+    "        'log_alpha': agent.log_alpha,\n",
+    "    }, f\"{save_path}_final.pt\")\n",
+    "    \n",
+    "    total_time = time.time() - start_time\n",
+    "    print(f\"\\n{'='*70}\")\n",
+    "    print(f\" TRAINING COMPLETE\")\n",
+    "    print(f\"{'='*70}\")\n",
+    "    print(f\"   Total time: {total_time/60:.1f} min\")\n",
+    "    print(f\"   Episodes: {episode_count}\")\n",
+    "    print(f\"   Best train reward (DSR): {best_reward:.4f}\")\n",
+    "    print(f\"   Best eval reward (DSR): {best_eval:.4f}\")\n",
+    "    print(f\"   Avg speed: {total_timesteps/total_time:.0f} steps/sec\")\n",
+    "    \n",
+    "    return episode_rewards, eval_rewards\n",
+    "\n",
+    "\n",
+    "def evaluate_agent(agent, env, n_episodes=1):\n",
+    "    \"\"\"Run evaluation episodes\"\"\"\n",
+    "    total_reward = 0\n",
+    "    total_pnl = 0\n",
+    "    total_long_pct = 0\n",
+    "    \n",
+    "    for _ in range(n_episodes):\n",
+    "        state = env.reset()\n",
+    "        episode_reward = 0\n",
+    "        done = False\n",
+    "        \n",
+    "        while not done:\n",
+    "            action = agent.select_action(state, deterministic=True)\n",
+    "            state, reward, done, info = env.step(action)\n",
+    "            episode_reward += reward\n",
+    "        \n",
+    "        total_reward += episode_reward\n",
+    "        final_value = info.get('total_value', 10000)\n",
+    "        total_pnl += (final_value / 10000 - 1) * 100\n",
+    "        \n",
+    "        # Calculate long percentage\n",
+    "        long_steps = info.get('long_steps', 0)\n",
+    "        short_steps = info.get('short_steps', 0)\n",
+    "        total_active = long_steps + short_steps\n",
+    "        total_long_pct += (long_steps / total_active * 100) if total_active > 0 else 0\n",
+    "    \n",
+    "    return total_reward / n_episodes, total_pnl / n_episodes, total_long_pct / n_episodes\n",
+    "\n",
+    "\n",
+    "print(\"✅ Training function ready:\")\n",
+    "print(\"   - Per-episode eval + position tracking\")\n",
+    "print(\"   - DSR reward (risk-adjusted)\")\n",
+    "print(\"   - Fee ramping: 0% → 0.1% after 100k steps\")\n",
+    "print(\"   - Model checkpointing\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0097e547",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 9: START TRAINING\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" STARTING SAC TRAINING\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Training parameters\n",
+    "TOTAL_STEPS = 500_000      # 500K steps\n",
+    "WARMUP_STEPS = 10_000      # 10K random warmup\n",
+    "BATCH_SIZE = 256           # Standard batch size\n",
+    "UPDATE_FREQ = 1            # Update every step\n",
+    "FEE_WARMUP = 100_000       # Start fee ramping after 100k steps\n",
+    "FEE_RAMP = 100_000         # Ramp fees over 100k steps (0 → 0.1%)\n",
+    "\n",
+    "print(f\"\\n📋 Configuration:\")\n",
+    "print(f\"   Steps: {TOTAL_STEPS:,}\")\n",
+    "print(f\"   Batch: {BATCH_SIZE}\")\n",
+    "print(f\"   Train env: {len(train_data):,} candles\")\n",
+    "print(f\"   Valid env: {len(valid_data):,} candles\")\n",
+    "print(f\"   Device: {device}\")\n",
+    "print(f\"\\n💰 Fee Curriculum:\")\n",
+    "print(f\"   Steps 0-{FEE_WARMUP:,}: 0% fee (learn basic trading)\")\n",
+    "print(f\"   Steps {FEE_WARMUP:,}-{FEE_WARMUP+FEE_RAMP:,}: Ramp 0%→0.1%\")\n",
+    "print(f\"   Steps {FEE_WARMUP+FEE_RAMP:,}+: Full 0.1% fee\")\n",
+    "print(f\"\\n🎯 Reward: Differential Sharpe Ratio (DSR)\")\n",
+    "print(f\"   - Risk-adjusted returns (not just PnL)\")\n",
+    "print(f\"   - Small values (-0.5 to 0.5) are normal\")\n",
+    "print(f\"   - NOT normalized further\")\n",
+    "\n",
+    "# Run training with validation eval every episode\n",
+    "episode_rewards, eval_rewards = train_sac(\n",
+    "    agent=agent,\n",
+    "    env=train_env,\n",
+    "    valid_env=valid_env,\n",
+    "    buffer=buffer,\n",
+    "    total_timesteps=TOTAL_STEPS,\n",
+    "    warmup_steps=WARMUP_STEPS,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    update_freq=UPDATE_FREQ,\n",
+    "    fee_warmup_steps=FEE_WARMUP,\n",
+    "    fee_ramp_steps=FEE_RAMP,\n",
+    "    save_path=\"sac_v9_pytorch\"\n",
+    ")\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*70)\n",
+    "print(\" TRAINING COMPLETE\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "712fb0b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 10: LOAD TRAINED MODELS\n",
+    "# ============================================================================\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.patches as mpatches\n",
+    "from matplotlib.gridspec import GridSpec\n",
+    "import seaborn as sns\n",
+    "\n",
+    "# Set style for beautiful charts\n",
+    "plt.style.use('dark_background')\n",
+    "sns.set_palette(\"husl\")\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" LOADING TRAINED MODELS\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Model paths from Kaggle\n",
+    "MODEL_PATH = '/kaggle/input/models/'\n",
+    "FINAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_final (1).pt'\n",
+    "BEST_TRAIN_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_train (1).pt'\n",
+    "BEST_EVAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_eval (1).pt'\n",
+    "\n",
+    "def load_model(agent, checkpoint_path, name=\"model\"):\n",
+    "    \"\"\"Load model weights from checkpoint\"\"\"\n",
+    "    try:\n",
+    "        checkpoint = torch.load(checkpoint_path, map_location=device)\n",
+    "        agent.actor.load_state_dict(checkpoint['actor'])\n",
+    "        agent.critic.load_state_dict(checkpoint['critic'])\n",
+    "        agent.critic_target.load_state_dict(checkpoint['critic_target'])\n",
+    "        if 'log_alpha' in checkpoint:\n",
+    "            agent.log_alpha = checkpoint['log_alpha']\n",
+    "        print(f\"✅ {name} loaded successfully!\")\n",
+    "        return True\n",
+    "    except Exception as e:\n",
+    "        print(f\"❌ Error loading {name}: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "# Create fresh agent for evaluation\n",
+    "eval_agent = SACAgent(\n",
+    "    state_dim=state_dim,\n",
+    "    action_dim=action_dim,\n",
+    "    device=device\n",
+    ")\n",
+    "\n",
+    "# Load best eval model (most generalizable)\n",
+    "load_model(eval_agent, BEST_EVAL_MODEL, \"Best Eval Model\")\n",
+    "\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec761346",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 11: TRAINING SUMMARY VISUALIZATION\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" TRAINING SUMMARY VISUALIZATION\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Training results from your run\n",
+    "training_stats = {\n",
+    "    'total_time_min': 131.0,\n",
+    "    'total_episodes': 1000,\n",
+    "    'total_steps': 500_000,\n",
+    "    'best_train_dsr': 0.5949,\n",
+    "    'best_eval_dsr': 0.2125,\n",
+    "    'avg_speed': 64,  # steps/sec\n",
+    "}\n",
+    "\n",
+    "# Create summary figure\n",
+    "fig = plt.figure(figsize=(16, 10))\n",
+    "gs = GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)\n",
+    "\n",
+    "# Colors\n",
+    "colors = {\n",
+    "    'primary': '#00D4AA',\n",
+    "    'secondary': '#FF6B6B', \n",
+    "    'accent': '#4ECDC4',\n",
+    "    'warning': '#FFE66D',\n",
+    "    'bg': '#1a1a2e',\n",
+    "    'grid': '#333355'\n",
+    "}\n",
+    "\n",
+    "fig.patch.set_facecolor(colors['bg'])\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 1. Training Configuration Card\n",
+    "# ============================================================================\n",
+    "ax1 = fig.add_subplot(gs[0, 0])\n",
+    "ax1.set_facecolor(colors['bg'])\n",
+    "ax1.axis('off')\n",
+    "\n",
+    "config_text = f\"\"\"\n",
+    "╔══════════════════════════════════════╗\n",
+    "║     🎯 TRAINING CONFIGURATION        ║\n",
+    "╠══════════════════════════════════════╣\n",
+    "║                                      ║\n",
+    "║  Total Steps:     500,000            ║\n",
+    "║  Episodes:        1,000              ║\n",
+    "║  Batch Size:      256                ║\n",
+    "║  Episode Length:  500 steps          ║\n",
+    "║                                      ║\n",
+    "║  📊 Network Architecture             ║\n",
+    "║  Actor:  512 → 512 → 256 → 1        ║\n",
+    "║  Critic: 512 → 512 → 256 → 1 (x2)   ║\n",
+    "║                                      ║\n",
+    "║  💰 Fee Curriculum                   ║\n",
+    "║  0-100k:    0% fee                   ║\n",
+    "║  100k-200k: Ramp to 0.1%             ║\n",
+    "║  200k+:     Full 0.1% fee            ║\n",
+    "║                                      ║\n",
+    "║  🎲 Data Augmentation                ║\n",
+    "║  Random Flip: 50% probability        ║\n",
+    "║  DSR Warmup:  100 steps              ║\n",
+    "╚══════════════════════════════════════╝\n",
+    "\"\"\"\n",
+    "ax1.text(0.5, 0.5, config_text, transform=ax1.transAxes, fontsize=10,\n",
+    "         verticalalignment='center', horizontalalignment='center',\n",
+    "         fontfamily='monospace', color='white',\n",
+    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['primary'], linewidth=2))\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 2. Key Metrics Card\n",
+    "# ============================================================================\n",
+    "ax2 = fig.add_subplot(gs[0, 1])\n",
+    "ax2.set_facecolor(colors['bg'])\n",
+    "ax2.axis('off')\n",
+    "\n",
+    "metrics_text = f\"\"\"\n",
+    "╔══════════════════════════════════════╗\n",
+    "║        📈 TRAINING RESULTS           ║\n",
+    "╠══════════════════════════════════════╣\n",
+    "║                                      ║\n",
+    "║  ⏱️  Total Time:     131.0 min       ║\n",
+    "║  ⚡ Avg Speed:       64 steps/sec    ║\n",
+    "║                                      ║\n",
+    "║  🏆 BEST REWARDS (DSR)               ║\n",
+    "║  ┌────────────────────────────┐      ║\n",
+    "║  │ Train:  0.5949             │      ║\n",
+    "║  │ Eval:   0.2125             │      ║\n",
+    "║  └────────────────────────────┘      ║\n",
+    "║                                      ║\n",
+    "║  📊 Multi-Timeframe Features         ║\n",
+    "║  15m: 26 features                    ║\n",
+    "║  1h:  26 features                    ║\n",
+    "║  4h:  26 features                    ║\n",
+    "║  Other: ~10 features                 ║\n",
+    "║  TOTAL: ~88 features                 ║\n",
+    "║                                      ║\n",
+    "╚══════════════════════════════════════╝\n",
+    "\"\"\"\n",
+    "ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=10,\n",
+    "         verticalalignment='center', horizontalalignment='center',\n",
+    "         fontfamily='monospace', color='white',\n",
+    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['accent'], linewidth=2))\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 3. Reward Type Explanation\n",
+    "# ============================================================================\n",
+    "ax3 = fig.add_subplot(gs[0, 2])\n",
+    "ax3.set_facecolor(colors['bg'])\n",
+    "ax3.axis('off')\n",
+    "\n",
+    "dsr_text = f\"\"\"\n",
+    "╔══════════════════════════════════════╗\n",
+    "║   🧮 DIFFERENTIAL SHARPE RATIO       ║\n",
+    "╠══════════════════════════════════════╣\n",
+    "║                                      ║\n",
+    "║  Formula:                            ║\n",
+    "║                                      ║\n",
+    "║       B·ΔA - 0.5·A·ΔB                ║\n",
+    "║  DSR = ─────────────────             ║\n",
+    "║         (B - A²)^1.5                 ║\n",
+    "║                                      ║\n",
+    "║  Where:                              ║\n",
+    "║  A = EMA of returns                  ║\n",
+    "║  B = EMA of squared returns          ║\n",
+    "║                                      ║\n",
+    "║  ✅ Benefits:                        ║\n",
+    "║  • Risk-adjusted (Sharpe-like)       ║\n",
+    "║  • Penalizes volatility              ║\n",
+    "║  • Rewards consistency               ║\n",
+    "║  • Scale: -0.5 to +0.5               ║\n",
+    "║                                      ║\n",
+    "║  ⚠️ Note: Small values are normal!   ║\n",
+    "╚══════════════════════════════════════╝\n",
+    "\"\"\"\n",
+    "ax3.text(0.5, 0.5, dsr_text, transform=ax3.transAxes, fontsize=10,\n",
+    "         verticalalignment='center', horizontalalignment='center',\n",
+    "         fontfamily='monospace', color='white',\n",
+    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['warning'], linewidth=2))\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 4. Training Progress Bar (Visual)\n",
+    "# ============================================================================\n",
+    "ax4 = fig.add_subplot(gs[1, :])\n",
+    "ax4.set_facecolor(colors['bg'])\n",
+    "\n",
+    "# Create timeline visualization\n",
+    "phases = [\n",
+    "    ('Random Warmup', 0, 10000, '#666699'),\n",
+    "    ('No Fees (Learning)', 10000, 100000, colors['primary']),\n",
+    "    ('Fee Ramping', 100000, 200000, colors['warning']),\n",
+    "    ('Full Fees', 200000, 500000, colors['secondary']),\n",
+    "]\n",
+    "\n",
+    "for name, start, end, color in phases:\n",
+    "    ax4.barh(0, end-start, left=start, height=0.4, color=color, edgecolor='white', linewidth=0.5)\n",
+    "    mid = (start + end) / 2\n",
+    "    ax4.text(mid, 0, name, ha='center', va='center', fontsize=9, color='white', fontweight='bold')\n",
+    "\n",
+    "# Add markers\n",
+    "ax4.axvline(x=10000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
+    "ax4.axvline(x=100000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
+    "ax4.axvline(x=200000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
+    "\n",
+    "ax4.set_xlim(0, 500000)\n",
+    "ax4.set_ylim(-0.5, 0.5)\n",
+    "ax4.set_xlabel('Training Steps', fontsize=12, color='white')\n",
+    "ax4.set_title('📊 Training Curriculum Timeline', fontsize=14, color='white', fontweight='bold', pad=20)\n",
+    "ax4.set_yticks([])\n",
+    "ax4.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x/1000:.0f}K'))\n",
+    "ax4.tick_params(colors='white')\n",
+    "ax4.spines['top'].set_visible(False)\n",
+    "ax4.spines['right'].set_visible(False)\n",
+    "ax4.spines['left'].set_visible(False)\n",
+    "ax4.spines['bottom'].set_color('white')\n",
+    "\n",
+    "# Add step markers\n",
+    "for step in [0, 100000, 200000, 300000, 400000, 500000]:\n",
+    "    ax4.text(step, -0.35, f'{step//1000}K', ha='center', va='top', fontsize=8, color='gray')\n",
+    "\n",
+    "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Training Summary', fontsize=18, color='white', fontweight='bold', y=0.98)\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('training_summary.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n",
+    "plt.show()\n",
+    "\n",
+    "print(\"\\n✅ Training summary visualization saved!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46d509d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 12: COMPREHENSIVE BACKTESTING FUNCTION\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" BACKTESTING ENGINE\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "def run_backtest(agent, env, name=\"Test\", verbose=True):\n",
+    "    \"\"\"\n",
+    "    Run comprehensive backtest and collect detailed metrics\n",
+    "    \"\"\"\n",
+    "    env.set_training_mode(False)  # No augmentation during testing\n",
+    "    env.set_fee_multiplier(1.0)   # Full 0.1% fees\n",
+    "    \n",
+    "    # Run full episode\n",
+    "    state = env.reset()\n",
+    "    done = False\n",
+    "    \n",
+    "    # Track everything\n",
+    "    history = {\n",
+    "        'step': [],\n",
+    "        'price': [],\n",
+    "        'position': [],\n",
+    "        'action': [],\n",
+    "        'balance': [],\n",
+    "        'total_value': [],\n",
+    "        'pnl_pct': [],\n",
+    "        'reward': [],\n",
+    "        'trades': []\n",
+    "    }\n",
+    "    \n",
+    "    step = 0\n",
+    "    total_reward = 0\n",
+    "    prev_position = 0\n",
+    "    \n",
+    "    while not done:\n",
+    "        action = agent.select_action(state, deterministic=True)\n",
+    "        next_state, reward, done, info = env.step(action)\n",
+    "        \n",
+    "        idx = env.start_idx + env.current_step - 1\n",
+    "        price = env.df.loc[idx, 'close']\n",
+    "        \n",
+    "        # Track trade\n",
+    "        if abs(info['position'] - prev_position) > 0.1:\n",
+    "            history['trades'].append({\n",
+    "                'step': step,\n",
+    "                'price': price,\n",
+    "                'from_pos': prev_position,\n",
+    "                'to_pos': info['position'],\n",
+    "                'type': 'LONG' if info['position'] > 0 else ('SHORT' if info['position'] < 0 else 'CLOSE')\n",
+    "            })\n",
+    "        \n",
+    "        history['step'].append(step)\n",
+    "        history['price'].append(price)\n",
+    "        history['position'].append(info['position'])\n",
+    "        history['action'].append(action[0])\n",
+    "        history['balance'].append(env.balance)\n",
+    "        history['total_value'].append(info['total_value'])\n",
+    "        history['pnl_pct'].append((info['total_value'] / env.initial_balance - 1) * 100)\n",
+    "        history['reward'].append(reward)\n",
+    "        \n",
+    "        prev_position = info['position']\n",
+    "        total_reward += reward\n",
+    "        state = next_state\n",
+    "        step += 1\n",
+    "    \n",
+    "    # Calculate final metrics\n",
+    "    final_value = history['total_value'][-1]\n",
+    "    initial_value = env.initial_balance\n",
+    "    total_pnl_pct = (final_value / initial_value - 1) * 100\n",
+    "    \n",
+    "    # Calculate Sharpe ratio\n",
+    "    returns = np.diff(history['total_value']) / np.array(history['total_value'][:-1])\n",
+    "    sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96)  # Annualized (96 = 15m candles per day)\n",
+    "    \n",
+    "    # Max drawdown\n",
+    "    peak = np.maximum.accumulate(history['total_value'])\n",
+    "    drawdowns = (peak - history['total_value']) / peak * 100\n",
+    "    max_drawdown = np.max(drawdowns)\n",
+    "    \n",
+    "    # Position distribution\n",
+    "    positions = np.array(history['position'])\n",
+    "    long_pct = np.mean(positions > 0.1) * 100\n",
+    "    short_pct = np.mean(positions < -0.1) * 100\n",
+    "    neutral_pct = np.mean(np.abs(positions) <= 0.1) * 100\n",
+    "    \n",
+    "    # Win rate (for trades)\n",
+    "    if len(history['trades']) > 1:\n",
+    "        trade_pnls = []\n",
+    "        for i in range(1, len(history['trades'])):\n",
+    "            entry = history['trades'][i-1]\n",
+    "            exit_trade = history['trades'][i]\n",
+    "            if entry['type'] != 'CLOSE':\n",
+    "                pnl = (exit_trade['price'] - entry['price']) / entry['price'] * 100\n",
+    "                if entry['type'] == 'SHORT':\n",
+    "                    pnl = -pnl\n",
+    "                trade_pnls.append(pnl)\n",
+    "        win_rate = np.mean(np.array(trade_pnls) > 0) * 100 if trade_pnls else 0\n",
+    "    else:\n",
+    "        win_rate = 0\n",
+    "        trade_pnls = []\n",
+    "    \n",
+    "    metrics = {\n",
+    "        'name': name,\n",
+    "        'total_reward': total_reward,\n",
+    "        'total_pnl_pct': total_pnl_pct,\n",
+    "        'final_value': final_value,\n",
+    "        'sharpe_ratio': sharpe,\n",
+    "        'max_drawdown': max_drawdown,\n",
+    "        'num_trades': len(history['trades']),\n",
+    "        'long_pct': long_pct,\n",
+    "        'short_pct': short_pct,\n",
+    "        'neutral_pct': neutral_pct,\n",
+    "        'win_rate': win_rate,\n",
+    "        'avg_trade_pnl': np.mean(trade_pnls) if trade_pnls else 0,\n",
+    "        'history': history\n",
+    "    }\n",
+    "    \n",
+    "    if verbose:\n",
+    "        print(f\"\\n{'='*50}\")\n",
+    "        print(f\"📊 {name} Results\")\n",
+    "        print(f\"{'='*50}\")\n",
+    "        print(f\"   💰 Total PnL:      {total_pnl_pct:+.2f}%\")\n",
+    "        print(f\"   📈 Final Value:    ${final_value:,.2f}\")\n",
+    "        print(f\"   🎯 DSR Reward:     {total_reward:.4f}\")\n",
+    "        print(f\"   📉 Max Drawdown:   {max_drawdown:.2f}%\")\n",
+    "        print(f\"   📊 Sharpe Ratio:   {sharpe:.3f}\")\n",
+    "        print(f\"   🔄 Num Trades:     {len(history['trades'])}\")\n",
+    "        print(f\"   ✅ Win Rate:       {win_rate:.1f}%\")\n",
+    "        print(f\"   📊 Position Mix:   L:{long_pct:.0f}% | S:{short_pct:.0f}% | N:{neutral_pct:.0f}%\")\n",
+    "    \n",
+    "    return metrics\n",
+    "\n",
+    "print(\"✅ Backtesting engine ready!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28f0c4d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 13: TEST ON UNSEEN DATA (TEST SET)\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" TESTING ON UNSEEN DATA\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Create test environment with UNSEEN data (test_data_norm)\n",
+    "print(f\"📊 Test Data: {len(test_data):,} candles (unseen during training)\")\n",
+    "print(f\"   Date range: {test_data.index[0]} to {test_data.index[-1]}\")\n",
+    "\n",
+    "# Test with all three models\n",
+    "models_to_test = [\n",
+    "    ('Best Eval', BEST_EVAL_MODEL),\n",
+    "    ('Best Train', BEST_TRAIN_MODEL),\n",
+    "    ('Final', FINAL_MODEL),\n",
+    "]\n",
+    "\n",
+    "all_results = []\n",
+    "\n",
+    "for model_name, model_path in models_to_test:\n",
+    "    print(f\"\\n🔄 Loading {model_name} model...\")\n",
+    "    \n",
+    "    # Create fresh agent\n",
+    "    test_agent = SACAgent(\n",
+    "        state_dim=state_dim,\n",
+    "        action_dim=action_dim,\n",
+    "        device=device\n",
+    "    )\n",
+    "    \n",
+    "    # Load model\n",
+    "    if load_model(test_agent, model_path, model_name):\n",
+    "        # Run multiple test episodes for robustness\n",
+    "        episode_results = []\n",
+    "        \n",
+    "        for ep in range(5):  # 5 test episodes\n",
+    "            metrics = run_backtest(test_agent, test_env, f\"{model_name} (Ep {ep+1})\", verbose=False)\n",
+    "            episode_results.append(metrics)\n",
+    "        \n",
+    "        # Average results\n",
+    "        avg_pnl = np.mean([r['total_pnl_pct'] for r in episode_results])\n",
+    "        avg_sharpe = np.mean([r['sharpe_ratio'] for r in episode_results])\n",
+    "        avg_drawdown = np.mean([r['max_drawdown'] for r in episode_results])\n",
+    "        avg_trades = np.mean([r['num_trades'] for r in episode_results])\n",
+    "        \n",
+    "        print(f\"\\n📊 {model_name} Model - Average over 5 episodes:\")\n",
+    "        print(f\"   💰 Avg PnL:        {avg_pnl:+.2f}%\")\n",
+    "        print(f\"   📊 Avg Sharpe:     {avg_sharpe:.3f}\")\n",
+    "        print(f\"   📉 Avg Drawdown:   {avg_drawdown:.2f}%\")\n",
+    "        print(f\"   🔄 Avg Trades:     {avg_trades:.0f}\")\n",
+    "        \n",
+    "        # Store best episode for visualization\n",
+    "        best_ep = max(episode_results, key=lambda x: x['total_pnl_pct'])\n",
+    "        best_ep['model_name'] = model_name\n",
+    "        best_ep['avg_pnl'] = avg_pnl\n",
+    "        best_ep['avg_sharpe'] = avg_sharpe\n",
+    "        all_results.append(best_ep)\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*70)\n",
+    "print(\" ALL MODELS TESTED\")\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3209ba1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 14: DETAILED PERFORMANCE VISUALIZATION\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" DETAILED PERFORMANCE CHARTS\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Use best eval model results\n",
+    "if all_results:\n",
+    "    best_result = all_results[0]  # Best Eval model\n",
+    "    history = best_result['history']\n",
+    "    \n",
+    "    # Create comprehensive visualization\n",
+    "    fig = plt.figure(figsize=(20, 16))\n",
+    "    gs = GridSpec(4, 3, figure=fig, hspace=0.35, wspace=0.25)\n",
+    "    fig.patch.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 1. Portfolio Value Over Time\n",
+    "    # ============================================================================\n",
+    "    ax1 = fig.add_subplot(gs[0, :2])\n",
+    "    ax1.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    steps = history['step']\n",
+    "    portfolio = history['total_value']\n",
+    "    \n",
+    "    # Color based on profit/loss\n",
+    "    colors_line = ['#00D4AA' if v >= 10000 else '#FF6B6B' for v in portfolio]\n",
+    "    \n",
+    "    ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) >= 10000, \n",
+    "                     color='#00D4AA', alpha=0.3, label='Profit')\n",
+    "    ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) < 10000,\n",
+    "                     color='#FF6B6B', alpha=0.3, label='Loss')\n",
+    "    ax1.plot(steps, portfolio, color='white', linewidth=1.5, alpha=0.9)\n",
+    "    ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial')\n",
+    "    \n",
+    "    ax1.set_xlabel('Step', fontsize=11, color='white')\n",
+    "    ax1.set_ylabel('Portfolio Value ($)', fontsize=11, color='white')\n",
+    "    ax1.set_title('💰 Portfolio Value Over Time', fontsize=14, color='white', fontweight='bold')\n",
+    "    ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "    ax1.tick_params(colors='white')\n",
+    "    ax1.grid(True, alpha=0.2, color='gray')\n",
+    "    for spine in ax1.spines.values():\n",
+    "        spine.set_color('gray')\n",
+    "    \n",
+    "    # Final value annotation\n",
+    "    final_val = portfolio[-1]\n",
+    "    pnl_pct = (final_val / 10000 - 1) * 100\n",
+    "    color = '#00D4AA' if pnl_pct >= 0 else '#FF6B6B'\n",
+    "    ax1.annotate(f'${final_val:,.0f}\\n({pnl_pct:+.1f}%)', \n",
+    "                 xy=(steps[-1], final_val), \n",
+    "                 fontsize=12, color=color, fontweight='bold',\n",
+    "                 ha='right', va='bottom')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 2. Metrics Summary Box\n",
+    "    # ============================================================================\n",
+    "    ax2 = fig.add_subplot(gs[0, 2])\n",
+    "    ax2.set_facecolor('#1a1a2e')\n",
+    "    ax2.axis('off')\n",
+    "    \n",
+    "    metrics_text = f\"\"\"\n",
+    "    ╔═══════════════════════════╗\n",
+    "    ║   📊 TEST PERFORMANCE     ║\n",
+    "    ╠═══════════════════════════╣\n",
+    "    ║                           ║\n",
+    "    ║   PnL:     {best_result['total_pnl_pct']:+.2f}%          ║\n",
+    "    ║   Sharpe:  {best_result['sharpe_ratio']:.3f}          ║\n",
+    "    ║   Max DD:  {best_result['max_drawdown']:.2f}%          ║\n",
+    "    ║   Trades:  {best_result['num_trades']}              ║\n",
+    "    ║   Win%:    {best_result['win_rate']:.1f}%           ║\n",
+    "    ║                           ║\n",
+    "    ║   Long:    {best_result['long_pct']:.0f}%            ║\n",
+    "    ║   Short:   {best_result['short_pct']:.0f}%            ║\n",
+    "    ║   Neutral: {best_result['neutral_pct']:.0f}%            ║\n",
+    "    ╚═══════════════════════════╝\n",
+    "    \"\"\"\n",
+    "    ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=11,\n",
+    "             verticalalignment='center', horizontalalignment='center',\n",
+    "             fontfamily='monospace', color='white',\n",
+    "             bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='#00D4AA', linewidth=2))\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 3. Price with Position Overlay\n",
+    "    # ============================================================================\n",
+    "    ax3 = fig.add_subplot(gs[1, :])\n",
+    "    ax3.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    prices = history['price']\n",
+    "    positions = history['position']\n",
+    "    \n",
+    "    # Normalize price for display\n",
+    "    price_norm = (np.array(prices) - np.min(prices)) / (np.max(prices) - np.min(prices))\n",
+    "    \n",
+    "    ax3.plot(steps, prices, color='white', linewidth=1, alpha=0.8, label='BTC Price')\n",
+    "    \n",
+    "    # Color background by position\n",
+    "    for i in range(len(steps)-1):\n",
+    "        if positions[i] > 0.1:\n",
+    "            ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#00D4AA')\n",
+    "        elif positions[i] < -0.1:\n",
+    "            ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#FF6B6B')\n",
+    "    \n",
+    "    # Add trade markers\n",
+    "    for trade in history['trades'][:50]:  # Limit markers for clarity\n",
+    "        step_idx = trade['step']\n",
+    "        if step_idx < len(prices):\n",
+    "            marker = '^' if trade['type'] == 'LONG' else ('v' if trade['type'] == 'SHORT' else 'o')\n",
+    "            color = '#00D4AA' if trade['type'] == 'LONG' else ('#FF6B6B' if trade['type'] == 'SHORT' else 'yellow')\n",
+    "            ax3.scatter(step_idx, prices[step_idx], marker=marker, color=color, s=80, zorder=5, edgecolors='white')\n",
+    "    \n",
+    "    ax3.set_xlabel('Step', fontsize=11, color='white')\n",
+    "    ax3.set_ylabel('BTC Price ($)', fontsize=11, color='white')\n",
+    "    ax3.set_title('📈 Price Chart with Agent Positions (Green=Long, Red=Short)', fontsize=14, color='white', fontweight='bold')\n",
+    "    ax3.tick_params(colors='white')\n",
+    "    ax3.grid(True, alpha=0.2, color='gray')\n",
+    "    for spine in ax3.spines.values():\n",
+    "        spine.set_color('gray')\n",
+    "    ax3.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
+    "    \n",
+    "    # Legend for trades\n",
+    "    long_patch = mpatches.Patch(color='#00D4AA', alpha=0.5, label='Long Position')\n",
+    "    short_patch = mpatches.Patch(color='#FF6B6B', alpha=0.5, label='Short Position')\n",
+    "    ax3.legend(handles=[long_patch, short_patch], loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 4. Position Distribution\n",
+    "    # ============================================================================\n",
+    "    ax4 = fig.add_subplot(gs[2, 0])\n",
+    "    ax4.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    pos_labels = ['Long', 'Short', 'Neutral']\n",
+    "    pos_values = [best_result['long_pct'], best_result['short_pct'], best_result['neutral_pct']]\n",
+    "    pos_colors = ['#00D4AA', '#FF6B6B', '#FFE66D']\n",
+    "    \n",
+    "    wedges, texts, autotexts = ax4.pie(pos_values, labels=pos_labels, colors=pos_colors,\n",
+    "                                        autopct='%1.1f%%', startangle=90,\n",
+    "                                        explode=(0.05, 0.05, 0.05),\n",
+    "                                        textprops={'color': 'white', 'fontsize': 10})\n",
+    "    ax4.set_title('📊 Position Distribution', fontsize=12, color='white', fontweight='bold')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 5. Drawdown Chart\n",
+    "    # ============================================================================\n",
+    "    ax5 = fig.add_subplot(gs[2, 1])\n",
+    "    ax5.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    peak = np.maximum.accumulate(portfolio)\n",
+    "    drawdown = (peak - np.array(portfolio)) / peak * 100\n",
+    "    \n",
+    "    ax5.fill_between(steps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n",
+    "    ax5.plot(steps, drawdown, color='#FF6B6B', linewidth=1)\n",
+    "    ax5.axhline(y=best_result['max_drawdown'], color='yellow', linestyle='--', \n",
+    "                label=f'Max DD: {best_result[\"max_drawdown\"]:.1f}%')\n",
+    "    \n",
+    "    ax5.set_xlabel('Step', fontsize=11, color='white')\n",
+    "    ax5.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n",
+    "    ax5.set_title('📉 Drawdown Over Time', fontsize=12, color='white', fontweight='bold')\n",
+    "    ax5.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "    ax5.tick_params(colors='white')\n",
+    "    ax5.grid(True, alpha=0.2, color='gray')\n",
+    "    ax5.invert_yaxis()\n",
+    "    for spine in ax5.spines.values():\n",
+    "        spine.set_color('gray')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 6. Action Distribution\n",
+    "    # ============================================================================\n",
+    "    ax6 = fig.add_subplot(gs[2, 2])\n",
+    "    ax6.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    actions = history['action']\n",
+    "    ax6.hist(actions, bins=50, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n",
+    "    ax6.axvline(x=0, color='yellow', linestyle='--', alpha=0.7, label='Neutral')\n",
+    "    ax6.axvline(x=np.mean(actions), color='#00D4AA', linestyle='-', linewidth=2, label=f'Mean: {np.mean(actions):.2f}')\n",
+    "    \n",
+    "    ax6.set_xlabel('Action Value', fontsize=11, color='white')\n",
+    "    ax6.set_ylabel('Frequency', fontsize=11, color='white')\n",
+    "    ax6.set_title('🎯 Action Distribution', fontsize=12, color='white', fontweight='bold')\n",
+    "    ax6.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "    ax6.tick_params(colors='white')\n",
+    "    ax6.grid(True, alpha=0.2, color='gray')\n",
+    "    for spine in ax6.spines.values():\n",
+    "        spine.set_color('gray')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 7. Cumulative Reward\n",
+    "    # ============================================================================\n",
+    "    ax7 = fig.add_subplot(gs[3, 0])\n",
+    "    ax7.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    cum_rewards = np.cumsum(history['reward'])\n",
+    "    ax7.plot(steps, cum_rewards, color='#00D4AA', linewidth=1.5)\n",
+    "    ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards >= 0, color='#00D4AA', alpha=0.3)\n",
+    "    ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards < 0, color='#FF6B6B', alpha=0.3)\n",
+    "    ax7.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
+    "    \n",
+    "    ax7.set_xlabel('Step', fontsize=11, color='white')\n",
+    "    ax7.set_ylabel('Cumulative DSR', fontsize=11, color='white')\n",
+    "    ax7.set_title('🎯 Cumulative DSR Reward', fontsize=12, color='white', fontweight='bold')\n",
+    "    ax7.tick_params(colors='white')\n",
+    "    ax7.grid(True, alpha=0.2, color='gray')\n",
+    "    for spine in ax7.spines.values():\n",
+    "        spine.set_color('gray')\n",
+    "    \n",
+    "    # ============================================================================\n",
+    "    # 8. Model Comparison\n",
+    "    # ============================================================================\n",
+    "    ax8 = fig.add_subplot(gs[3, 1:])\n",
+    "    ax8.set_facecolor('#1a1a2e')\n",
+    "    \n",
+    "    if len(all_results) >= 3:\n",
+    "        model_names = [r['model_name'] for r in all_results]\n",
+    "        pnls = [r['total_pnl_pct'] for r in all_results]\n",
+    "        sharpes = [r['sharpe_ratio'] for r in all_results]\n",
+    "        \n",
+    "        x = np.arange(len(model_names))\n",
+    "        width = 0.35\n",
+    "        \n",
+    "        bars1 = ax8.bar(x - width/2, pnls, width, label='PnL %', color='#00D4AA', alpha=0.8)\n",
+    "        \n",
+    "        ax8_twin = ax8.twinx()\n",
+    "        bars2 = ax8_twin.bar(x + width/2, sharpes, width, label='Sharpe', color='#4ECDC4', alpha=0.8)\n",
+    "        \n",
+    "        ax8.set_xlabel('Model', fontsize=11, color='white')\n",
+    "        ax8.set_ylabel('PnL (%)', fontsize=11, color='#00D4AA')\n",
+    "        ax8_twin.set_ylabel('Sharpe Ratio', fontsize=11, color='#4ECDC4')\n",
+    "        ax8.set_title('📊 Model Comparison (Test Set)', fontsize=12, color='white', fontweight='bold')\n",
+    "        ax8.set_xticks(x)\n",
+    "        ax8.set_xticklabels(model_names, color='white')\n",
+    "        ax8.tick_params(colors='white')\n",
+    "        ax8_twin.tick_params(colors='white')\n",
+    "        ax8.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
+    "        \n",
+    "        # Add value labels\n",
+    "        for bar, val in zip(bars1, pnls):\n",
+    "            ax8.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:+.1f}%',\n",
+    "                    ha='center', va='bottom', color='white', fontsize=9)\n",
+    "        \n",
+    "        for bar, val in zip(bars2, sharpes):\n",
+    "            ax8_twin.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.2f}',\n",
+    "                         ha='center', va='bottom', color='white', fontsize=9)\n",
+    "        \n",
+    "        ax8.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "        ax8_twin.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "        \n",
+    "        for spine in ax8.spines.values():\n",
+    "            spine.set_color('gray')\n",
+    "    \n",
+    "    plt.suptitle('🚀 SAC Bitcoin Trading Agent - Test Performance Analysis', \n",
+    "                 fontsize=18, color='white', fontweight='bold', y=0.98)\n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig('test_performance.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
+    "    plt.show()\n",
+    "    \n",
+    "    print(\"\\n✅ Performance visualization saved!\")\n",
+    "else:\n",
+    "    print(\"⚠️ No results to visualize. Run the test cells first.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dee9c95f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 15: EXTENDED BACKTEST (FULL TEST PERIOD)\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" EXTENDED BACKTEST - FULL TEST PERIOD\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "def run_extended_backtest(agent, df, initial_balance=10000, fee=0.001):\n",
+    "    \"\"\"\n",
+    "    Run backtest over the ENTIRE test dataset (not just one episode)\n",
+    "    \"\"\"\n",
+    "    agent_copy = agent\n",
+    "    \n",
+    "    # We'll manually step through the entire dataset\n",
+    "    balance = initial_balance\n",
+    "    position = 0.0\n",
+    "    entry_price = 0.0\n",
+    "    \n",
+    "    history = {\n",
+    "        'timestamp': [],\n",
+    "        'price': [],\n",
+    "        'position': [],\n",
+    "        'portfolio_value': [],\n",
+    "        'pnl_pct': [],\n",
+    "        'trades': []\n",
+    "    }\n",
+    "    \n",
+    "    # Get feature columns\n",
+    "    feature_cols = [col for col in df.columns \n",
+    "                   if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
+    "    \n",
+    "    prev_action = 0.0\n",
+    "    \n",
+    "    # Step through entire dataset\n",
+    "    for i in range(100, len(df) - 1):  # Start at 100 to have lookback\n",
+    "        row = df.iloc[i]\n",
+    "        price = row['close']\n",
+    "        \n",
+    "        # Build observation (simplified)\n",
+    "        features = row[feature_cols].values\n",
+    "        \n",
+    "        # Calculate portfolio value\n",
+    "        if position > 0:\n",
+    "            pnl = position * initial_balance * (price / entry_price - 1)\n",
+    "        elif position < 0:\n",
+    "            pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
+    "        else:\n",
+    "            pnl = 0\n",
+    "        portfolio_value = balance + pnl\n",
+    "        \n",
+    "        # Build state\n",
+    "        total_return = (portfolio_value / initial_balance) - 1\n",
+    "        max_val = max(history['portfolio_value']) if history['portfolio_value'] else initial_balance\n",
+    "        drawdown = (max_val - portfolio_value) / max_val if max_val > 0 else 0\n",
+    "        \n",
+    "        portfolio_info = np.array([\n",
+    "            position,\n",
+    "            total_return,\n",
+    "            drawdown,\n",
+    "            row['returns_1_15m'],\n",
+    "            row['rsi_14_15m'],\n",
+    "            prev_action\n",
+    "        ], dtype=np.float32)\n",
+    "        \n",
+    "        obs = np.concatenate([features, portfolio_info])\n",
+    "        obs = np.clip(obs, -10, 10).astype(np.float32)\n",
+    "        \n",
+    "        # Get action from agent\n",
+    "        action = agent.select_action(obs, deterministic=True)\n",
+    "        target_position = np.clip(action[0], -1.0, 1.0)\n",
+    "        \n",
+    "        # Execute trade if position changes significantly\n",
+    "        if abs(target_position - position) > 0.1:\n",
+    "            # Close existing position\n",
+    "            if position != 0:\n",
+    "                if position > 0:\n",
+    "                    close_pnl = position * initial_balance * (price / entry_price - 1)\n",
+    "                else:\n",
+    "                    close_pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
+    "                fee_cost = abs(close_pnl) * fee\n",
+    "                balance += close_pnl - fee_cost\n",
+    "                \n",
+    "                history['trades'].append({\n",
+    "                    'timestamp': df.index[i],\n",
+    "                    'price': price,\n",
+    "                    'type': 'CLOSE',\n",
+    "                    'pnl': close_pnl - fee_cost\n",
+    "                })\n",
+    "                position = 0.0\n",
+    "            \n",
+    "            # Open new position\n",
+    "            if abs(target_position) > 0.1:\n",
+    "                position = target_position\n",
+    "                entry_price = price\n",
+    "                fee_cost = abs(position) * initial_balance * fee\n",
+    "                balance -= fee_cost\n",
+    "                \n",
+    "                history['trades'].append({\n",
+    "                    'timestamp': df.index[i],\n",
+    "                    'price': price,\n",
+    "                    'type': 'LONG' if position > 0 else 'SHORT',\n",
+    "                    'size': position\n",
+    "                })\n",
+    "        \n",
+    "        # Update portfolio value\n",
+    "        if position > 0:\n",
+    "            pnl = position * initial_balance * (price / entry_price - 1)\n",
+    "        elif position < 0:\n",
+    "            pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
+    "        else:\n",
+    "            pnl = 0\n",
+    "        portfolio_value = balance + pnl\n",
+    "        \n",
+    "        # Record history\n",
+    "        history['timestamp'].append(df.index[i])\n",
+    "        history['price'].append(price)\n",
+    "        history['position'].append(position)\n",
+    "        history['portfolio_value'].append(portfolio_value)\n",
+    "        history['pnl_pct'].append((portfolio_value / initial_balance - 1) * 100)\n",
+    "        \n",
+    "        prev_action = target_position\n",
+    "    \n",
+    "    return history\n",
+    "\n",
+    "# Load best eval model\n",
+    "print(\"🔄 Loading Best Eval model for extended backtest...\")\n",
+    "best_agent = SACAgent(state_dim=state_dim, action_dim=action_dim, device=device)\n",
+    "load_model(best_agent, BEST_EVAL_MODEL, \"Best Eval\")\n",
+    "\n",
+    "# Run extended backtest on test data\n",
+    "print(f\"\\n📊 Running extended backtest on {len(test_data_norm):,} candles...\")\n",
+    "extended_history = run_extended_backtest(best_agent, test_data_norm)\n",
+    "\n",
+    "# Calculate final metrics\n",
+    "final_portfolio = extended_history['portfolio_value'][-1]\n",
+    "total_pnl = (final_portfolio / 10000 - 1) * 100\n",
+    "num_trades = len(extended_history['trades'])\n",
+    "\n",
+    "# Calculate returns for Sharpe\n",
+    "returns = np.diff(extended_history['portfolio_value']) / np.array(extended_history['portfolio_value'][:-1])\n",
+    "sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96)\n",
+    "\n",
+    "# Max drawdown\n",
+    "peak = np.maximum.accumulate(extended_history['portfolio_value'])\n",
+    "drawdown = (peak - np.array(extended_history['portfolio_value'])) / peak * 100\n",
+    "max_dd = np.max(drawdown)\n",
+    "\n",
+    "# Buy and hold comparison\n",
+    "buy_hold_return = (extended_history['price'][-1] / extended_history['price'][0] - 1) * 100\n",
+    "\n",
+    "print(f\"\\n{'='*60}\")\n",
+    "print(f\"📊 EXTENDED BACKTEST RESULTS\")\n",
+    "print(f\"{'='*60}\")\n",
+    "print(f\"   📅 Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')}\")\n",
+    "print(f\"   📊 Candles: {len(extended_history['portfolio_value']):,}\")\n",
+    "print(f\"\\n   💰 AGENT PERFORMANCE:\")\n",
+    "print(f\"      Final Value:   ${final_portfolio:,.2f}\")\n",
+    "print(f\"      Total PnL:     {total_pnl:+.2f}%\")\n",
+    "print(f\"      Sharpe Ratio:  {sharpe:.3f}\")\n",
+    "print(f\"      Max Drawdown:  {max_dd:.2f}%\")\n",
+    "print(f\"      Num Trades:    {num_trades}\")\n",
+    "print(f\"\\n   📈 BUY & HOLD COMPARISON:\")\n",
+    "print(f\"      B&H Return:    {buy_hold_return:+.2f}%\")\n",
+    "print(f\"      Alpha:         {total_pnl - buy_hold_return:+.2f}%\")\n",
+    "print(f\"{'='*60}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b20eb2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 16: EXTENDED BACKTEST VISUALIZATION\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" EXTENDED BACKTEST VISUALIZATION\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "fig = plt.figure(figsize=(20, 14))\n",
+    "gs = GridSpec(3, 2, figure=fig, hspace=0.3, wspace=0.2)\n",
+    "fig.patch.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 1. Portfolio Value vs Buy & Hold (Main Chart)\n",
+    "# ============================================================================\n",
+    "ax1 = fig.add_subplot(gs[0, :])\n",
+    "ax1.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "timestamps = extended_history['timestamp']\n",
+    "portfolio = extended_history['portfolio_value']\n",
+    "prices = extended_history['price']\n",
+    "\n",
+    "# Normalize buy & hold to start at 10000\n",
+    "buy_hold = np.array(prices) / prices[0] * 10000\n",
+    "\n",
+    "# Plot\n",
+    "ax1.plot(timestamps, portfolio, color='#00D4AA', linewidth=2, label=f'SAC Agent ({total_pnl:+.1f}%)', zorder=3)\n",
+    "ax1.plot(timestamps, buy_hold, color='#4ECDC4', linewidth=1.5, alpha=0.7, label=f'Buy & Hold ({buy_hold_return:+.1f}%)', zorder=2)\n",
+    "ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial Capital')\n",
+    "\n",
+    "# Fill between\n",
+    "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) > buy_hold,\n",
+    "                 color='#00D4AA', alpha=0.2, label='Outperformance')\n",
+    "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) <= buy_hold,\n",
+    "                 color='#FF6B6B', alpha=0.2, label='Underperformance')\n",
+    "\n",
+    "ax1.set_xlabel('Date', fontsize=12, color='white')\n",
+    "ax1.set_ylabel('Portfolio Value ($)', fontsize=12, color='white')\n",
+    "ax1.set_title('💰 Agent Performance vs Buy & Hold', fontsize=16, color='white', fontweight='bold')\n",
+    "ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray', fontsize=10)\n",
+    "ax1.tick_params(colors='white')\n",
+    "ax1.grid(True, alpha=0.2, color='gray')\n",
+    "for spine in ax1.spines.values():\n",
+    "    spine.set_color('gray')\n",
+    "ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
+    "\n",
+    "# Add final value annotations\n",
+    "ax1.annotate(f'Agent: ${portfolio[-1]:,.0f}', xy=(timestamps[-1], portfolio[-1]),\n",
+    "             xytext=(10, 10), textcoords='offset points',\n",
+    "             fontsize=11, color='#00D4AA', fontweight='bold')\n",
+    "ax1.annotate(f'B&H: ${buy_hold[-1]:,.0f}', xy=(timestamps[-1], buy_hold[-1]),\n",
+    "             xytext=(10, -10), textcoords='offset points',\n",
+    "             fontsize=11, color='#4ECDC4', fontweight='bold')\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 2. BTC Price with Trade Markers\n",
+    "# ============================================================================\n",
+    "ax2 = fig.add_subplot(gs[1, :])\n",
+    "ax2.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "ax2.plot(timestamps, prices, color='white', linewidth=1, alpha=0.8)\n",
+    "\n",
+    "# Add trade markers\n",
+    "long_trades = [t for t in extended_history['trades'] if t['type'] == 'LONG']\n",
+    "short_trades = [t for t in extended_history['trades'] if t['type'] == 'SHORT']\n",
+    "close_trades = [t for t in extended_history['trades'] if t['type'] == 'CLOSE']\n",
+    "\n",
+    "if long_trades:\n",
+    "    ax2.scatter([t['timestamp'] for t in long_trades], [t['price'] for t in long_trades],\n",
+    "                marker='^', color='#00D4AA', s=100, label=f'Long ({len(long_trades)})', zorder=5, edgecolors='white')\n",
+    "if short_trades:\n",
+    "    ax2.scatter([t['timestamp'] for t in short_trades], [t['price'] for t in short_trades],\n",
+    "                marker='v', color='#FF6B6B', s=100, label=f'Short ({len(short_trades)})', zorder=5, edgecolors='white')\n",
+    "\n",
+    "ax2.set_xlabel('Date', fontsize=12, color='white')\n",
+    "ax2.set_ylabel('BTC Price ($)', fontsize=12, color='white')\n",
+    "ax2.set_title('📈 BTC Price with Trade Entries', fontsize=14, color='white', fontweight='bold')\n",
+    "ax2.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "ax2.tick_params(colors='white')\n",
+    "ax2.grid(True, alpha=0.2, color='gray')\n",
+    "for spine in ax2.spines.values():\n",
+    "    spine.set_color('gray')\n",
+    "ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 3. Drawdown Chart\n",
+    "# ============================================================================\n",
+    "ax3 = fig.add_subplot(gs[2, 0])\n",
+    "ax3.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "ax3.fill_between(timestamps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n",
+    "ax3.plot(timestamps, drawdown, color='#FF6B6B', linewidth=1)\n",
+    "ax3.axhline(y=max_dd, color='yellow', linestyle='--', linewidth=2, label=f'Max DD: {max_dd:.1f}%')\n",
+    "\n",
+    "ax3.set_xlabel('Date', fontsize=11, color='white')\n",
+    "ax3.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n",
+    "ax3.set_title('📉 Drawdown Over Time', fontsize=13, color='white', fontweight='bold')\n",
+    "ax3.legend(loc='lower right', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "ax3.tick_params(colors='white')\n",
+    "ax3.grid(True, alpha=0.2, color='gray')\n",
+    "ax3.invert_yaxis()\n",
+    "for spine in ax3.spines.values():\n",
+    "    spine.set_color('gray')\n",
+    "\n",
+    "# ============================================================================\n",
+    "# 4. Rolling Returns Comparison\n",
+    "# ============================================================================\n",
+    "ax4 = fig.add_subplot(gs[2, 1])\n",
+    "ax4.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "# Calculate rolling 7-day returns (672 = 7 days of 15m candles)\n",
+    "window = 672\n",
+    "agent_rolling = pd.Series(extended_history['pnl_pct']).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n",
+    "bh_returns = (np.array(prices) / prices[0] - 1) * 100\n",
+    "bh_rolling = pd.Series(bh_returns).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n",
+    "\n",
+    "# Align timestamps with rolling data (use iloc to ensure same length)\n",
+    "valid_idx = agent_rolling.dropna().index\n",
+    "timestamps_arr = np.array(timestamps)\n",
+    "ax4.plot(timestamps_arr[valid_idx], agent_rolling.dropna().values, color='#00D4AA', linewidth=1.5, label='Agent', alpha=0.8)\n",
+    "ax4.plot(timestamps_arr[valid_idx], bh_rolling.iloc[valid_idx].values, color='#4ECDC4', linewidth=1.5, label='Buy & Hold', alpha=0.8)\n",
+    "ax4.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
+    "\n",
+    "ax4.set_xlabel('Date', fontsize=11, color='white')\n",
+    "ax4.set_ylabel('7-Day Rolling Return (%)', fontsize=11, color='white')\n",
+    "ax4.set_title('📊 7-Day Rolling Returns Comparison', fontsize=13, color='white', fontweight='bold')\n",
+    "ax4.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
+    "ax4.tick_params(colors='white')\n",
+    "ax4.grid(True, alpha=0.2, color='gray')\n",
+    "for spine in ax4.spines.values():\n",
+    "    spine.set_color('gray')\n",
+    "\n",
+    "plt.suptitle('🚀 SAC Bitcoin Agent - Extended Backtest Analysis', \n",
+    "             fontsize=18, color='white', fontweight='bold', y=0.98)\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('extended_backtest.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
+    "plt.show()\n",
+    "\n",
+    "print(\"\\n✅ Extended backtest visualization saved!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "027f6534",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 17: FINAL SUMMARY DASHBOARD\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" FINAL SUMMARY DASHBOARD\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "fig = plt.figure(figsize=(20, 10))\n",
+    "fig.patch.set_facecolor('#1a1a2e')\n",
+    "\n",
+    "# Create grid\n",
+    "gs = GridSpec(2, 4, figure=fig, hspace=0.4, wspace=0.3)\n",
+    "\n",
+    "# Color palette\n",
+    "colors = {\n",
+    "    'green': '#00D4AA',\n",
+    "    'red': '#FF6B6B',\n",
+    "    'blue': '#4ECDC4',\n",
+    "    'yellow': '#FFE66D',\n",
+    "    'purple': '#9B59B6',\n",
+    "    'bg': '#1a1a2e'\n",
+    "}\n",
+    "\n",
+    "# ============================================================================\n",
+    "# Helper function for metric cards\n",
+    "# ============================================================================\n",
+    "def create_metric_card(ax, title, value, subtitle=\"\", color='#00D4AA', icon=\"📊\"):\n",
+    "    ax.set_facecolor(colors['bg'])\n",
+    "    ax.axis('off')\n",
+    "    \n",
+    "    # Main value\n",
+    "    ax.text(0.5, 0.6, f\"{icon}\", transform=ax.transAxes, fontsize=30,\n",
+    "            ha='center', va='center')\n",
+    "    ax.text(0.5, 0.35, f\"{value}\", transform=ax.transAxes, fontsize=24,\n",
+    "            ha='center', va='center', color=color, fontweight='bold')\n",
+    "    ax.text(0.5, 0.15, f\"{title}\", transform=ax.transAxes, fontsize=11,\n",
+    "            ha='center', va='center', color='white')\n",
+    "    if subtitle:\n",
+    "        ax.text(0.5, 0.02, f\"{subtitle}\", transform=ax.transAxes, fontsize=9,\n",
+    "                ha='center', va='center', color='gray')\n",
+    "    \n",
+    "    # Border\n",
+    "    for spine in ax.spines.values():\n",
+    "        spine.set_visible(True)\n",
+    "        spine.set_color(color)\n",
+    "        spine.set_linewidth(2)\n",
+    "\n",
+    "# ============================================================================\n",
+    "# Create metric cards\n",
+    "# ============================================================================\n",
+    "# Row 1: Training Metrics\n",
+    "ax1 = fig.add_subplot(gs[0, 0])\n",
+    "create_metric_card(ax1, \"Training Time\", \"131 min\", \"1000 episodes\", colors['blue'], \"⏱️\")\n",
+    "\n",
+    "ax2 = fig.add_subplot(gs[0, 1])\n",
+    "create_metric_card(ax2, \"Best Train DSR\", \"0.5949\", \"Risk-adjusted reward\", colors['green'], \"🎯\")\n",
+    "\n",
+    "ax3 = fig.add_subplot(gs[0, 2])\n",
+    "create_metric_card(ax3, \"Best Eval DSR\", \"0.2125\", \"Validation set\", colors['yellow'], \"📈\")\n",
+    "\n",
+    "ax4 = fig.add_subplot(gs[0, 3])\n",
+    "create_metric_card(ax4, \"Training Speed\", \"64 sps\", \"steps per second\", colors['purple'], \"⚡\")\n",
+    "\n",
+    "# Row 2: Test Performance Metrics\n",
+    "ax5 = fig.add_subplot(gs[1, 0])\n",
+    "pnl_color = colors['green'] if total_pnl >= 0 else colors['red']\n",
+    "create_metric_card(ax5, \"Test PnL\", f\"{total_pnl:+.2f}%\", \"Extended backtest\", pnl_color, \"💰\")\n",
+    "\n",
+    "ax6 = fig.add_subplot(gs[1, 1])\n",
+    "sharpe_color = colors['green'] if sharpe > 0.5 else (colors['yellow'] if sharpe > 0 else colors['red'])\n",
+    "create_metric_card(ax6, \"Sharpe Ratio\", f\"{sharpe:.3f}\", \"Annualized\", sharpe_color, \"📊\")\n",
+    "\n",
+    "ax7 = fig.add_subplot(gs[1, 2])\n",
+    "create_metric_card(ax7, \"Max Drawdown\", f\"{max_dd:.1f}%\", \"Peak to trough\", colors['red'], \"📉\")\n",
+    "\n",
+    "ax8 = fig.add_subplot(gs[1, 3])\n",
+    "alpha = total_pnl - buy_hold_return\n",
+    "alpha_color = colors['green'] if alpha >= 0 else colors['red']\n",
+    "create_metric_card(ax8, \"Alpha vs B&H\", f\"{alpha:+.2f}%\", \"Excess return\", alpha_color, \"🏆\")\n",
+    "\n",
+    "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Performance Dashboard', \n",
+    "             fontsize=20, color='white', fontweight='bold', y=0.98)\n",
+    "\n",
+    "# Add footer\n",
+    "fig.text(0.5, 0.02, \n",
+    "         f\"Test Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')} | \"\n",
+    "         f\"Trades: {num_trades} | Multi-timeframe: 15m/1h/4h | DSR Reward | 0.1% Transaction Fee\",\n",
+    "         ha='center', fontsize=10, color='gray')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('final_dashboard.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n",
+    "plt.show()\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*70)\n",
+    "print(\" ✅ ALL VISUALIZATIONS COMPLETE!\")\n",
+    "print(\"=\"*70)\n",
+    "print(\"\\n📁 Saved files:\")\n",
+    "print(\"   • training_summary.png\")\n",
+    "print(\"   • test_performance.png\")\n",
+    "print(\"   • extended_backtest.png\")\n",
+    "print(\"   • final_dashboard.png\")\n",
+    "print(\"\\n🎉 Analysis complete!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7d777375",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================================\n",
+    "# CELL 18: TRADE ANALYSIS & STATISTICS\n",
+    "# ============================================================================\n",
+    "\n",
+    "print(\"=\"*70)\n",
+    "print(\" TRADE ANALYSIS & STATISTICS\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Analyze trades\n",
+    "trades = extended_history['trades']\n",
+    "\n",
+    "if trades:\n",
+    "    # Separate trade types\n",
+    "    long_entries = [t for t in trades if t['type'] == 'LONG']\n",
+    "    short_entries = [t for t in trades if t['type'] == 'SHORT']\n",
+    "    closes = [t for t in trades if t['type'] == 'CLOSE']\n",
+    "    \n",
+    "    # Calculate trade PnLs from close trades\n",
+    "    trade_pnls = [t.get('pnl', 0) for t in closes if 'pnl' in t]\n",
+    "    \n",
+    "    if trade_pnls:\n",
+    "        winning_trades = [p for p in trade_pnls if p > 0]\n",
+    "        losing_trades = [p for p in trade_pnls if p <= 0]\n",
+    "        \n",
+    "        win_rate = len(winning_trades) / len(trade_pnls) * 100\n",
+    "        avg_win = np.mean(winning_trades) if winning_trades else 0\n",
+    "        avg_loss = np.mean(losing_trades) if losing_trades else 0\n",
+    "        profit_factor = abs(sum(winning_trades) / sum(losing_trades)) if losing_trades and sum(losing_trades) != 0 else float('inf')\n",
+    "        \n",
+    "        print(f\"\\n📊 TRADE STATISTICS:\")\n",
+    "        print(f\"   Total Trades:    {len(trade_pnls)}\")\n",
+    "        print(f\"   Long Entries:    {len(long_entries)}\")\n",
+    "        print(f\"   Short Entries:   {len(short_entries)}\")\n",
+    "        print(f\"\\n📈 PERFORMANCE:\")\n",
+    "        print(f\"   Win Rate:        {win_rate:.1f}%\")\n",
+    "        print(f\"   Winning Trades:  {len(winning_trades)}\")\n",
+    "        print(f\"   Losing Trades:   {len(losing_trades)}\")\n",
+    "        print(f\"   Avg Win:         ${avg_win:.2f}\")\n",
+    "        print(f\"   Avg Loss:        ${avg_loss:.2f}\")\n",
+    "        print(f\"   Profit Factor:   {profit_factor:.2f}\")\n",
+    "        print(f\"   Total P&L:       ${sum(trade_pnls):.2f}\")\n",
+    "        \n",
+    "        # Create trade analysis visualization\n",
+    "        fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
+    "        fig.patch.set_facecolor('#1a1a2e')\n",
+    "        \n",
+    "        # 1. Trade P&L Distribution\n",
+    "        ax1 = axes[0, 0]\n",
+    "        ax1.set_facecolor('#1a1a2e')\n",
+    "        \n",
+    "        bins = np.linspace(min(trade_pnls), max(trade_pnls), 30)\n",
+    "        ax1.hist([p for p in trade_pnls if p > 0], bins=bins, color='#00D4AA', alpha=0.7, label='Wins')\n",
+    "        ax1.hist([p for p in trade_pnls if p <= 0], bins=bins, color='#FF6B6B', alpha=0.7, label='Losses')\n",
+    "        ax1.axvline(x=0, color='white', linestyle='--', alpha=0.7)\n",
+    "        ax1.axvline(x=np.mean(trade_pnls), color='#FFE66D', linestyle='-', linewidth=2, \n",
+    "                    label=f'Mean: ${np.mean(trade_pnls):.2f}')\n",
+    "        \n",
+    "        ax1.set_xlabel('Trade P&L ($)', fontsize=11, color='white')\n",
+    "        ax1.set_ylabel('Frequency', fontsize=11, color='white')\n",
+    "        ax1.set_title('📊 Trade P&L Distribution', fontsize=13, color='white', fontweight='bold')\n",
+    "        ax1.legend(facecolor='#1a1a2e', edgecolor='gray')\n",
+    "        ax1.tick_params(colors='white')\n",
+    "        ax1.grid(True, alpha=0.2, color='gray')\n",
+    "        for spine in ax1.spines.values():\n",
+    "            spine.set_color('gray')\n",
+    "        \n",
+    "        # 2. Cumulative Trade P&L\n",
+    "        ax2 = axes[0, 1]\n",
+    "        ax2.set_facecolor('#1a1a2e')\n",
+    "        \n",
+    "        cum_pnl = np.cumsum(trade_pnls)\n",
+    "        trade_nums = range(1, len(trade_pnls) + 1)\n",
+    "        \n",
+    "        ax2.plot(trade_nums, cum_pnl, color='#00D4AA', linewidth=2)\n",
+    "        ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl >= 0, color='#00D4AA', alpha=0.3)\n",
+    "        ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl < 0, color='#FF6B6B', alpha=0.3)\n",
+    "        ax2.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
+    "        \n",
+    "        ax2.set_xlabel('Trade Number', fontsize=11, color='white')\n",
+    "        ax2.set_ylabel('Cumulative P&L ($)', fontsize=11, color='white')\n",
+    "        ax2.set_title('📈 Cumulative Trade P&L', fontsize=13, color='white', fontweight='bold')\n",
+    "        ax2.tick_params(colors='white')\n",
+    "        ax2.grid(True, alpha=0.2, color='gray')\n",
+    "        for spine in ax2.spines.values():\n",
+    "            spine.set_color('gray')\n",
+    "        \n",
+    "        # 3. Win/Loss Ratio Pie Chart\n",
+    "        ax3 = axes[1, 0]\n",
+    "        ax3.set_facecolor('#1a1a2e')\n",
+    "        \n",
+    "        sizes = [len(winning_trades), len(losing_trades)]\n",
+    "        labels = [f'Wins ({len(winning_trades)})', f'Losses ({len(losing_trades)})']\n",
+    "        colors_pie = ['#00D4AA', '#FF6B6B']\n",
+    "        explode = (0.05, 0.05)\n",
+    "        \n",
+    "        wedges, texts, autotexts = ax3.pie(sizes, labels=labels, colors=colors_pie,\n",
+    "                                           autopct='%1.1f%%', startangle=90, explode=explode,\n",
+    "                                           textprops={'color': 'white', 'fontsize': 11})\n",
+    "        ax3.set_title('🎯 Win/Loss Distribution', fontsize=13, color='white', fontweight='bold')\n",
+    "        \n",
+    "        # 4. Trade Size Distribution  \n",
+    "        ax4 = axes[1, 1]\n",
+    "        ax4.set_facecolor('#1a1a2e')\n",
+    "        \n",
+    "        # Position sizes from history\n",
+    "        positions = [abs(p) for p in extended_history['position'] if abs(p) > 0.1]\n",
+    "        \n",
+    "        if positions:\n",
+    "            ax4.hist(positions, bins=20, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n",
+    "            ax4.axvline(x=np.mean(positions), color='#FFE66D', linestyle='-', linewidth=2,\n",
+    "                       label=f'Mean: {np.mean(positions):.2f}')\n",
+    "        \n",
+    "        ax4.set_xlabel('Position Size', fontsize=11, color='white')\n",
+    "        ax4.set_ylabel('Frequency', fontsize=11, color='white')\n",
+    "        ax4.set_title('📊 Position Size Distribution', fontsize=13, color='white', fontweight='bold')\n",
+    "        ax4.legend(facecolor='#1a1a2e', edgecolor='gray')\n",
+    "        ax4.tick_params(colors='white')\n",
+    "        ax4.grid(True, alpha=0.2, color='gray')\n",
+    "        for spine in ax4.spines.values():\n",
+    "            spine.set_color('gray')\n",
+    "        \n",
+    "        plt.suptitle('🔍 Trade Analysis Deep Dive', fontsize=16, color='white', fontweight='bold', y=0.98)\n",
+    "        plt.tight_layout()\n",
+    "        plt.savefig('trade_analysis.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
+    "        plt.show()\n",
+    "        \n",
+    "        print(\"\\n✅ Trade analysis visualization saved!\")\n",
+    "    else:\n",
+    "        print(\"⚠️ No trade P&L data available\")\n",
+    "else:\n",
+    "    print(\"⚠️ No trades recorded\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kaggle": {
+   "accelerator": "nvidiaTeslaT4",
+   "dataSources": [
+    {
+     "datasetId": 7097204,
+     "sourceId": 11420269,
+     "sourceType": "datasetVersion"
+    },
+    {
+     "datasetId": 5656419,
+     "sourceId": 13492684,
+     "sourceType": "datasetVersion"
+    },
+    {
+     "datasetId": 7608804,
+     "sourceId": 13495502,
+     "sourceType": "datasetVersion"
+    },
+    {
+     "datasetId": 8569093,
+     "sourceId": 13496378,
+     "sourceType": "datasetVersion"
+    }
+   ],
+   "dockerImageVersionId": 31153,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": null,
+   "end_time": null,
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2025-10-25T11:42:30.221950",
+   "version": "2.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}