{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b085bca",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-10-25T11:42:35.310569Z",
     "iopub.status.busy": "2025-10-25T11:42:35.310358Z",
     "iopub.status.idle": "2025-10-25T11:43:07.305498Z",
     "shell.execute_reply": "2025-10-25T11:43:07.304622Z"
    },
    "papermill": {
     "duration": 32.0093,
     "end_time": "2025-10-25T11:43:07.311339",
     "exception": false,
     "start_time": "2025-10-25T11:42:35.302039",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 1: PYTORCH GPU SETUP (KAGGLE 30GB GPU)\n",
    "# ============================================================================\n",
    "\n",
    "!pip install -q ta\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" PYTORCH GPU SETUP (30GB GPU)\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# ============================================================================\n",
    "# GPU CONFIGURATION FOR MAXIMUM PERFORMANCE\n",
    "# ============================================================================\n",
    "\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "if torch.cuda.is_available():\n",
    "    # Get GPU info\n",
    "    gpu_name = torch.cuda.get_device_name(0)\n",
    "    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9\n",
    "    \n",
    "    print(f\"✅ GPU: {gpu_name}\")\n",
    "    print(f\"✅ GPU Memory: {gpu_mem:.1f} GB\")\n",
    "    \n",
    "    # Enable TF32 for faster matmul (Ampere GPUs: A100, RTX 30xx, 40xx)\n",
    "    torch.backends.cuda.matmul.allow_tf32 = True\n",
    "    torch.backends.cudnn.allow_tf32 = True\n",
    "    print(\"✅ TF32: Enabled (2-3x speedup on Ampere)\")\n",
    "    \n",
    "    # Enable cuDNN autotuner\n",
    "    torch.backends.cudnn.benchmark = True\n",
    "    print(\"✅ cuDNN benchmark: Enabled\")\n",
    "    \n",
    "    # Set default tensor type to CUDA\n",
    "    torch.set_default_device('cuda')\n",
    "    print(\"✅ Default device: CUDA\")\n",
    "    \n",
    "else:\n",
    "    print(\"⚠️ No GPU detected, using CPU\")\n",
    "\n",
    "print(f\"\\n✅ PyTorch: {torch.__version__}\")\n",
    "print(f\"✅ Device: {device}\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7730408f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-10-25T11:43:07.325051Z",
     "iopub.status.busy": "2025-10-25T11:43:07.324573Z",
     "iopub.status.idle": "2025-10-25T11:43:16.012274Z",
     "shell.execute_reply": "2025-10-25T11:43:16.011145Z"
    },
    "papermill": {
     "duration": 8.696398,
     "end_time": "2025-10-25T11:43:16.013680",
     "exception": false,
     "start_time": "2025-10-25T11:43:07.317282",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 2: LOAD DATA + FEATURES + ENVIRONMENT (MULTI-TIMEFRAME)\n",
    "# ============================================================================\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym import spaces\n",
    "from ta.momentum import RSIIndicator, StochasticOscillator, ROCIndicator, WilliamsRIndicator\n",
    "from ta.trend import MACD, EMAIndicator, SMAIndicator, ADXIndicator, CCIIndicator\n",
    "from ta.volatility import BollingerBands, AverageTrueRange\n",
    "from ta.volume import OnBalanceVolumeIndicator\n",
    "import os\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" LOADING MULTI-TIMEFRAME DATA + FEATURES\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# ============================================================================\n",
    "# HELPER: CALCULATE INDICATORS FOR ANY TIMEFRAME\n",
    "# ============================================================================\n",
    "def calculate_indicators(df, suffix=''):\n",
    "    \"\"\"Calculate all technical indicators for a given dataframe\"\"\"\n",
    "    data = df.copy()\n",
    "    s = f'_{suffix}' if suffix else ''\n",
    "    \n",
    "    # Momentum\n",
    "    data[f'rsi_14{s}'] = RSIIndicator(close=data['close'], window=14).rsi() / 100\n",
    "    data[f'rsi_7{s}'] = RSIIndicator(close=data['close'], window=7).rsi() / 100\n",
    "    \n",
    "    stoch = StochasticOscillator(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
    "    data[f'stoch_k{s}'] = stoch.stoch() / 100\n",
    "    data[f'stoch_d{s}'] = stoch.stoch_signal() / 100\n",
    "    \n",
    "    roc = ROCIndicator(close=data['close'], window=12)\n",
    "    data[f'roc_12{s}'] = np.tanh(roc.roc() / 100)\n",
    "    \n",
    "    williams = WilliamsRIndicator(high=data['high'], low=data['low'], close=data['close'], lbp=14)\n",
    "    data[f'williams_r{s}'] = (williams.williams_r() + 100) / 100\n",
    "    \n",
    "    macd = MACD(close=data['close'])\n",
    "    data[f'macd{s}'] = np.tanh(macd.macd() / data['close'] * 100)\n",
    "    data[f'macd_signal{s}'] = np.tanh(macd.macd_signal() / data['close'] * 100)\n",
    "    data[f'macd_diff{s}'] = np.tanh(macd.macd_diff() / data['close'] * 100)\n",
    "    \n",
    "    # Trend\n",
    "    data[f'sma_20{s}'] = SMAIndicator(close=data['close'], window=20).sma_indicator()\n",
    "    data[f'sma_50{s}'] = SMAIndicator(close=data['close'], window=50).sma_indicator()\n",
    "    data[f'ema_12{s}'] = EMAIndicator(close=data['close'], window=12).ema_indicator()\n",
    "    data[f'ema_26{s}'] = EMAIndicator(close=data['close'], window=26).ema_indicator()\n",
    "    \n",
    "    data[f'price_vs_sma20{s}'] = (data['close'] - data[f'sma_20{s}']) / data[f'sma_20{s}']\n",
    "    data[f'price_vs_sma50{s}'] = (data['close'] - data[f'sma_50{s}']) / data[f'sma_50{s}']\n",
    "    \n",
    "    adx = ADXIndicator(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
    "    data[f'adx{s}'] = adx.adx() / 100\n",
    "    data[f'adx_pos{s}'] = adx.adx_pos() / 100\n",
    "    data[f'adx_neg{s}'] = adx.adx_neg() / 100\n",
    "    \n",
    "    cci = CCIIndicator(high=data['high'], low=data['low'], close=data['close'], window=20)\n",
    "    data[f'cci{s}'] = np.tanh(cci.cci() / 100)\n",
    "    \n",
    "    # Volatility\n",
    "    bb = BollingerBands(close=data['close'], window=20, window_dev=2)\n",
    "    data[f'bb_width{s}'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()\n",
    "    data[f'bb_position{s}'] = (data['close'] - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())\n",
    "    \n",
    "    atr = AverageTrueRange(high=data['high'], low=data['low'], close=data['close'], window=14)\n",
    "    data[f'atr_percent{s}'] = atr.average_true_range() / data['close']\n",
    "    \n",
    "    # Volume\n",
    "    data[f'volume_ma_20{s}'] = data['volume'].rolling(20).mean()\n",
    "    data[f'volume_ratio{s}'] = data['volume'] / (data[f'volume_ma_20{s}'] + 1e-8)\n",
    "    \n",
    "    obv = OnBalanceVolumeIndicator(close=data['close'], volume=data['volume'])\n",
    "    data[f'obv_slope{s}'] = (obv.on_balance_volume().diff(5) / (obv.on_balance_volume().shift(5).abs() + 1e-8))\n",
    "    \n",
    "    # Price action\n",
    "    data[f'returns_1{s}'] = data['close'].pct_change()\n",
    "    data[f'returns_5{s}'] = data['close'].pct_change(5)\n",
    "    data[f'returns_20{s}'] = data['close'].pct_change(20)\n",
    "    data[f'volatility_20{s}'] = data[f'returns_1{s}'].rolling(20).std()\n",
    "    \n",
    "    data[f'body_size{s}'] = abs(data['close'] - data['open']) / (data['open'] + 1e-8)\n",
    "    data[f'high_20{s}'] = data['high'].rolling(20).max()\n",
    "    data[f'low_20{s}'] = data['low'].rolling(20).min()\n",
    "    data[f'price_position{s}'] = (data['close'] - data[f'low_20{s}']) / (data[f'high_20{s}'] - data[f'low_20{s}'] + 1e-8)\n",
    "    \n",
    "    # Drop intermediate columns\n",
    "    cols_to_drop = [c for c in [f'sma_20{s}', f'sma_50{s}', f'ema_12{s}', f'ema_26{s}', \n",
    "                                f'volume_ma_20{s}', f'high_20{s}', f'low_20{s}'] if c in data.columns]\n",
    "    data = data.drop(columns=cols_to_drop)\n",
    "    \n",
    "    return data\n",
    "\n",
    "def load_and_clean_btc(filepath):\n",
    "    \"\"\"Load and clean BTC data from CSV\"\"\"\n",
    "    df = pd.read_csv(filepath)\n",
    "    column_mapping = {'Open time': 'timestamp', 'Open': 'open', 'High': 'high', \n",
    "                     'Low': 'low', 'Close': 'close', 'Volume': 'volume'}\n",
    "    df = df.rename(columns=column_mapping)\n",
    "    df['timestamp'] = pd.to_datetime(df['timestamp'])\n",
    "    df.set_index('timestamp', inplace=True)\n",
    "    df = df[['open', 'high', 'low', 'close', 'volume']]\n",
    "    \n",
    "    for col in df.columns:\n",
    "        df[col] = pd.to_numeric(df[col], errors='coerce')\n",
    "    \n",
    "    df = df[df.index >= '2021-01-01']\n",
    "    df = df[~df.index.duplicated(keep='first')]\n",
    "    df = df.replace(0, np.nan).dropna().sort_index()\n",
    "    return df\n",
    "\n",
    "# ============================================================================\n",
    "# 1. LOAD ALL TIMEFRAMES\n",
    "# ============================================================================\n",
    "data_path = '/kaggle/input/bitcoin-historical-datasets-2018-2024/'\n",
    "\n",
    "print(\"📊 Loading 15-minute data...\")\n",
    "btc_15m = load_and_clean_btc(data_path + 'btc_15m_data_2018_to_2025.csv')\n",
    "print(f\"   ✅ 15m: {len(btc_15m):,} candles\")\n",
    "\n",
    "print(\"📊 Loading 1-hour data...\")\n",
    "btc_1h = load_and_clean_btc(data_path + 'btc_1h_data_2018_to_2025.csv')\n",
    "print(f\"   ✅ 1h: {len(btc_1h):,} candles\")\n",
    "\n",
    "print(\"📊 Loading 4-hour data...\")\n",
    "btc_4h = load_and_clean_btc(data_path + 'btc_4h_data_2018_to_2025.csv')\n",
    "print(f\"   ✅ 4h: {len(btc_4h):,} candles\")\n",
    "\n",
    "# ============================================================================\n",
    "# 2. LOAD FEAR & GREED INDEX\n",
    "# ============================================================================\n",
    "fgi_loaded = False\n",
    "\n",
    "try:\n",
    "    fgi_path = '/kaggle/input/btc-usdt-4h-ohlc-fgi-daily-2020/'\n",
    "    files = os.listdir(fgi_path)\n",
    "    \n",
    "    for filename in files:\n",
    "        if filename.endswith('.csv'):\n",
    "            fgi_data = pd.read_csv(fgi_path + filename)\n",
    "            \n",
    "            time_col = [c for c in fgi_data.columns if 'time' in c.lower() or 'date' in c.lower()]\n",
    "            if time_col:\n",
    "                fgi_data['timestamp'] = pd.to_datetime(fgi_data[time_col[0]])\n",
    "            else:\n",
    "                fgi_data['timestamp'] = pd.to_datetime(fgi_data.iloc[:, 0])\n",
    "            \n",
    "            fgi_data.set_index('timestamp', inplace=True)\n",
    "            \n",
    "            fgi_col = [c for c in fgi_data.columns if 'fgi' in c.lower() or 'fear' in c.lower() or 'greed' in c.lower()]\n",
    "            if fgi_col:\n",
    "                fgi_data = fgi_data[[fgi_col[0]]].rename(columns={fgi_col[0]: 'fgi'})\n",
    "                fgi_loaded = True\n",
    "                print(f\"✅ Fear & Greed loaded: {len(fgi_data):,} values\")\n",
    "                break\n",
    "except:\n",
    "    pass\n",
    "\n",
    "if not fgi_loaded:\n",
    "    fgi_data = pd.DataFrame(index=btc_15m.index)\n",
    "    fgi_data['fgi'] = 50\n",
    "    print(\"⚠️ Using neutral FGI values\")\n",
    "\n",
    "# ============================================================================\n",
    "# 3. CALCULATE INDICATORS FOR EACH TIMEFRAME\n",
    "# ============================================================================\n",
    "print(\"\\n🔧 Calculating indicators for 15m...\")\n",
    "data_15m = calculate_indicators(btc_15m, suffix='15m')\n",
    "\n",
    "print(\"🔧 Calculating indicators for 1h...\")\n",
    "data_1h = calculate_indicators(btc_1h, suffix='1h')\n",
    "\n",
    "print(\"🔧 Calculating indicators for 4h...\")\n",
    "data_4h = calculate_indicators(btc_4h, suffix='4h')\n",
    "\n",
    "# ============================================================================\n",
    "# 4. MERGE HIGHER TIMEFRAMES INTO 15M (FORWARD FILL)\n",
    "# ============================================================================\n",
    "print(\"\\n🔗 Merging timeframes...\")\n",
    "\n",
    "cols_1h = [c for c in data_1h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n",
    "cols_4h = [c for c in data_4h.columns if c not in ['open', 'high', 'low', 'close', 'volume']]\n",
    "\n",
    "data = data_15m.copy()\n",
    "data = data.join(data_1h[cols_1h], how='left')\n",
    "data = data.join(data_4h[cols_4h], how='left')\n",
    "\n",
    "for col in cols_1h + cols_4h:\n",
    "    data[col] = data[col].fillna(method='ffill')\n",
    "\n",
    "# Merge FGI\n",
    "data = data.join(fgi_data, how='left')\n",
    "data['fgi'] = data['fgi'].fillna(method='ffill').fillna(method='bfill').fillna(50)\n",
    "\n",
    "# Fear & Greed derived features\n",
    "data['fgi_normalized'] = (data['fgi'] - 50) / 50\n",
    "data['fgi_change'] = data['fgi'].diff() / 50\n",
    "data['fgi_ma7'] = data['fgi'].rolling(7).mean()\n",
    "data['fgi_vs_ma'] = (data['fgi'] - data['fgi_ma7']) / 50\n",
    "\n",
    "# Time features\n",
    "data['hour'] = data.index.hour / 24\n",
    "data['day_of_week'] = data.index.dayofweek / 7\n",
    "data['us_session'] = ((data.index.hour >= 14) & (data.index.hour < 21)).astype(float)\n",
    "\n",
    "btc_features = data.dropna()\n",
    "\n",
    "feature_cols = [col for col in btc_features.columns \n",
    "                if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
    "\n",
    "print(f\"\\n✅ Multi-timeframe features complete!\")\n",
    "print(f\"   15m features: {len([c for c in feature_cols if '15m' in c])}\")\n",
    "print(f\"   1h features: {len([c for c in feature_cols if '1h' in c])}\")\n",
    "print(f\"   4h features: {len([c for c in feature_cols if '4h' in c])}\")\n",
    "print(f\"   Other features: {len([c for c in feature_cols if '15m' not in c and '1h' not in c and '4h' not in c])}\")\n",
    "print(f\"   TOTAL features: {len(feature_cols)}\")\n",
    "print(f\"   Clean data: {len(btc_features):,} candles\")\n",
    "\n",
    "# ============================================================================\n",
    "# 5. TRAIN/VALID/TEST SPLITS\n",
    "# ============================================================================\n",
    "print(\"\\n📊 Creating Data Splits...\")\n",
    "\n",
    "train_size = int(len(btc_features) * 0.70)\n",
    "valid_size = int(len(btc_features) * 0.15)\n",
    "\n",
    "train_data = btc_features.iloc[:train_size].copy()\n",
    "valid_data = btc_features.iloc[train_size:train_size+valid_size].copy()\n",
    "test_data = btc_features.iloc[train_size+valid_size:].copy()\n",
    "\n",
    "print(f\"   Train: {len(train_data):,} | Valid: {len(valid_data):,} | Test: {len(test_data):,}\")\n",
    "\n",
    "# Store full data for walk-forward\n",
    "full_data = btc_features.copy()\n",
    "\n",
    "# ============================================================================\n",
    "# 6. ROLLING NORMALIZATION CLASS\n",
    "# ============================================================================\n",
    "class RollingNormalizer:\n",
    "    \"\"\"\n",
    "    Rolling z-score normalization to prevent look-ahead bias.\n",
    "    Uses a rolling window to calculate mean and std.\n",
    "    \"\"\"\n",
    "    def __init__(self, window_size=2880):  # 2880 = 30 days of 15m candles\n",
    "        self.window_size = window_size\n",
    "        self.feature_cols = None\n",
    "        \n",
    "    def fit_transform(self, df, feature_cols):\n",
    "        \"\"\"Apply rolling normalization to dataframe\"\"\"\n",
    "        self.feature_cols = feature_cols\n",
    "        result = df.copy()\n",
    "        \n",
    "        for col in feature_cols:\n",
    "            rolling_mean = df[col].rolling(window=self.window_size, min_periods=100).mean()\n",
    "            rolling_std = df[col].rolling(window=self.window_size, min_periods=100).std()\n",
    "            result[col] = (df[col] - rolling_mean) / (rolling_std + 1e-8)\n",
    "        \n",
    "        # Clip extreme values\n",
    "        result[feature_cols] = result[feature_cols].clip(-5, 5)\n",
    "        \n",
    "        # Fill NaN at start with 0 (neutral)\n",
    "        result[feature_cols] = result[feature_cols].fillna(0)\n",
    "        \n",
    "        return result\n",
    "\n",
    "print(\"✅ RollingNormalizer class defined\")\n",
    "\n",
    "# ============================================================================\n",
    "# 7. TRADING ENVIRONMENT WITH DSR + RANDOM FLIP AUGMENTATION\n",
    "# ============================================================================\n",
    "class BitcoinTradingEnv(gym.Env):\n",
    "    \"\"\"\n",
    "    Trading environment with:\n",
    "    - Differential Sharpe Ratio (DSR) reward with warmup\n",
    "    - Previous action in state (to learn cost of switching)\n",
    "    - Transaction fee ramping (0 -> 0.1% after warmup)\n",
    "    - Random flip data augmentation (50% chance to invert market)\n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self, df, initial_balance=10000, episode_length=500,\n",
    "                 base_transaction_fee=0.001,  # 0.1% max fee\n",
    "                 dsr_eta=0.01):  # DSR adaptation rate\n",
    "        super().__init__()\n",
    "        self.df = df.reset_index(drop=True)\n",
    "        self.initial_balance = initial_balance\n",
    "        self.episode_length = episode_length\n",
    "        self.base_transaction_fee = base_transaction_fee\n",
    "        self.dsr_eta = dsr_eta\n",
    "        \n",
    "        # Fee ramping (controlled externally via set_fee_multiplier)\n",
    "        self.fee_multiplier = 0.0\n",
    "        \n",
    "        # Training mode for data augmentation (random flips)\n",
    "        self.training_mode = True\n",
    "        self.flip_sign = 1.0  # Will be -1 or +1 for augmentation\n",
    "        \n",
    "        # DSR warmup period (return 0 reward until EMAs settle)\n",
    "        self.dsr_warmup_steps = 100\n",
    "        \n",
    "        self.feature_cols = [col for col in df.columns \n",
    "                            if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
    "        \n",
    "        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)\n",
    "        # +6 for: position, total_return, drawdown, returns_1, rsi_14, PREVIOUS_ACTION\n",
    "        self.observation_space = spaces.Box(\n",
    "            low=-10, high=10, \n",
    "            shape=(len(self.feature_cols) + 6,), \n",
    "            dtype=np.float32\n",
    "        )\n",
    "        self.reset()\n",
    "    \n",
    "    def set_fee_multiplier(self, multiplier):\n",
    "        \"\"\"Set fee multiplier (0.0 to 1.0) for fee ramping\"\"\"\n",
    "        self.fee_multiplier = np.clip(multiplier, 0.0, 1.0)\n",
    "    \n",
    "    def set_training_mode(self, training=True):\n",
    "        \"\"\"Set training mode (enables random flips for augmentation)\"\"\"\n",
    "        self.training_mode = training\n",
    "    \n",
    "    @property\n",
    "    def current_fee(self):\n",
    "        \"\"\"Current transaction fee based on multiplier\"\"\"\n",
    "        return self.base_transaction_fee * self.fee_multiplier\n",
    "    \n",
    "    def reset(self):\n",
    "        max_start = len(self.df) - self.episode_length - 1\n",
    "        self.start_idx = np.random.randint(100, max(101, max_start))\n",
    "        \n",
    "        self.current_step = 0\n",
    "        self.balance = self.initial_balance\n",
    "        self.position = 0.0\n",
    "        self.entry_price = 0.0\n",
    "        self.total_value = self.initial_balance\n",
    "        self.prev_total_value = self.initial_balance\n",
    "        self.max_value = self.initial_balance\n",
    "        \n",
    "        # Previous action for state\n",
    "        self.prev_action = 0.0\n",
    "        \n",
    "        # DSR variables (Differential Sharpe Ratio)\n",
    "        self.A_t = 0.0  # EMA of returns\n",
    "        self.B_t = 0.0  # EMA of squared returns\n",
    "        \n",
    "        # Position tracking\n",
    "        self.long_steps = 0\n",
    "        self.short_steps = 0\n",
    "        self.neutral_steps = 0\n",
    "        self.num_trades = 0\n",
    "        \n",
    "        # Random flip for data augmentation (50% chance during training)\n",
    "        # This inverts price movements: what was bullish becomes bearish\n",
    "        if self.training_mode:\n",
    "            self.flip_sign = -1.0 if np.random.random() < 0.5 else 1.0\n",
    "        else:\n",
    "            self.flip_sign = 1.0  # No flip during eval\n",
    "        \n",
    "        return self._get_obs()\n",
    "    \n",
    "    def _get_obs(self):\n",
    "        idx = self.start_idx + self.current_step\n",
    "        features = self.df.loc[idx, self.feature_cols].values.copy()\n",
    "        \n",
    "        # Apply random flip augmentation to return-based features\n",
    "        # This inverts bullish/bearish signals when flip_sign = -1\n",
    "        if self.flip_sign < 0:\n",
    "            for i, col in enumerate(self.feature_cols):\n",
    "                if any(x in col.lower() for x in ['returns', 'roc', 'macd', 'cci', 'obv', 'sentiment']):\n",
    "                    features[i] *= self.flip_sign\n",
    "        \n",
    "        total_return = (self.total_value / self.initial_balance) - 1\n",
    "        drawdown = (self.max_value - self.total_value) / self.max_value if self.max_value > 0 else 0\n",
    "        \n",
    "        # Apply flip to market returns shown in portfolio info\n",
    "        market_return = self.df.loc[idx, 'returns_1_15m'] * self.flip_sign\n",
    "        \n",
    "        portfolio_info = np.array([\n",
    "            self.position,\n",
    "            total_return,\n",
    "            drawdown,\n",
    "            market_return,\n",
    "            self.df.loc[idx, 'rsi_14_15m'],\n",
    "            self.prev_action\n",
    "        ], dtype=np.float32)\n",
    "        \n",
    "        obs = np.concatenate([features, portfolio_info])\n",
    "        return np.clip(obs, -10, 10).astype(np.float32)\n",
    "    \n",
    "    def _calculate_dsr(self, return_t):\n",
    "        \"\"\"\n",
    "        Calculate Differential Sharpe Ratio reward.\n",
    "        DSR = (B_{t-1} * ΔA_t - 0.5 * A_{t-1} * ΔB_t) / (B_{t-1} - A_{t-1}^2)^1.5\n",
    "        \"\"\"\n",
    "        eta = self.dsr_eta\n",
    "        \n",
    "        A_prev = self.A_t\n",
    "        B_prev = self.B_t\n",
    "        \n",
    "        delta_A = eta * (return_t - A_prev)\n",
    "        delta_B = eta * (return_t**2 - B_prev)\n",
    "        \n",
    "        self.A_t = A_prev + delta_A\n",
    "        self.B_t = B_prev + delta_B\n",
    "        \n",
    "        variance = B_prev - A_prev**2\n",
    "        \n",
    "        if variance <= 1e-8:\n",
    "            return return_t\n",
    "        \n",
    "        dsr = (B_prev * delta_A - 0.5 * A_prev * delta_B) / (variance ** 1.5 + 1e-8)\n",
    "        return np.clip(dsr, -0.5, 0.5)\n",
    "    \n",
    "    def step(self, action):\n",
    "        idx = self.start_idx + self.current_step\n",
    "        current_price = self.df.loc[idx, 'close']\n",
    "        target_position = np.clip(action[0], -1.0, 1.0)\n",
    "        \n",
    "        self.prev_total_value = self.total_value\n",
    "        \n",
    "        # Position change logic with transaction costs\n",
    "        if abs(target_position - self.position) > 0.1:\n",
    "            if self.position != 0:\n",
    "                self._close_position(current_price)\n",
    "            if abs(target_position) > 0.1:\n",
    "                self._open_position(target_position, current_price)\n",
    "            self.num_trades += 1\n",
    "        \n",
    "        self._update_total_value(current_price)\n",
    "        self.max_value = max(self.max_value, self.total_value)\n",
    "        \n",
    "        # Track position type\n",
    "        if self.position > 0.1:\n",
    "            self.long_steps += 1\n",
    "        elif self.position < -0.1:\n",
    "            self.short_steps += 1\n",
    "        else:\n",
    "            self.neutral_steps += 1\n",
    "        \n",
    "        self.current_step += 1\n",
    "        done = (self.current_step >= self.episode_length) or (self.total_value <= self.initial_balance * 0.5)\n",
    "        \n",
    "        # ============ DSR REWARD WITH WARMUP ============\n",
    "        raw_return = (self.total_value - self.prev_total_value) / self.initial_balance\n",
    "        \n",
    "        # Apply flip_sign to reward (if we flipped the market, flip what \"good\" means)\n",
    "        raw_return *= self.flip_sign\n",
    "        \n",
    "        # DSR Warmup: Return tiny penalty for first N steps to let EMAs settle\n",
    "        if self.current_step < self.dsr_warmup_steps:\n",
    "            reward = -0.0001  # Tiny constant penalty during warmup\n",
    "        else:\n",
    "            reward = self._calculate_dsr(raw_return)\n",
    "        \n",
    "        self.prev_action = target_position\n",
    "        \n",
    "        obs = self._get_obs()\n",
    "        info = {\n",
    "            'total_value': self.total_value, \n",
    "            'position': self.position,\n",
    "            'long_steps': self.long_steps,\n",
    "            'short_steps': self.short_steps,\n",
    "            'neutral_steps': self.neutral_steps,\n",
    "            'num_trades': self.num_trades,\n",
    "            'current_fee': self.current_fee,\n",
    "            'flip_sign': self.flip_sign,\n",
    "            'raw_return': raw_return,\n",
    "            'dsr_reward': reward\n",
    "        }\n",
    "        \n",
    "        return obs, reward, done, info\n",
    "    \n",
    "    def _update_total_value(self, current_price):\n",
    "        if self.position != 0:\n",
    "            if self.position > 0:\n",
    "                pnl = self.position * self.initial_balance * (current_price / self.entry_price - 1)\n",
    "            else:\n",
    "                pnl = abs(self.position) * self.initial_balance * (1 - current_price / self.entry_price)\n",
    "            self.total_value = self.balance + pnl\n",
    "        else:\n",
    "            self.total_value = self.balance\n",
    "    \n",
    "    def _open_position(self, size, price):\n",
    "        self.position = size\n",
    "        self.entry_price = price\n",
    "        fee_cost = abs(size) * self.initial_balance * self.current_fee\n",
    "        self.balance -= fee_cost\n",
    "    \n",
    "    def _close_position(self, price):\n",
    "        if self.position > 0:\n",
    "            pnl = self.position * self.initial_balance * (price / self.entry_price - 1)\n",
    "        else:\n",
    "            pnl = abs(self.position) * self.initial_balance * (1 - price / self.entry_price)\n",
    "        \n",
    "        fee_cost = abs(pnl) * self.current_fee\n",
    "        self.balance += pnl - fee_cost\n",
    "        self.position = 0.0\n",
    "\n",
    "print(\"✅ Environment class ready:\")\n",
    "print(\"   - DSR reward with 100-step warmup\")\n",
    "print(\"   - Random flip augmentation (50% probability)\")\n",
    "print(\"   - Previous action in state\")\n",
    "print(\"   - Transaction fee ramping\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bab183bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 3: LOAD SENTIMENT DATA\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" LOADING SENTIMENT DATA\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "sentiment_file = '/kaggle/input/bitcoin-news-with-sentimen/bitcoin_news_3hour_intervals_with_sentiment.csv'\n",
    "\n",
    "try:\n",
    "    sentiment_raw = pd.read_csv(sentiment_file)\n",
    "    \n",
    "    def parse_time_range(time_str):\n",
    "        parts = str(time_str).split(' ')\n",
    "        if len(parts) >= 2:\n",
    "            date = parts[0]\n",
    "            time_range = parts[1]\n",
    "            start_time = time_range.split('-')[0]\n",
    "            return f\"{date} {start_time}:00\"\n",
    "        return time_str\n",
    "    \n",
    "    sentiment_raw['timestamp'] = sentiment_raw['time_interval'].apply(parse_time_range)\n",
    "    sentiment_raw['timestamp'] = pd.to_datetime(sentiment_raw['timestamp'])\n",
    "    sentiment_raw = sentiment_raw.set_index('timestamp').sort_index()\n",
    "    \n",
    "    sentiment_clean = pd.DataFrame(index=sentiment_raw.index)\n",
    "    sentiment_clean['prob_bullish'] = pd.to_numeric(sentiment_raw['prob_bullish'], errors='coerce')\n",
    "    sentiment_clean['prob_bearish'] = pd.to_numeric(sentiment_raw['prob_bearish'], errors='coerce')\n",
    "    sentiment_clean['prob_neutral'] = pd.to_numeric(sentiment_raw['prob_neutral'], errors='coerce')\n",
    "    sentiment_clean['confidence'] = pd.to_numeric(sentiment_raw['sentiment_confidence'], errors='coerce')\n",
    "    sentiment_clean = sentiment_clean.dropna()\n",
    "    \n",
    "    # Merge with data\n",
    "    for df in [train_data, valid_data, test_data]:\n",
    "        df_temp = df.join(sentiment_clean, how='left')\n",
    "        for col in ['prob_bullish', 'prob_bearish', 'prob_neutral', 'confidence']:\n",
    "            df[col] = df_temp[col].fillna(method='ffill').fillna(method='bfill').fillna(0.33 if col != 'confidence' else 0.5)\n",
    "        \n",
    "        df['sentiment_net'] = df['prob_bullish'] - df['prob_bearish']\n",
    "        df['sentiment_strength'] = (df['prob_bullish'] - df['prob_bearish']).abs()\n",
    "        df['sentiment_weighted'] = df['sentiment_net'] * df['confidence']\n",
    "    \n",
    "    print(f\"✅ Sentiment loaded: {len(sentiment_clean):,} records\")\n",
    "    print(f\"✅ Features added: 7 sentiment features\")\n",
    "    \n",
    "except Exception as e:\n",
    "    print(f\"⚠️ Sentiment not loaded: {e}\")\n",
    "    for df in [train_data, valid_data, test_data]:\n",
    "        df['sentiment_net'] = 0\n",
    "        df['sentiment_strength'] = 0\n",
    "        df['sentiment_weighted'] = 0\n",
    "\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4640182f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 4: ROLLING NORMALIZATION + CREATE ENVIRONMENTS\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" ROLLING NORMALIZATION + CREATING ENVIRONMENTS\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Get feature columns (all except OHLCV and intermediate columns)\n",
    "feature_cols = [col for col in train_data.columns \n",
    "                if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
    "\n",
    "print(f\"📊 Total features: {len(feature_cols)}\")\n",
    "\n",
    "# ============================================================================\n",
    "# ROLLING NORMALIZATION (Prevents look-ahead bias!)\n",
    "# Uses only past data for normalization at each point\n",
    "# ============================================================================\n",
    "rolling_normalizer = RollingNormalizer(window_size=2880)  # 30 days of 15m data\n",
    "\n",
    "print(\"🔄 Applying rolling normalization (window=2880)...\")\n",
    "\n",
    "# Apply rolling normalization to each split\n",
    "train_data_norm = rolling_normalizer.fit_transform(train_data, feature_cols)\n",
    "valid_data_norm = rolling_normalizer.fit_transform(valid_data, feature_cols)  \n",
    "test_data_norm = rolling_normalizer.fit_transform(test_data, feature_cols)\n",
    "\n",
    "print(\"✅ Rolling normalization complete (no look-ahead bias!)\")\n",
    "\n",
    "# Create environments\n",
    "train_env = BitcoinTradingEnv(train_data_norm, episode_length=500)\n",
    "valid_env = BitcoinTradingEnv(valid_data_norm, episode_length=500)\n",
    "test_env = BitcoinTradingEnv(test_data_norm, episode_length=500)\n",
    "\n",
    "state_dim = train_env.observation_space.shape[0]\n",
    "action_dim = 1\n",
    "\n",
    "print(f\"\\n✅ Environments created:\")\n",
    "print(f\"   State dim: {state_dim} (features={len(feature_cols)} + portfolio=6)\")\n",
    "print(f\"   Action dim: {action_dim}\")\n",
    "print(f\"   Train samples: {len(train_data):,}\")\n",
    "print(f\"   Fee starts at: 0% (ramps to 0.1% after warmup)\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a48bf946",
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "execution": {
     "iopub.execute_input": "2025-10-25T11:43:16.495113Z",
     "iopub.status.busy": "2025-10-25T11:43:16.494816Z",
     "iopub.status.idle": "2025-10-25T11:43:16.516176Z",
     "shell.execute_reply": "2025-10-25T11:43:16.515329Z"
    },
    "papermill": {
     "duration": 0.029962,
     "end_time": "2025-10-25T11:43:16.517375",
     "exception": false,
     "start_time": "2025-10-25T11:43:16.487413",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 5: PYTORCH SAC AGENT (GPU OPTIMIZED)\n",
    "# ============================================================================\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torch.distributions import Normal\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" PYTORCH SAC AGENT\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# ============================================================================\n",
    "# ACTOR NETWORK (Policy)\n",
    "# ============================================================================\n",
    "class Actor(nn.Module):\n",
    "    def __init__(self, state_dim, action_dim, hidden_dim=512):\n",
    "        super().__init__()\n",
    "        # Larger network for 90+ features: 512 -> 512 -> 256 -> output\n",
    "        self.fc1 = nn.Linear(state_dim, hidden_dim)\n",
    "        self.fc2 = nn.Linear(hidden_dim, hidden_dim)\n",
    "        self.fc3 = nn.Linear(hidden_dim, hidden_dim // 2)  # Taper down\n",
    "        \n",
    "        self.mean = nn.Linear(hidden_dim // 2, action_dim)\n",
    "        self.log_std = nn.Linear(hidden_dim // 2, action_dim)\n",
    "        \n",
    "        self.LOG_STD_MIN = -20\n",
    "        self.LOG_STD_MAX = 2\n",
    "        \n",
    "    def forward(self, state):\n",
    "        x = F.relu(self.fc1(state))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = F.relu(self.fc3(x))\n",
    "        \n",
    "        mean = self.mean(x)\n",
    "        log_std = self.log_std(x)\n",
    "        log_std = torch.clamp(log_std, self.LOG_STD_MIN, self.LOG_STD_MAX)\n",
    "        \n",
    "        return mean, log_std\n",
    "    \n",
    "    def sample(self, state):\n",
    "        mean, log_std = self.forward(state)\n",
    "        std = log_std.exp()\n",
    "        \n",
    "        normal = Normal(mean, std)\n",
    "        x_t = normal.rsample()  # Reparameterization trick\n",
    "        action = torch.tanh(x_t)\n",
    "        \n",
    "        # Log prob with tanh correction\n",
    "        log_prob = normal.log_prob(x_t)\n",
    "        log_prob -= torch.log(1 - action.pow(2) + 1e-6)\n",
    "        log_prob = log_prob.sum(dim=-1, keepdim=True)\n",
    "        \n",
    "        return action, log_prob, mean\n",
    "\n",
    "# ============================================================================\n",
    "# CRITIC NETWORK (Twin Q-functions)\n",
    "# ============================================================================\n",
    "class Critic(nn.Module):\n",
    "    def __init__(self, state_dim, action_dim, hidden_dim=512):\n",
    "        super().__init__()\n",
    "        # Q1 network: 512 -> 512 -> 256 -> 1\n",
    "        self.fc1_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n",
    "        self.fc1_2 = nn.Linear(hidden_dim, hidden_dim)\n",
    "        self.fc1_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n",
    "        self.fc1_out = nn.Linear(hidden_dim // 2, 1)\n",
    "        \n",
    "        # Q2 network: 512 -> 512 -> 256 -> 1\n",
    "        self.fc2_1 = nn.Linear(state_dim + action_dim, hidden_dim)\n",
    "        self.fc2_2 = nn.Linear(hidden_dim, hidden_dim)\n",
    "        self.fc2_3 = nn.Linear(hidden_dim, hidden_dim // 2)\n",
    "        self.fc2_out = nn.Linear(hidden_dim // 2, 1)\n",
    "        \n",
    "    def forward(self, state, action):\n",
    "        x = torch.cat([state, action], dim=-1)\n",
    "        \n",
    "        # Q1\n",
    "        q1 = F.relu(self.fc1_1(x))\n",
    "        q1 = F.relu(self.fc1_2(q1))\n",
    "        q1 = F.relu(self.fc1_3(q1))\n",
    "        q1 = self.fc1_out(q1)\n",
    "        \n",
    "        # Q2\n",
    "        q2 = F.relu(self.fc2_1(x))\n",
    "        q2 = F.relu(self.fc2_2(q2))\n",
    "        q2 = F.relu(self.fc2_3(q2))\n",
    "        q2 = self.fc2_out(q2)\n",
    "        \n",
    "        return q1, q2\n",
    "    \n",
    "    def q1(self, state, action):\n",
    "        x = torch.cat([state, action], dim=-1)\n",
    "        q1 = F.relu(self.fc1_1(x))\n",
    "        q1 = F.relu(self.fc1_2(q1))\n",
    "        q1 = F.relu(self.fc1_3(q1))\n",
    "        return self.fc1_out(q1)\n",
    "\n",
    "# ============================================================================\n",
    "# SAC AGENT\n",
    "# ============================================================================\n",
    "class SACAgent:\n",
    "    def __init__(self, state_dim, action_dim, device,\n",
    "                 actor_lr=3e-4, critic_lr=3e-4, alpha_lr=3e-4,\n",
    "                 gamma=0.99, tau=0.005, initial_alpha=0.2):\n",
    "        \n",
    "        self.device = device\n",
    "        self.gamma = gamma\n",
    "        self.tau = tau\n",
    "        self.action_dim = action_dim\n",
    "        \n",
    "        # Networks\n",
    "        self.actor = Actor(state_dim, action_dim).to(device)\n",
    "        self.critic = Critic(state_dim, action_dim).to(device)\n",
    "        self.critic_target = Critic(state_dim, action_dim).to(device)\n",
    "        self.critic_target.load_state_dict(self.critic.state_dict())\n",
    "        \n",
    "        # Optimizers\n",
    "        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)\n",
    "        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)\n",
    "        \n",
    "        # Entropy (auto-tuning alpha)\n",
    "        self.target_entropy = -action_dim\n",
    "        self.log_alpha = torch.tensor(np.log(initial_alpha), requires_grad=True, device=device)\n",
    "        self.alpha_optimizer = optim.Adam([self.log_alpha], lr=alpha_lr)\n",
    "        \n",
    "    @property\n",
    "    def alpha(self):\n",
    "        return self.log_alpha.exp()\n",
    "    \n",
    "    def select_action(self, state, deterministic=False):\n",
    "        with torch.no_grad():\n",
    "            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n",
    "            if deterministic:\n",
    "                mean, _ = self.actor(state)\n",
    "                action = torch.tanh(mean)\n",
    "            else:\n",
    "                action, _, _ = self.actor.sample(state)\n",
    "            return action.cpu().numpy()[0]\n",
    "    \n",
    "    def update(self, batch):\n",
    "        states, actions, rewards, next_states, dones = batch\n",
    "        \n",
    "        states = torch.FloatTensor(states).to(self.device)\n",
    "        actions = torch.FloatTensor(actions).to(self.device)\n",
    "        rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)\n",
    "        next_states = torch.FloatTensor(next_states).to(self.device)\n",
    "        dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)\n",
    "        \n",
    "        # ============ Update Critic ============\n",
    "        with torch.no_grad():\n",
    "            next_actions, next_log_probs, _ = self.actor.sample(next_states)\n",
    "            q1_target, q2_target = self.critic_target(next_states, next_actions)\n",
    "            q_target = torch.min(q1_target, q2_target)\n",
    "            target_q = rewards + (1 - dones) * self.gamma * (q_target - self.alpha * next_log_probs)\n",
    "        \n",
    "        q1, q2 = self.critic(states, actions)\n",
    "        critic_loss = F.mse_loss(q1, target_q) + F.mse_loss(q2, target_q)\n",
    "        \n",
    "        self.critic_optimizer.zero_grad()\n",
    "        critic_loss.backward()\n",
    "        self.critic_optimizer.step()\n",
    "        \n",
    "        # ============ Update Actor ============\n",
    "        new_actions, log_probs, _ = self.actor.sample(states)\n",
    "        q1_new, q2_new = self.critic(states, new_actions)\n",
    "        q_new = torch.min(q1_new, q2_new)\n",
    "        actor_loss = (self.alpha * log_probs - q_new).mean()\n",
    "        \n",
    "        self.actor_optimizer.zero_grad()\n",
    "        actor_loss.backward()\n",
    "        self.actor_optimizer.step()\n",
    "        \n",
    "        # ============ Update Alpha ============\n",
    "        alpha_loss = -(self.log_alpha * (log_probs.detach() + self.target_entropy)).mean()\n",
    "        \n",
    "        self.alpha_optimizer.zero_grad()\n",
    "        alpha_loss.backward()\n",
    "        self.alpha_optimizer.step()\n",
    "        \n",
    "        # ============ Update Target Network ============\n",
    "        for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):\n",
    "            target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)\n",
    "        \n",
    "        return {\n",
    "            'critic_loss': critic_loss.item(),\n",
    "            'actor_loss': actor_loss.item(),\n",
    "            'alpha': self.alpha.item()\n",
    "        }\n",
    "\n",
    "print(\"✅ Actor: 512→512→256→1\")\n",
    "print(\"✅ Critic: Twin Q (512→512→256→1)\")\n",
    "print(\"✅ SAC Agent with auto-tuning alpha\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7f72357",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-10-25T11:43:16.531841Z",
     "iopub.status.busy": "2025-10-25T11:43:16.531619Z",
     "iopub.status.idle": "2025-10-25T11:43:16.549706Z",
     "shell.execute_reply": "2025-10-25T11:43:16.548781Z"
    },
    "papermill": {
     "duration": 0.026952,
     "end_time": "2025-10-25T11:43:16.550849",
     "exception": false,
     "start_time": "2025-10-25T11:43:16.523897",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 6: REPLAY BUFFER (GPU-FRIENDLY)\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" REPLAY BUFFER\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "class ReplayBuffer:\n",
    "    def __init__(self, state_dim, action_dim, max_size=1_000_000):\n",
    "        self.max_size = max_size\n",
    "        self.ptr = 0\n",
    "        self.size = 0\n",
    "        \n",
    "        self.states = np.zeros((max_size, state_dim), dtype=np.float32)\n",
    "        self.actions = np.zeros((max_size, action_dim), dtype=np.float32)\n",
    "        self.rewards = np.zeros((max_size, 1), dtype=np.float32)\n",
    "        self.next_states = np.zeros((max_size, state_dim), dtype=np.float32)\n",
    "        self.dones = np.zeros((max_size, 1), dtype=np.float32)\n",
    "        \n",
    "        mem_gb = (self.states.nbytes + self.actions.nbytes + self.rewards.nbytes + \n",
    "                  self.next_states.nbytes + self.dones.nbytes) / 1e9\n",
    "        print(f\"📦 Buffer capacity: {max_size:,} | Memory: {mem_gb:.2f} GB\")\n",
    "    \n",
    "    def add(self, state, action, reward, next_state, done):\n",
    "        self.states[self.ptr] = state\n",
    "        self.actions[self.ptr] = action\n",
    "        self.rewards[self.ptr] = reward\n",
    "        self.next_states[self.ptr] = next_state\n",
    "        self.dones[self.ptr] = done\n",
    "        \n",
    "        self.ptr = (self.ptr + 1) % self.max_size\n",
    "        self.size = min(self.size + 1, self.max_size)\n",
    "    \n",
    "    def sample(self, batch_size):\n",
    "        idx = np.random.randint(0, self.size, size=batch_size)\n",
    "        return (\n",
    "            self.states[idx],\n",
    "            self.actions[idx],\n",
    "            self.rewards[idx],\n",
    "            self.next_states[idx],\n",
    "            self.dones[idx]\n",
    "        )\n",
    "\n",
    "print(\"✅ ReplayBuffer defined\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f88fc10c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-10-25T11:43:16.566540Z",
     "iopub.status.busy": "2025-10-25T11:43:16.565845Z",
     "iopub.status.idle": "2025-10-25T11:43:18.815426Z",
     "shell.execute_reply": "2025-10-25T11:43:18.814475Z"
    },
    "papermill": {
     "duration": 2.258566,
     "end_time": "2025-10-25T11:43:18.816724",
     "exception": false,
     "start_time": "2025-10-25T11:43:16.558158",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 7: CREATE AGENT + BUFFER\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" CREATING AGENT + BUFFER\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Create SAC agent\n",
    "agent = SACAgent(\n",
    "    state_dim=state_dim,\n",
    "    action_dim=action_dim,\n",
    "    device=device,\n",
    "    actor_lr=3e-4,\n",
    "    critic_lr=3e-4,\n",
    "    alpha_lr=3e-4,\n",
    "    gamma=0.99,\n",
    "    tau=0.005,\n",
    "    initial_alpha=0.2\n",
    ")\n",
    "\n",
    "# Create replay buffer\n",
    "buffer = ReplayBuffer(\n",
    "    state_dim=state_dim,\n",
    "    action_dim=action_dim,\n",
    "    max_size=1_000_000\n",
    ")\n",
    "\n",
    "# Count parameters\n",
    "total_params = sum(p.numel() for p in agent.actor.parameters()) + \\\n",
    "               sum(p.numel() for p in agent.critic.parameters())\n",
    "\n",
    "print(f\"\\n✅ Agent created on {device}\")\n",
    "print(f\"   Actor params: {sum(p.numel() for p in agent.actor.parameters()):,}\")\n",
    "print(f\"   Critic params: {sum(p.numel() for p in agent.critic.parameters()):,}\")\n",
    "print(f\"   Total params: {total_params:,}\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "150b4202",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-10-25T11:43:18.832274Z",
     "iopub.status.busy": "2025-10-25T11:43:18.831944Z",
     "iopub.status.idle": "2025-10-25T11:43:19.038505Z",
     "shell.execute_reply": "2025-10-25T11:43:19.037696Z"
    },
    "papermill": {
     "duration": 0.215721,
     "end_time": "2025-10-25T11:43:19.039678",
     "exception": false,
     "start_time": "2025-10-25T11:43:18.823957",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 8: TRAINING FUNCTION (GPU OPTIMIZED + FEE RAMPING)\n",
    "# ============================================================================\n",
    "\n",
    "from tqdm.notebook import tqdm\n",
    "import time\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" TRAINING FUNCTION\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "def train_sac(agent, env, valid_env, buffer, \n",
    "              total_timesteps=700_000,\n",
    "              warmup_steps=10_000,\n",
    "              batch_size=1024,\n",
    "              update_freq=1,\n",
    "              fee_warmup_steps=100_000,  # When to start fee ramping\n",
    "              fee_ramp_steps=100_000,     # Steps to ramp from 0 to max fee\n",
    "              save_path=\"sac_v9\"):\n",
    "    \n",
    "    print(f\"\\n🚀 Training Configuration:\")\n",
    "    print(f\"   Total steps: {total_timesteps:,}\")\n",
    "    print(f\"   Warmup: {warmup_steps:,}\")\n",
    "    print(f\"   Batch size: {batch_size}\")\n",
    "    print(f\"   Fee warmup: {fee_warmup_steps:,} steps (then ramp over {fee_ramp_steps:,})\")\n",
    "    print(f\"   Data augmentation: Random flips (50% probability)\")\n",
    "    print(f\"   DSR warmup: 100 steps per episode (0 reward)\")\n",
    "    print(f\"   Device: {agent.device}\")\n",
    "    \n",
    "    # Set training modes for augmentation\n",
    "    env.set_training_mode(True)   # Enable random flips\n",
    "    valid_env.set_training_mode(False)  # No augmentation for validation\n",
    "    \n",
    "    # Stats tracking\n",
    "    episode_rewards = []\n",
    "    episode_lengths = []\n",
    "    eval_rewards = []\n",
    "    best_reward = -np.inf\n",
    "    best_eval = -np.inf\n",
    "    \n",
    "    # Training stats\n",
    "    critic_losses = []\n",
    "    actor_losses = []\n",
    "    \n",
    "    state = env.reset()\n",
    "    episode_reward = 0\n",
    "    episode_length = 0\n",
    "    episode_count = 0\n",
    "    \n",
    "    start_time = time.time()\n",
    "    \n",
    "    pbar = tqdm(range(total_timesteps), desc=\"Training\")\n",
    "    \n",
    "    for step in pbar:\n",
    "        # ============ FEE RAMPING CURRICULUM ============\n",
    "        # 0 fees until fee_warmup_steps, then ramp to 1.0 over fee_ramp_steps\n",
    "        if step < fee_warmup_steps:\n",
    "            fee_multiplier = 0.0\n",
    "        else:\n",
    "            progress = (step - fee_warmup_steps) / fee_ramp_steps\n",
    "            fee_multiplier = min(1.0, progress)\n",
    "        \n",
    "        env.set_fee_multiplier(fee_multiplier)\n",
    "        valid_env.set_fee_multiplier(fee_multiplier)\n",
    "        \n",
    "        # Select action\n",
    "        if step < warmup_steps:\n",
    "            action = env.action_space.sample()\n",
    "        else:\n",
    "            action = agent.select_action(state, deterministic=False)\n",
    "        \n",
    "        # Step environment\n",
    "        next_state, reward, done, info = env.step(action)\n",
    "        \n",
    "        # Store transition\n",
    "        buffer.add(state, action, reward, next_state, float(done))\n",
    "        \n",
    "        state = next_state\n",
    "        episode_reward += reward\n",
    "        episode_length += 1\n",
    "        \n",
    "        # Update agent\n",
    "        stats = None\n",
    "        if step >= warmup_steps and step % update_freq == 0:\n",
    "            batch = buffer.sample(batch_size)\n",
    "            stats = agent.update(batch)\n",
    "            critic_losses.append(stats['critic_loss'])\n",
    "            actor_losses.append(stats['actor_loss'])\n",
    "        \n",
    "        # Episode end\n",
    "        if done:\n",
    "            episode_rewards.append(episode_reward)\n",
    "            episode_lengths.append(episode_length)\n",
    "            episode_count += 1\n",
    "            \n",
    "            # Calculate episode stats\n",
    "            final_value = info.get('total_value', 10000)\n",
    "            pnl_pct = (final_value / 10000 - 1) * 100\n",
    "            num_trades = info.get('num_trades', 0)\n",
    "            current_fee = info.get('current_fee', 0) * 100  # Convert to %\n",
    "            \n",
    "            # Get position distribution\n",
    "            long_steps = info.get('long_steps', 0)\n",
    "            short_steps = info.get('short_steps', 0)\n",
    "            neutral_steps = info.get('neutral_steps', 0)\n",
    "            total_active = long_steps + short_steps\n",
    "            long_pct = (long_steps / total_active * 100) if total_active > 0 else 0\n",
    "            short_pct = (short_steps / total_active * 100) if total_active > 0 else 0\n",
    "            \n",
    "            # Update progress bar with detailed info\n",
    "            avg_reward = np.mean(episode_rewards[-10:]) if len(episode_rewards) >= 10 else episode_reward\n",
    "            avg_critic = np.mean(critic_losses[-100:]) if critic_losses else 0\n",
    "            \n",
    "            pbar.set_postfix({\n",
    "                'ep': episode_count,\n",
    "                'R': f'{episode_reward:.4f}',\n",
    "                'avg10': f'{avg_reward:.4f}',\n",
    "                'PnL%': f'{pnl_pct:+.2f}',\n",
    "                'L/S': f'{long_pct:.0f}/{short_pct:.0f}',\n",
    "                'fee%': f'{current_fee:.3f}',\n",
    "                'α': f'{agent.alpha.item():.3f}',\n",
    "            })\n",
    "            \n",
    "            # ============ EVAL EVERY EPISODE ============\n",
    "            eval_reward, eval_pnl, eval_long_pct = evaluate_agent(agent, valid_env, n_episodes=1)\n",
    "            eval_rewards.append(eval_reward)\n",
    "            \n",
    "            # Print detailed episode summary\n",
    "            elapsed = time.time() - start_time\n",
    "            steps_per_sec = (step + 1) / elapsed\n",
    "            \n",
    "            print(f\"\\n{'='*60}\")\n",
    "            print(f\"📊 Episode {episode_count} Complete | Step {step+1:,}/{total_timesteps:,}\")\n",
    "            print(f\"{'='*60}\")\n",
    "            print(f\"   🎮 TRAIN:\")\n",
    "            print(f\"      Reward (DSR): {episode_reward:.4f} | PnL: {pnl_pct:+.2f}%\")\n",
    "            print(f\"      Length: {episode_length} steps | Trades: {num_trades}\")\n",
    "            print(f\"      Avg (last 10): {avg_reward:.4f}\")\n",
    "            print(f\"   📊 POSITION BALANCE:\")\n",
    "            print(f\"      Long: {long_steps} steps ({long_pct:.1f}%)\")\n",
    "            print(f\"      Short: {short_steps} steps ({short_pct:.1f}%)\")\n",
    "            print(f\"      Neutral: {neutral_steps} steps\")\n",
    "            print(f\"   💰 FEE CURRICULUM:\")\n",
    "            print(f\"      Current fee: {current_fee:.4f}% (multiplier: {fee_multiplier:.2f})\")\n",
    "            print(f\"   📈 EVAL (validation):\")\n",
    "            print(f\"      Reward: {eval_reward:.4f} | PnL: {eval_pnl:+.2f}%\")\n",
    "            print(f\"      Long%: {eval_long_pct:.1f}%\")\n",
    "            print(f\"      Avg (last 5): {np.mean(eval_rewards[-5:]):.4f}\")\n",
    "            print(f\"   🧠 AGENT:\")\n",
    "            print(f\"      Alpha: {agent.alpha.item():.4f}\")\n",
    "            print(f\"      Critic loss: {avg_critic:.5f}\")\n",
    "            print(f\"   ⚡ Speed: {steps_per_sec:.0f} steps/sec\")\n",
    "            print(f\"   💾 Buffer: {buffer.size:,} transitions\")\n",
    "            \n",
    "            # Save best train\n",
    "            if episode_reward > best_reward:\n",
    "                best_reward = episode_reward\n",
    "                torch.save({\n",
    "                    'actor': agent.actor.state_dict(),\n",
    "                    'critic': agent.critic.state_dict(),\n",
    "                    'critic_target': agent.critic_target.state_dict(),\n",
    "                    'log_alpha': agent.log_alpha,\n",
    "                }, f\"{save_path}_best_train.pt\")\n",
    "                print(f\"   🏆 NEW BEST TRAIN: {best_reward:.4f}\")\n",
    "            \n",
    "            # Save best eval\n",
    "            if eval_reward > best_eval:\n",
    "                best_eval = eval_reward\n",
    "                torch.save({\n",
    "                    'actor': agent.actor.state_dict(),\n",
    "                    'critic': agent.critic.state_dict(),\n",
    "                    'critic_target': agent.critic_target.state_dict(),\n",
    "                    'log_alpha': agent.log_alpha,\n",
    "                }, f\"{save_path}_best_eval.pt\")\n",
    "                print(f\"   🏆 NEW BEST EVAL: {best_eval:.4f}\")\n",
    "            \n",
    "            # Reset\n",
    "            state = env.reset()\n",
    "            episode_reward = 0\n",
    "            episode_length = 0\n",
    "    \n",
    "    # Final save\n",
    "    torch.save({\n",
    "        'actor': agent.actor.state_dict(),\n",
    "        'critic': agent.critic.state_dict(),\n",
    "        'critic_target': agent.critic_target.state_dict(),\n",
    "        'log_alpha': agent.log_alpha,\n",
    "    }, f\"{save_path}_final.pt\")\n",
    "    \n",
    "    total_time = time.time() - start_time\n",
    "    print(f\"\\n{'='*70}\")\n",
    "    print(f\" TRAINING COMPLETE\")\n",
    "    print(f\"{'='*70}\")\n",
    "    print(f\"   Total time: {total_time/60:.1f} min\")\n",
    "    print(f\"   Episodes: {episode_count}\")\n",
    "    print(f\"   Best train reward (DSR): {best_reward:.4f}\")\n",
    "    print(f\"   Best eval reward (DSR): {best_eval:.4f}\")\n",
    "    print(f\"   Avg speed: {total_timesteps/total_time:.0f} steps/sec\")\n",
    "    \n",
    "    return episode_rewards, eval_rewards\n",
    "\n",
    "\n",
    "def evaluate_agent(agent, env, n_episodes=1):\n",
    "    \"\"\"Run evaluation episodes\"\"\"\n",
    "    total_reward = 0\n",
    "    total_pnl = 0\n",
    "    total_long_pct = 0\n",
    "    \n",
    "    for _ in range(n_episodes):\n",
    "        state = env.reset()\n",
    "        episode_reward = 0\n",
    "        done = False\n",
    "        \n",
    "        while not done:\n",
    "            action = agent.select_action(state, deterministic=True)\n",
    "            state, reward, done, info = env.step(action)\n",
    "            episode_reward += reward\n",
    "        \n",
    "        total_reward += episode_reward\n",
    "        final_value = info.get('total_value', 10000)\n",
    "        total_pnl += (final_value / 10000 - 1) * 100\n",
    "        \n",
    "        # Calculate long percentage\n",
    "        long_steps = info.get('long_steps', 0)\n",
    "        short_steps = info.get('short_steps', 0)\n",
    "        total_active = long_steps + short_steps\n",
    "        total_long_pct += (long_steps / total_active * 100) if total_active > 0 else 0\n",
    "    \n",
    "    return total_reward / n_episodes, total_pnl / n_episodes, total_long_pct / n_episodes\n",
    "\n",
    "\n",
    "print(\"✅ Training function ready:\")\n",
    "print(\"   - Per-episode eval + position tracking\")\n",
    "print(\"   - DSR reward (risk-adjusted)\")\n",
    "print(\"   - Fee ramping: 0% → 0.1% after 100k steps\")\n",
    "print(\"   - Model checkpointing\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0097e547",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 9: START TRAINING\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" STARTING SAC TRAINING\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Training parameters\n",
    "TOTAL_STEPS = 500_000      # 500K steps\n",
    "WARMUP_STEPS = 10_000      # 10K random warmup\n",
    "BATCH_SIZE = 256           # Standard batch size\n",
    "UPDATE_FREQ = 1            # Update every step\n",
    "FEE_WARMUP = 100_000       # Start fee ramping after 100k steps\n",
    "FEE_RAMP = 100_000         # Ramp fees over 100k steps (0 → 0.1%)\n",
    "\n",
    "print(f\"\\n📋 Configuration:\")\n",
    "print(f\"   Steps: {TOTAL_STEPS:,}\")\n",
    "print(f\"   Batch: {BATCH_SIZE}\")\n",
    "print(f\"   Train env: {len(train_data):,} candles\")\n",
    "print(f\"   Valid env: {len(valid_data):,} candles\")\n",
    "print(f\"   Device: {device}\")\n",
    "print(f\"\\n💰 Fee Curriculum:\")\n",
    "print(f\"   Steps 0-{FEE_WARMUP:,}: 0% fee (learn basic trading)\")\n",
    "print(f\"   Steps {FEE_WARMUP:,}-{FEE_WARMUP+FEE_RAMP:,}: Ramp 0%→0.1%\")\n",
    "print(f\"   Steps {FEE_WARMUP+FEE_RAMP:,}+: Full 0.1% fee\")\n",
    "print(f\"\\n🎯 Reward: Differential Sharpe Ratio (DSR)\")\n",
    "print(f\"   - Risk-adjusted returns (not just PnL)\")\n",
    "print(f\"   - Small values (-0.5 to 0.5) are normal\")\n",
    "print(f\"   - NOT normalized further\")\n",
    "\n",
    "# Run training with validation eval every episode\n",
    "episode_rewards, eval_rewards = train_sac(\n",
    "    agent=agent,\n",
    "    env=train_env,\n",
    "    valid_env=valid_env,\n",
    "    buffer=buffer,\n",
    "    total_timesteps=TOTAL_STEPS,\n",
    "    warmup_steps=WARMUP_STEPS,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    update_freq=UPDATE_FREQ,\n",
    "    fee_warmup_steps=FEE_WARMUP,\n",
    "    fee_ramp_steps=FEE_RAMP,\n",
    "    save_path=\"sac_v9_pytorch\"\n",
    ")\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\" TRAINING COMPLETE\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "712fb0b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 10: LOAD TRAINED MODELS\n",
    "# ============================================================================\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.patches as mpatches\n",
    "from matplotlib.gridspec import GridSpec\n",
    "import seaborn as sns\n",
    "\n",
    "# Set style for beautiful charts\n",
    "plt.style.use('dark_background')\n",
    "sns.set_palette(\"husl\")\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" LOADING TRAINED MODELS\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Model paths from Kaggle\n",
    "MODEL_PATH = '/kaggle/input/models/'\n",
    "FINAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_final (1).pt'\n",
    "BEST_TRAIN_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_train (1).pt'\n",
    "BEST_EVAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_eval (1).pt'\n",
    "\n",
    "def load_model(agent, checkpoint_path, name=\"model\"):\n",
    "    \"\"\"Load model weights from checkpoint\"\"\"\n",
    "    try:\n",
    "        checkpoint = torch.load(checkpoint_path, map_location=device)\n",
    "        agent.actor.load_state_dict(checkpoint['actor'])\n",
    "        agent.critic.load_state_dict(checkpoint['critic'])\n",
    "        agent.critic_target.load_state_dict(checkpoint['critic_target'])\n",
    "        if 'log_alpha' in checkpoint:\n",
    "            agent.log_alpha = checkpoint['log_alpha']\n",
    "        print(f\"✅ {name} loaded successfully!\")\n",
    "        return True\n",
    "    except Exception as e:\n",
    "        print(f\"❌ Error loading {name}: {e}\")\n",
    "        return False\n",
    "\n",
    "# Create fresh agent for evaluation\n",
    "eval_agent = SACAgent(\n",
    "    state_dim=state_dim,\n",
    "    action_dim=action_dim,\n",
    "    device=device\n",
    ")\n",
    "\n",
    "# Load best eval model (most generalizable)\n",
    "load_model(eval_agent, BEST_EVAL_MODEL, \"Best Eval Model\")\n",
    "\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec761346",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 11: TRAINING SUMMARY VISUALIZATION\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" TRAINING SUMMARY VISUALIZATION\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Training results from your run\n",
    "training_stats = {\n",
    "    'total_time_min': 131.0,\n",
    "    'total_episodes': 1000,\n",
    "    'total_steps': 500_000,\n",
    "    'best_train_dsr': 0.5949,\n",
    "    'best_eval_dsr': 0.2125,\n",
    "    'avg_speed': 64,  # steps/sec\n",
    "}\n",
    "\n",
    "# Create summary figure\n",
    "fig = plt.figure(figsize=(16, 10))\n",
    "gs = GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)\n",
    "\n",
    "# Colors\n",
    "colors = {\n",
    "    'primary': '#00D4AA',\n",
    "    'secondary': '#FF6B6B', \n",
    "    'accent': '#4ECDC4',\n",
    "    'warning': '#FFE66D',\n",
    "    'bg': '#1a1a2e',\n",
    "    'grid': '#333355'\n",
    "}\n",
    "\n",
    "fig.patch.set_facecolor(colors['bg'])\n",
    "\n",
    "# ============================================================================\n",
    "# 1. Training Configuration Card\n",
    "# ============================================================================\n",
    "ax1 = fig.add_subplot(gs[0, 0])\n",
    "ax1.set_facecolor(colors['bg'])\n",
    "ax1.axis('off')\n",
    "\n",
    "config_text = f\"\"\"\n",
    "╔══════════════════════════════════════╗\n",
    "║     🎯 TRAINING CONFIGURATION        ║\n",
    "╠══════════════════════════════════════╣\n",
    "║                                      ║\n",
    "║  Total Steps:     500,000            ║\n",
    "║  Episodes:        1,000              ║\n",
    "║  Batch Size:      256                ║\n",
    "║  Episode Length:  500 steps          ║\n",
    "║                                      ║\n",
    "║  📊 Network Architecture             ║\n",
    "║  Actor:  512 → 512 → 256 → 1        ║\n",
    "║  Critic: 512 → 512 → 256 → 1 (x2)   ║\n",
    "║                                      ║\n",
    "║  💰 Fee Curriculum                   ║\n",
    "║  0-100k:    0% fee                   ║\n",
    "║  100k-200k: Ramp to 0.1%             ║\n",
    "║  200k+:     Full 0.1% fee            ║\n",
    "║                                      ║\n",
    "║  🎲 Data Augmentation                ║\n",
    "║  Random Flip: 50% probability        ║\n",
    "║  DSR Warmup:  100 steps              ║\n",
    "╚══════════════════════════════════════╝\n",
    "\"\"\"\n",
    "ax1.text(0.5, 0.5, config_text, transform=ax1.transAxes, fontsize=10,\n",
    "         verticalalignment='center', horizontalalignment='center',\n",
    "         fontfamily='monospace', color='white',\n",
    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['primary'], linewidth=2))\n",
    "\n",
    "# ============================================================================\n",
    "# 2. Key Metrics Card\n",
    "# ============================================================================\n",
    "ax2 = fig.add_subplot(gs[0, 1])\n",
    "ax2.set_facecolor(colors['bg'])\n",
    "ax2.axis('off')\n",
    "\n",
    "metrics_text = f\"\"\"\n",
    "╔══════════════════════════════════════╗\n",
    "║        📈 TRAINING RESULTS           ║\n",
    "╠══════════════════════════════════════╣\n",
    "║                                      ║\n",
    "║  ⏱️  Total Time:     131.0 min       ║\n",
    "║  ⚡ Avg Speed:       64 steps/sec    ║\n",
    "║                                      ║\n",
    "║  🏆 BEST REWARDS (DSR)               ║\n",
    "║  ┌────────────────────────────┐      ║\n",
    "║  │ Train:  0.5949             │      ║\n",
    "║  │ Eval:   0.2125             │      ║\n",
    "║  └────────────────────────────┘      ║\n",
    "║                                      ║\n",
    "║  📊 Multi-Timeframe Features         ║\n",
    "║  15m: 26 features                    ║\n",
    "║  1h:  26 features                    ║\n",
    "║  4h:  26 features                    ║\n",
    "║  Other: ~10 features                 ║\n",
    "║  TOTAL: ~88 features                 ║\n",
    "║                                      ║\n",
    "╚══════════════════════════════════════╝\n",
    "\"\"\"\n",
    "ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=10,\n",
    "         verticalalignment='center', horizontalalignment='center',\n",
    "         fontfamily='monospace', color='white',\n",
    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['accent'], linewidth=2))\n",
    "\n",
    "# ============================================================================\n",
    "# 3. Reward Type Explanation\n",
    "# ============================================================================\n",
    "ax3 = fig.add_subplot(gs[0, 2])\n",
    "ax3.set_facecolor(colors['bg'])\n",
    "ax3.axis('off')\n",
    "\n",
    "dsr_text = f\"\"\"\n",
    "╔══════════════════════════════════════╗\n",
    "║   🧮 DIFFERENTIAL SHARPE RATIO       ║\n",
    "╠══════════════════════════════════════╣\n",
    "║                                      ║\n",
    "║  Formula:                            ║\n",
    "║                                      ║\n",
    "║       B·ΔA - 0.5·A·ΔB                ║\n",
    "║  DSR = ─────────────────             ║\n",
    "║         (B - A²)^1.5                 ║\n",
    "║                                      ║\n",
    "║  Where:                              ║\n",
    "║  A = EMA of returns                  ║\n",
    "║  B = EMA of squared returns          ║\n",
    "║                                      ║\n",
    "║  ✅ Benefits:                        ║\n",
    "║  • Risk-adjusted (Sharpe-like)       ║\n",
    "║  • Penalizes volatility              ║\n",
    "║  • Rewards consistency               ║\n",
    "║  • Scale: -0.5 to +0.5               ║\n",
    "║                                      ║\n",
    "║  ⚠️ Note: Small values are normal!   ║\n",
    "╚══════════════════════════════════════╝\n",
    "\"\"\"\n",
    "ax3.text(0.5, 0.5, dsr_text, transform=ax3.transAxes, fontsize=10,\n",
    "         verticalalignment='center', horizontalalignment='center',\n",
    "         fontfamily='monospace', color='white',\n",
    "         bbox=dict(boxstyle='round', facecolor=colors['bg'], edgecolor=colors['warning'], linewidth=2))\n",
    "\n",
    "# ============================================================================\n",
    "# 4. Training Progress Bar (Visual)\n",
    "# ============================================================================\n",
    "ax4 = fig.add_subplot(gs[1, :])\n",
    "ax4.set_facecolor(colors['bg'])\n",
    "\n",
    "# Create timeline visualization\n",
    "phases = [\n",
    "    ('Random Warmup', 0, 10000, '#666699'),\n",
    "    ('No Fees (Learning)', 10000, 100000, colors['primary']),\n",
    "    ('Fee Ramping', 100000, 200000, colors['warning']),\n",
    "    ('Full Fees', 200000, 500000, colors['secondary']),\n",
    "]\n",
    "\n",
    "for name, start, end, color in phases:\n",
    "    ax4.barh(0, end-start, left=start, height=0.4, color=color, edgecolor='white', linewidth=0.5)\n",
    "    mid = (start + end) / 2\n",
    "    ax4.text(mid, 0, name, ha='center', va='center', fontsize=9, color='white', fontweight='bold')\n",
    "\n",
    "# Add markers\n",
    "ax4.axvline(x=10000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
    "ax4.axvline(x=100000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
    "ax4.axvline(x=200000, color='white', linestyle='--', alpha=0.5, linewidth=1)\n",
    "\n",
    "ax4.set_xlim(0, 500000)\n",
    "ax4.set_ylim(-0.5, 0.5)\n",
    "ax4.set_xlabel('Training Steps', fontsize=12, color='white')\n",
    "ax4.set_title('📊 Training Curriculum Timeline', fontsize=14, color='white', fontweight='bold', pad=20)\n",
    "ax4.set_yticks([])\n",
    "ax4.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x/1000:.0f}K'))\n",
    "ax4.tick_params(colors='white')\n",
    "ax4.spines['top'].set_visible(False)\n",
    "ax4.spines['right'].set_visible(False)\n",
    "ax4.spines['left'].set_visible(False)\n",
    "ax4.spines['bottom'].set_color('white')\n",
    "\n",
    "# Add step markers\n",
    "for step in [0, 100000, 200000, 300000, 400000, 500000]:\n",
    "    ax4.text(step, -0.35, f'{step//1000}K', ha='center', va='top', fontsize=8, color='gray')\n",
    "\n",
    "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Training Summary', fontsize=18, color='white', fontweight='bold', y=0.98)\n",
    "plt.tight_layout()\n",
    "plt.savefig('training_summary.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "print(\"\\n✅ Training summary visualization saved!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46d509d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 12: COMPREHENSIVE BACKTESTING FUNCTION\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" BACKTESTING ENGINE\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "def run_backtest(agent, env, name=\"Test\", verbose=True):\n",
    "    \"\"\"\n",
    "    Run comprehensive backtest and collect detailed metrics\n",
    "    \"\"\"\n",
    "    env.set_training_mode(False)  # No augmentation during testing\n",
    "    env.set_fee_multiplier(1.0)   # Full 0.1% fees\n",
    "    \n",
    "    # Run full episode\n",
    "    state = env.reset()\n",
    "    done = False\n",
    "    \n",
    "    # Track everything\n",
    "    history = {\n",
    "        'step': [],\n",
    "        'price': [],\n",
    "        'position': [],\n",
    "        'action': [],\n",
    "        'balance': [],\n",
    "        'total_value': [],\n",
    "        'pnl_pct': [],\n",
    "        'reward': [],\n",
    "        'trades': []\n",
    "    }\n",
    "    \n",
    "    step = 0\n",
    "    total_reward = 0\n",
    "    prev_position = 0\n",
    "    \n",
    "    while not done:\n",
    "        action = agent.select_action(state, deterministic=True)\n",
    "        next_state, reward, done, info = env.step(action)\n",
    "        \n",
    "        idx = env.start_idx + env.current_step - 1\n",
    "        price = env.df.loc[idx, 'close']\n",
    "        \n",
    "        # Track trade\n",
    "        if abs(info['position'] - prev_position) > 0.1:\n",
    "            history['trades'].append({\n",
    "                'step': step,\n",
    "                'price': price,\n",
    "                'from_pos': prev_position,\n",
    "                'to_pos': info['position'],\n",
    "                'type': 'LONG' if info['position'] > 0 else ('SHORT' if info['position'] < 0 else 'CLOSE')\n",
    "            })\n",
    "        \n",
    "        history['step'].append(step)\n",
    "        history['price'].append(price)\n",
    "        history['position'].append(info['position'])\n",
    "        history['action'].append(action[0])\n",
    "        history['balance'].append(env.balance)\n",
    "        history['total_value'].append(info['total_value'])\n",
    "        history['pnl_pct'].append((info['total_value'] / env.initial_balance - 1) * 100)\n",
    "        history['reward'].append(reward)\n",
    "        \n",
    "        prev_position = info['position']\n",
    "        total_reward += reward\n",
    "        state = next_state\n",
    "        step += 1\n",
    "    \n",
    "    # Calculate final metrics\n",
    "    final_value = history['total_value'][-1]\n",
    "    initial_value = env.initial_balance\n",
    "    total_pnl_pct = (final_value / initial_value - 1) * 100\n",
    "    \n",
    "    # Calculate Sharpe ratio\n",
    "    returns = np.diff(history['total_value']) / np.array(history['total_value'][:-1])\n",
    "    sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96)  # Annualized (96 = 15m candles per day)\n",
    "    \n",
    "    # Max drawdown\n",
    "    peak = np.maximum.accumulate(history['total_value'])\n",
    "    drawdowns = (peak - history['total_value']) / peak * 100\n",
    "    max_drawdown = np.max(drawdowns)\n",
    "    \n",
    "    # Position distribution\n",
    "    positions = np.array(history['position'])\n",
    "    long_pct = np.mean(positions > 0.1) * 100\n",
    "    short_pct = np.mean(positions < -0.1) * 100\n",
    "    neutral_pct = np.mean(np.abs(positions) <= 0.1) * 100\n",
    "    \n",
    "    # Win rate (for trades)\n",
    "    if len(history['trades']) > 1:\n",
    "        trade_pnls = []\n",
    "        for i in range(1, len(history['trades'])):\n",
    "            entry = history['trades'][i-1]\n",
    "            exit_trade = history['trades'][i]\n",
    "            if entry['type'] != 'CLOSE':\n",
    "                pnl = (exit_trade['price'] - entry['price']) / entry['price'] * 100\n",
    "                if entry['type'] == 'SHORT':\n",
    "                    pnl = -pnl\n",
    "                trade_pnls.append(pnl)\n",
    "        win_rate = np.mean(np.array(trade_pnls) > 0) * 100 if trade_pnls else 0\n",
    "    else:\n",
    "        win_rate = 0\n",
    "        trade_pnls = []\n",
    "    \n",
    "    metrics = {\n",
    "        'name': name,\n",
    "        'total_reward': total_reward,\n",
    "        'total_pnl_pct': total_pnl_pct,\n",
    "        'final_value': final_value,\n",
    "        'sharpe_ratio': sharpe,\n",
    "        'max_drawdown': max_drawdown,\n",
    "        'num_trades': len(history['trades']),\n",
    "        'long_pct': long_pct,\n",
    "        'short_pct': short_pct,\n",
    "        'neutral_pct': neutral_pct,\n",
    "        'win_rate': win_rate,\n",
    "        'avg_trade_pnl': np.mean(trade_pnls) if trade_pnls else 0,\n",
    "        'history': history\n",
    "    }\n",
    "    \n",
    "    if verbose:\n",
    "        print(f\"\\n{'='*50}\")\n",
    "        print(f\"📊 {name} Results\")\n",
    "        print(f\"{'='*50}\")\n",
    "        print(f\"   💰 Total PnL:      {total_pnl_pct:+.2f}%\")\n",
    "        print(f\"   📈 Final Value:    ${final_value:,.2f}\")\n",
    "        print(f\"   🎯 DSR Reward:     {total_reward:.4f}\")\n",
    "        print(f\"   📉 Max Drawdown:   {max_drawdown:.2f}%\")\n",
    "        print(f\"   📊 Sharpe Ratio:   {sharpe:.3f}\")\n",
    "        print(f\"   🔄 Num Trades:     {len(history['trades'])}\")\n",
    "        print(f\"   ✅ Win Rate:       {win_rate:.1f}%\")\n",
    "        print(f\"   📊 Position Mix:   L:{long_pct:.0f}% | S:{short_pct:.0f}% | N:{neutral_pct:.0f}%\")\n",
    "    \n",
    "    return metrics\n",
    "\n",
    "print(\"✅ Backtesting engine ready!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28f0c4d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 13: TEST ON UNSEEN DATA (TEST SET)\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" TESTING ON UNSEEN DATA\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Create test environment with UNSEEN data (test_data_norm)\n",
    "print(f\"📊 Test Data: {len(test_data):,} candles (unseen during training)\")\n",
    "print(f\"   Date range: {test_data.index[0]} to {test_data.index[-1]}\")\n",
    "\n",
    "# Test with all three models\n",
    "models_to_test = [\n",
    "    ('Best Eval', BEST_EVAL_MODEL),\n",
    "    ('Best Train', BEST_TRAIN_MODEL),\n",
    "    ('Final', FINAL_MODEL),\n",
    "]\n",
    "\n",
    "all_results = []\n",
    "\n",
    "for model_name, model_path in models_to_test:\n",
    "    print(f\"\\n🔄 Loading {model_name} model...\")\n",
    "    \n",
    "    # Create fresh agent\n",
    "    test_agent = SACAgent(\n",
    "        state_dim=state_dim,\n",
    "        action_dim=action_dim,\n",
    "        device=device\n",
    "    )\n",
    "    \n",
    "    # Load model\n",
    "    if load_model(test_agent, model_path, model_name):\n",
    "        # Run multiple test episodes for robustness\n",
    "        episode_results = []\n",
    "        \n",
    "        for ep in range(5):  # 5 test episodes\n",
    "            metrics = run_backtest(test_agent, test_env, f\"{model_name} (Ep {ep+1})\", verbose=False)\n",
    "            episode_results.append(metrics)\n",
    "        \n",
    "        # Average results\n",
    "        avg_pnl = np.mean([r['total_pnl_pct'] for r in episode_results])\n",
    "        avg_sharpe = np.mean([r['sharpe_ratio'] for r in episode_results])\n",
    "        avg_drawdown = np.mean([r['max_drawdown'] for r in episode_results])\n",
    "        avg_trades = np.mean([r['num_trades'] for r in episode_results])\n",
    "        \n",
    "        print(f\"\\n📊 {model_name} Model - Average over 5 episodes:\")\n",
    "        print(f\"   💰 Avg PnL:        {avg_pnl:+.2f}%\")\n",
    "        print(f\"   📊 Avg Sharpe:     {avg_sharpe:.3f}\")\n",
    "        print(f\"   📉 Avg Drawdown:   {avg_drawdown:.2f}%\")\n",
    "        print(f\"   🔄 Avg Trades:     {avg_trades:.0f}\")\n",
    "        \n",
    "        # Store best episode for visualization\n",
    "        best_ep = max(episode_results, key=lambda x: x['total_pnl_pct'])\n",
    "        best_ep['model_name'] = model_name\n",
    "        best_ep['avg_pnl'] = avg_pnl\n",
    "        best_ep['avg_sharpe'] = avg_sharpe\n",
    "        all_results.append(best_ep)\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\" ALL MODELS TESTED\")\n",
    "print(\"=\"*70)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3209ba1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 14: DETAILED PERFORMANCE VISUALIZATION\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" DETAILED PERFORMANCE CHARTS\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Use best eval model results\n",
    "if all_results:\n",
    "    best_result = all_results[0]  # Best Eval model\n",
    "    history = best_result['history']\n",
    "    \n",
    "    # Create comprehensive visualization\n",
    "    fig = plt.figure(figsize=(20, 16))\n",
    "    gs = GridSpec(4, 3, figure=fig, hspace=0.35, wspace=0.25)\n",
    "    fig.patch.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 1. Portfolio Value Over Time\n",
    "    # ============================================================================\n",
    "    ax1 = fig.add_subplot(gs[0, :2])\n",
    "    ax1.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    steps = history['step']\n",
    "    portfolio = history['total_value']\n",
    "    \n",
    "    # Color based on profit/loss\n",
    "    colors_line = ['#00D4AA' if v >= 10000 else '#FF6B6B' for v in portfolio]\n",
    "    \n",
    "    ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) >= 10000, \n",
    "                     color='#00D4AA', alpha=0.3, label='Profit')\n",
    "    ax1.fill_between(steps, 10000, portfolio, where=np.array(portfolio) < 10000,\n",
    "                     color='#FF6B6B', alpha=0.3, label='Loss')\n",
    "    ax1.plot(steps, portfolio, color='white', linewidth=1.5, alpha=0.9)\n",
    "    ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial')\n",
    "    \n",
    "    ax1.set_xlabel('Step', fontsize=11, color='white')\n",
    "    ax1.set_ylabel('Portfolio Value ($)', fontsize=11, color='white')\n",
    "    ax1.set_title('💰 Portfolio Value Over Time', fontsize=14, color='white', fontweight='bold')\n",
    "    ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
    "    ax1.tick_params(colors='white')\n",
    "    ax1.grid(True, alpha=0.2, color='gray')\n",
    "    for spine in ax1.spines.values():\n",
    "        spine.set_color('gray')\n",
    "    \n",
    "    # Final value annotation\n",
    "    final_val = portfolio[-1]\n",
    "    pnl_pct = (final_val / 10000 - 1) * 100\n",
    "    color = '#00D4AA' if pnl_pct >= 0 else '#FF6B6B'\n",
    "    ax1.annotate(f'${final_val:,.0f}\\n({pnl_pct:+.1f}%)', \n",
    "                 xy=(steps[-1], final_val), \n",
    "                 fontsize=12, color=color, fontweight='bold',\n",
    "                 ha='right', va='bottom')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 2. Metrics Summary Box\n",
    "    # ============================================================================\n",
    "    ax2 = fig.add_subplot(gs[0, 2])\n",
    "    ax2.set_facecolor('#1a1a2e')\n",
    "    ax2.axis('off')\n",
    "    \n",
    "    metrics_text = f\"\"\"\n",
    "    ╔═══════════════════════════╗\n",
    "    ║   📊 TEST PERFORMANCE     ║\n",
    "    ╠═══════════════════════════╣\n",
    "    ║                           ║\n",
    "    ║   PnL:     {best_result['total_pnl_pct']:+.2f}%          ║\n",
    "    ║   Sharpe:  {best_result['sharpe_ratio']:.3f}          ║\n",
    "    ║   Max DD:  {best_result['max_drawdown']:.2f}%          ║\n",
    "    ║   Trades:  {best_result['num_trades']}              ║\n",
    "    ║   Win%:    {best_result['win_rate']:.1f}%           ║\n",
    "    ║                           ║\n",
    "    ║   Long:    {best_result['long_pct']:.0f}%            ║\n",
    "    ║   Short:   {best_result['short_pct']:.0f}%            ║\n",
    "    ║   Neutral: {best_result['neutral_pct']:.0f}%            ║\n",
    "    ╚═══════════════════════════╝\n",
    "    \"\"\"\n",
    "    ax2.text(0.5, 0.5, metrics_text, transform=ax2.transAxes, fontsize=11,\n",
    "             verticalalignment='center', horizontalalignment='center',\n",
    "             fontfamily='monospace', color='white',\n",
    "             bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='#00D4AA', linewidth=2))\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 3. Price with Position Overlay\n",
    "    # ============================================================================\n",
    "    ax3 = fig.add_subplot(gs[1, :])\n",
    "    ax3.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    prices = history['price']\n",
    "    positions = history['position']\n",
    "    \n",
    "    # Normalize price for display\n",
    "    price_norm = (np.array(prices) - np.min(prices)) / (np.max(prices) - np.min(prices))\n",
    "    \n",
    "    ax3.plot(steps, prices, color='white', linewidth=1, alpha=0.8, label='BTC Price')\n",
    "    \n",
    "    # Color background by position\n",
    "    for i in range(len(steps)-1):\n",
    "        if positions[i] > 0.1:\n",
    "            ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#00D4AA')\n",
    "        elif positions[i] < -0.1:\n",
    "            ax3.axvspan(steps[i], steps[i+1], alpha=0.2, color='#FF6B6B')\n",
    "    \n",
    "    # Add trade markers\n",
    "    for trade in history['trades'][:50]:  # Limit markers for clarity\n",
    "        step_idx = trade['step']\n",
    "        if step_idx < len(prices):\n",
    "            marker = '^' if trade['type'] == 'LONG' else ('v' if trade['type'] == 'SHORT' else 'o')\n",
    "            color = '#00D4AA' if trade['type'] == 'LONG' else ('#FF6B6B' if trade['type'] == 'SHORT' else 'yellow')\n",
    "            ax3.scatter(step_idx, prices[step_idx], marker=marker, color=color, s=80, zorder=5, edgecolors='white')\n",
    "    \n",
    "    ax3.set_xlabel('Step', fontsize=11, color='white')\n",
    "    ax3.set_ylabel('BTC Price ($)', fontsize=11, color='white')\n",
    "    ax3.set_title('📈 Price Chart with Agent Positions (Green=Long, Red=Short)', fontsize=14, color='white', fontweight='bold')\n",
    "    ax3.tick_params(colors='white')\n",
    "    ax3.grid(True, alpha=0.2, color='gray')\n",
    "    for spine in ax3.spines.values():\n",
    "        spine.set_color('gray')\n",
    "    ax3.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
    "    \n",
    "    # Legend for trades\n",
    "    long_patch = mpatches.Patch(color='#00D4AA', alpha=0.5, label='Long Position')\n",
    "    short_patch = mpatches.Patch(color='#FF6B6B', alpha=0.5, label='Short Position')\n",
    "    ax3.legend(handles=[long_patch, short_patch], loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 4. Position Distribution\n",
    "    # ============================================================================\n",
    "    ax4 = fig.add_subplot(gs[2, 0])\n",
    "    ax4.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    pos_labels = ['Long', 'Short', 'Neutral']\n",
    "    pos_values = [best_result['long_pct'], best_result['short_pct'], best_result['neutral_pct']]\n",
    "    pos_colors = ['#00D4AA', '#FF6B6B', '#FFE66D']\n",
    "    \n",
    "    wedges, texts, autotexts = ax4.pie(pos_values, labels=pos_labels, colors=pos_colors,\n",
    "                                        autopct='%1.1f%%', startangle=90,\n",
    "                                        explode=(0.05, 0.05, 0.05),\n",
    "                                        textprops={'color': 'white', 'fontsize': 10})\n",
    "    ax4.set_title('📊 Position Distribution', fontsize=12, color='white', fontweight='bold')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 5. Drawdown Chart\n",
    "    # ============================================================================\n",
    "    ax5 = fig.add_subplot(gs[2, 1])\n",
    "    ax5.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    peak = np.maximum.accumulate(portfolio)\n",
    "    drawdown = (peak - np.array(portfolio)) / peak * 100\n",
    "    \n",
    "    ax5.fill_between(steps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n",
    "    ax5.plot(steps, drawdown, color='#FF6B6B', linewidth=1)\n",
    "    ax5.axhline(y=best_result['max_drawdown'], color='yellow', linestyle='--', \n",
    "                label=f'Max DD: {best_result[\"max_drawdown\"]:.1f}%')\n",
    "    \n",
    "    ax5.set_xlabel('Step', fontsize=11, color='white')\n",
    "    ax5.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n",
    "    ax5.set_title('📉 Drawdown Over Time', fontsize=12, color='white', fontweight='bold')\n",
    "    ax5.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
    "    ax5.tick_params(colors='white')\n",
    "    ax5.grid(True, alpha=0.2, color='gray')\n",
    "    ax5.invert_yaxis()\n",
    "    for spine in ax5.spines.values():\n",
    "        spine.set_color('gray')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 6. Action Distribution\n",
    "    # ============================================================================\n",
    "    ax6 = fig.add_subplot(gs[2, 2])\n",
    "    ax6.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    actions = history['action']\n",
    "    ax6.hist(actions, bins=50, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n",
    "    ax6.axvline(x=0, color='yellow', linestyle='--', alpha=0.7, label='Neutral')\n",
    "    ax6.axvline(x=np.mean(actions), color='#00D4AA', linestyle='-', linewidth=2, label=f'Mean: {np.mean(actions):.2f}')\n",
    "    \n",
    "    ax6.set_xlabel('Action Value', fontsize=11, color='white')\n",
    "    ax6.set_ylabel('Frequency', fontsize=11, color='white')\n",
    "    ax6.set_title('🎯 Action Distribution', fontsize=12, color='white', fontweight='bold')\n",
    "    ax6.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
    "    ax6.tick_params(colors='white')\n",
    "    ax6.grid(True, alpha=0.2, color='gray')\n",
    "    for spine in ax6.spines.values():\n",
    "        spine.set_color('gray')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 7. Cumulative Reward\n",
    "    # ============================================================================\n",
    "    ax7 = fig.add_subplot(gs[3, 0])\n",
    "    ax7.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    cum_rewards = np.cumsum(history['reward'])\n",
    "    ax7.plot(steps, cum_rewards, color='#00D4AA', linewidth=1.5)\n",
    "    ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards >= 0, color='#00D4AA', alpha=0.3)\n",
    "    ax7.fill_between(steps, 0, cum_rewards, where=cum_rewards < 0, color='#FF6B6B', alpha=0.3)\n",
    "    ax7.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
    "    \n",
    "    ax7.set_xlabel('Step', fontsize=11, color='white')\n",
    "    ax7.set_ylabel('Cumulative DSR', fontsize=11, color='white')\n",
    "    ax7.set_title('🎯 Cumulative DSR Reward', fontsize=12, color='white', fontweight='bold')\n",
    "    ax7.tick_params(colors='white')\n",
    "    ax7.grid(True, alpha=0.2, color='gray')\n",
    "    for spine in ax7.spines.values():\n",
    "        spine.set_color('gray')\n",
    "    \n",
    "    # ============================================================================\n",
    "    # 8. Model Comparison\n",
    "    # ============================================================================\n",
    "    ax8 = fig.add_subplot(gs[3, 1:])\n",
    "    ax8.set_facecolor('#1a1a2e')\n",
    "    \n",
    "    if len(all_results) >= 3:\n",
    "        model_names = [r['model_name'] for r in all_results]\n",
    "        pnls = [r['total_pnl_pct'] for r in all_results]\n",
    "        sharpes = [r['sharpe_ratio'] for r in all_results]\n",
    "        \n",
    "        x = np.arange(len(model_names))\n",
    "        width = 0.35\n",
    "        \n",
    "        bars1 = ax8.bar(x - width/2, pnls, width, label='PnL %', color='#00D4AA', alpha=0.8)\n",
    "        \n",
    "        ax8_twin = ax8.twinx()\n",
    "        bars2 = ax8_twin.bar(x + width/2, sharpes, width, label='Sharpe', color='#4ECDC4', alpha=0.8)\n",
    "        \n",
    "        ax8.set_xlabel('Model', fontsize=11, color='white')\n",
    "        ax8.set_ylabel('PnL (%)', fontsize=11, color='#00D4AA')\n",
    "        ax8_twin.set_ylabel('Sharpe Ratio', fontsize=11, color='#4ECDC4')\n",
    "        ax8.set_title('📊 Model Comparison (Test Set)', fontsize=12, color='white', fontweight='bold')\n",
    "        ax8.set_xticks(x)\n",
    "        ax8.set_xticklabels(model_names, color='white')\n",
    "        ax8.tick_params(colors='white')\n",
    "        ax8_twin.tick_params(colors='white')\n",
    "        ax8.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
    "        \n",
    "        # Add value labels\n",
    "        for bar, val in zip(bars1, pnls):\n",
    "            ax8.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:+.1f}%',\n",
    "                    ha='center', va='bottom', color='white', fontsize=9)\n",
    "        \n",
    "        for bar, val in zip(bars2, sharpes):\n",
    "            ax8_twin.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.2f}',\n",
    "                         ha='center', va='bottom', color='white', fontsize=9)\n",
    "        \n",
    "        ax8.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
    "        ax8_twin.legend(loc='upper right', facecolor='#1a1a2e', edgecolor='gray')\n",
    "        \n",
    "        for spine in ax8.spines.values():\n",
    "            spine.set_color('gray')\n",
    "    \n",
    "    plt.suptitle('🚀 SAC Bitcoin Trading Agent - Test Performance Analysis', \n",
    "                 fontsize=18, color='white', fontweight='bold', y=0.98)\n",
    "    plt.tight_layout()\n",
    "    plt.savefig('test_performance.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
    "    plt.show()\n",
    "    \n",
    "    print(\"\\n✅ Performance visualization saved!\")\n",
    "else:\n",
    "    print(\"⚠️ No results to visualize. Run the test cells first.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dee9c95f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 15: EXTENDED BACKTEST (FULL TEST PERIOD)\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" EXTENDED BACKTEST - FULL TEST PERIOD\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "def run_extended_backtest(agent, df, initial_balance=10000, fee=0.001):\n",
    "    \"\"\"\n",
    "    Run backtest over the ENTIRE test dataset (not just one episode)\n",
    "    \"\"\"\n",
    "    agent_copy = agent\n",
    "    \n",
    "    # We'll manually step through the entire dataset\n",
    "    balance = initial_balance\n",
    "    position = 0.0\n",
    "    entry_price = 0.0\n",
    "    \n",
    "    history = {\n",
    "        'timestamp': [],\n",
    "        'price': [],\n",
    "        'position': [],\n",
    "        'portfolio_value': [],\n",
    "        'pnl_pct': [],\n",
    "        'trades': []\n",
    "    }\n",
    "    \n",
    "    # Get feature columns\n",
    "    feature_cols = [col for col in df.columns \n",
    "                   if col not in ['open', 'high', 'low', 'close', 'volume', 'fgi', 'fgi_ma7']]\n",
    "    \n",
    "    prev_action = 0.0\n",
    "    \n",
    "    # Step through entire dataset\n",
    "    for i in range(100, len(df) - 1):  # Start at 100 to have lookback\n",
    "        row = df.iloc[i]\n",
    "        price = row['close']\n",
    "        \n",
    "        # Build observation (simplified)\n",
    "        features = row[feature_cols].values\n",
    "        \n",
    "        # Calculate portfolio value\n",
    "        if position > 0:\n",
    "            pnl = position * initial_balance * (price / entry_price - 1)\n",
    "        elif position < 0:\n",
    "            pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
    "        else:\n",
    "            pnl = 0\n",
    "        portfolio_value = balance + pnl\n",
    "        \n",
    "        # Build state\n",
    "        total_return = (portfolio_value / initial_balance) - 1\n",
    "        max_val = max(history['portfolio_value']) if history['portfolio_value'] else initial_balance\n",
    "        drawdown = (max_val - portfolio_value) / max_val if max_val > 0 else 0\n",
    "        \n",
    "        portfolio_info = np.array([\n",
    "            position,\n",
    "            total_return,\n",
    "            drawdown,\n",
    "            row['returns_1_15m'],\n",
    "            row['rsi_14_15m'],\n",
    "            prev_action\n",
    "        ], dtype=np.float32)\n",
    "        \n",
    "        obs = np.concatenate([features, portfolio_info])\n",
    "        obs = np.clip(obs, -10, 10).astype(np.float32)\n",
    "        \n",
    "        # Get action from agent\n",
    "        action = agent.select_action(obs, deterministic=True)\n",
    "        target_position = np.clip(action[0], -1.0, 1.0)\n",
    "        \n",
    "        # Execute trade if position changes significantly\n",
    "        if abs(target_position - position) > 0.1:\n",
    "            # Close existing position\n",
    "            if position != 0:\n",
    "                if position > 0:\n",
    "                    close_pnl = position * initial_balance * (price / entry_price - 1)\n",
    "                else:\n",
    "                    close_pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
    "                fee_cost = abs(close_pnl) * fee\n",
    "                balance += close_pnl - fee_cost\n",
    "                \n",
    "                history['trades'].append({\n",
    "                    'timestamp': df.index[i],\n",
    "                    'price': price,\n",
    "                    'type': 'CLOSE',\n",
    "                    'pnl': close_pnl - fee_cost\n",
    "                })\n",
    "                position = 0.0\n",
    "            \n",
    "            # Open new position\n",
    "            if abs(target_position) > 0.1:\n",
    "                position = target_position\n",
    "                entry_price = price\n",
    "                fee_cost = abs(position) * initial_balance * fee\n",
    "                balance -= fee_cost\n",
    "                \n",
    "                history['trades'].append({\n",
    "                    'timestamp': df.index[i],\n",
    "                    'price': price,\n",
    "                    'type': 'LONG' if position > 0 else 'SHORT',\n",
    "                    'size': position\n",
    "                })\n",
    "        \n",
    "        # Update portfolio value\n",
    "        if position > 0:\n",
    "            pnl = position * initial_balance * (price / entry_price - 1)\n",
    "        elif position < 0:\n",
    "            pnl = abs(position) * initial_balance * (1 - price / entry_price)\n",
    "        else:\n",
    "            pnl = 0\n",
    "        portfolio_value = balance + pnl\n",
    "        \n",
    "        # Record history\n",
    "        history['timestamp'].append(df.index[i])\n",
    "        history['price'].append(price)\n",
    "        history['position'].append(position)\n",
    "        history['portfolio_value'].append(portfolio_value)\n",
    "        history['pnl_pct'].append((portfolio_value / initial_balance - 1) * 100)\n",
    "        \n",
    "        prev_action = target_position\n",
    "    \n",
    "    return history\n",
    "\n",
    "# Load best eval model\n",
    "print(\"🔄 Loading Best Eval model for extended backtest...\")\n",
    "best_agent = SACAgent(state_dim=state_dim, action_dim=action_dim, device=device)\n",
    "load_model(best_agent, BEST_EVAL_MODEL, \"Best Eval\")\n",
    "\n",
    "# Run extended backtest on test data\n",
    "print(f\"\\n📊 Running extended backtest on {len(test_data_norm):,} candles...\")\n",
    "extended_history = run_extended_backtest(best_agent, test_data_norm)\n",
    "\n",
    "# Calculate final metrics\n",
    "final_portfolio = extended_history['portfolio_value'][-1]\n",
    "total_pnl = (final_portfolio / 10000 - 1) * 100\n",
    "num_trades = len(extended_history['trades'])\n",
    "\n",
    "# Calculate returns for Sharpe\n",
    "returns = np.diff(extended_history['portfolio_value']) / np.array(extended_history['portfolio_value'][:-1])\n",
    "sharpe = np.mean(returns) / (np.std(returns) + 1e-8) * np.sqrt(252 * 96)\n",
    "\n",
    "# Max drawdown\n",
    "peak = np.maximum.accumulate(extended_history['portfolio_value'])\n",
    "drawdown = (peak - np.array(extended_history['portfolio_value'])) / peak * 100\n",
    "max_dd = np.max(drawdown)\n",
    "\n",
    "# Buy and hold comparison\n",
    "buy_hold_return = (extended_history['price'][-1] / extended_history['price'][0] - 1) * 100\n",
    "\n",
    "print(f\"\\n{'='*60}\")\n",
    "print(f\"📊 EXTENDED BACKTEST RESULTS\")\n",
    "print(f\"{'='*60}\")\n",
    "print(f\"   📅 Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')}\")\n",
    "print(f\"   📊 Candles: {len(extended_history['portfolio_value']):,}\")\n",
    "print(f\"\\n   💰 AGENT PERFORMANCE:\")\n",
    "print(f\"      Final Value:   ${final_portfolio:,.2f}\")\n",
    "print(f\"      Total PnL:     {total_pnl:+.2f}%\")\n",
    "print(f\"      Sharpe Ratio:  {sharpe:.3f}\")\n",
    "print(f\"      Max Drawdown:  {max_dd:.2f}%\")\n",
    "print(f\"      Num Trades:    {num_trades}\")\n",
    "print(f\"\\n   📈 BUY & HOLD COMPARISON:\")\n",
    "print(f\"      B&H Return:    {buy_hold_return:+.2f}%\")\n",
    "print(f\"      Alpha:         {total_pnl - buy_hold_return:+.2f}%\")\n",
    "print(f\"{'='*60}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b20eb2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 16: EXTENDED BACKTEST VISUALIZATION\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" EXTENDED BACKTEST VISUALIZATION\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "fig = plt.figure(figsize=(20, 14))\n",
    "gs = GridSpec(3, 2, figure=fig, hspace=0.3, wspace=0.2)\n",
    "fig.patch.set_facecolor('#1a1a2e')\n",
    "\n",
    "# ============================================================================\n",
    "# 1. Portfolio Value vs Buy & Hold (Main Chart)\n",
    "# ============================================================================\n",
    "ax1 = fig.add_subplot(gs[0, :])\n",
    "ax1.set_facecolor('#1a1a2e')\n",
    "\n",
    "timestamps = extended_history['timestamp']\n",
    "portfolio = extended_history['portfolio_value']\n",
    "prices = extended_history['price']\n",
    "\n",
    "# Normalize buy & hold to start at 10000\n",
    "buy_hold = np.array(prices) / prices[0] * 10000\n",
    "\n",
    "# Plot\n",
    "ax1.plot(timestamps, portfolio, color='#00D4AA', linewidth=2, label=f'SAC Agent ({total_pnl:+.1f}%)', zorder=3)\n",
    "ax1.plot(timestamps, buy_hold, color='#4ECDC4', linewidth=1.5, alpha=0.7, label=f'Buy & Hold ({buy_hold_return:+.1f}%)', zorder=2)\n",
    "ax1.axhline(y=10000, color='gray', linestyle='--', alpha=0.5, label='Initial Capital')\n",
    "\n",
    "# Fill between\n",
    "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) > buy_hold,\n",
    "                 color='#00D4AA', alpha=0.2, label='Outperformance')\n",
    "ax1.fill_between(timestamps, buy_hold, portfolio, where=np.array(portfolio) <= buy_hold,\n",
    "                 color='#FF6B6B', alpha=0.2, label='Underperformance')\n",
    "\n",
    "ax1.set_xlabel('Date', fontsize=12, color='white')\n",
    "ax1.set_ylabel('Portfolio Value ($)', fontsize=12, color='white')\n",
    "ax1.set_title('💰 Agent Performance vs Buy & Hold', fontsize=16, color='white', fontweight='bold')\n",
    "ax1.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray', fontsize=10)\n",
    "ax1.tick_params(colors='white')\n",
    "ax1.grid(True, alpha=0.2, color='gray')\n",
    "for spine in ax1.spines.values():\n",
    "    spine.set_color('gray')\n",
    "ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
    "\n",
    "# Add final value annotations\n",
    "ax1.annotate(f'Agent: ${portfolio[-1]:,.0f}', xy=(timestamps[-1], portfolio[-1]),\n",
    "             xytext=(10, 10), textcoords='offset points',\n",
    "             fontsize=11, color='#00D4AA', fontweight='bold')\n",
    "ax1.annotate(f'B&H: ${buy_hold[-1]:,.0f}', xy=(timestamps[-1], buy_hold[-1]),\n",
    "             xytext=(10, -10), textcoords='offset points',\n",
    "             fontsize=11, color='#4ECDC4', fontweight='bold')\n",
    "\n",
    "# ============================================================================\n",
    "# 2. BTC Price with Trade Markers\n",
    "# ============================================================================\n",
    "ax2 = fig.add_subplot(gs[1, :])\n",
    "ax2.set_facecolor('#1a1a2e')\n",
    "\n",
    "ax2.plot(timestamps, prices, color='white', linewidth=1, alpha=0.8)\n",
    "\n",
    "# Add trade markers\n",
    "long_trades = [t for t in extended_history['trades'] if t['type'] == 'LONG']\n",
    "short_trades = [t for t in extended_history['trades'] if t['type'] == 'SHORT']\n",
    "close_trades = [t for t in extended_history['trades'] if t['type'] == 'CLOSE']\n",
    "\n",
    "if long_trades:\n",
    "    ax2.scatter([t['timestamp'] for t in long_trades], [t['price'] for t in long_trades],\n",
    "                marker='^', color='#00D4AA', s=100, label=f'Long ({len(long_trades)})', zorder=5, edgecolors='white')\n",
    "if short_trades:\n",
    "    ax2.scatter([t['timestamp'] for t in short_trades], [t['price'] for t in short_trades],\n",
    "                marker='v', color='#FF6B6B', s=100, label=f'Short ({len(short_trades)})', zorder=5, edgecolors='white')\n",
    "\n",
    "ax2.set_xlabel('Date', fontsize=12, color='white')\n",
    "ax2.set_ylabel('BTC Price ($)', fontsize=12, color='white')\n",
    "ax2.set_title('📈 BTC Price with Trade Entries', fontsize=14, color='white', fontweight='bold')\n",
    "ax2.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
    "ax2.tick_params(colors='white')\n",
    "ax2.grid(True, alpha=0.2, color='gray')\n",
    "for spine in ax2.spines.values():\n",
    "    spine.set_color('gray')\n",
    "ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))\n",
    "\n",
    "# ============================================================================\n",
    "# 3. Drawdown Chart\n",
    "# ============================================================================\n",
    "ax3 = fig.add_subplot(gs[2, 0])\n",
    "ax3.set_facecolor('#1a1a2e')\n",
    "\n",
    "ax3.fill_between(timestamps, 0, drawdown, color='#FF6B6B', alpha=0.5)\n",
    "ax3.plot(timestamps, drawdown, color='#FF6B6B', linewidth=1)\n",
    "ax3.axhline(y=max_dd, color='yellow', linestyle='--', linewidth=2, label=f'Max DD: {max_dd:.1f}%')\n",
    "\n",
    "ax3.set_xlabel('Date', fontsize=11, color='white')\n",
    "ax3.set_ylabel('Drawdown (%)', fontsize=11, color='white')\n",
    "ax3.set_title('📉 Drawdown Over Time', fontsize=13, color='white', fontweight='bold')\n",
    "ax3.legend(loc='lower right', facecolor='#1a1a2e', edgecolor='gray')\n",
    "ax3.tick_params(colors='white')\n",
    "ax3.grid(True, alpha=0.2, color='gray')\n",
    "ax3.invert_yaxis()\n",
    "for spine in ax3.spines.values():\n",
    "    spine.set_color('gray')\n",
    "\n",
    "# ============================================================================\n",
    "# 4. Rolling Returns Comparison\n",
    "# ============================================================================\n",
    "ax4 = fig.add_subplot(gs[2, 1])\n",
    "ax4.set_facecolor('#1a1a2e')\n",
    "\n",
    "# Calculate rolling 7-day returns (672 = 7 days of 15m candles)\n",
    "window = 672\n",
    "agent_rolling = pd.Series(extended_history['pnl_pct']).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n",
    "bh_returns = (np.array(prices) / prices[0] - 1) * 100\n",
    "bh_rolling = pd.Series(bh_returns).rolling(window).apply(lambda x: x.iloc[-1] - x.iloc[0])\n",
    "\n",
    "# Align timestamps with rolling data (use iloc to ensure same length)\n",
    "valid_idx = agent_rolling.dropna().index\n",
    "timestamps_arr = np.array(timestamps)\n",
    "ax4.plot(timestamps_arr[valid_idx], agent_rolling.dropna().values, color='#00D4AA', linewidth=1.5, label='Agent', alpha=0.8)\n",
    "ax4.plot(timestamps_arr[valid_idx], bh_rolling.iloc[valid_idx].values, color='#4ECDC4', linewidth=1.5, label='Buy & Hold', alpha=0.8)\n",
    "ax4.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
    "\n",
    "ax4.set_xlabel('Date', fontsize=11, color='white')\n",
    "ax4.set_ylabel('7-Day Rolling Return (%)', fontsize=11, color='white')\n",
    "ax4.set_title('📊 7-Day Rolling Returns Comparison', fontsize=13, color='white', fontweight='bold')\n",
    "ax4.legend(loc='upper left', facecolor='#1a1a2e', edgecolor='gray')\n",
    "ax4.tick_params(colors='white')\n",
    "ax4.grid(True, alpha=0.2, color='gray')\n",
    "for spine in ax4.spines.values():\n",
    "    spine.set_color('gray')\n",
    "\n",
    "plt.suptitle('🚀 SAC Bitcoin Agent - Extended Backtest Analysis', \n",
    "             fontsize=18, color='white', fontweight='bold', y=0.98)\n",
    "plt.tight_layout()\n",
    "plt.savefig('extended_backtest.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "print(\"\\n✅ Extended backtest visualization saved!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "027f6534",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 17: FINAL SUMMARY DASHBOARD\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" FINAL SUMMARY DASHBOARD\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "fig = plt.figure(figsize=(20, 10))\n",
    "fig.patch.set_facecolor('#1a1a2e')\n",
    "\n",
    "# Create grid\n",
    "gs = GridSpec(2, 4, figure=fig, hspace=0.4, wspace=0.3)\n",
    "\n",
    "# Color palette\n",
    "colors = {\n",
    "    'green': '#00D4AA',\n",
    "    'red': '#FF6B6B',\n",
    "    'blue': '#4ECDC4',\n",
    "    'yellow': '#FFE66D',\n",
    "    'purple': '#9B59B6',\n",
    "    'bg': '#1a1a2e'\n",
    "}\n",
    "\n",
    "# ============================================================================\n",
    "# Helper function for metric cards\n",
    "# ============================================================================\n",
    "def create_metric_card(ax, title, value, subtitle=\"\", color='#00D4AA', icon=\"📊\"):\n",
    "    ax.set_facecolor(colors['bg'])\n",
    "    ax.axis('off')\n",
    "    \n",
    "    # Main value\n",
    "    ax.text(0.5, 0.6, f\"{icon}\", transform=ax.transAxes, fontsize=30,\n",
    "            ha='center', va='center')\n",
    "    ax.text(0.5, 0.35, f\"{value}\", transform=ax.transAxes, fontsize=24,\n",
    "            ha='center', va='center', color=color, fontweight='bold')\n",
    "    ax.text(0.5, 0.15, f\"{title}\", transform=ax.transAxes, fontsize=11,\n",
    "            ha='center', va='center', color='white')\n",
    "    if subtitle:\n",
    "        ax.text(0.5, 0.02, f\"{subtitle}\", transform=ax.transAxes, fontsize=9,\n",
    "                ha='center', va='center', color='gray')\n",
    "    \n",
    "    # Border\n",
    "    for spine in ax.spines.values():\n",
    "        spine.set_visible(True)\n",
    "        spine.set_color(color)\n",
    "        spine.set_linewidth(2)\n",
    "\n",
    "# ============================================================================\n",
    "# Create metric cards\n",
    "# ============================================================================\n",
    "# Row 1: Training Metrics\n",
    "ax1 = fig.add_subplot(gs[0, 0])\n",
    "create_metric_card(ax1, \"Training Time\", \"131 min\", \"1000 episodes\", colors['blue'], \"⏱️\")\n",
    "\n",
    "ax2 = fig.add_subplot(gs[0, 1])\n",
    "create_metric_card(ax2, \"Best Train DSR\", \"0.5949\", \"Risk-adjusted reward\", colors['green'], \"🎯\")\n",
    "\n",
    "ax3 = fig.add_subplot(gs[0, 2])\n",
    "create_metric_card(ax3, \"Best Eval DSR\", \"0.2125\", \"Validation set\", colors['yellow'], \"📈\")\n",
    "\n",
    "ax4 = fig.add_subplot(gs[0, 3])\n",
    "create_metric_card(ax4, \"Training Speed\", \"64 sps\", \"steps per second\", colors['purple'], \"⚡\")\n",
    "\n",
    "# Row 2: Test Performance Metrics\n",
    "ax5 = fig.add_subplot(gs[1, 0])\n",
    "pnl_color = colors['green'] if total_pnl >= 0 else colors['red']\n",
    "create_metric_card(ax5, \"Test PnL\", f\"{total_pnl:+.2f}%\", \"Extended backtest\", pnl_color, \"💰\")\n",
    "\n",
    "ax6 = fig.add_subplot(gs[1, 1])\n",
    "sharpe_color = colors['green'] if sharpe > 0.5 else (colors['yellow'] if sharpe > 0 else colors['red'])\n",
    "create_metric_card(ax6, \"Sharpe Ratio\", f\"{sharpe:.3f}\", \"Annualized\", sharpe_color, \"📊\")\n",
    "\n",
    "ax7 = fig.add_subplot(gs[1, 2])\n",
    "create_metric_card(ax7, \"Max Drawdown\", f\"{max_dd:.1f}%\", \"Peak to trough\", colors['red'], \"📉\")\n",
    "\n",
    "ax8 = fig.add_subplot(gs[1, 3])\n",
    "alpha = total_pnl - buy_hold_return\n",
    "alpha_color = colors['green'] if alpha >= 0 else colors['red']\n",
    "create_metric_card(ax8, \"Alpha vs B&H\", f\"{alpha:+.2f}%\", \"Excess return\", alpha_color, \"🏆\")\n",
    "\n",
    "plt.suptitle('🚀 SAC Bitcoin Trading Agent - Performance Dashboard', \n",
    "             fontsize=20, color='white', fontweight='bold', y=0.98)\n",
    "\n",
    "# Add footer\n",
    "fig.text(0.5, 0.02, \n",
    "         f\"Test Period: {extended_history['timestamp'][0].strftime('%Y-%m-%d')} to {extended_history['timestamp'][-1].strftime('%Y-%m-%d')} | \"\n",
    "         f\"Trades: {num_trades} | Multi-timeframe: 15m/1h/4h | DSR Reward | 0.1% Transaction Fee\",\n",
    "         ha='center', fontsize=10, color='gray')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('final_dashboard.png', dpi=150, facecolor=colors['bg'], bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\" ✅ ALL VISUALIZATIONS COMPLETE!\")\n",
    "print(\"=\"*70)\n",
    "print(\"\\n📁 Saved files:\")\n",
    "print(\"   • training_summary.png\")\n",
    "print(\"   • test_performance.png\")\n",
    "print(\"   • extended_backtest.png\")\n",
    "print(\"   • final_dashboard.png\")\n",
    "print(\"\\n🎉 Analysis complete!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d777375",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CELL 18: TRADE ANALYSIS & STATISTICS\n",
    "# ============================================================================\n",
    "\n",
    "print(\"=\"*70)\n",
    "print(\" TRADE ANALYSIS & STATISTICS\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Analyze trades\n",
    "trades = extended_history['trades']\n",
    "\n",
    "if trades:\n",
    "    # Separate trade types\n",
    "    long_entries = [t for t in trades if t['type'] == 'LONG']\n",
    "    short_entries = [t for t in trades if t['type'] == 'SHORT']\n",
    "    closes = [t for t in trades if t['type'] == 'CLOSE']\n",
    "    \n",
    "    # Calculate trade PnLs from close trades\n",
    "    trade_pnls = [t.get('pnl', 0) for t in closes if 'pnl' in t]\n",
    "    \n",
    "    if trade_pnls:\n",
    "        winning_trades = [p for p in trade_pnls if p > 0]\n",
    "        losing_trades = [p for p in trade_pnls if p <= 0]\n",
    "        \n",
    "        win_rate = len(winning_trades) / len(trade_pnls) * 100\n",
    "        avg_win = np.mean(winning_trades) if winning_trades else 0\n",
    "        avg_loss = np.mean(losing_trades) if losing_trades else 0\n",
    "        profit_factor = abs(sum(winning_trades) / sum(losing_trades)) if losing_trades and sum(losing_trades) != 0 else float('inf')\n",
    "        \n",
    "        print(f\"\\n📊 TRADE STATISTICS:\")\n",
    "        print(f\"   Total Trades:    {len(trade_pnls)}\")\n",
    "        print(f\"   Long Entries:    {len(long_entries)}\")\n",
    "        print(f\"   Short Entries:   {len(short_entries)}\")\n",
    "        print(f\"\\n📈 PERFORMANCE:\")\n",
    "        print(f\"   Win Rate:        {win_rate:.1f}%\")\n",
    "        print(f\"   Winning Trades:  {len(winning_trades)}\")\n",
    "        print(f\"   Losing Trades:   {len(losing_trades)}\")\n",
    "        print(f\"   Avg Win:         ${avg_win:.2f}\")\n",
    "        print(f\"   Avg Loss:        ${avg_loss:.2f}\")\n",
    "        print(f\"   Profit Factor:   {profit_factor:.2f}\")\n",
    "        print(f\"   Total P&L:       ${sum(trade_pnls):.2f}\")\n",
    "        \n",
    "        # Create trade analysis visualization\n",
    "        fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
    "        fig.patch.set_facecolor('#1a1a2e')\n",
    "        \n",
    "        # 1. Trade P&L Distribution\n",
    "        ax1 = axes[0, 0]\n",
    "        ax1.set_facecolor('#1a1a2e')\n",
    "        \n",
    "        bins = np.linspace(min(trade_pnls), max(trade_pnls), 30)\n",
    "        ax1.hist([p for p in trade_pnls if p > 0], bins=bins, color='#00D4AA', alpha=0.7, label='Wins')\n",
    "        ax1.hist([p for p in trade_pnls if p <= 0], bins=bins, color='#FF6B6B', alpha=0.7, label='Losses')\n",
    "        ax1.axvline(x=0, color='white', linestyle='--', alpha=0.7)\n",
    "        ax1.axvline(x=np.mean(trade_pnls), color='#FFE66D', linestyle='-', linewidth=2, \n",
    "                    label=f'Mean: ${np.mean(trade_pnls):.2f}')\n",
    "        \n",
    "        ax1.set_xlabel('Trade P&L ($)', fontsize=11, color='white')\n",
    "        ax1.set_ylabel('Frequency', fontsize=11, color='white')\n",
    "        ax1.set_title('📊 Trade P&L Distribution', fontsize=13, color='white', fontweight='bold')\n",
    "        ax1.legend(facecolor='#1a1a2e', edgecolor='gray')\n",
    "        ax1.tick_params(colors='white')\n",
    "        ax1.grid(True, alpha=0.2, color='gray')\n",
    "        for spine in ax1.spines.values():\n",
    "            spine.set_color('gray')\n",
    "        \n",
    "        # 2. Cumulative Trade P&L\n",
    "        ax2 = axes[0, 1]\n",
    "        ax2.set_facecolor('#1a1a2e')\n",
    "        \n",
    "        cum_pnl = np.cumsum(trade_pnls)\n",
    "        trade_nums = range(1, len(trade_pnls) + 1)\n",
    "        \n",
    "        ax2.plot(trade_nums, cum_pnl, color='#00D4AA', linewidth=2)\n",
    "        ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl >= 0, color='#00D4AA', alpha=0.3)\n",
    "        ax2.fill_between(trade_nums, 0, cum_pnl, where=cum_pnl < 0, color='#FF6B6B', alpha=0.3)\n",
    "        ax2.axhline(y=0, color='gray', linestyle='--', alpha=0.5)\n",
    "        \n",
    "        ax2.set_xlabel('Trade Number', fontsize=11, color='white')\n",
    "        ax2.set_ylabel('Cumulative P&L ($)', fontsize=11, color='white')\n",
    "        ax2.set_title('📈 Cumulative Trade P&L', fontsize=13, color='white', fontweight='bold')\n",
    "        ax2.tick_params(colors='white')\n",
    "        ax2.grid(True, alpha=0.2, color='gray')\n",
    "        for spine in ax2.spines.values():\n",
    "            spine.set_color('gray')\n",
    "        \n",
    "        # 3. Win/Loss Ratio Pie Chart\n",
    "        ax3 = axes[1, 0]\n",
    "        ax3.set_facecolor('#1a1a2e')\n",
    "        \n",
    "        sizes = [len(winning_trades), len(losing_trades)]\n",
    "        labels = [f'Wins ({len(winning_trades)})', f'Losses ({len(losing_trades)})']\n",
    "        colors_pie = ['#00D4AA', '#FF6B6B']\n",
    "        explode = (0.05, 0.05)\n",
    "        \n",
    "        wedges, texts, autotexts = ax3.pie(sizes, labels=labels, colors=colors_pie,\n",
    "                                           autopct='%1.1f%%', startangle=90, explode=explode,\n",
    "                                           textprops={'color': 'white', 'fontsize': 11})\n",
    "        ax3.set_title('🎯 Win/Loss Distribution', fontsize=13, color='white', fontweight='bold')\n",
    "        \n",
    "        # 4. Trade Size Distribution  \n",
    "        ax4 = axes[1, 1]\n",
    "        ax4.set_facecolor('#1a1a2e')\n",
    "        \n",
    "        # Position sizes from history\n",
    "        positions = [abs(p) for p in extended_history['position'] if abs(p) > 0.1]\n",
    "        \n",
    "        if positions:\n",
    "            ax4.hist(positions, bins=20, color='#4ECDC4', alpha=0.7, edgecolor='white', linewidth=0.5)\n",
    "            ax4.axvline(x=np.mean(positions), color='#FFE66D', linestyle='-', linewidth=2,\n",
    "                       label=f'Mean: {np.mean(positions):.2f}')\n",
    "        \n",
    "        ax4.set_xlabel('Position Size', fontsize=11, color='white')\n",
    "        ax4.set_ylabel('Frequency', fontsize=11, color='white')\n",
    "        ax4.set_title('📊 Position Size Distribution', fontsize=13, color='white', fontweight='bold')\n",
    "        ax4.legend(facecolor='#1a1a2e', edgecolor='gray')\n",
    "        ax4.tick_params(colors='white')\n",
    "        ax4.grid(True, alpha=0.2, color='gray')\n",
    "        for spine in ax4.spines.values():\n",
    "            spine.set_color('gray')\n",
    "        \n",
    "        plt.suptitle('🔍 Trade Analysis Deep Dive', fontsize=16, color='white', fontweight='bold', y=0.98)\n",
    "        plt.tight_layout()\n",
    "        plt.savefig('trade_analysis.png', dpi=150, facecolor='#1a1a2e', bbox_inches='tight')\n",
    "        plt.show()\n",
    "        \n",
    "        print(\"\\n✅ Trade analysis visualization saved!\")\n",
    "    else:\n",
    "        print(\"⚠️ No trade P&L data available\")\n",
    "else:\n",
    "    print(\"⚠️ No trades recorded\")"
   ]
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "nvidiaTeslaT4",
   "dataSources": [
    {
     "datasetId": 7097204,
     "sourceId": 11420269,
     "sourceType": "datasetVersion"
    },
    {
     "datasetId": 5656419,
     "sourceId": 13492684,
     "sourceType": "datasetVersion"
    },
    {
     "datasetId": 7608804,
     "sourceId": 13495502,
     "sourceType": "datasetVersion"
    },
    {
     "datasetId": 8569093,
     "sourceId": 13496378,
     "sourceType": "datasetVersion"
    }
   ],
   "dockerImageVersionId": 31153,
   "isGpuEnabled": true,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  },
  "papermill": {
   "default_parameters": {},
   "duration": null,
   "end_time": null,
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2025-10-25T11:42:30.221950",
   "version": "2.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}