|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!pip install -q ta
|
|
|
|
|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
import torch.nn.functional as F
|
|
|
import torch.optim as optim
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import warnings
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
print("="*70)
|
|
|
print(" PYTORCH GPU SETUP (30GB GPU)")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
if torch.cuda.is_available():
|
|
|
|
|
|
gpu_name = torch.cuda.get_device_name(0)
|
|
|
gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
|
|
|
|
|
|
print(f"✅ GPU: {gpu_name}")
|
|
|
print(f"✅ GPU Memory: {gpu_mem:.1f} GB")
|
|
|
|
|
|
|
|
|
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
torch.backends.cudnn.allow_tf32 = True
|
|
|
print("✅ TF32: Enabled (2-3x speedup on Ampere)")
|
|
|
|
|
|
|
|
|
torch.backends.cudnn.benchmark = True
|
|
|
print("✅ cuDNN benchmark: Enabled")
|
|
|
|
|
|
|
|
|
torch.set_default_device('cuda')
|
|
|
print("✅ Default device: CUDA")
|
|
|
|
|
|
else:
|
|
|
print("⚠️ No GPU detected, using CPU")
|
|
|
|
|
|
print(f"\n✅ PyTorch: {torch.__version__}")
|
|
|
print(f"✅ Device: {device}")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import gym
|
|
|
from gym import spaces
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
from ta.momentum import RSIIndicator, StochasticOscillator, ROCIndicator, WilliamsRIndicator
|
|
|
from ta.trend import MACD, EMAIndicator, SMAIndicator, ADXIndicator, CCIIndicator
|
|
|
from ta.volatility import BollingerBands, AverageTrueRange
|
|
|
from ta.volume import OnBalanceVolumeIndicator
|
|
|
import os
|
|
|
|
|
|
print("="*70)
|
|
|
print(" LOADING DATA + FEATURES")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_path = '/kaggle/input/bitcoin-historical-datasets-2018-2024/'
|
|
|
btc_data = pd.read_csv(data_path + 'btc_15m_data_2018_to_2025.csv')
|
|
|
|
|
|
column_mapping = {'Open time': 'timestamp', 'Open': 'open', 'High': 'high',
|
|
|
'Low': 'low', 'Close': 'close', 'Volume': 'volume'}
|
|
|
btc_data = btc_data.rename(columns=column_mapping)
|
|
|
btc_data['timestamp'] = pd.to_datetime(btc_data['timestamp'])
|
|
|
btc_data.set_index('timestamp', inplace=True)
|
|
|
btc_data = btc_data[['open', 'high', 'low', 'close', 'volume']]
|
|
|
|
|
|
for col in btc_data.columns:
|
|
|
btc_data[col] = pd.to_numeric(btc_data[col], errors='coerce')
|
|
|
|
|
|
btc_data = btc_data[btc_data.index >= '2021-01-01']
|
|
|
btc_data = btc_data[~btc_data.index.duplicated(keep='first')]
|
|
|
btc_data = btc_data.replace(0, np.nan).dropna().sort_index()
|
|
|
|
|
|
print(f"✅ BTC Data: {len(btc_data):,} candles")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fgi_loaded = False
|
|
|
|
|
|
try:
|
|
|
fgi_path = '/kaggle/input/btc-usdt-4h-ohlc-fgi-daily-2020/'
|
|
|
files = os.listdir(fgi_path)
|
|
|
|
|
|
for filename in files:
|
|
|
if filename.endswith('.csv'):
|
|
|
fgi_data = pd.read_csv(fgi_path + filename)
|
|
|
|
|
|
|
|
|
time_col = [c for c in fgi_data.columns if 'time' in c.lower() or 'date' in c.lower()]
|
|
|
if time_col:
|
|
|
fgi_data['timestamp'] = pd.to_datetime(fgi_data[time_col[0]])
|
|
|
else:
|
|
|
fgi_data['timestamp'] = pd.to_datetime(fgi_data.iloc[:, 0])
|
|
|
|
|
|
fgi_data.set_index('timestamp', inplace=True)
|
|
|
|
|
|
|
|
|
fgi_col = [c for c in fgi_data.columns if 'fgi' in c.lower() or 'fear' in c.lower() or 'greed' in c.lower()]
|
|
|
if fgi_col:
|
|
|
fgi_data = fgi_data[[fgi_col[0]]].rename(columns={fgi_col[0]: 'fgi'})
|
|
|
fgi_loaded = True
|
|
|
print(f"✅ Fear & Greed loaded: {len(fgi_data):,} values")
|
|
|
break
|
|
|
except:
|
|
|
pass
|
|
|
|
|
|
if not fgi_loaded:
|
|
|
fgi_data = pd.DataFrame(index=btc_data.index)
|
|
|
fgi_data['fgi'] = 50
|
|
|
print("⚠️ Using neutral FGI values")
|
|
|
|
|
|
|
|
|
btc_data = btc_data.join(fgi_data, how='left')
|
|
|
btc_data['fgi'] = btc_data['fgi'].fillna(method='ffill').fillna(method='bfill').fillna(50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("🔧 Calculating indicators...")
|
|
|
data = btc_data.copy()
|
|
|
|
|
|
|
|
|
data['rsi_14'] = RSIIndicator(close=data['close'], window=14).rsi() / 100
|
|
|
data['rsi_7'] = RSIIndicator(close=data['close'], window=7).rsi() / 100
|
|
|
|
|
|
stoch = StochasticOscillator(high=data['high'], low=data['low'], close=data['close'], window=14)
|
|
|
data['stoch_k'] = stoch.stoch() / 100
|
|
|
data['stoch_d'] = stoch.stoch_signal() / 100
|
|
|
|
|
|
roc = ROCIndicator(close=data['close'], window=12)
|
|
|
data['roc_12'] = np.tanh(roc.roc() / 100)
|
|
|
|
|
|
williams = WilliamsRIndicator(high=data['high'], low=data['low'], close=data['close'], lbp=14)
|
|
|
data['williams_r'] = (williams.williams_r() + 100) / 100
|
|
|
|
|
|
macd = MACD(close=data['close'])
|
|
|
data['macd'] = np.tanh(macd.macd() / data['close'] * 100)
|
|
|
data['macd_signal'] = np.tanh(macd.macd_signal() / data['close'] * 100)
|
|
|
data['macd_diff'] = np.tanh(macd.macd_diff() / data['close'] * 100)
|
|
|
|
|
|
|
|
|
data['sma_20'] = SMAIndicator(close=data['close'], window=20).sma_indicator()
|
|
|
data['sma_50'] = SMAIndicator(close=data['close'], window=50).sma_indicator()
|
|
|
data['ema_12'] = EMAIndicator(close=data['close'], window=12).ema_indicator()
|
|
|
data['ema_26'] = EMAIndicator(close=data['close'], window=26).ema_indicator()
|
|
|
|
|
|
data['price_vs_sma20'] = (data['close'] - data['sma_20']) / data['sma_20']
|
|
|
data['price_vs_sma50'] = (data['close'] - data['sma_50']) / data['sma_50']
|
|
|
|
|
|
adx = ADXIndicator(high=data['high'], low=data['low'], close=data['close'], window=14)
|
|
|
data['adx'] = adx.adx() / 100
|
|
|
data['adx_pos'] = adx.adx_pos() / 100
|
|
|
data['adx_neg'] = adx.adx_neg() / 100
|
|
|
|
|
|
cci = CCIIndicator(high=data['high'], low=data['low'], close=data['close'], window=20)
|
|
|
data['cci'] = np.tanh(cci.cci() / 100)
|
|
|
|
|
|
|
|
|
bb = BollingerBands(close=data['close'], window=20, window_dev=2)
|
|
|
data['bb_width'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()
|
|
|
data['bb_position'] = (data['close'] - bb.bollinger_lband()) / (bb.bollinger_hband() - bb.bollinger_lband())
|
|
|
|
|
|
atr = AverageTrueRange(high=data['high'], low=data['low'], close=data['close'], window=14)
|
|
|
data['atr_percent'] = atr.average_true_range() / data['close']
|
|
|
|
|
|
|
|
|
data['volume_ma_20'] = data['volume'].rolling(20).mean()
|
|
|
data['volume_ratio'] = data['volume'] / (data['volume_ma_20'] + 1e-8)
|
|
|
|
|
|
obv = OnBalanceVolumeIndicator(close=data['close'], volume=data['volume'])
|
|
|
data['obv_slope'] = (obv.on_balance_volume().diff(5) / (obv.on_balance_volume().shift(5).abs() + 1e-8))
|
|
|
|
|
|
|
|
|
data['returns_1'] = data['close'].pct_change()
|
|
|
data['returns_5'] = data['close'].pct_change(5)
|
|
|
data['returns_20'] = data['close'].pct_change(20)
|
|
|
data['volatility_20'] = data['returns_1'].rolling(20).std()
|
|
|
|
|
|
data['body_size'] = abs(data['close'] - data['open']) / (data['open'] + 1e-8)
|
|
|
data['high_20'] = data['high'].rolling(20).max()
|
|
|
data['low_20'] = data['low'].rolling(20).min()
|
|
|
data['price_position'] = (data['close'] - data['low_20']) / (data['high_20'] - data['low_20'] + 1e-8)
|
|
|
|
|
|
|
|
|
data['fgi_normalized'] = (data['fgi'] - 50) / 50
|
|
|
data['fgi_change'] = data['fgi'].diff() / 50
|
|
|
data['fgi_ma7'] = data['fgi'].rolling(7).mean()
|
|
|
data['fgi_vs_ma'] = (data['fgi'] - data['fgi_ma7']) / 50
|
|
|
|
|
|
|
|
|
data['hour'] = data.index.hour / 24
|
|
|
data['day_of_week'] = data.index.dayofweek / 7
|
|
|
data['us_session'] = ((data.index.hour >= 14) & (data.index.hour < 21)).astype(float)
|
|
|
|
|
|
btc_features = data.dropna()
|
|
|
feature_cols = [col for col in btc_features.columns if col not in ['open', 'high', 'low', 'close', 'volume']]
|
|
|
|
|
|
print(f"✅ Features: {len(feature_cols)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_size = int(len(btc_features) * 0.70)
|
|
|
valid_size = int(len(btc_features) * 0.15)
|
|
|
|
|
|
train_data = btc_features.iloc[:train_size].copy()
|
|
|
valid_data = btc_features.iloc[train_size:train_size+valid_size].copy()
|
|
|
test_data = btc_features.iloc[train_size+valid_size:].copy()
|
|
|
|
|
|
print(f"\n📊 Train: {len(train_data):,} | Valid: {len(valid_data):,} | Test: {len(test_data):,}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BitcoinTradingEnv(gym.Env):
|
|
|
def __init__(self, df, initial_balance=10000, episode_length=500, transaction_fee=0.0,
|
|
|
long_bonus=0.0001, short_penalty_threshold=0.8, short_penalty=0.05):
|
|
|
super().__init__()
|
|
|
self.df = df.reset_index(drop=True)
|
|
|
self.initial_balance = initial_balance
|
|
|
self.episode_length = episode_length
|
|
|
self.transaction_fee = transaction_fee
|
|
|
|
|
|
|
|
|
self.long_bonus = long_bonus
|
|
|
self.short_penalty_threshold = short_penalty_threshold
|
|
|
self.short_penalty = short_penalty
|
|
|
|
|
|
self.feature_cols = [col for col in df.columns
|
|
|
if col not in ['open', 'high', 'low', 'close', 'volume']]
|
|
|
|
|
|
self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
|
|
|
self.observation_space = spaces.Box(
|
|
|
low=-10, high=10,
|
|
|
shape=(len(self.feature_cols) + 5,),
|
|
|
dtype=np.float32
|
|
|
)
|
|
|
self.reset()
|
|
|
|
|
|
def reset(self):
|
|
|
max_start = len(self.df) - self.episode_length - 1
|
|
|
self.start_idx = np.random.randint(100, max(101, max_start))
|
|
|
|
|
|
self.current_step = 0
|
|
|
self.balance = self.initial_balance
|
|
|
self.position = 0.0
|
|
|
self.entry_price = 0.0
|
|
|
self.total_value = self.initial_balance
|
|
|
self.prev_total_value = self.initial_balance
|
|
|
self.max_value = self.initial_balance
|
|
|
|
|
|
|
|
|
self.long_steps = 0
|
|
|
self.short_steps = 0
|
|
|
self.neutral_steps = 0
|
|
|
|
|
|
return self._get_obs()
|
|
|
|
|
|
def _get_obs(self):
|
|
|
idx = self.start_idx + self.current_step
|
|
|
features = self.df.loc[idx, self.feature_cols].values
|
|
|
|
|
|
total_return = (self.total_value / self.initial_balance) - 1
|
|
|
drawdown = (self.max_value - self.total_value) / self.max_value if self.max_value > 0 else 0
|
|
|
|
|
|
portfolio_info = np.array([
|
|
|
self.position,
|
|
|
total_return,
|
|
|
drawdown,
|
|
|
self.df.loc[idx, 'returns_1'],
|
|
|
self.df.loc[idx, 'rsi_14']
|
|
|
], dtype=np.float32)
|
|
|
|
|
|
obs = np.concatenate([features, portfolio_info])
|
|
|
return np.clip(obs, -10, 10).astype(np.float32)
|
|
|
|
|
|
def step(self, action):
|
|
|
idx = self.start_idx + self.current_step
|
|
|
current_price = self.df.loc[idx, 'close']
|
|
|
target_position = np.clip(action[0], -1.0, 1.0)
|
|
|
|
|
|
self.prev_total_value = self.total_value
|
|
|
|
|
|
if abs(target_position - self.position) > 0.1:
|
|
|
if self.position != 0:
|
|
|
self._close_position(current_price)
|
|
|
if abs(target_position) > 0.1:
|
|
|
self._open_position(target_position, current_price)
|
|
|
|
|
|
self._update_total_value(current_price)
|
|
|
self.max_value = max(self.max_value, self.total_value)
|
|
|
|
|
|
|
|
|
if self.position > 0.1:
|
|
|
self.long_steps += 1
|
|
|
elif self.position < -0.1:
|
|
|
self.short_steps += 1
|
|
|
else:
|
|
|
self.neutral_steps += 1
|
|
|
|
|
|
self.current_step += 1
|
|
|
done = (self.current_step >= self.episode_length) or (self.total_value <= self.initial_balance * 0.5)
|
|
|
|
|
|
|
|
|
|
|
|
reward = (self.total_value - self.prev_total_value) / self.initial_balance
|
|
|
|
|
|
|
|
|
if self.position > 0.1:
|
|
|
reward += self.long_bonus
|
|
|
|
|
|
|
|
|
if done:
|
|
|
total_active_steps = self.long_steps + self.short_steps
|
|
|
if total_active_steps > 0:
|
|
|
short_ratio = self.short_steps / total_active_steps
|
|
|
if short_ratio > self.short_penalty_threshold:
|
|
|
|
|
|
reward -= self.short_penalty * (short_ratio - self.short_penalty_threshold) / (1 - self.short_penalty_threshold)
|
|
|
|
|
|
obs = self._get_obs()
|
|
|
info = {
|
|
|
'total_value': self.total_value,
|
|
|
'position': self.position,
|
|
|
'long_steps': self.long_steps,
|
|
|
'short_steps': self.short_steps,
|
|
|
'neutral_steps': self.neutral_steps
|
|
|
}
|
|
|
|
|
|
return obs, reward, done, info
|
|
|
|
|
|
def _update_total_value(self, current_price):
|
|
|
if self.position != 0:
|
|
|
if self.position > 0:
|
|
|
pnl = self.position * self.initial_balance * (current_price / self.entry_price - 1)
|
|
|
else:
|
|
|
pnl = abs(self.position) * self.initial_balance * (1 - current_price / self.entry_price)
|
|
|
self.total_value = self.balance + pnl
|
|
|
else:
|
|
|
self.total_value = self.balance
|
|
|
|
|
|
def _open_position(self, size, price):
|
|
|
self.position = size
|
|
|
self.entry_price = price
|
|
|
|
|
|
def _close_position(self, price):
|
|
|
if self.position > 0:
|
|
|
pnl = self.position * self.initial_balance * (price / self.entry_price - 1)
|
|
|
else:
|
|
|
pnl = abs(self.position) * self.initial_balance * (1 - price / self.entry_price)
|
|
|
|
|
|
pnl -= abs(pnl) * self.transaction_fee
|
|
|
self.balance += pnl
|
|
|
self.position = 0.0
|
|
|
|
|
|
print("✅ Environment class ready (with anti-short bias)")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" LOADING SENTIMENT DATA")
|
|
|
print("="*70)
|
|
|
|
|
|
sentiment_file = '/kaggle/input/bitcoin-news-with-sentimen/bitcoin_news_3hour_intervals_with_sentiment.csv'
|
|
|
|
|
|
try:
|
|
|
sentiment_raw = pd.read_csv(sentiment_file)
|
|
|
|
|
|
def parse_time_range(time_str):
|
|
|
parts = str(time_str).split(' ')
|
|
|
if len(parts) >= 2:
|
|
|
date = parts[0]
|
|
|
time_range = parts[1]
|
|
|
start_time = time_range.split('-')[0]
|
|
|
return f"{date} {start_time}:00"
|
|
|
return time_str
|
|
|
|
|
|
sentiment_raw['timestamp'] = sentiment_raw['time_interval'].apply(parse_time_range)
|
|
|
sentiment_raw['timestamp'] = pd.to_datetime(sentiment_raw['timestamp'])
|
|
|
sentiment_raw = sentiment_raw.set_index('timestamp').sort_index()
|
|
|
|
|
|
sentiment_clean = pd.DataFrame(index=sentiment_raw.index)
|
|
|
sentiment_clean['prob_bullish'] = pd.to_numeric(sentiment_raw['prob_bullish'], errors='coerce')
|
|
|
sentiment_clean['prob_bearish'] = pd.to_numeric(sentiment_raw['prob_bearish'], errors='coerce')
|
|
|
sentiment_clean['prob_neutral'] = pd.to_numeric(sentiment_raw['prob_neutral'], errors='coerce')
|
|
|
sentiment_clean['confidence'] = pd.to_numeric(sentiment_raw['sentiment_confidence'], errors='coerce')
|
|
|
sentiment_clean = sentiment_clean.dropna()
|
|
|
|
|
|
|
|
|
for df in [train_data, valid_data, test_data]:
|
|
|
df_temp = df.join(sentiment_clean, how='left')
|
|
|
for col in ['prob_bullish', 'prob_bearish', 'prob_neutral', 'confidence']:
|
|
|
df[col] = df_temp[col].fillna(method='ffill').fillna(method='bfill').fillna(0.33 if col != 'confidence' else 0.5)
|
|
|
|
|
|
df['sentiment_net'] = df['prob_bullish'] - df['prob_bearish']
|
|
|
df['sentiment_strength'] = (df['prob_bullish'] - df['prob_bearish']).abs()
|
|
|
df['sentiment_weighted'] = df['sentiment_net'] * df['confidence']
|
|
|
|
|
|
print(f"✅ Sentiment loaded: {len(sentiment_clean):,} records")
|
|
|
print(f"✅ Features added: 7 sentiment features")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"⚠️ Sentiment not loaded: {e}")
|
|
|
for df in [train_data, valid_data, test_data]:
|
|
|
df['sentiment_net'] = 0
|
|
|
df['sentiment_strength'] = 0
|
|
|
df['sentiment_weighted'] = 0
|
|
|
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
print("="*70)
|
|
|
print(" NORMALIZING DATA + CREATING ENVIRONMENTS")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
feature_cols = [col for col in train_data.columns
|
|
|
if col not in ['open', 'high', 'low', 'close', 'volume']]
|
|
|
|
|
|
print(f"📊 Total features: {len(feature_cols)}")
|
|
|
|
|
|
|
|
|
scaler = StandardScaler()
|
|
|
train_data[feature_cols] = scaler.fit_transform(train_data[feature_cols])
|
|
|
valid_data[feature_cols] = scaler.transform(valid_data[feature_cols])
|
|
|
test_data[feature_cols] = scaler.transform(test_data[feature_cols])
|
|
|
|
|
|
|
|
|
for df in [train_data, valid_data, test_data]:
|
|
|
df[feature_cols] = df[feature_cols].clip(-5, 5)
|
|
|
|
|
|
print("✅ Normalization complete (fitted on train only)")
|
|
|
|
|
|
|
|
|
train_env = BitcoinTradingEnv(train_data, episode_length=500)
|
|
|
valid_env = BitcoinTradingEnv(valid_data, episode_length=500)
|
|
|
test_env = BitcoinTradingEnv(test_data, episode_length=500)
|
|
|
|
|
|
state_dim = train_env.observation_space.shape[0]
|
|
|
action_dim = 1
|
|
|
|
|
|
print(f"\n✅ Environments created:")
|
|
|
print(f" State dim: {state_dim}")
|
|
|
print(f" Action dim: {action_dim}")
|
|
|
print(f" Train episodes: ~{len(train_data)//500}")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
import torch.nn.functional as F
|
|
|
import torch.optim as optim
|
|
|
from torch.distributions import Normal
|
|
|
|
|
|
print("="*70)
|
|
|
print(" PYTORCH SAC AGENT")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Actor(nn.Module):
|
|
|
def __init__(self, state_dim, action_dim, hidden_dim=256):
|
|
|
super().__init__()
|
|
|
self.fc1 = nn.Linear(state_dim, hidden_dim)
|
|
|
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
self.fc3 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
|
|
|
self.mean = nn.Linear(hidden_dim, action_dim)
|
|
|
self.log_std = nn.Linear(hidden_dim, action_dim)
|
|
|
|
|
|
self.LOG_STD_MIN = -20
|
|
|
self.LOG_STD_MAX = 2
|
|
|
|
|
|
def forward(self, state):
|
|
|
x = F.relu(self.fc1(state))
|
|
|
x = F.relu(self.fc2(x))
|
|
|
x = F.relu(self.fc3(x))
|
|
|
|
|
|
mean = self.mean(x)
|
|
|
log_std = self.log_std(x)
|
|
|
log_std = torch.clamp(log_std, self.LOG_STD_MIN, self.LOG_STD_MAX)
|
|
|
|
|
|
return mean, log_std
|
|
|
|
|
|
def sample(self, state):
|
|
|
mean, log_std = self.forward(state)
|
|
|
std = log_std.exp()
|
|
|
|
|
|
normal = Normal(mean, std)
|
|
|
x_t = normal.rsample()
|
|
|
action = torch.tanh(x_t)
|
|
|
|
|
|
|
|
|
log_prob = normal.log_prob(x_t)
|
|
|
log_prob -= torch.log(1 - action.pow(2) + 1e-6)
|
|
|
log_prob = log_prob.sum(dim=-1, keepdim=True)
|
|
|
|
|
|
return action, log_prob, mean
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Critic(nn.Module):
|
|
|
def __init__(self, state_dim, action_dim, hidden_dim=256):
|
|
|
super().__init__()
|
|
|
|
|
|
self.fc1_1 = nn.Linear(state_dim + action_dim, hidden_dim)
|
|
|
self.fc1_2 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
self.fc1_3 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
self.fc1_out = nn.Linear(hidden_dim, 1)
|
|
|
|
|
|
|
|
|
self.fc2_1 = nn.Linear(state_dim + action_dim, hidden_dim)
|
|
|
self.fc2_2 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
self.fc2_3 = nn.Linear(hidden_dim, hidden_dim)
|
|
|
self.fc2_out = nn.Linear(hidden_dim, 1)
|
|
|
|
|
|
def forward(self, state, action):
|
|
|
x = torch.cat([state, action], dim=-1)
|
|
|
|
|
|
q1 = F.relu(self.fc1_1(x))
|
|
|
q1 = F.relu(self.fc1_2(q1))
|
|
|
q1 = F.relu(self.fc1_3(q1))
|
|
|
q1 = self.fc1_out(q1)
|
|
|
|
|
|
q2 = F.relu(self.fc2_1(x))
|
|
|
q2 = F.relu(self.fc2_2(q2))
|
|
|
q2 = F.relu(self.fc2_3(q2))
|
|
|
q2 = self.fc2_out(q2)
|
|
|
|
|
|
return q1, q2
|
|
|
|
|
|
def q1(self, state, action):
|
|
|
x = torch.cat([state, action], dim=-1)
|
|
|
q1 = F.relu(self.fc1_1(x))
|
|
|
q1 = F.relu(self.fc1_2(q1))
|
|
|
q1 = F.relu(self.fc1_3(q1))
|
|
|
return self.fc1_out(q1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SACAgent:
|
|
|
def __init__(self, state_dim, action_dim, device,
|
|
|
actor_lr=3e-4, critic_lr=3e-4, alpha_lr=3e-4,
|
|
|
gamma=0.99, tau=0.005, initial_alpha=0.2):
|
|
|
|
|
|
self.device = device
|
|
|
self.gamma = gamma
|
|
|
self.tau = tau
|
|
|
self.action_dim = action_dim
|
|
|
|
|
|
|
|
|
self.actor = Actor(state_dim, action_dim).to(device)
|
|
|
self.critic = Critic(state_dim, action_dim).to(device)
|
|
|
self.critic_target = Critic(state_dim, action_dim).to(device)
|
|
|
self.critic_target.load_state_dict(self.critic.state_dict())
|
|
|
|
|
|
|
|
|
self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)
|
|
|
self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)
|
|
|
|
|
|
|
|
|
self.target_entropy = -action_dim
|
|
|
self.log_alpha = torch.tensor(np.log(initial_alpha), requires_grad=True, device=device)
|
|
|
self.alpha_optimizer = optim.Adam([self.log_alpha], lr=alpha_lr)
|
|
|
|
|
|
@property
|
|
|
def alpha(self):
|
|
|
return self.log_alpha.exp()
|
|
|
|
|
|
def select_action(self, state, deterministic=False):
|
|
|
with torch.no_grad():
|
|
|
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
|
|
if deterministic:
|
|
|
mean, _ = self.actor(state)
|
|
|
action = torch.tanh(mean)
|
|
|
else:
|
|
|
action, _, _ = self.actor.sample(state)
|
|
|
return action.cpu().numpy()[0]
|
|
|
|
|
|
def update(self, batch):
|
|
|
states, actions, rewards, next_states, dones = batch
|
|
|
|
|
|
states = torch.FloatTensor(states).to(self.device)
|
|
|
actions = torch.FloatTensor(actions).to(self.device)
|
|
|
rewards = torch.FloatTensor(rewards).to(self.device)
|
|
|
next_states = torch.FloatTensor(next_states).to(self.device)
|
|
|
dones = torch.FloatTensor(dones).to(self.device)
|
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
next_actions, next_log_probs, _ = self.actor.sample(next_states)
|
|
|
q1_target, q2_target = self.critic_target(next_states, next_actions)
|
|
|
q_target = torch.min(q1_target, q2_target)
|
|
|
target_q = rewards + (1 - dones) * self.gamma * (q_target - self.alpha * next_log_probs)
|
|
|
|
|
|
q1, q2 = self.critic(states, actions)
|
|
|
critic_loss = F.mse_loss(q1, target_q) + F.mse_loss(q2, target_q)
|
|
|
|
|
|
self.critic_optimizer.zero_grad()
|
|
|
critic_loss.backward()
|
|
|
torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1.0)
|
|
|
self.critic_optimizer.step()
|
|
|
|
|
|
|
|
|
new_actions, log_probs, _ = self.actor.sample(states)
|
|
|
q1_new, q2_new = self.critic(states, new_actions)
|
|
|
q_new = torch.min(q1_new, q2_new)
|
|
|
|
|
|
actor_loss = (self.alpha.detach() * log_probs - q_new).mean()
|
|
|
|
|
|
self.actor_optimizer.zero_grad()
|
|
|
actor_loss.backward()
|
|
|
torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 1.0)
|
|
|
self.actor_optimizer.step()
|
|
|
|
|
|
|
|
|
alpha_loss = -(self.log_alpha * (log_probs + self.target_entropy).detach()).mean()
|
|
|
|
|
|
self.alpha_optimizer.zero_grad()
|
|
|
alpha_loss.backward()
|
|
|
self.alpha_optimizer.step()
|
|
|
|
|
|
|
|
|
for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
|
|
|
target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
|
|
|
|
|
|
return {
|
|
|
'critic_loss': critic_loss.item(),
|
|
|
'actor_loss': actor_loss.item(),
|
|
|
'alpha': self.alpha.item(),
|
|
|
'q_value': q1.mean().item()
|
|
|
}
|
|
|
|
|
|
def save(self, path):
|
|
|
torch.save({
|
|
|
'actor': self.actor.state_dict(),
|
|
|
'critic': self.critic.state_dict(),
|
|
|
'critic_target': self.critic_target.state_dict(),
|
|
|
'log_alpha': self.log_alpha,
|
|
|
}, path)
|
|
|
|
|
|
def load(self, path):
|
|
|
checkpoint = torch.load(path)
|
|
|
self.actor.load_state_dict(checkpoint['actor'])
|
|
|
self.critic.load_state_dict(checkpoint['critic'])
|
|
|
self.critic_target.load_state_dict(checkpoint['critic_target'])
|
|
|
self.log_alpha = checkpoint['log_alpha']
|
|
|
|
|
|
print("✅ SACAgent class defined (PyTorch)")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" REPLAY BUFFER")
|
|
|
print("="*70)
|
|
|
|
|
|
class ReplayBuffer:
|
|
|
def __init__(self, state_dim, action_dim, max_size=1_000_000):
|
|
|
self.max_size = max_size
|
|
|
self.ptr = 0
|
|
|
self.size = 0
|
|
|
|
|
|
self.states = np.zeros((max_size, state_dim), dtype=np.float32)
|
|
|
self.actions = np.zeros((max_size, action_dim), dtype=np.float32)
|
|
|
self.rewards = np.zeros((max_size, 1), dtype=np.float32)
|
|
|
self.next_states = np.zeros((max_size, state_dim), dtype=np.float32)
|
|
|
self.dones = np.zeros((max_size, 1), dtype=np.float32)
|
|
|
|
|
|
mem_gb = (self.states.nbytes + self.actions.nbytes + self.rewards.nbytes +
|
|
|
self.next_states.nbytes + self.dones.nbytes) / 1e9
|
|
|
print(f"📦 Buffer capacity: {max_size:,} | Memory: {mem_gb:.2f} GB")
|
|
|
|
|
|
def add(self, state, action, reward, next_state, done):
|
|
|
self.states[self.ptr] = state
|
|
|
self.actions[self.ptr] = action
|
|
|
self.rewards[self.ptr] = reward
|
|
|
self.next_states[self.ptr] = next_state
|
|
|
self.dones[self.ptr] = done
|
|
|
|
|
|
self.ptr = (self.ptr + 1) % self.max_size
|
|
|
self.size = min(self.size + 1, self.max_size)
|
|
|
|
|
|
def sample(self, batch_size):
|
|
|
idx = np.random.randint(0, self.size, size=batch_size)
|
|
|
return (
|
|
|
self.states[idx],
|
|
|
self.actions[idx],
|
|
|
self.rewards[idx],
|
|
|
self.next_states[idx],
|
|
|
self.dones[idx]
|
|
|
)
|
|
|
|
|
|
print("✅ ReplayBuffer defined")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from tqdm.notebook import tqdm
|
|
|
import time
|
|
|
|
|
|
print("="*70)
|
|
|
print(" TRAINING FUNCTION")
|
|
|
print("="*70)
|
|
|
|
|
|
def train_sac(agent, env, valid_env, buffer,
|
|
|
total_timesteps=700_000,
|
|
|
warmup_steps=10_000,
|
|
|
batch_size=1024,
|
|
|
update_freq=1,
|
|
|
save_path="sac_v9"):
|
|
|
|
|
|
print(f"\n🚀 Training Configuration:")
|
|
|
print(f" Total steps: {total_timesteps:,}")
|
|
|
print(f" Warmup: {warmup_steps:,}")
|
|
|
print(f" Batch size: {batch_size}")
|
|
|
print(f" Device: {agent.device}")
|
|
|
|
|
|
|
|
|
episode_rewards = []
|
|
|
episode_lengths = []
|
|
|
eval_rewards = []
|
|
|
best_reward = -np.inf
|
|
|
best_eval = -np.inf
|
|
|
|
|
|
|
|
|
critic_losses = []
|
|
|
actor_losses = []
|
|
|
q_values = []
|
|
|
|
|
|
state = env.reset()
|
|
|
episode_reward = 0
|
|
|
episode_length = 0
|
|
|
episode_count = 0
|
|
|
total_trades = 0
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
pbar = tqdm(range(total_timesteps), desc="Training")
|
|
|
|
|
|
for step in pbar:
|
|
|
|
|
|
if step < warmup_steps:
|
|
|
action = env.action_space.sample()
|
|
|
else:
|
|
|
action = agent.select_action(state, deterministic=False)
|
|
|
|
|
|
|
|
|
next_state, reward, done, info = env.step(action)
|
|
|
|
|
|
|
|
|
buffer.add(state, action, reward, next_state, float(done))
|
|
|
|
|
|
state = next_state
|
|
|
episode_reward += reward
|
|
|
episode_length += 1
|
|
|
|
|
|
|
|
|
stats = None
|
|
|
if step >= warmup_steps and step % update_freq == 0:
|
|
|
batch = buffer.sample(batch_size)
|
|
|
stats = agent.update(batch)
|
|
|
critic_losses.append(stats['critic_loss'])
|
|
|
actor_losses.append(stats['actor_loss'])
|
|
|
q_values.append(stats['q_value'])
|
|
|
|
|
|
|
|
|
if done:
|
|
|
episode_rewards.append(episode_reward)
|
|
|
episode_lengths.append(episode_length)
|
|
|
episode_count += 1
|
|
|
|
|
|
|
|
|
final_value = info.get('total_value', 10000)
|
|
|
pnl_pct = (final_value / 10000 - 1) * 100
|
|
|
|
|
|
|
|
|
long_steps = info.get('long_steps', 0)
|
|
|
short_steps = info.get('short_steps', 0)
|
|
|
neutral_steps = info.get('neutral_steps', 0)
|
|
|
total_active = long_steps + short_steps
|
|
|
long_pct = (long_steps / total_active * 100) if total_active > 0 else 0
|
|
|
short_pct = (short_steps / total_active * 100) if total_active > 0 else 0
|
|
|
|
|
|
|
|
|
avg_reward = np.mean(episode_rewards[-10:]) if len(episode_rewards) >= 10 else episode_reward
|
|
|
avg_q = np.mean(q_values[-100:]) if q_values else 0
|
|
|
avg_critic = np.mean(critic_losses[-100:]) if critic_losses else 0
|
|
|
|
|
|
pbar.set_postfix({
|
|
|
'ep': episode_count,
|
|
|
'R': f'{episode_reward:.4f}',
|
|
|
'avg10': f'{avg_reward:.4f}',
|
|
|
'PnL%': f'{pnl_pct:+.2f}',
|
|
|
'L/S': f'{long_pct:.0f}/{short_pct:.0f}',
|
|
|
'α': f'{agent.alpha.item():.3f}',
|
|
|
})
|
|
|
|
|
|
|
|
|
eval_reward, eval_pnl, eval_long_pct = evaluate_agent(agent, valid_env, n_episodes=1)
|
|
|
eval_rewards.append(eval_reward)
|
|
|
|
|
|
|
|
|
elapsed = time.time() - start_time
|
|
|
steps_per_sec = (step + 1) / elapsed
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"📊 Episode {episode_count} Complete | Step {step+1:,}/{total_timesteps:,}")
|
|
|
print(f"{'='*60}")
|
|
|
print(f" 🎮 TRAIN:")
|
|
|
print(f" Reward: {episode_reward:.4f} | PnL: {pnl_pct:+.2f}%")
|
|
|
print(f" Length: {episode_length} steps")
|
|
|
print(f" Avg (last 10): {avg_reward:.4f}")
|
|
|
print(f" 📊 POSITION BALANCE:")
|
|
|
print(f" Long: {long_steps} steps ({long_pct:.1f}%)")
|
|
|
print(f" Short: {short_steps} steps ({short_pct:.1f}%)")
|
|
|
print(f" Neutral: {neutral_steps} steps")
|
|
|
if short_pct > 80:
|
|
|
print(f" ⚠️ EXCESSIVE SHORTING - PENALTY APPLIED")
|
|
|
print(f" 📈 EVAL (validation):")
|
|
|
print(f" Reward: {eval_reward:.4f} | PnL: {eval_pnl:+.2f}%")
|
|
|
print(f" Long%: {eval_long_pct:.1f}%")
|
|
|
print(f" Avg (last 5): {np.mean(eval_rewards[-5:]):.4f}")
|
|
|
print(f" 🧠 AGENT:")
|
|
|
print(f" Alpha: {agent.alpha.item():.4f}")
|
|
|
print(f" Q-value: {avg_q:.3f}")
|
|
|
print(f" Critic loss: {avg_critic:.5f}")
|
|
|
print(f" ⚡ Speed: {steps_per_sec:.0f} steps/sec")
|
|
|
print(f" 💾 Buffer: {buffer.size:,} transitions")
|
|
|
|
|
|
|
|
|
if episode_reward > best_reward:
|
|
|
best_reward = episode_reward
|
|
|
agent.save(f"{save_path}_best_train.pt")
|
|
|
print(f" 🏆 NEW BEST TRAIN: {best_reward:.4f}")
|
|
|
|
|
|
|
|
|
if eval_reward > best_eval:
|
|
|
best_eval = eval_reward
|
|
|
agent.save(f"{save_path}_best_eval.pt")
|
|
|
print(f" 🏆 NEW BEST EVAL: {best_eval:.4f}")
|
|
|
|
|
|
|
|
|
state = env.reset()
|
|
|
episode_reward = 0
|
|
|
episode_length = 0
|
|
|
|
|
|
|
|
|
agent.save(f"{save_path}_final.pt")
|
|
|
|
|
|
total_time = time.time() - start_time
|
|
|
print(f"\n{'='*70}")
|
|
|
print(f" TRAINING COMPLETE")
|
|
|
print(f"{'='*70}")
|
|
|
print(f" Total time: {total_time/60:.1f} min")
|
|
|
print(f" Episodes: {episode_count}")
|
|
|
print(f" Best train reward: {best_reward:.4f}")
|
|
|
print(f" Best eval reward: {best_eval:.4f}")
|
|
|
print(f" Avg speed: {total_timesteps/total_time:.0f} steps/sec")
|
|
|
|
|
|
return episode_rewards, eval_rewards
|
|
|
|
|
|
|
|
|
def evaluate_agent(agent, env, n_episodes=1):
|
|
|
"""Run evaluation episodes"""
|
|
|
total_reward = 0
|
|
|
total_pnl = 0
|
|
|
total_long_pct = 0
|
|
|
|
|
|
for _ in range(n_episodes):
|
|
|
state = env.reset()
|
|
|
episode_reward = 0
|
|
|
done = False
|
|
|
|
|
|
while not done:
|
|
|
action = agent.select_action(state, deterministic=True)
|
|
|
state, reward, done, info = env.step(action)
|
|
|
episode_reward += reward
|
|
|
|
|
|
total_reward += episode_reward
|
|
|
final_value = info.get('total_value', 10000)
|
|
|
total_pnl += (final_value / 10000 - 1) * 100
|
|
|
|
|
|
|
|
|
long_steps = info.get('long_steps', 0)
|
|
|
short_steps = info.get('short_steps', 0)
|
|
|
total_active = long_steps + short_steps
|
|
|
total_long_pct += (long_steps / total_active * 100) if total_active > 0 else 0
|
|
|
|
|
|
return total_reward / n_episodes, total_pnl / n_episodes, total_long_pct / n_episodes
|
|
|
|
|
|
|
|
|
print("✅ Training function ready (with per-episode eval + position tracking)")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" CREATING AGENT + BUFFER")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
agent = SACAgent(
|
|
|
state_dim=state_dim,
|
|
|
action_dim=action_dim,
|
|
|
device=device,
|
|
|
actor_lr=3e-4,
|
|
|
critic_lr=3e-4,
|
|
|
alpha_lr=3e-4,
|
|
|
gamma=0.99,
|
|
|
tau=0.005,
|
|
|
initial_alpha=0.2
|
|
|
)
|
|
|
|
|
|
|
|
|
buffer = ReplayBuffer(
|
|
|
state_dim=state_dim,
|
|
|
action_dim=action_dim,
|
|
|
max_size=1_000_000
|
|
|
)
|
|
|
|
|
|
|
|
|
total_params = sum(p.numel() for p in agent.actor.parameters()) + \
|
|
|
sum(p.numel() for p in agent.critic.parameters())
|
|
|
|
|
|
print(f"\n✅ Agent created on {device}")
|
|
|
print(f" Actor params: {sum(p.numel() for p in agent.actor.parameters()):,}")
|
|
|
print(f" Critic params: {sum(p.numel() for p in agent.critic.parameters()):,}")
|
|
|
print(f" Total params: {total_params:,}")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" STARTING SAC TRAINING")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
TOTAL_STEPS = 700_000
|
|
|
WARMUP_STEPS = 10_000
|
|
|
BATCH_SIZE = 1024
|
|
|
UPDATE_FREQ = 1
|
|
|
|
|
|
print(f"\n📋 Configuration:")
|
|
|
print(f" Steps: {TOTAL_STEPS:,}")
|
|
|
print(f" Batch: {BATCH_SIZE}")
|
|
|
print(f" Train env: {len(train_data):,} candles")
|
|
|
print(f" Valid env: {len(valid_data):,} candles")
|
|
|
print(f" Device: {device}")
|
|
|
|
|
|
|
|
|
episode_rewards, eval_rewards = train_sac(
|
|
|
agent=agent,
|
|
|
env=train_env,
|
|
|
valid_env=valid_env,
|
|
|
buffer=buffer,
|
|
|
total_timesteps=TOTAL_STEPS,
|
|
|
warmup_steps=WARMUP_STEPS,
|
|
|
batch_size=BATCH_SIZE,
|
|
|
update_freq=UPDATE_FREQ,
|
|
|
save_path="sac_v9_pytorch"
|
|
|
)
|
|
|
|
|
|
print("\n" + "="*70)
|
|
|
print(" TRAINING COMPLETE")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
import matplotlib.patches as mpatches
|
|
|
from matplotlib.gridspec import GridSpec
|
|
|
import seaborn as sns
|
|
|
|
|
|
|
|
|
plt.style.use('dark_background')
|
|
|
sns.set_palette("husl")
|
|
|
|
|
|
print("="*70)
|
|
|
print(" LOADING TRAINED MODELS")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
MODEL_PATH = '/kaggle/input/sac1/pytorch/default/1/'
|
|
|
FINAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_final.pt'
|
|
|
BEST_TRAIN_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_train.pt'
|
|
|
BEST_EVAL_MODEL = MODEL_PATH + 'sac_v9_pytorch_best_eval.pt'
|
|
|
|
|
|
def load_model(agent, checkpoint_path, name="model"):
|
|
|
"""Load model weights from checkpoint"""
|
|
|
try:
|
|
|
checkpoint = torch.load(checkpoint_path, map_location=device)
|
|
|
agent.actor.load_state_dict(checkpoint['actor'])
|
|
|
agent.critic.load_state_dict(checkpoint['critic'])
|
|
|
agent.critic_target.load_state_dict(checkpoint['critic_target'])
|
|
|
if 'log_alpha' in checkpoint:
|
|
|
agent.log_alpha = checkpoint['log_alpha']
|
|
|
print(f"✅ {name} loaded successfully!")
|
|
|
return True
|
|
|
except Exception as e:
|
|
|
print(f"❌ Error loading {name}: {e}")
|
|
|
return False
|
|
|
|
|
|
|
|
|
eval_agent = SACAgent(
|
|
|
state_dim=state_dim,
|
|
|
action_dim=action_dim,
|
|
|
device=device
|
|
|
)
|
|
|
|
|
|
|
|
|
load_model(eval_agent, BEST_EVAL_MODEL, "Best Eval Model")
|
|
|
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" TRAINING SUMMARY DASHBOARD")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
fig = plt.figure(figsize=(16, 10))
|
|
|
fig.suptitle('SAC Bitcoin Agent - Training Summary', fontsize=20, fontweight='bold', color='white')
|
|
|
|
|
|
|
|
|
gs = GridSpec(3, 3, figure=fig, hspace=0.4, wspace=0.3)
|
|
|
|
|
|
|
|
|
ax_config = fig.add_subplot(gs[0, 0])
|
|
|
ax_config.axis('off')
|
|
|
config_text = """
|
|
|
📋 CONFIGURATION
|
|
|
─────────────────────
|
|
|
Architecture: SAC
|
|
|
Hidden Dim: 256
|
|
|
Learning Rate: 3e-4
|
|
|
Buffer Size: 1,000,000
|
|
|
Batch Size: 1,024
|
|
|
Total Steps: 700,000
|
|
|
Gamma: 0.99
|
|
|
Tau: 0.005
|
|
|
Auto Alpha: True
|
|
|
"""
|
|
|
ax_config.text(0.1, 0.5, config_text, fontsize=11, verticalalignment='center',
|
|
|
fontfamily='monospace', color='cyan',
|
|
|
bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='cyan', alpha=0.8))
|
|
|
|
|
|
|
|
|
ax_features = fig.add_subplot(gs[0, 1])
|
|
|
ax_features.axis('off')
|
|
|
features_text = """
|
|
|
🎯 TRAINING FEATURES
|
|
|
─────────────────────────
|
|
|
✅ Single Timeframe (15m)
|
|
|
✅ Technical Indicators
|
|
|
✅ Sentiment Features
|
|
|
✅ Standard Normalization
|
|
|
✅ Action Scaling [-1, 1]
|
|
|
✅ Fee: 0.1%
|
|
|
"""
|
|
|
ax_features.text(0.1, 0.5, features_text, fontsize=11, verticalalignment='center',
|
|
|
fontfamily='monospace', color='lime',
|
|
|
bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='lime', alpha=0.8))
|
|
|
|
|
|
|
|
|
ax_data = fig.add_subplot(gs[0, 2])
|
|
|
ax_data.axis('off')
|
|
|
data_text = """
|
|
|
📊 DATA SPLIT
|
|
|
─────────────────────
|
|
|
Training: 70%
|
|
|
Validation: 15%
|
|
|
Test: 15%
|
|
|
Total Samples: ~35k
|
|
|
"""
|
|
|
ax_data.text(0.1, 0.5, data_text, fontsize=11, verticalalignment='center',
|
|
|
fontfamily='monospace', color='orange',
|
|
|
bbox=dict(boxstyle='round', facecolor='#1a1a2e', edgecolor='orange', alpha=0.8))
|
|
|
|
|
|
|
|
|
ax_timeline = fig.add_subplot(gs[1, :])
|
|
|
ax_timeline.set_title('Training Progress Timeline', fontsize=14, fontweight='bold')
|
|
|
steps = np.linspace(0, 700000, 100)
|
|
|
progress = 100 * (1 - np.exp(-steps/200000))
|
|
|
ax_timeline.fill_between(steps/1000, progress, alpha=0.3, color='cyan')
|
|
|
ax_timeline.plot(steps/1000, progress, 'cyan', linewidth=2)
|
|
|
ax_timeline.set_xlabel('Steps (thousands)', fontsize=12)
|
|
|
ax_timeline.set_ylabel('Estimated Progress %', fontsize=12)
|
|
|
ax_timeline.set_ylim(0, 105)
|
|
|
ax_timeline.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax_model = fig.add_subplot(gs[2, :])
|
|
|
ax_model.axis('off')
|
|
|
model_info = f"""
|
|
|
🤖 LOADED MODEL INFO
|
|
|
════════════════════════════════════════════════════════════════════════════════
|
|
|
📁 Model Path: {MODEL_PATH}
|
|
|
🎯 Best Eval Model: sac_v9_pytorch_best_eval.pt
|
|
|
🏋️ Best Train Model: sac_v9_pytorch_best_train.pt
|
|
|
🏁 Final Model: sac_v9_pytorch_final.pt
|
|
|
|
|
|
💡 Actor Parameters: {sum(p.numel() for p in eval_agent.actor.parameters()):,}
|
|
|
💡 Critic Parameters: {sum(p.numel() for p in eval_agent.critic.parameters()):,}
|
|
|
════════════════════════════════════════════════════════════════════════════════
|
|
|
"""
|
|
|
ax_model.text(0.5, 0.5, model_info, fontsize=11, verticalalignment='center',
|
|
|
horizontalalignment='center', fontfamily='monospace', color='white',
|
|
|
bbox=dict(boxstyle='round', facecolor='#0d1117', edgecolor='white', alpha=0.9))
|
|
|
|
|
|
plt.tight_layout()
|
|
|
plt.show()
|
|
|
|
|
|
print("\n✅ Training summary visualization complete!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_backtest(agent, env, df, name="Agent", verbose=True):
|
|
|
"""
|
|
|
Run comprehensive backtest and collect detailed metrics.
|
|
|
|
|
|
Returns:
|
|
|
dict: Complete backtest results including all metrics and history
|
|
|
"""
|
|
|
state = env.reset()
|
|
|
|
|
|
if isinstance(state, tuple):
|
|
|
state = state[0]
|
|
|
done = False
|
|
|
|
|
|
|
|
|
positions = []
|
|
|
portfolio_values = [env.initial_balance]
|
|
|
actions = []
|
|
|
rewards = []
|
|
|
prices = []
|
|
|
timestamps = []
|
|
|
|
|
|
step = 0
|
|
|
total_reward = 0
|
|
|
|
|
|
while not done:
|
|
|
|
|
|
action = agent.select_action(state, deterministic=True)
|
|
|
result = env.step(action)
|
|
|
|
|
|
if len(result) == 5:
|
|
|
next_state, reward, terminated, truncated, info = result
|
|
|
done = terminated or truncated
|
|
|
else:
|
|
|
next_state, reward, done, info = result
|
|
|
|
|
|
|
|
|
positions.append(env.position)
|
|
|
portfolio_values.append(env.total_value)
|
|
|
actions.append(action[0] if isinstance(action, np.ndarray) else action)
|
|
|
rewards.append(reward)
|
|
|
|
|
|
if step < len(df):
|
|
|
prices.append(df['close'].iloc[step])
|
|
|
if 'timestamp' in df.columns:
|
|
|
timestamps.append(df['timestamp'].iloc[step])
|
|
|
else:
|
|
|
timestamps.append(step)
|
|
|
|
|
|
state = next_state
|
|
|
total_reward += reward
|
|
|
step += 1
|
|
|
|
|
|
|
|
|
portfolio_values = np.array(portfolio_values)
|
|
|
positions = np.array(positions)
|
|
|
actions = np.array(actions)
|
|
|
rewards = np.array(rewards)
|
|
|
prices = np.array(prices[:len(portfolio_values)-1])
|
|
|
|
|
|
|
|
|
portfolio_returns = np.diff(portfolio_values) / portfolio_values[:-1]
|
|
|
portfolio_returns = np.nan_to_num(portfolio_returns, nan=0.0, posinf=0.0, neginf=0.0)
|
|
|
|
|
|
|
|
|
total_return = (portfolio_values[-1] / portfolio_values[0] - 1) * 100
|
|
|
|
|
|
|
|
|
bars_per_year = 4 * 24 * 365
|
|
|
mean_return = np.mean(portfolio_returns)
|
|
|
std_return = np.std(portfolio_returns)
|
|
|
sharpe = np.sqrt(bars_per_year) * mean_return / (std_return + 1e-10)
|
|
|
|
|
|
|
|
|
downside_returns = portfolio_returns[portfolio_returns < 0]
|
|
|
downside_std = np.std(downside_returns) if len(downside_returns) > 0 else 1e-10
|
|
|
sortino = np.sqrt(bars_per_year) * mean_return / (downside_std + 1e-10)
|
|
|
|
|
|
|
|
|
running_max = np.maximum.accumulate(portfolio_values)
|
|
|
drawdowns = (portfolio_values - running_max) / running_max
|
|
|
max_drawdown = np.min(drawdowns) * 100
|
|
|
|
|
|
|
|
|
n_bars = len(portfolio_values)
|
|
|
annualized_return = ((portfolio_values[-1] / portfolio_values[0]) ** (bars_per_year / n_bars) - 1) * 100
|
|
|
calmar = annualized_return / (abs(max_drawdown) + 1e-10)
|
|
|
|
|
|
|
|
|
winning_steps = np.sum(portfolio_returns > 0)
|
|
|
total_trades = np.sum(portfolio_returns != 0)
|
|
|
win_rate = (winning_steps / total_trades * 100) if total_trades > 0 else 0
|
|
|
|
|
|
|
|
|
gross_profit = np.sum(portfolio_returns[portfolio_returns > 0])
|
|
|
gross_loss = abs(np.sum(portfolio_returns[portfolio_returns < 0]))
|
|
|
profit_factor = gross_profit / (gross_loss + 1e-10)
|
|
|
|
|
|
|
|
|
long_pct = np.sum(positions > 0.1) / len(positions) * 100 if len(positions) > 0 else 0
|
|
|
short_pct = np.sum(positions < -0.1) / len(positions) * 100 if len(positions) > 0 else 0
|
|
|
neutral_pct = 100 - long_pct - short_pct
|
|
|
|
|
|
results = {
|
|
|
'name': name,
|
|
|
'total_return': total_return,
|
|
|
'sharpe': sharpe,
|
|
|
'sortino': sortino,
|
|
|
'max_drawdown': max_drawdown,
|
|
|
'calmar': calmar,
|
|
|
'win_rate': win_rate,
|
|
|
'profit_factor': profit_factor,
|
|
|
'total_reward': total_reward,
|
|
|
'portfolio_values': portfolio_values,
|
|
|
'positions': positions,
|
|
|
'actions': actions,
|
|
|
'rewards': rewards,
|
|
|
'prices': prices,
|
|
|
'timestamps': timestamps,
|
|
|
'portfolio_returns': portfolio_returns,
|
|
|
'drawdowns': drawdowns,
|
|
|
'long_pct': long_pct,
|
|
|
'short_pct': short_pct,
|
|
|
'neutral_pct': neutral_pct,
|
|
|
'n_steps': step
|
|
|
}
|
|
|
|
|
|
if verbose:
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f" {name} BACKTEST RESULTS")
|
|
|
print(f"{'='*60}")
|
|
|
print(f"📈 Total Return: {total_return:>10.2f}%")
|
|
|
print(f"📊 Sharpe Ratio: {sharpe:>10.3f}")
|
|
|
print(f"📊 Sortino Ratio: {sortino:>10.3f}")
|
|
|
print(f"📉 Max Drawdown: {max_drawdown:>10.2f}%")
|
|
|
print(f"📊 Calmar Ratio: {calmar:>10.3f}")
|
|
|
print(f"🎯 Win Rate: {win_rate:>10.1f}%")
|
|
|
print(f"💰 Profit Factor: {profit_factor:>10.2f}")
|
|
|
print(f"🔄 Total Steps: {step:>10,}")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
return results
|
|
|
|
|
|
print("✅ Backtesting function defined!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" TESTING ON UNSEEN DATA (Test Split)")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
print(f"\n📊 Test Data: {len(test_data):,} samples")
|
|
|
if 'timestamp' in test_data.columns:
|
|
|
print(f"📅 Period: {test_data['timestamp'].iloc[0]} to {test_data['timestamp'].iloc[-1]}")
|
|
|
|
|
|
|
|
|
class SequentialBacktestEnv(BitcoinTradingEnv):
|
|
|
"""Environment for sequential backtesting - starts from index 0"""
|
|
|
def reset(self):
|
|
|
self.start_idx = 0
|
|
|
self.current_step = 0
|
|
|
self.balance = self.initial_balance
|
|
|
self.position = 0.0
|
|
|
self.entry_price = 0.0
|
|
|
self.total_value = self.initial_balance
|
|
|
self.prev_total_value = self.initial_balance
|
|
|
self.max_value = self.initial_balance
|
|
|
self.long_steps = 0
|
|
|
self.short_steps = 0
|
|
|
self.neutral_steps = 0
|
|
|
return self._get_obs()
|
|
|
|
|
|
|
|
|
models_to_test = [
|
|
|
(BEST_EVAL_MODEL, "Best Eval Model"),
|
|
|
(BEST_TRAIN_MODEL, "Best Train Model"),
|
|
|
(FINAL_MODEL, "Final Model")
|
|
|
]
|
|
|
|
|
|
all_results = {}
|
|
|
|
|
|
for model_path, model_name in models_to_test:
|
|
|
print(f"\n🔄 Testing {model_name}...")
|
|
|
|
|
|
|
|
|
test_agent = SACAgent(state_dim=state_dim, action_dim=action_dim, device=device)
|
|
|
if load_model(test_agent, model_path, model_name):
|
|
|
|
|
|
model_test_env = SequentialBacktestEnv(
|
|
|
df=test_data,
|
|
|
initial_balance=100000,
|
|
|
episode_length=len(test_data) - 10,
|
|
|
transaction_fee=0.001
|
|
|
)
|
|
|
results = run_backtest(test_agent, model_test_env, test_data, name=model_name, verbose=True)
|
|
|
all_results[model_name] = results
|
|
|
|
|
|
|
|
|
print("\n🔄 Calculating Buy & Hold baseline...")
|
|
|
bh_initial_price = test_data['close'].iloc[0]
|
|
|
bh_final_price = test_data['close'].iloc[-1]
|
|
|
bh_return = (bh_final_price / bh_initial_price - 1) * 100
|
|
|
bh_prices = test_data['close'].values
|
|
|
bh_returns = np.diff(bh_prices) / bh_prices[:-1]
|
|
|
bh_cumulative = 100000 * np.cumprod(1 + bh_returns)
|
|
|
bh_cumulative = np.insert(bh_cumulative, 0, 100000)
|
|
|
bh_max_dd = (np.min(bh_cumulative / np.maximum.accumulate(bh_cumulative)) - 1) * 100
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f" BUY & HOLD BASELINE")
|
|
|
print(f"{'='*60}")
|
|
|
print(f"📈 Total Return: {bh_return:>10.2f}%")
|
|
|
print(f"📉 Max Drawdown: {bh_max_dd:>10.2f}%")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
|
|
all_results['Buy & Hold'] = {
|
|
|
'name': 'Buy & Hold',
|
|
|
'total_return': bh_return,
|
|
|
'max_drawdown': bh_max_dd,
|
|
|
'portfolio_values': bh_cumulative,
|
|
|
'sharpe': 0,
|
|
|
'sortino': 0
|
|
|
}
|
|
|
|
|
|
print("\n✅ All models tested!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
best_results = all_results.get('Best Eval Model', list(all_results.values())[0])
|
|
|
|
|
|
fig = plt.figure(figsize=(20, 16))
|
|
|
fig.suptitle(f'SAC Agent Performance Analysis - {best_results["name"]}',
|
|
|
fontsize=20, fontweight='bold', color='white')
|
|
|
|
|
|
gs = GridSpec(4, 2, figure=fig, hspace=0.35, wspace=0.25)
|
|
|
|
|
|
|
|
|
ax1 = fig.add_subplot(gs[0, :])
|
|
|
portfolio_vals = best_results['portfolio_values']
|
|
|
timestamps = best_results.get('timestamps', range(len(portfolio_vals)))
|
|
|
|
|
|
|
|
|
bh_vals = all_results['Buy & Hold']['portfolio_values']
|
|
|
min_len = min(len(portfolio_vals), len(bh_vals))
|
|
|
|
|
|
ax1.plot(range(min_len), portfolio_vals[:min_len], 'cyan', linewidth=2, label='SAC Agent')
|
|
|
ax1.plot(range(min_len), bh_vals[:min_len], 'orange', linewidth=2, alpha=0.7, label='Buy & Hold')
|
|
|
ax1.fill_between(range(min_len), portfolio_vals[:min_len], bh_vals[:min_len],
|
|
|
where=portfolio_vals[:min_len] > bh_vals[:min_len],
|
|
|
color='green', alpha=0.3, label='Outperformance')
|
|
|
ax1.fill_between(range(min_len), portfolio_vals[:min_len], bh_vals[:min_len],
|
|
|
where=portfolio_vals[:min_len] <= bh_vals[:min_len],
|
|
|
color='red', alpha=0.3, label='Underperformance')
|
|
|
ax1.set_title('Portfolio Value Comparison', fontsize=14, fontweight='bold')
|
|
|
ax1.set_xlabel('Time Steps')
|
|
|
ax1.set_ylabel('Portfolio Value ($)')
|
|
|
ax1.legend(loc='upper left')
|
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax2 = fig.add_subplot(gs[1, 0])
|
|
|
drawdowns = best_results['drawdowns'] * 100
|
|
|
ax2.fill_between(range(len(drawdowns)), drawdowns, 0, color='red', alpha=0.5)
|
|
|
ax2.plot(drawdowns, 'red', linewidth=1)
|
|
|
ax2.axhline(y=best_results['max_drawdown'], color='yellow', linestyle='--',
|
|
|
label=f'Max DD: {best_results["max_drawdown"]:.1f}%')
|
|
|
ax2.set_title('Drawdown Analysis', fontsize=14, fontweight='bold')
|
|
|
ax2.set_xlabel('Time Steps')
|
|
|
ax2.set_ylabel('Drawdown (%)')
|
|
|
ax2.legend()
|
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax3 = fig.add_subplot(gs[1, 1])
|
|
|
positions = best_results['positions']
|
|
|
colors = ['green' if p > 0.1 else 'red' if p < -0.1 else 'gray' for p in positions]
|
|
|
ax3.bar(range(len(positions)), positions, color=colors, alpha=0.7, width=1)
|
|
|
ax3.axhline(y=0, color='white', linestyle='-', linewidth=1)
|
|
|
ax3.axhline(y=1, color='green', linestyle='--', alpha=0.5)
|
|
|
ax3.axhline(y=-1, color='red', linestyle='--', alpha=0.5)
|
|
|
ax3.set_title('Position Over Time', fontsize=14, fontweight='bold')
|
|
|
ax3.set_xlabel('Time Steps')
|
|
|
ax3.set_ylabel('Position (Long/Short)')
|
|
|
ax3.set_ylim(-1.2, 1.2)
|
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax4 = fig.add_subplot(gs[2, 0])
|
|
|
actions = best_results['actions']
|
|
|
ax4.hist(actions, bins=50, color='cyan', alpha=0.7, edgecolor='white')
|
|
|
ax4.axvline(x=0, color='yellow', linestyle='--', linewidth=2)
|
|
|
ax4.set_title('Action Distribution', fontsize=14, fontweight='bold')
|
|
|
ax4.set_xlabel('Action Value')
|
|
|
ax4.set_ylabel('Frequency')
|
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax5 = fig.add_subplot(gs[2, 1])
|
|
|
returns = best_results['portfolio_returns'] * 100
|
|
|
ax5.hist(returns, bins=100, color='lime', alpha=0.7, edgecolor='white')
|
|
|
ax5.axvline(x=0, color='yellow', linestyle='--', linewidth=2)
|
|
|
ax5.axvline(x=np.mean(returns), color='cyan', linestyle='-', linewidth=2,
|
|
|
label=f'Mean: {np.mean(returns):.4f}%')
|
|
|
ax5.set_title('Returns Distribution', fontsize=14, fontweight='bold')
|
|
|
ax5.set_xlabel('Return (%)')
|
|
|
ax5.set_ylabel('Frequency')
|
|
|
ax5.legend()
|
|
|
ax5.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax6 = fig.add_subplot(gs[3, 0])
|
|
|
rewards = best_results['rewards']
|
|
|
window = min(500, len(rewards) // 10)
|
|
|
rewards_smooth = np.convolve(rewards, np.ones(window)/window, mode='valid')
|
|
|
ax6.plot(rewards_smooth, 'magenta', linewidth=1)
|
|
|
ax6.axhline(y=0, color='white', linestyle='--', alpha=0.5)
|
|
|
ax6.set_title(f'Reward Over Time (Rolling {window})', fontsize=14, fontweight='bold')
|
|
|
ax6.set_xlabel('Time Steps')
|
|
|
ax6.set_ylabel('Reward')
|
|
|
ax6.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax7 = fig.add_subplot(gs[3, 1])
|
|
|
cumulative_reward = np.cumsum(rewards)
|
|
|
ax7.plot(cumulative_reward, 'gold', linewidth=2)
|
|
|
ax7.fill_between(range(len(cumulative_reward)), cumulative_reward, 0,
|
|
|
where=cumulative_reward > 0, color='green', alpha=0.3)
|
|
|
ax7.fill_between(range(len(cumulative_reward)), cumulative_reward, 0,
|
|
|
where=cumulative_reward <= 0, color='red', alpha=0.3)
|
|
|
ax7.set_title('Cumulative Reward', fontsize=14, fontweight='bold')
|
|
|
ax7.set_xlabel('Time Steps')
|
|
|
ax7.set_ylabel('Cumulative Reward')
|
|
|
ax7.grid(True, alpha=0.3)
|
|
|
|
|
|
plt.tight_layout()
|
|
|
plt.show()
|
|
|
|
|
|
print("\n✅ Detailed performance charts generated!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" EXTENDED BACKTEST ON FULL TEST PERIOD")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
extended_test_env = SequentialBacktestEnv(
|
|
|
df=test_data,
|
|
|
initial_balance=100000,
|
|
|
episode_length=len(test_data) - 10,
|
|
|
transaction_fee=0.001
|
|
|
)
|
|
|
|
|
|
|
|
|
extended_results = run_backtest(
|
|
|
eval_agent,
|
|
|
extended_test_env,
|
|
|
test_data,
|
|
|
name="Extended Backtest (Best Eval)",
|
|
|
verbose=True
|
|
|
)
|
|
|
|
|
|
|
|
|
print(f"\n📊 Additional Statistics:")
|
|
|
print(f" 📈 Long Positions: {extended_results['long_pct']:.1f}%")
|
|
|
print(f" 📉 Short Positions: {extended_results['short_pct']:.1f}%")
|
|
|
print(f" ⏸️ Neutral Positions: {extended_results['neutral_pct']:.1f}%")
|
|
|
print(f" 📊 Total Reward: {extended_results['total_reward']:.2f}")
|
|
|
|
|
|
|
|
|
print(f"\n📊 vs Buy & Hold:")
|
|
|
agent_return = extended_results['total_return']
|
|
|
bh_return_val = all_results['Buy & Hold']['total_return']
|
|
|
outperformance = agent_return - bh_return_val
|
|
|
print(f" Agent Return: {agent_return:+.2f}%")
|
|
|
print(f" B&H Return: {bh_return_val:+.2f}%")
|
|
|
print(f" Outperformance: {outperformance:+.2f}%")
|
|
|
|
|
|
if outperformance > 0:
|
|
|
print(f"\n ✅ Agent OUTPERFORMS Buy & Hold by {outperformance:.2f}%")
|
|
|
else:
|
|
|
print(f"\n ⚠️ Agent UNDERPERFORMS Buy & Hold by {abs(outperformance):.2f}%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
fig = plt.figure(figsize=(20, 14))
|
|
|
fig.suptitle('Extended Backtest Analysis', fontsize=20, fontweight='bold', color='white')
|
|
|
|
|
|
gs = GridSpec(3, 2, figure=fig, hspace=0.35, wspace=0.25)
|
|
|
|
|
|
|
|
|
portfolio_vals = extended_results['portfolio_values']
|
|
|
prices = extended_results['prices']
|
|
|
positions = extended_results['positions']
|
|
|
timestamps = extended_results['timestamps']
|
|
|
|
|
|
|
|
|
min_len = min(len(portfolio_vals)-1, len(prices), len(positions))
|
|
|
|
|
|
|
|
|
ax1 = fig.add_subplot(gs[0, :])
|
|
|
ax1_twin = ax1.twinx()
|
|
|
|
|
|
ax1.plot(range(min_len), portfolio_vals[:min_len], 'cyan', linewidth=2, label='Portfolio Value')
|
|
|
ax1_twin.plot(range(min_len), prices[:min_len], 'orange', linewidth=1, alpha=0.7, label='BTC Price')
|
|
|
|
|
|
ax1.set_xlabel('Time Steps')
|
|
|
ax1.set_ylabel('Portfolio Value ($)', color='cyan')
|
|
|
ax1_twin.set_ylabel('BTC Price ($)', color='orange')
|
|
|
ax1.set_title('Portfolio Value vs BTC Price', fontsize=14, fontweight='bold')
|
|
|
ax1.tick_params(axis='y', labelcolor='cyan')
|
|
|
ax1_twin.tick_params(axis='y', labelcolor='orange')
|
|
|
|
|
|
|
|
|
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
|
lines2, labels2 = ax1_twin.get_legend_handles_labels()
|
|
|
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
|
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax2 = fig.add_subplot(gs[1, 0])
|
|
|
pos_data = positions[:min_len].reshape(1, -1)
|
|
|
cax = ax2.imshow(pos_data, aspect='auto', cmap='RdYlGn', vmin=-1, vmax=1)
|
|
|
ax2.set_title('Position Heatmap Over Time', fontsize=14, fontweight='bold')
|
|
|
ax2.set_xlabel('Time Steps')
|
|
|
ax2.set_yticks([])
|
|
|
plt.colorbar(cax, ax=ax2, label='Position', orientation='horizontal', pad=0.2)
|
|
|
|
|
|
|
|
|
ax3 = fig.add_subplot(gs[1, 1])
|
|
|
position_changes = np.abs(np.diff(positions[:min_len]))
|
|
|
change_threshold = 0.1
|
|
|
significant_changes = position_changes > change_threshold
|
|
|
change_rate = np.convolve(significant_changes.astype(float),
|
|
|
np.ones(100)/100, mode='valid') * 100
|
|
|
|
|
|
ax3.plot(change_rate, 'lime', linewidth=1)
|
|
|
ax3.set_title('Position Change Rate (Rolling 100 Steps)', fontsize=14, fontweight='bold')
|
|
|
ax3.set_xlabel('Time Steps')
|
|
|
ax3.set_ylabel('Change Rate (%)')
|
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax4 = fig.add_subplot(gs[2, 0])
|
|
|
window = 500
|
|
|
agent_returns = extended_results['portfolio_returns'][:min_len-1]
|
|
|
bh_returns = np.diff(prices[:min_len]) / prices[:min_len-1]
|
|
|
|
|
|
|
|
|
agent_rolling = pd.Series(agent_returns).rolling(window=window).mean() * 100
|
|
|
bh_rolling = pd.Series(bh_returns).rolling(window=window).mean() * 100
|
|
|
|
|
|
|
|
|
valid_idx = agent_rolling.dropna().index
|
|
|
|
|
|
timestamps_arr = np.arange(len(agent_returns))
|
|
|
|
|
|
ax4.plot(timestamps_arr[valid_idx], agent_rolling.dropna().values, 'cyan', linewidth=1, label='Agent')
|
|
|
ax4.plot(timestamps_arr[valid_idx], bh_rolling.iloc[valid_idx].values, 'orange', linewidth=1, alpha=0.7, label='Buy & Hold')
|
|
|
ax4.axhline(y=0, color='white', linestyle='--', alpha=0.5)
|
|
|
ax4.set_title(f'Rolling Mean Return (Window={window})', fontsize=14, fontweight='bold')
|
|
|
ax4.set_xlabel('Time Steps')
|
|
|
ax4.set_ylabel('Mean Return (%)')
|
|
|
ax4.legend()
|
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax5 = fig.add_subplot(gs[2, 1])
|
|
|
|
|
|
rolling_sharpe = (agent_rolling / (pd.Series(agent_returns).rolling(window=window).std() * 100 + 1e-10))
|
|
|
valid_sharpe_idx = rolling_sharpe.dropna().index
|
|
|
|
|
|
ax5.plot(timestamps_arr[valid_sharpe_idx], rolling_sharpe.iloc[valid_sharpe_idx].values, 'gold', linewidth=1)
|
|
|
ax5.axhline(y=0, color='white', linestyle='--', alpha=0.5)
|
|
|
ax5.set_title(f'Rolling Sharpe-like Ratio (Window={window})', fontsize=14, fontweight='bold')
|
|
|
ax5.set_xlabel('Time Steps')
|
|
|
ax5.set_ylabel('Sharpe-like Ratio')
|
|
|
ax5.grid(True, alpha=0.3)
|
|
|
|
|
|
plt.tight_layout()
|
|
|
plt.show()
|
|
|
|
|
|
print("\n✅ Extended backtest visualization complete!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" FINAL PERFORMANCE SUMMARY")
|
|
|
print("="*70)
|
|
|
|
|
|
fig = plt.figure(figsize=(18, 12))
|
|
|
fig.suptitle('🎯 SAC Bitcoin Trading Agent - Final Summary Dashboard',
|
|
|
fontsize=22, fontweight='bold', color='white', y=0.98)
|
|
|
|
|
|
gs = GridSpec(3, 4, figure=fig, hspace=0.4, wspace=0.3)
|
|
|
|
|
|
|
|
|
def create_metric_card(ax, title, value, unit="", color='white', icon=""):
|
|
|
ax.axis('off')
|
|
|
ax.text(0.5, 0.7, f"{icon}", fontsize=30, ha='center', va='center',
|
|
|
color=color, transform=ax.transAxes)
|
|
|
ax.text(0.5, 0.4, f"{value}{unit}", fontsize=24, ha='center', va='center',
|
|
|
fontweight='bold', color=color, transform=ax.transAxes)
|
|
|
ax.text(0.5, 0.15, title, fontsize=11, ha='center', va='center',
|
|
|
color='gray', transform=ax.transAxes)
|
|
|
ax.add_patch(mpatches.FancyBboxPatch((0.05, 0.05), 0.9, 0.9,
|
|
|
boxstyle="round,pad=0.02,rounding_size=0.1",
|
|
|
facecolor='#1a1a2e', edgecolor=color, linewidth=2,
|
|
|
transform=ax.transAxes))
|
|
|
|
|
|
|
|
|
best = extended_results
|
|
|
|
|
|
ax1 = fig.add_subplot(gs[0, 0])
|
|
|
color1 = 'lime' if best['total_return'] > 0 else 'red'
|
|
|
create_metric_card(ax1, "Total Return", f"{best['total_return']:+.2f}", "%", color1, "📈")
|
|
|
|
|
|
ax2 = fig.add_subplot(gs[0, 1])
|
|
|
color2 = 'lime' if best['sharpe'] > 1 else 'yellow' if best['sharpe'] > 0 else 'red'
|
|
|
create_metric_card(ax2, "Sharpe Ratio", f"{best['sharpe']:.3f}", "", color2, "📊")
|
|
|
|
|
|
ax3 = fig.add_subplot(gs[0, 2])
|
|
|
color3 = 'lime' if best['max_drawdown'] > -20 else 'yellow' if best['max_drawdown'] > -40 else 'red'
|
|
|
create_metric_card(ax3, "Max Drawdown", f"{best['max_drawdown']:.1f}", "%", color3, "📉")
|
|
|
|
|
|
ax4 = fig.add_subplot(gs[0, 3])
|
|
|
color4 = 'lime' if best['win_rate'] > 50 else 'yellow' if best['win_rate'] > 40 else 'red'
|
|
|
create_metric_card(ax4, "Win Rate", f"{best['win_rate']:.1f}", "%", color4, "🎯")
|
|
|
|
|
|
|
|
|
ax5 = fig.add_subplot(gs[1, 0])
|
|
|
create_metric_card(ax5, "Sortino Ratio", f"{best['sortino']:.3f}", "", 'cyan', "📊")
|
|
|
|
|
|
ax6 = fig.add_subplot(gs[1, 1])
|
|
|
color6 = 'lime' if best['calmar'] > 1 else 'yellow' if best['calmar'] > 0 else 'red'
|
|
|
create_metric_card(ax6, "Calmar Ratio", f"{best['calmar']:.3f}", "", color6, "⚖️")
|
|
|
|
|
|
ax7 = fig.add_subplot(gs[1, 2])
|
|
|
color7 = 'lime' if best['profit_factor'] > 1.5 else 'yellow' if best['profit_factor'] > 1 else 'red'
|
|
|
create_metric_card(ax7, "Profit Factor", f"{best['profit_factor']:.2f}", "", color7, "💰")
|
|
|
|
|
|
ax8 = fig.add_subplot(gs[1, 3])
|
|
|
create_metric_card(ax8, "Total Steps", f"{best['n_steps']:,}", "", 'white', "🔄")
|
|
|
|
|
|
|
|
|
ax_compare = fig.add_subplot(gs[2, :2])
|
|
|
models = [r['name'] for r in all_results.values() if 'total_return' in r]
|
|
|
returns = [r['total_return'] for r in all_results.values() if 'total_return' in r]
|
|
|
colors_bar = ['lime' if r > 0 else 'red' for r in returns]
|
|
|
|
|
|
bars = ax_compare.barh(models, returns, color=colors_bar, alpha=0.7, edgecolor='white')
|
|
|
ax_compare.axvline(x=0, color='white', linestyle='-', linewidth=1)
|
|
|
ax_compare.set_xlabel('Total Return (%)', fontsize=12)
|
|
|
ax_compare.set_title('Model Comparison - Total Returns', fontsize=14, fontweight='bold')
|
|
|
ax_compare.grid(True, alpha=0.3, axis='x')
|
|
|
|
|
|
|
|
|
for bar, val in zip(bars, returns):
|
|
|
width = bar.get_width()
|
|
|
ax_compare.text(width + 0.5 if width > 0 else width - 0.5, bar.get_y() + bar.get_height()/2,
|
|
|
f'{val:.2f}%', ha='left' if width > 0 else 'right', va='center', fontsize=10)
|
|
|
|
|
|
|
|
|
ax_pie = fig.add_subplot(gs[2, 2:])
|
|
|
position_labels = ['Long', 'Short', 'Neutral']
|
|
|
position_sizes = [best['long_pct'], best['short_pct'], best['neutral_pct']]
|
|
|
position_colors = ['green', 'red', 'gray']
|
|
|
explode = (0.05, 0.05, 0)
|
|
|
|
|
|
wedges, texts, autotexts = ax_pie.pie(position_sizes, explode=explode, labels=position_labels,
|
|
|
colors=position_colors, autopct='%1.1f%%',
|
|
|
shadow=True, startangle=90)
|
|
|
ax_pie.set_title('Position Distribution', fontsize=14, fontweight='bold')
|
|
|
for autotext in autotexts:
|
|
|
autotext.set_color('white')
|
|
|
autotext.set_fontweight('bold')
|
|
|
|
|
|
plt.tight_layout()
|
|
|
plt.show()
|
|
|
|
|
|
print("\n✅ Final summary dashboard generated!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*70)
|
|
|
print(" DETAILED TRADE ANALYSIS")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
positions = extended_results['positions']
|
|
|
actions = extended_results['actions']
|
|
|
rewards = extended_results['rewards']
|
|
|
portfolio_returns = extended_results['portfolio_returns']
|
|
|
|
|
|
|
|
|
position_changes = np.diff(positions)
|
|
|
significant_trades = np.abs(position_changes) > 0.1
|
|
|
trade_indices = np.where(significant_trades)[0]
|
|
|
n_trades = len(trade_indices)
|
|
|
|
|
|
|
|
|
trade_sizes = np.abs(position_changes[significant_trades])
|
|
|
|
|
|
print(f"\n📊 TRADING STATISTICS")
|
|
|
print(f" Total Position Changes: {n_trades:,}")
|
|
|
print(f" Average Trade Size: {np.mean(trade_sizes):.3f}")
|
|
|
print(f" Max Trade Size: {np.max(trade_sizes):.3f}")
|
|
|
print(f" Trades per 1000 Steps: {n_trades / len(positions) * 1000:.1f}")
|
|
|
|
|
|
|
|
|
print(f"\n📊 ACTION STATISTICS")
|
|
|
print(f" Mean Action: {np.mean(actions):+.4f}")
|
|
|
print(f" Std Action: {np.std(actions):.4f}")
|
|
|
print(f" Min Action: {np.min(actions):+.4f}")
|
|
|
print(f" Max Action: {np.max(actions):+.4f}")
|
|
|
print(f" Actions > 0: {np.sum(actions > 0) / len(actions) * 100:.1f}%")
|
|
|
print(f" Actions < 0: {np.sum(actions < 0) / len(actions) * 100:.1f}%")
|
|
|
|
|
|
|
|
|
print(f"\n📊 REWARD STATISTICS")
|
|
|
print(f" Total Reward: {np.sum(rewards):.2f}")
|
|
|
print(f" Mean Reward: {np.mean(rewards):.6f}")
|
|
|
print(f" Std Reward: {np.std(rewards):.6f}")
|
|
|
print(f" Max Reward: {np.max(rewards):.4f}")
|
|
|
print(f" Min Reward: {np.min(rewards):.4f}")
|
|
|
print(f" Positive Rewards:{np.sum(rewards > 0) / len(rewards) * 100:.1f}%")
|
|
|
|
|
|
|
|
|
print(f"\n📊 RETURN STATISTICS")
|
|
|
print(f" Mean Return: {np.mean(portfolio_returns) * 100:.6f}%")
|
|
|
print(f" Std Return: {np.std(portfolio_returns) * 100:.4f}%")
|
|
|
print(f" Skewness: {pd.Series(portfolio_returns).skew():.4f}")
|
|
|
print(f" Kurtosis: {pd.Series(portfolio_returns).kurtosis():.4f}")
|
|
|
|
|
|
|
|
|
print(f"\n📊 BEST/WORST PERIODS")
|
|
|
window = 100
|
|
|
rolling_returns = pd.Series(portfolio_returns).rolling(window).sum() * 100
|
|
|
best_period_end = rolling_returns.idxmax()
|
|
|
worst_period_end = rolling_returns.idxmin()
|
|
|
print(f" Best {window}-step Return: {rolling_returns.max():.2f}% (ending at step {best_period_end})")
|
|
|
print(f" Worst {window}-step Return: {rolling_returns.min():.2f}% (ending at step {worst_period_end})")
|
|
|
|
|
|
|
|
|
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
|
|
|
fig.suptitle('Trade Analysis Details', fontsize=16, fontweight='bold', color='white')
|
|
|
|
|
|
|
|
|
ax1 = axes[0, 0]
|
|
|
ax1.hist(trade_sizes, bins=30, color='cyan', alpha=0.7, edgecolor='white')
|
|
|
ax1.axvline(x=np.mean(trade_sizes), color='yellow', linestyle='--',
|
|
|
label=f'Mean: {np.mean(trade_sizes):.3f}')
|
|
|
ax1.set_title('Trade Size Distribution', fontsize=12, fontweight='bold')
|
|
|
ax1.set_xlabel('Trade Size (Position Change)')
|
|
|
ax1.set_ylabel('Frequency')
|
|
|
ax1.legend()
|
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax2 = axes[0, 1]
|
|
|
sample_size = min(5000, len(actions))
|
|
|
sample_idx = np.random.choice(len(actions), sample_size, replace=False)
|
|
|
ax2.scatter(actions[sample_idx], rewards[sample_idx], alpha=0.3, c='lime', s=5)
|
|
|
ax2.axhline(y=0, color='white', linestyle='--', alpha=0.5)
|
|
|
ax2.axvline(x=0, color='white', linestyle='--', alpha=0.5)
|
|
|
ax2.set_title('Action vs Reward (Sample)', fontsize=12, fontweight='bold')
|
|
|
ax2.set_xlabel('Action')
|
|
|
ax2.set_ylabel('Reward')
|
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax3 = axes[1, 0]
|
|
|
window_sizes = [100, 500, 1000]
|
|
|
for w in window_sizes:
|
|
|
if w < len(portfolio_returns):
|
|
|
rolling_ret = pd.Series(portfolio_returns).rolling(w).sum() * 100
|
|
|
ax3.hist(rolling_ret.dropna(), bins=50, alpha=0.5, label=f'{w}-step')
|
|
|
ax3.axvline(x=0, color='white', linestyle='--')
|
|
|
ax3.set_title('Rolling Return Distributions', fontsize=12, fontweight='bold')
|
|
|
ax3.set_xlabel('Cumulative Return (%)')
|
|
|
ax3.set_ylabel('Frequency')
|
|
|
ax3.legend()
|
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
ax4 = axes[1, 1]
|
|
|
wins = portfolio_returns > 0
|
|
|
win_streaks = []
|
|
|
loss_streaks = []
|
|
|
current_streak = 0
|
|
|
is_winning = None
|
|
|
|
|
|
for w in wins:
|
|
|
if is_winning is None:
|
|
|
is_winning = w
|
|
|
current_streak = 1
|
|
|
elif w == is_winning:
|
|
|
current_streak += 1
|
|
|
else:
|
|
|
if is_winning:
|
|
|
win_streaks.append(current_streak)
|
|
|
else:
|
|
|
loss_streaks.append(current_streak)
|
|
|
is_winning = w
|
|
|
current_streak = 1
|
|
|
|
|
|
|
|
|
if is_winning:
|
|
|
win_streaks.append(current_streak)
|
|
|
else:
|
|
|
loss_streaks.append(current_streak)
|
|
|
|
|
|
ax4.hist(win_streaks, bins=30, alpha=0.6, color='green', label='Win Streaks')
|
|
|
ax4.hist(loss_streaks, bins=30, alpha=0.6, color='red', label='Loss Streaks')
|
|
|
ax4.set_title('Win/Loss Streak Distribution', fontsize=12, fontweight='bold')
|
|
|
ax4.set_xlabel('Streak Length')
|
|
|
ax4.set_ylabel('Frequency')
|
|
|
ax4.legend()
|
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
|
|
plt.tight_layout()
|
|
|
plt.show()
|
|
|
|
|
|
print(f"\n{'='*70}")
|
|
|
print(f" ANALYSIS COMPLETE")
|
|
|
print(f"{'='*70}")
|
|
|
print(f"\n🎉 All visualization and testing cells executed successfully!")
|
|
|
print(f"📊 Models tested: {len(all_results)}")
|
|
|
print(f"📈 Best performing model: {extended_results['name']}")
|
|
|
print(f"💰 Final return: {extended_results['total_return']:+.2f}%")
|
|
|
|
|
|
|
|
|
|