Spaces:
Sleeping
Sleeping
File size: 1,671 Bytes
3004d7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import torch
import torch.nn as nn
import torch.nn.functional as F
class PPOActorCritic(nn.Module):
"""
Execution Agent using PPO (Proximal Policy Optimization).
Input: [Signal (1), L2_Imbalance (1), Spread (1), Position_Net (1), Volatility (1)] -> 5 Dim
Action Space:
- Type: Limit (0) vs Market (1) -> Categorical(2)
- Price Offset: Continuous (Gaussian)
- Size: Continuous (Gaussian, 0-1 ratio)
"""
def __init__(self, input_dim=5, action_dim=3):
super(PPOActorCritic, self).__init__()
self.input_dim = input_dim
# Shared Feature Extractor
self.common = nn.Linear(input_dim, 64)
# Actor Heads
# 1. Order Type (Discrete)
self.actor_type = nn.Sequential(
nn.Linear(64, 32),
nn.Linear(32, 2),
nn.Softmax(dim=-1)
)
# 2. Price Offset (Continuous) - Mu, Sigma
self.actor_offset_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Tanh())
self.actor_offset_sigma = nn.Parameter(torch.zeros(1))
# 3. Size (Continuous) - Mu, Sigma
self.actor_size_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Sigmoid())
# Critic Head (Value Function)
self.critic = nn.Sequential(
nn.Linear(64, 32),
nn.Linear(32, 1)
)
def forward(self, x):
x = F.relu(self.common(x))
probs_type = self.actor_type(x)
mu_offset = self.actor_offset_mu(x)
mu_size = self.actor_size_mu(x)
value = self.critic(x)
return probs_type, mu_offset, mu_size, value
|