Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| class PPOActorCritic(nn.Module): | |
| """ | |
| Execution Agent using PPO (Proximal Policy Optimization). | |
| Input: [Signal (1), L2_Imbalance (1), Spread (1), Position_Net (1), Volatility (1)] -> 5 Dim | |
| Action Space: | |
| - Type: Limit (0) vs Market (1) -> Categorical(2) | |
| - Price Offset: Continuous (Gaussian) | |
| - Size: Continuous (Gaussian, 0-1 ratio) | |
| """ | |
| def __init__(self, input_dim=5, action_dim=3): | |
| super(PPOActorCritic, self).__init__() | |
| self.input_dim = input_dim | |
| # Shared Feature Extractor | |
| self.common = nn.Linear(input_dim, 64) | |
| # Actor Heads | |
| # 1. Order Type (Discrete) | |
| self.actor_type = nn.Sequential( | |
| nn.Linear(64, 32), | |
| nn.Linear(32, 2), | |
| nn.Softmax(dim=-1) | |
| ) | |
| # 2. Price Offset (Continuous) - Mu, Sigma | |
| self.actor_offset_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Tanh()) | |
| self.actor_offset_sigma = nn.Parameter(torch.zeros(1)) | |
| # 3. Size (Continuous) - Mu, Sigma | |
| self.actor_size_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Sigmoid()) | |
| # Critic Head (Value Function) | |
| self.critic = nn.Sequential( | |
| nn.Linear(64, 32), | |
| nn.Linear(32, 1) | |
| ) | |
| def forward(self, x): | |
| x = F.relu(self.common(x)) | |
| probs_type = self.actor_type(x) | |
| mu_offset = self.actor_offset_mu(x) | |
| mu_size = self.actor_size_mu(x) | |
| value = self.critic(x) | |
| return probs_type, mu_offset, mu_size, value | |