import torch import torch.nn as nn import torch.nn.functional as F class PPOActorCritic(nn.Module): """ Execution Agent using PPO (Proximal Policy Optimization). Input: [Signal (1), L2_Imbalance (1), Spread (1), Position_Net (1), Volatility (1)] -> 5 Dim Action Space: - Type: Limit (0) vs Market (1) -> Categorical(2) - Price Offset: Continuous (Gaussian) - Size: Continuous (Gaussian, 0-1 ratio) """ def __init__(self, input_dim=5, action_dim=3): super(PPOActorCritic, self).__init__() self.input_dim = input_dim # Shared Feature Extractor self.common = nn.Linear(input_dim, 64) # Actor Heads # 1. Order Type (Discrete) self.actor_type = nn.Sequential( nn.Linear(64, 32), nn.Linear(32, 2), nn.Softmax(dim=-1) ) # 2. Price Offset (Continuous) - Mu, Sigma self.actor_offset_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Tanh()) self.actor_offset_sigma = nn.Parameter(torch.zeros(1)) # 3. Size (Continuous) - Mu, Sigma self.actor_size_mu = nn.Sequential(nn.Linear(64, 32), nn.Linear(32, 1), nn.Sigmoid()) # Critic Head (Value Function) self.critic = nn.Sequential( nn.Linear(64, 32), nn.Linear(32, 1) ) def forward(self, x): x = F.relu(self.common(x)) probs_type = self.actor_type(x) mu_offset = self.actor_offset_mu(x) mu_size = self.actor_size_mu(x) value = self.critic(x) return probs_type, mu_offset, mu_size, value