""" Differentiable Portfolio Optimization Layer ============================================ Wraps a mean-variance CVXPY program as a differentiable PyTorch layer via cvxpylayers. Gradients flow backward through the KKT conditions of the convex program, allowing upstream neural-network parameters to be trained on portfolio-performance metrics rather than forecast accuracy. Implements the cvxpylayers approach from: Agrawal et al. (2019), "Differentiable Convex Optimization Layers", NeurIPS. Design note ----------- The layer enforces only the *minimal* constraint set required for differentiability (fully-invested, long-only). The full production constraints (sector caps, beta bounds, duration limits, turnover, CVaR) are applied downstream in ``solver.py`` using the E2E weights as a warm-start. This separation is necessary because cvxpylayers requires a disciplined parametric program -- no SOCP cones, no integer variables. """ import torch import torch.nn as nn import numpy as np try: from cvxpylayers.torch import CvxpyLayer import cvxpy as cp _HAS_CVXPYLAYERS = True except ImportError: _HAS_CVXPYLAYERS = False from config import logger class DifferentiablePortfolioLayer(nn.Module): """ A CVXPY mean-variance program wrapped as a differentiable PyTorch layer. Parameters (differentiable inputs) ----------------------------------- mu : (batch, n_assets) predicted expected returns L : (batch, n, n) lower-Cholesky of covariance matrix Variables (differentiable outputs) ----------------------------------- w : (batch, n_assets) optimal portfolio weights """ def __init__(self, n_assets: int, risk_factor: float = 3.0, allow_short: bool = False): super().__init__() self.n = n_assets self.risk_factor = risk_factor self.allow_short = allow_short if _HAS_CVXPYLAYERS: self._build_layer() else: self.layer = None logger.warning("cvxpylayers not installed. Differentiable layer will raise an error if called.") # ------------------------------------------------------------------ # # Layer construction (called once) # # ------------------------------------------------------------------ # def _build_layer(self): n = self.n # CVXPY parameters — these become the differentiable inputs mu_param = cp.Parameter(n, name="mu") L_scaled_param = cp.Parameter((n, n), name="L_scaled") w = cp.Variable(n) # Objective: maximise μᵀw − ‖L_scaledᵀw‖² portfolio_variance = cp.sum_squares(L_scaled_param.T @ w) objective = cp.Maximize( mu_param @ w - portfolio_variance ) constraints = [cp.sum(w) == 1.0] if not self.allow_short: constraints.append(w >= 0.0) prob = cp.Problem(objective, constraints) self.layer = CvxpyLayer( prob, parameters=[mu_param, L_scaled_param], variables=[w], ) # ------------------------------------------------------------------ # # Forward pass # # ------------------------------------------------------------------ # def forward(self, mu: torch.Tensor, L: torch.Tensor, risk_factor: torch.Tensor = None) -> torch.Tensor: """ Args ---- mu : (batch, n_assets) predicted expected returns L : (batch, n_assets, n_assets) lower-Cholesky of covariance risk_factor: (batch,) risk aversion scalar per sample Returns ------- w : (batch, n_assets) optimal weights, with gradients attached """ if self.layer is None: raise ImportError("cvxpylayers is required for Model 6. Please install it.") if mu.ndim == 1: mu = mu.unsqueeze(0) if L.ndim == 2: L = L.unsqueeze(0) if risk_factor is None: risk_factor = torch.full((mu.shape[0],), self.risk_factor, dtype=mu.dtype, device=mu.device) # Scale L to bake in the risk parameter before CVXPY scale = torch.sqrt(risk_factor / 2.0).view(-1, 1, 1) L_scaled = L * scale weights, = self.layer( mu, L_scaled, solver_args={ "solve_method": "SCS", "eps": 1e-4, "max_iters": 5000, }, ) return weights