math-backend / differentiable_optimizer.py
engineportf's picture
Upload folder using huggingface_hub
558db1e verified
Raw
History Blame Contribute Delete
4.66 kB
"""
Differentiable Portfolio Optimization Layer
============================================
Wraps a mean-variance CVXPY program as a differentiable PyTorch layer via
cvxpylayers. Gradients flow backward through the KKT conditions of the
convex program, allowing upstream neural-network parameters to be trained
on portfolio-performance metrics rather than forecast accuracy.
Implements the cvxpylayers approach from:
Agrawal et al. (2019), "Differentiable Convex Optimization Layers", NeurIPS.
Design note
-----------
The layer enforces only the *minimal* constraint set required for
differentiability (fully-invested, long-only). The full production
constraints (sector caps, beta bounds, duration limits, turnover, CVaR)
are applied downstream in ``solver.py`` using the E2E weights as a
warm-start. This separation is necessary because cvxpylayers requires
a disciplined parametric program -- no SOCP cones, no integer variables.
"""
import torch
import torch.nn as nn
import numpy as np
try:
from cvxpylayers.torch import CvxpyLayer
import cvxpy as cp
_HAS_CVXPYLAYERS = True
except ImportError:
_HAS_CVXPYLAYERS = False
from config import logger
class DifferentiablePortfolioLayer(nn.Module):
"""
A CVXPY mean-variance program wrapped as a differentiable PyTorch layer.
Parameters (differentiable inputs)
-----------------------------------
mu : (batch, n_assets) predicted expected returns
L : (batch, n, n) lower-Cholesky of covariance matrix
Variables (differentiable outputs)
-----------------------------------
w : (batch, n_assets) optimal portfolio weights
"""
def __init__(self, n_assets: int, risk_factor: float = 3.0,
allow_short: bool = False):
super().__init__()
self.n = n_assets
self.risk_factor = risk_factor
self.allow_short = allow_short
if _HAS_CVXPYLAYERS:
self._build_layer()
else:
self.layer = None
logger.warning("cvxpylayers not installed. Differentiable layer will raise an error if called.")
# ------------------------------------------------------------------ #
# Layer construction (called once) #
# ------------------------------------------------------------------ #
def _build_layer(self):
n = self.n
# CVXPY parameters — these become the differentiable inputs
mu_param = cp.Parameter(n, name="mu")
L_scaled_param = cp.Parameter((n, n), name="L_scaled")
w = cp.Variable(n)
# Objective: maximise μᵀw − ‖L_scaledᵀw‖²
portfolio_variance = cp.sum_squares(L_scaled_param.T @ w)
objective = cp.Maximize(
mu_param @ w - portfolio_variance
)
constraints = [cp.sum(w) == 1.0]
if not self.allow_short:
constraints.append(w >= 0.0)
prob = cp.Problem(objective, constraints)
self.layer = CvxpyLayer(
prob,
parameters=[mu_param, L_scaled_param],
variables=[w],
)
# ------------------------------------------------------------------ #
# Forward pass #
# ------------------------------------------------------------------ #
def forward(self, mu: torch.Tensor,
L: torch.Tensor,
risk_factor: torch.Tensor = None) -> torch.Tensor:
"""
Args
----
mu : (batch, n_assets) predicted expected returns
L : (batch, n_assets, n_assets) lower-Cholesky of covariance
risk_factor: (batch,) risk aversion scalar per sample
Returns
-------
w : (batch, n_assets) optimal weights, with gradients attached
"""
if self.layer is None:
raise ImportError("cvxpylayers is required for Model 6. Please install it.")
if mu.ndim == 1:
mu = mu.unsqueeze(0)
if L.ndim == 2:
L = L.unsqueeze(0)
if risk_factor is None:
risk_factor = torch.full((mu.shape[0],), self.risk_factor, dtype=mu.dtype, device=mu.device)
# Scale L to bake in the risk parameter before CVXPY
scale = torch.sqrt(risk_factor / 2.0).view(-1, 1, 1)
L_scaled = L * scale
weights, = self.layer(
mu, L_scaled,
solver_args={
"solve_method": "SCS",
"eps": 1e-4,
"max_iters": 5000,
},
)
return weights