Spaces:
Sleeping
Sleeping
File size: 4,661 Bytes
558db1e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
Differentiable Portfolio Optimization Layer
============================================
Wraps a mean-variance CVXPY program as a differentiable PyTorch layer via
cvxpylayers. Gradients flow backward through the KKT conditions of the
convex program, allowing upstream neural-network parameters to be trained
on portfolio-performance metrics rather than forecast accuracy.
Implements the cvxpylayers approach from:
Agrawal et al. (2019), "Differentiable Convex Optimization Layers", NeurIPS.
Design note
-----------
The layer enforces only the *minimal* constraint set required for
differentiability (fully-invested, long-only). The full production
constraints (sector caps, beta bounds, duration limits, turnover, CVaR)
are applied downstream in ``solver.py`` using the E2E weights as a
warm-start. This separation is necessary because cvxpylayers requires
a disciplined parametric program -- no SOCP cones, no integer variables.
"""
import torch
import torch.nn as nn
import numpy as np
try:
from cvxpylayers.torch import CvxpyLayer
import cvxpy as cp
_HAS_CVXPYLAYERS = True
except ImportError:
_HAS_CVXPYLAYERS = False
from config import logger
class DifferentiablePortfolioLayer(nn.Module):
"""
A CVXPY mean-variance program wrapped as a differentiable PyTorch layer.
Parameters (differentiable inputs)
-----------------------------------
mu : (batch, n_assets) predicted expected returns
L : (batch, n, n) lower-Cholesky of covariance matrix
Variables (differentiable outputs)
-----------------------------------
w : (batch, n_assets) optimal portfolio weights
"""
def __init__(self, n_assets: int, risk_factor: float = 3.0,
allow_short: bool = False):
super().__init__()
self.n = n_assets
self.risk_factor = risk_factor
self.allow_short = allow_short
if _HAS_CVXPYLAYERS:
self._build_layer()
else:
self.layer = None
logger.warning("cvxpylayers not installed. Differentiable layer will raise an error if called.")
# ------------------------------------------------------------------ #
# Layer construction (called once) #
# ------------------------------------------------------------------ #
def _build_layer(self):
n = self.n
# CVXPY parameters — these become the differentiable inputs
mu_param = cp.Parameter(n, name="mu")
L_scaled_param = cp.Parameter((n, n), name="L_scaled")
w = cp.Variable(n)
# Objective: maximise μᵀw − ‖L_scaledᵀw‖²
portfolio_variance = cp.sum_squares(L_scaled_param.T @ w)
objective = cp.Maximize(
mu_param @ w - portfolio_variance
)
constraints = [cp.sum(w) == 1.0]
if not self.allow_short:
constraints.append(w >= 0.0)
prob = cp.Problem(objective, constraints)
self.layer = CvxpyLayer(
prob,
parameters=[mu_param, L_scaled_param],
variables=[w],
)
# ------------------------------------------------------------------ #
# Forward pass #
# ------------------------------------------------------------------ #
def forward(self, mu: torch.Tensor,
L: torch.Tensor,
risk_factor: torch.Tensor = None) -> torch.Tensor:
"""
Args
----
mu : (batch, n_assets) predicted expected returns
L : (batch, n_assets, n_assets) lower-Cholesky of covariance
risk_factor: (batch,) risk aversion scalar per sample
Returns
-------
w : (batch, n_assets) optimal weights, with gradients attached
"""
if self.layer is None:
raise ImportError("cvxpylayers is required for Model 6. Please install it.")
if mu.ndim == 1:
mu = mu.unsqueeze(0)
if L.ndim == 2:
L = L.unsqueeze(0)
if risk_factor is None:
risk_factor = torch.full((mu.shape[0],), self.risk_factor, dtype=mu.dtype, device=mu.device)
# Scale L to bake in the risk parameter before CVXPY
scale = torch.sqrt(risk_factor / 2.0).view(-1, 1, 1)
L_scaled = L * scale
weights, = self.layer(
mu, L_scaled,
solver_args={
"solve_method": "SCS",
"eps": 1e-4,
"max_iters": 5000,
},
)
return weights
|