File size: 4,661 Bytes
558db1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
Differentiable Portfolio Optimization Layer
============================================
Wraps a mean-variance CVXPY program as a differentiable PyTorch layer via
cvxpylayers.  Gradients flow backward through the KKT conditions of the
convex program, allowing upstream neural-network parameters to be trained
on portfolio-performance metrics rather than forecast accuracy.

Implements the cvxpylayers approach from:
    Agrawal et al. (2019), "Differentiable Convex Optimization Layers", NeurIPS.

Design note
-----------
The layer enforces only the *minimal* constraint set required for
differentiability (fully-invested, long-only).  The full production
constraints (sector caps, beta bounds, duration limits, turnover, CVaR)
are applied downstream in ``solver.py`` using the E2E weights as a
warm-start.  This separation is necessary because cvxpylayers requires
a disciplined parametric program -- no SOCP cones, no integer variables.
"""

import torch
import torch.nn as nn
import numpy as np

try:
    from cvxpylayers.torch import CvxpyLayer
    import cvxpy as cp
    _HAS_CVXPYLAYERS = True
except ImportError:
    _HAS_CVXPYLAYERS = False

from config import logger


class DifferentiablePortfolioLayer(nn.Module):
    """
    A CVXPY mean-variance program wrapped as a differentiable PyTorch layer.

    Parameters (differentiable inputs)
    -----------------------------------
    mu : (batch, n_assets)   predicted expected returns
    L  : (batch, n, n)       lower-Cholesky of covariance matrix

    Variables (differentiable outputs)
    -----------------------------------
    w  : (batch, n_assets)   optimal portfolio weights
    """

    def __init__(self, n_assets: int, risk_factor: float = 3.0,
                 allow_short: bool = False):
        super().__init__()
        self.n = n_assets
        self.risk_factor = risk_factor
        self.allow_short = allow_short
        if _HAS_CVXPYLAYERS:
            self._build_layer()
        else:
            self.layer = None
            logger.warning("cvxpylayers not installed. Differentiable layer will raise an error if called.")

    # ------------------------------------------------------------------ #
    #  Layer construction (called once)                                    #
    # ------------------------------------------------------------------ #
    def _build_layer(self):
        n = self.n

        # CVXPY parameters — these become the differentiable inputs
        mu_param = cp.Parameter(n, name="mu")
        L_scaled_param = cp.Parameter((n, n), name="L_scaled")

        w = cp.Variable(n)

        # Objective: maximise  μᵀw  −  ‖L_scaledᵀw‖²
        portfolio_variance = cp.sum_squares(L_scaled_param.T @ w)
        objective = cp.Maximize(
            mu_param @ w - portfolio_variance
        )

        constraints = [cp.sum(w) == 1.0]
        if not self.allow_short:
            constraints.append(w >= 0.0)

        prob = cp.Problem(objective, constraints)

        self.layer = CvxpyLayer(
            prob,
            parameters=[mu_param, L_scaled_param],
            variables=[w],
        )

    # ------------------------------------------------------------------ #
    #  Forward pass                                                        #
    # ------------------------------------------------------------------ #
    def forward(self, mu: torch.Tensor,
                L: torch.Tensor,
                risk_factor: torch.Tensor = None) -> torch.Tensor:
        """
        Args
        ----
        mu : (batch, n_assets)        predicted expected returns
        L  : (batch, n_assets, n_assets)  lower-Cholesky of covariance
        risk_factor: (batch,)         risk aversion scalar per sample

        Returns
        -------
        w  : (batch, n_assets)  optimal weights, with gradients attached
        """
        if self.layer is None:
            raise ImportError("cvxpylayers is required for Model 6. Please install it.")
            
        if mu.ndim == 1:
            mu = mu.unsqueeze(0)
        if L.ndim == 2:
            L = L.unsqueeze(0)
            
        if risk_factor is None:
            risk_factor = torch.full((mu.shape[0],), self.risk_factor, dtype=mu.dtype, device=mu.device)
            
        # Scale L to bake in the risk parameter before CVXPY
        scale = torch.sqrt(risk_factor / 2.0).view(-1, 1, 1)
        L_scaled = L * scale
            
        weights, = self.layer(
            mu, L_scaled,
            solver_args={
                "solve_method": "SCS",
                "eps": 1e-4,
                "max_iters": 5000,
            },
        )
        return weights