"""
Differentiable Portfolio Optimization Layer
============================================
Wraps a mean-variance CVXPY program as a differentiable PyTorch layer via
cvxpylayers.  Gradients flow backward through the KKT conditions of the
convex program, allowing upstream neural-network parameters to be trained
on portfolio-performance metrics rather than forecast accuracy.

Implements the cvxpylayers approach from:
    Agrawal et al. (2019), "Differentiable Convex Optimization Layers", NeurIPS.

Design note
-----------
The layer enforces only the *minimal* constraint set required for
differentiability (fully-invested, long-only).  The full production
constraints (sector caps, beta bounds, duration limits, turnover, CVaR)
are applied downstream in ``solver.py`` using the E2E weights as a
warm-start.  This separation is necessary because cvxpylayers requires
a disciplined parametric program -- no SOCP cones, no integer variables.
"""

import torch
import torch.nn as nn
import numpy as np

try:
    from cvxpylayers.torch import CvxpyLayer
    import cvxpy as cp
    _HAS_CVXPYLAYERS = True
except ImportError:
    _HAS_CVXPYLAYERS = False

from config import logger


class DifferentiablePortfolioLayer(nn.Module):
    """
    A CVXPY mean-variance program wrapped as a differentiable PyTorch layer.

    Parameters (differentiable inputs)
    -----------------------------------
    mu : (batch, n_assets)   predicted expected returns
    L  : (batch, n, n)       lower-Cholesky of covariance matrix

    Variables (differentiable outputs)
    -----------------------------------
    w  : (batch, n_assets)   optimal portfolio weights
    """

    def __init__(self, n_assets: int, risk_factor: float = 3.0,
                 allow_short: bool = False):
        super().__init__()
        self.n = n_assets
        self.risk_factor = risk_factor
        self.allow_short = allow_short
        if _HAS_CVXPYLAYERS:
            self._build_layer()
        else:
            self.layer = None
            logger.warning("cvxpylayers not installed. Differentiable layer will raise an error if called.")

    # ------------------------------------------------------------------ #
    #  Layer construction (called once)                                    #
    # ------------------------------------------------------------------ #
    def _build_layer(self):
        n = self.n

        # CVXPY parameters — these become the differentiable inputs
        mu_param = cp.Parameter(n, name="mu")
        L_scaled_param = cp.Parameter((n, n), name="L_scaled")

        w = cp.Variable(n)

        # Objective: maximise  μᵀw  −  ‖L_scaledᵀw‖²
        portfolio_variance = cp.sum_squares(L_scaled_param.T @ w)
        objective = cp.Maximize(
            mu_param @ w - portfolio_variance
        )

        constraints = [cp.sum(w) == 1.0]
        if not self.allow_short:
            constraints.append(w >= 0.0)

        prob = cp.Problem(objective, constraints)

        self.layer = CvxpyLayer(
            prob,
            parameters=[mu_param, L_scaled_param],
            variables=[w],
        )

    # ------------------------------------------------------------------ #
    #  Forward pass                                                        #
    # ------------------------------------------------------------------ #
    def forward(self, mu: torch.Tensor,
                L: torch.Tensor,
                risk_factor: torch.Tensor = None) -> torch.Tensor:
        """
        Args
        ----
        mu : (batch, n_assets)        predicted expected returns
        L  : (batch, n_assets, n_assets)  lower-Cholesky of covariance
        risk_factor: (batch,)         risk aversion scalar per sample

        Returns
        -------
        w  : (batch, n_assets)  optimal weights, with gradients attached
        """
        if self.layer is None:
            raise ImportError("cvxpylayers is required for Model 6. Please install it.")
            
        if mu.ndim == 1:
            mu = mu.unsqueeze(0)
        if L.ndim == 2:
            L = L.unsqueeze(0)
            
        if risk_factor is None:
            risk_factor = torch.full((mu.shape[0],), self.risk_factor, dtype=mu.dtype, device=mu.device)
            
        # Scale L to bake in the risk parameter before CVXPY
        scale = torch.sqrt(risk_factor / 2.0).view(-1, 1, 1)
        L_scaled = L * scale
            
        weights, = self.layer(
            mu, L_scaled,
            solver_args={
                "solve_method": "SCS",
                "eps": 1e-4,
                "max_iters": 5000,
            },
        )
        return weights