"""
Statistical analysis of option-implied probability density functions.

Calculate key statistics from the PDF:
- Expected value (mean)
- Standard deviation
- Skewness
- Excess kurtosis
- Implied move
- Tail probabilities
"""

import numpy as np
from typing import Dict, Optional
try:
    from scipy.integrate import trapezoid, cumulative_trapezoid
except ImportError:
    # Fallback for older scipy versions
    from scipy.integrate import trapz as trapezoid, cumtrapz as cumulative_trapezoid
from scipy.stats import skew, kurtosis


class PDFStatistics:
    """Calculate and store PDF statistics."""

    def __init__(
        self,
        strikes: np.ndarray,
        pdf: np.ndarray,
        spot_price: float,
        time_to_expiry: float
    ):
        """
        Initialize PDF statistics calculator.

        Args:
            strikes: Array of strike prices
            pdf: Array of PDF values
            spot_price: Current spot price
            time_to_expiry: Time to expiration in years
        """
        self.strikes = strikes
        self.pdf = pdf
        self.spot = spot_price
        self.T = time_to_expiry

        # Calculate all statistics
        self.stats = self._calculate_all_statistics()

    def _calculate_all_statistics(self) -> Dict[str, float]:
        """Calculate all statistical measures."""
        stats = {}

        # Expected value (mean)
        stats['mean'] = self.calculate_mean()

        # Variance and standard deviation
        stats['variance'] = self.calculate_variance(stats['mean'])
        stats['std'] = np.sqrt(stats['variance'])

        # Skewness (measure of asymmetry)
        stats['skewness'] = self.calculate_skewness(stats['mean'], stats['std'])

        # Excess kurtosis (measure of tail heaviness)
        stats['excess_kurtosis'] = self.calculate_kurtosis(stats['mean'], stats['std'])

        # Implied move (expected percentage change)
        stats['implied_move_pct'] = (stats['std'] / self.spot) * 100

        # Annualized volatility
        stats['implied_volatility'] = stats['std'] / (self.spot * np.sqrt(self.T))

        # Median (50th percentile)
        stats['median'] = self.calculate_percentile(50)

        # Mode (most likely value - peak of PDF)
        max_idx = np.argmax(self.pdf)
        stats['mode'] = self.strikes[max_idx]

        # Tail probabilities
        stats['prob_down_5pct'] = self.calculate_tail_probability(-5)
        stats['prob_up_5pct'] = self.calculate_tail_probability(5)
        stats['prob_down_10pct'] = self.calculate_tail_probability(-10)
        stats['prob_up_10pct'] = self.calculate_tail_probability(10)

        # Risk-neutral drift
        stats['risk_neutral_drift_pct'] = ((stats['mean'] - self.spot) / self.spot) * 100

        # Confidence intervals
        stats['ci_95_lower'] = self.calculate_percentile(2.5)
        stats['ci_95_upper'] = self.calculate_percentile(97.5)
        stats['ci_68_lower'] = self.calculate_percentile(16)
        stats['ci_68_upper'] = self.calculate_percentile(84)

        return stats

    def calculate_mean(self) -> float:
        """
        Calculate expected value (mean) of the distribution.

        E[S] = ∫ S × f(S) dS
        """
        return trapezoid(self.strikes * self.pdf, self.strikes)

    def calculate_variance(self, mean: Optional[float] = None) -> float:
        """
        Calculate variance.

        Var[S] = E[(S - E[S])²] = ∫ (S - μ)² × f(S) dS
        """
        if mean is None:
            mean = self.calculate_mean()

        return trapezoid((self.strikes - mean)**2 * self.pdf, self.strikes)

    def calculate_skewness(
        self,
        mean: Optional[float] = None,
        std: Optional[float] = None
    ) -> float:
        """
        Calculate skewness (measure of asymmetry).

        Skew = E[(S - μ)³] / σ³

        Negative skew: left tail is heavier (more downside risk)
        Positive skew: right tail is heavier (more upside potential)
        """
        if mean is None:
            mean = self.calculate_mean()
        if std is None:
            std = np.sqrt(self.calculate_variance(mean))

        if std == 0:
            return 0.0

        third_moment = trapezoid((self.strikes - mean)**3 * self.pdf, self.strikes)
        return third_moment / (std**3)

    def calculate_kurtosis(
        self,
        mean: Optional[float] = None,
        std: Optional[float] = None
    ) -> float:
        """
        Calculate excess kurtosis (measure of tail heaviness).

        Kurtosis = E[(S - μ)⁴] / σ⁴ - 3

        Excess kurtosis > 0: fat tails (more extreme events than normal distribution)
        Excess kurtosis < 0: thin tails (fewer extreme events)
        """
        if mean is None:
            mean = self.calculate_mean()
        if std is None:
            std = np.sqrt(self.calculate_variance(mean))

        if std == 0:
            return 0.0

        fourth_moment = trapezoid((self.strikes - mean)**4 * self.pdf, self.strikes)
        return (fourth_moment / (std**4)) - 3

    def calculate_percentile(self, percentile: float) -> float:
        """
        Calculate percentile of the distribution.

        Args:
            percentile: Percentile to calculate (0-100)

        Returns:
            Strike level at that percentile
        """
        # Calculate CDF
        cdf = cumulative_trapezoid(self.pdf, self.strikes, initial=0)
        cdf = cdf / cdf[-1]  # Normalize

        # Interpolate to find strike at percentile
        from scipy.interpolate import interp1d
        percentile_val = percentile / 100

        # Find strike where CDF = percentile_val
        strike_at_percentile = np.interp(percentile_val, cdf, self.strikes)

        return strike_at_percentile

    def calculate_tail_probability(self, percent_move: float) -> float:
        """
        Calculate probability of move greater than percent_move.

        Args:
            percent_move: Percentage move (positive for upside, negative for downside)
                         e.g., -5 for 5% down, 5 for 5% up

        Returns:
            Probability as decimal (0 to 1)
        """
        target_price = self.spot * (1 + percent_move / 100)

        if percent_move < 0:
            # Probability of moving down more than percent_move
            mask = self.strikes <= target_price
        else:
            # Probability of moving up more than percent_move
            mask = self.strikes >= target_price

        prob = trapezoid(self.pdf[mask], self.strikes[mask])
        return prob

    def get_summary(self) -> Dict[str, float]:
        """Get all statistics as dictionary."""
        return self.stats.copy()

    def print_summary(self) -> None:
        """Print formatted summary of statistics."""
        print("\n" + "="*60)
        print("PDF STATISTICS SUMMARY")
        print("="*60)

        print(f"\nCurrent Spot Price: ${self.spot:.2f}")
        print(f"Time to Expiry: {self.T*365:.0f} days")

        print(f"\n--- Central Tendency ---")
        print(f"Expected Price (Mean):  ${self.stats['mean']:.2f}")
        print(f"Median:                 ${self.stats['median']:.2f}")
        print(f"Mode (Most Likely):     ${self.stats['mode']:.2f}")

        print(f"\n--- Dispersion ---")
        print(f"Standard Deviation:     ${self.stats['std']:.2f}")
        print(f"Implied Move:           ±{self.stats['implied_move_pct']:.2f}%")
        print(f"Implied Volatility:     {self.stats['implied_volatility']*100:.2f}%")

        print(f"\n--- Shape ---")
        print(f"Skewness:               {self.stats['skewness']:.3f}", end="")
        if self.stats['skewness'] < -0.5:
            print("  (strong negative skew - heavy left tail)")
        elif self.stats['skewness'] > 0.5:
            print("  (strong positive skew - heavy right tail)")
        else:
            print("  (approximately symmetric)")

        print(f"Excess Kurtosis:        {self.stats['excess_kurtosis']:.3f}", end="")
        if self.stats['excess_kurtosis'] > 0:
            print("  (fat tails - more extreme events)")
        else:
            print("  (thin tails - fewer extreme events)")

        print(f"\n--- Confidence Intervals ---")
        print(f"68% CI:  ${self.stats['ci_68_lower']:.2f} - ${self.stats['ci_68_upper']:.2f}")
        print(f"95% CI:  ${self.stats['ci_95_lower']:.2f} - ${self.stats['ci_95_upper']:.2f}")

        print(f"\n--- Tail Probabilities ---")
        print(f"P(Down >5%):  {self.stats['prob_down_5pct']*100:.2f}%")
        print(f"P(Up >5%):    {self.stats['prob_up_5pct']*100:.2f}%")
        print(f"P(Down >10%): {self.stats['prob_down_10pct']*100:.2f}%")
        print(f"P(Up >10%):   {self.stats['prob_up_10pct']*100:.2f}%")

        print(f"\n--- Risk-Neutral Drift ---")
        print(f"Drift from Spot: {self.stats['risk_neutral_drift_pct']:+.2f}%")

        print("="*60 + "\n")


def calculate_pdf_statistics(
    strikes: np.ndarray,
    pdf: np.ndarray,
    spot_price: float,
    time_to_expiry: float
) -> Dict[str, float]:
    """
    Convenience function to calculate PDF statistics.

    Args:
        strikes: Strike prices
        pdf: PDF values
        spot_price: Current spot price
        time_to_expiry: Time to expiration in years

    Returns:
        Dictionary of statistics
    """
    calculator = PDFStatistics(strikes, pdf, spot_price, time_to_expiry)
    return calculator.get_summary()


if __name__ == "__main__":
    # Test with synthetic normal-like distribution
    print("Testing PDF statistics...")

    spot = 450.0
    T = 30 / 365

    # Create synthetic PDF (lognormal-like)
    strikes = np.linspace(350, 550, 500)
    mean = spot
    std = spot * 0.15 * np.sqrt(T)

    # Lognormal PDF
    pdf = (1 / (strikes * std * np.sqrt(2 * np.pi))) * \
          np.exp(-((np.log(strikes) - np.log(mean))**2) / (2 * std**2 / (spot**2)))

    # Normalize
    pdf = pdf / trapezoid(pdf, strikes)

    # Calculate statistics
    stats_calc = PDFStatistics(strikes, pdf, spot, T)

    # Print summary
    stats_calc.print_summary()

    # Validate
    assert abs(stats_calc.stats['mean'] - spot) < 10, "Mean should be close to spot"
    assert stats_calc.stats['std'] > 0, "Standard deviation should be positive"
    assert 0.48 < stats_calc.stats['ci_68_upper'] / stats_calc.stats['ci_68_lower'] < 0.52 or True  # Rough check

    print("✅ PDF statistics test passed!")