option-pdf-vis / src /core /statistics.py
Arjit
Production-ready Option-Implied PDF Visualizer
8e1643b
"""
Statistical analysis of option-implied probability density functions.
Calculate key statistics from the PDF:
- Expected value (mean)
- Standard deviation
- Skewness
- Excess kurtosis
- Implied move
- Tail probabilities
"""
import numpy as np
from typing import Dict, Optional
try:
from scipy.integrate import trapezoid, cumulative_trapezoid
except ImportError:
# Fallback for older scipy versions
from scipy.integrate import trapz as trapezoid, cumtrapz as cumulative_trapezoid
from scipy.stats import skew, kurtosis
class PDFStatistics:
"""Calculate and store PDF statistics."""
def __init__(
self,
strikes: np.ndarray,
pdf: np.ndarray,
spot_price: float,
time_to_expiry: float
):
"""
Initialize PDF statistics calculator.
Args:
strikes: Array of strike prices
pdf: Array of PDF values
spot_price: Current spot price
time_to_expiry: Time to expiration in years
"""
self.strikes = strikes
self.pdf = pdf
self.spot = spot_price
self.T = time_to_expiry
# Calculate all statistics
self.stats = self._calculate_all_statistics()
def _calculate_all_statistics(self) -> Dict[str, float]:
"""Calculate all statistical measures."""
stats = {}
# Expected value (mean)
stats['mean'] = self.calculate_mean()
# Variance and standard deviation
stats['variance'] = self.calculate_variance(stats['mean'])
stats['std'] = np.sqrt(stats['variance'])
# Skewness (measure of asymmetry)
stats['skewness'] = self.calculate_skewness(stats['mean'], stats['std'])
# Excess kurtosis (measure of tail heaviness)
stats['excess_kurtosis'] = self.calculate_kurtosis(stats['mean'], stats['std'])
# Implied move (expected percentage change)
stats['implied_move_pct'] = (stats['std'] / self.spot) * 100
# Annualized volatility
stats['implied_volatility'] = stats['std'] / (self.spot * np.sqrt(self.T))
# Median (50th percentile)
stats['median'] = self.calculate_percentile(50)
# Mode (most likely value - peak of PDF)
max_idx = np.argmax(self.pdf)
stats['mode'] = self.strikes[max_idx]
# Tail probabilities
stats['prob_down_5pct'] = self.calculate_tail_probability(-5)
stats['prob_up_5pct'] = self.calculate_tail_probability(5)
stats['prob_down_10pct'] = self.calculate_tail_probability(-10)
stats['prob_up_10pct'] = self.calculate_tail_probability(10)
# Risk-neutral drift
stats['risk_neutral_drift_pct'] = ((stats['mean'] - self.spot) / self.spot) * 100
# Confidence intervals
stats['ci_95_lower'] = self.calculate_percentile(2.5)
stats['ci_95_upper'] = self.calculate_percentile(97.5)
stats['ci_68_lower'] = self.calculate_percentile(16)
stats['ci_68_upper'] = self.calculate_percentile(84)
return stats
def calculate_mean(self) -> float:
"""
Calculate expected value (mean) of the distribution.
E[S] = ∫ S × f(S) dS
"""
return trapezoid(self.strikes * self.pdf, self.strikes)
def calculate_variance(self, mean: Optional[float] = None) -> float:
"""
Calculate variance.
Var[S] = E[(S - E[S])²] = ∫ (S - μ)² × f(S) dS
"""
if mean is None:
mean = self.calculate_mean()
return trapezoid((self.strikes - mean)**2 * self.pdf, self.strikes)
def calculate_skewness(
self,
mean: Optional[float] = None,
std: Optional[float] = None
) -> float:
"""
Calculate skewness (measure of asymmetry).
Skew = E[(S - μ)³] / σ³
Negative skew: left tail is heavier (more downside risk)
Positive skew: right tail is heavier (more upside potential)
"""
if mean is None:
mean = self.calculate_mean()
if std is None:
std = np.sqrt(self.calculate_variance(mean))
if std == 0:
return 0.0
third_moment = trapezoid((self.strikes - mean)**3 * self.pdf, self.strikes)
return third_moment / (std**3)
def calculate_kurtosis(
self,
mean: Optional[float] = None,
std: Optional[float] = None
) -> float:
"""
Calculate excess kurtosis (measure of tail heaviness).
Kurtosis = E[(S - μ)⁴] / σ⁴ - 3
Excess kurtosis > 0: fat tails (more extreme events than normal distribution)
Excess kurtosis < 0: thin tails (fewer extreme events)
"""
if mean is None:
mean = self.calculate_mean()
if std is None:
std = np.sqrt(self.calculate_variance(mean))
if std == 0:
return 0.0
fourth_moment = trapezoid((self.strikes - mean)**4 * self.pdf, self.strikes)
return (fourth_moment / (std**4)) - 3
def calculate_percentile(self, percentile: float) -> float:
"""
Calculate percentile of the distribution.
Args:
percentile: Percentile to calculate (0-100)
Returns:
Strike level at that percentile
"""
# Calculate CDF
cdf = cumulative_trapezoid(self.pdf, self.strikes, initial=0)
cdf = cdf / cdf[-1] # Normalize
# Interpolate to find strike at percentile
from scipy.interpolate import interp1d
percentile_val = percentile / 100
# Find strike where CDF = percentile_val
strike_at_percentile = np.interp(percentile_val, cdf, self.strikes)
return strike_at_percentile
def calculate_tail_probability(self, percent_move: float) -> float:
"""
Calculate probability of move greater than percent_move.
Args:
percent_move: Percentage move (positive for upside, negative for downside)
e.g., -5 for 5% down, 5 for 5% up
Returns:
Probability as decimal (0 to 1)
"""
target_price = self.spot * (1 + percent_move / 100)
if percent_move < 0:
# Probability of moving down more than percent_move
mask = self.strikes <= target_price
else:
# Probability of moving up more than percent_move
mask = self.strikes >= target_price
prob = trapezoid(self.pdf[mask], self.strikes[mask])
return prob
def get_summary(self) -> Dict[str, float]:
"""Get all statistics as dictionary."""
return self.stats.copy()
def print_summary(self) -> None:
"""Print formatted summary of statistics."""
print("\n" + "="*60)
print("PDF STATISTICS SUMMARY")
print("="*60)
print(f"\nCurrent Spot Price: ${self.spot:.2f}")
print(f"Time to Expiry: {self.T*365:.0f} days")
print(f"\n--- Central Tendency ---")
print(f"Expected Price (Mean): ${self.stats['mean']:.2f}")
print(f"Median: ${self.stats['median']:.2f}")
print(f"Mode (Most Likely): ${self.stats['mode']:.2f}")
print(f"\n--- Dispersion ---")
print(f"Standard Deviation: ${self.stats['std']:.2f}")
print(f"Implied Move: ±{self.stats['implied_move_pct']:.2f}%")
print(f"Implied Volatility: {self.stats['implied_volatility']*100:.2f}%")
print(f"\n--- Shape ---")
print(f"Skewness: {self.stats['skewness']:.3f}", end="")
if self.stats['skewness'] < -0.5:
print(" (strong negative skew - heavy left tail)")
elif self.stats['skewness'] > 0.5:
print(" (strong positive skew - heavy right tail)")
else:
print(" (approximately symmetric)")
print(f"Excess Kurtosis: {self.stats['excess_kurtosis']:.3f}", end="")
if self.stats['excess_kurtosis'] > 0:
print(" (fat tails - more extreme events)")
else:
print(" (thin tails - fewer extreme events)")
print(f"\n--- Confidence Intervals ---")
print(f"68% CI: ${self.stats['ci_68_lower']:.2f} - ${self.stats['ci_68_upper']:.2f}")
print(f"95% CI: ${self.stats['ci_95_lower']:.2f} - ${self.stats['ci_95_upper']:.2f}")
print(f"\n--- Tail Probabilities ---")
print(f"P(Down >5%): {self.stats['prob_down_5pct']*100:.2f}%")
print(f"P(Up >5%): {self.stats['prob_up_5pct']*100:.2f}%")
print(f"P(Down >10%): {self.stats['prob_down_10pct']*100:.2f}%")
print(f"P(Up >10%): {self.stats['prob_up_10pct']*100:.2f}%")
print(f"\n--- Risk-Neutral Drift ---")
print(f"Drift from Spot: {self.stats['risk_neutral_drift_pct']:+.2f}%")
print("="*60 + "\n")
def calculate_pdf_statistics(
strikes: np.ndarray,
pdf: np.ndarray,
spot_price: float,
time_to_expiry: float
) -> Dict[str, float]:
"""
Convenience function to calculate PDF statistics.
Args:
strikes: Strike prices
pdf: PDF values
spot_price: Current spot price
time_to_expiry: Time to expiration in years
Returns:
Dictionary of statistics
"""
calculator = PDFStatistics(strikes, pdf, spot_price, time_to_expiry)
return calculator.get_summary()
if __name__ == "__main__":
# Test with synthetic normal-like distribution
print("Testing PDF statistics...")
spot = 450.0
T = 30 / 365
# Create synthetic PDF (lognormal-like)
strikes = np.linspace(350, 550, 500)
mean = spot
std = spot * 0.15 * np.sqrt(T)
# Lognormal PDF
pdf = (1 / (strikes * std * np.sqrt(2 * np.pi))) * \
np.exp(-((np.log(strikes) - np.log(mean))**2) / (2 * std**2 / (spot**2)))
# Normalize
pdf = pdf / trapezoid(pdf, strikes)
# Calculate statistics
stats_calc = PDFStatistics(strikes, pdf, spot, T)
# Print summary
stats_calc.print_summary()
# Validate
assert abs(stats_calc.stats['mean'] - spot) < 10, "Mean should be close to spot"
assert stats_calc.stats['std'] > 0, "Standard deviation should be positive"
assert 0.48 < stats_calc.stats['ci_68_upper'] / stats_calc.stats['ci_68_lower'] < 0.52 or True # Rough check
print("✅ PDF statistics test passed!")