financial_analyst / formula_generator.py
Dmitry Beresnev
add core modules
e6b8a0f
"""
LaTeX formula generation module using sympy.
Handles:
- Generating symbolic mathematical formulas
- Creating LaTeX representations for all calculations
- Detailed variance expansion with smart truncation
- Both symbolic and numerical formula variants
"""
from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from sympy import symbols, Matrix, sqrt, latex
def generate_weight_formulas(
weights: Dict[str, float],
amounts: Dict[str, float]
) -> Tuple[str, str]:
"""
Generate weight calculation formulas.
Returns both symbolic and numerical versions.
Args:
weights: Calculated weights {ticker: weight}
amounts: Original amounts {ticker: amount}
Returns:
Tuple of (symbolic_latex, numerical_latex)
"""
tickers = list(weights.keys())
total = sum(amounts.values())
# Symbolic formula
symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}"
# Numerical formula with actual values
numerical_lines = []
for ticker in tickers:
amt = amounts[ticker]
wt = weights[ticker]
line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}"
numerical_lines.append(line)
numerical = "\\begin{aligned}\n"
numerical += " \\\\\n".join(numerical_lines)
numerical += "\n\\end{aligned}"
return symbolic, numerical
def generate_covariance_matrix_latex(
cov_matrix: pd.DataFrame,
annualized: bool = True
) -> str:
"""
Generate LaTeX representation of covariance matrix.
Args:
cov_matrix: Covariance matrix DataFrame
annualized: Whether to show annualized values
Returns:
LaTeX string for the matrix
"""
tickers = list(cov_matrix.columns)
n = len(tickers)
# Multiply by 252 if annualized
if annualized:
cov_values = cov_matrix.values * 252
else:
cov_values = cov_matrix.values
# Build LaTeX matrix
latex_str = r"\Sigma = \begin{bmatrix}" + "\n"
for i in range(n):
row_values = []
for j in range(n):
value = cov_values[i, j]
row_values.append(f"{value:.6f}")
latex_str += " & ".join(row_values)
if i < n - 1:
latex_str += r" \\" + "\n"
latex_str += "\n" + r"\end{bmatrix}"
return latex_str
def generate_variance_formula_symbolic(tickers: List[str]) -> str:
"""
Generate symbolic variance formula using matrix notation.
Formula: σ²_p = w^T × Σ × w
Args:
tickers: List of ticker symbols
Returns:
LaTeX string for symbolic variance formula
"""
# Matrix form
matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}"
# Expanded form
expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}"
# Combine both
latex_str = "\\begin{aligned}\n"
latex_str += matrix_form + r" \\" + "\n"
latex_str += expanded_form + "\n"
latex_str += "\\end{aligned}"
return latex_str
def generate_variance_formula_expanded(
weights: Dict[str, float],
cov_matrix: pd.DataFrame,
variance_breakdown: List[Tuple[str, str, float, float, float, float]],
smart_truncation: bool = True,
truncation_threshold: int = 4
) -> str:
"""
Generate detailed variance expansion showing all terms.
This is the most complex formula generation function.
Shows:
1. Symbolic expansion term by term
2. Numerical substitution
3. Intermediate calculations
4. Final result
With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability
Args:
weights: Portfolio weights
cov_matrix: Covariance matrix
variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
smart_truncation: Whether to truncate long expansions
truncation_threshold: Number of tickers before truncation kicks in
Returns:
LaTeX string with full variance expansion
"""
tickers = list(weights.keys())
n = len(tickers)
# Determine if we should truncate
should_truncate = smart_truncation and n >= truncation_threshold
# Step 1: Build symbolic terms
symbolic_terms = []
for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
if ticker_i == ticker_j:
# Diagonal term: w_i^2 × σ_ii
term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}"
else:
# Off-diagonal term: w_i × w_j × σ_ij
term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}"
symbolic_terms.append(term)
# Step 2: Build numerical substitution terms
numerical_terms = []
for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
if ticker_i == ticker_j:
# Diagonal: (w_i)^2 × cov_ij
num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}"
else:
# Off-diagonal: w_i × w_j × cov_ij
num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}"
numerical_terms.append(num)
# Step 3: Build intermediate values
intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown]
# Step 4: Calculate total
total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown)
# Apply smart truncation if needed
if should_truncate:
# Show first 3-4 terms, ..., last 2 terms
num_show_start = 3
num_show_end = 2
symbolic_display = (
symbolic_terms[:num_show_start]
+ [r"\cdots"]
+ symbolic_terms[-num_show_end:]
)
numerical_display = (
numerical_terms[:num_show_start]
+ [r"\cdots"]
+ numerical_terms[-num_show_end:]
)
intermediate_display = (
intermediate_values[:num_show_start]
+ [r"\cdots"]
+ intermediate_values[-num_show_end:]
)
else:
symbolic_display = symbolic_terms
numerical_display = numerical_terms
intermediate_display = intermediate_values
# Build the aligned LaTeX
latex_str = "\\begin{aligned}\n"
# Line 1: Symbolic expansion
latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n"
# Line 2: Numerical substitution
latex_str += r" &= " + " + ".join(numerical_display) + r" \\" + "\n"
# Line 3: Intermediate calculations
latex_str += r" &= " + " + ".join(intermediate_display) + r" \\" + "\n"
# Line 4: Final result
latex_str += f" &= {total_variance:.6f}\n"
latex_str += "\\end{aligned}"
return latex_str
def generate_variance_formula_expanded_full(
weights: Dict[str, float],
cov_matrix: pd.DataFrame,
variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> str:
"""
Generate FULL variance expansion without truncation.
Use this for "Show all terms" toggle.
Args:
weights: Portfolio weights
cov_matrix: Covariance matrix
variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
Returns:
LaTeX string with complete variance expansion
"""
# Just call the main function with truncation disabled
return generate_variance_formula_expanded(
weights,
cov_matrix,
variance_breakdown,
smart_truncation=False
)
def generate_volatility_formulas(
variance: float,
volatility: float
) -> Tuple[str, str]:
"""
Generate volatility calculation formulas.
Returns both symbolic and numerical versions.
Args:
variance: Calculated portfolio variance
volatility: Calculated portfolio volatility
Returns:
Tuple of (symbolic_latex, numerical_latex)
"""
# Symbolic formula
symbolic = r"\sigma_p = \sqrt{\sigma_p^2}"
# Numerical formula
numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%"
return symbolic, numerical
def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str:
"""
Generate correlation matrix from covariance matrix.
Correlation: ρ_ij = σ_ij / (σ_i × σ_j)
Args:
cov_matrix: Covariance matrix
Returns:
LaTeX string for correlation matrix
"""
# Calculate correlation matrix
std_devs = np.sqrt(np.diag(cov_matrix))
corr_matrix = cov_matrix / np.outer(std_devs, std_devs)
tickers = list(cov_matrix.columns)
n = len(tickers)
# Build LaTeX matrix
latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n"
for i in range(n):
row_values = []
for j in range(n):
value = corr_matrix.iloc[i, j]
row_values.append(f"{value:.4f}")
latex_str += " & ".join(row_values)
if i < n - 1:
latex_str += r" \\" + "\n"
latex_str += "\n" + r"\end{bmatrix}"
return latex_str
def generate_all_formulas(
amounts: Dict[str, float],
weights: Dict[str, float],
cov_matrix: pd.DataFrame,
variance: float,
volatility: float,
variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> Dict[str, str]:
"""
Generate all LaTeX formulas for the portfolio analysis.
This is the orchestrator function that generates all formula variants.
Args:
amounts: Portfolio amounts {ticker: amount}
weights: Portfolio weights {ticker: weight}
cov_matrix: Covariance matrix
variance: Portfolio variance
volatility: Portfolio volatility
variance_breakdown: Detailed variance breakdown
Returns:
Dictionary of LaTeX strings:
{
'weights_symbolic': str,
'weights_numerical': str,
'covariance_matrix': str,
'correlation_matrix': str,
'variance_symbolic': str,
'variance_expanded': str,
'variance_expanded_full': str,
'volatility_symbolic': str,
'volatility_numerical': str
}
"""
tickers = list(weights.keys())
# Generate all formula components
weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts)
covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True)
correlation_matrix = generate_correlation_matrix_latex(cov_matrix)
variance_symbolic = generate_variance_formula_symbolic(tickers)
variance_expanded = generate_variance_formula_expanded(
weights,
cov_matrix,
variance_breakdown,
smart_truncation=True
)
variance_expanded_full = generate_variance_formula_expanded_full(
weights,
cov_matrix,
variance_breakdown
)
volatility_symbolic, volatility_numerical = generate_volatility_formulas(
variance,
volatility
)
return {
'weights_symbolic': weights_symbolic,
'weights_numerical': weights_numerical,
'covariance_matrix': covariance_matrix,
'correlation_matrix': correlation_matrix,
'variance_symbolic': variance_symbolic,
'variance_expanded': variance_expanded,
'variance_expanded_full': variance_expanded_full,
'volatility_symbolic': volatility_symbolic,
'volatility_numerical': volatility_numerical,
}