""" LaTeX formula generation module using sympy. Handles: - Generating symbolic mathematical formulas - Creating LaTeX representations for all calculations - Detailed variance expansion with smart truncation - Both symbolic and numerical formula variants """ from typing import Dict, List, Tuple import pandas as pd import numpy as np from sympy import symbols, Matrix, sqrt, latex def generate_weight_formulas( weights: Dict[str, float], amounts: Dict[str, float] ) -> Tuple[str, str]: """ Generate weight calculation formulas. Returns both symbolic and numerical versions. Args: weights: Calculated weights {ticker: weight} amounts: Original amounts {ticker: amount} Returns: Tuple of (symbolic_latex, numerical_latex) """ tickers = list(weights.keys()) total = sum(amounts.values()) # Symbolic formula symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}" # Numerical formula with actual values numerical_lines = [] for ticker in tickers: amt = amounts[ticker] wt = weights[ticker] line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}" numerical_lines.append(line) numerical = "\\begin{aligned}\n" numerical += " \\\\\n".join(numerical_lines) numerical += "\n\\end{aligned}" return symbolic, numerical def generate_covariance_matrix_latex( cov_matrix: pd.DataFrame, annualized: bool = True ) -> str: """ Generate LaTeX representation of covariance matrix. Args: cov_matrix: Covariance matrix DataFrame annualized: Whether to show annualized values Returns: LaTeX string for the matrix """ tickers = list(cov_matrix.columns) n = len(tickers) # Multiply by 252 if annualized if annualized: cov_values = cov_matrix.values * 252 else: cov_values = cov_matrix.values # Build LaTeX matrix latex_str = r"\Sigma = \begin{bmatrix}" + "\n" for i in range(n): row_values = [] for j in range(n): value = cov_values[i, j] row_values.append(f"{value:.6f}") latex_str += " & ".join(row_values) if i < n - 1: latex_str += r" \\" + "\n" latex_str += "\n" + r"\end{bmatrix}" return latex_str def generate_variance_formula_symbolic(tickers: List[str]) -> str: """ Generate symbolic variance formula using matrix notation. Formula: σ²_p = w^T × Σ × w Args: tickers: List of ticker symbols Returns: LaTeX string for symbolic variance formula """ # Matrix form matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}" # Expanded form expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}" # Combine both latex_str = "\\begin{aligned}\n" latex_str += matrix_form + r" \\" + "\n" latex_str += expanded_form + "\n" latex_str += "\\end{aligned}" return latex_str def generate_variance_formula_expanded( weights: Dict[str, float], cov_matrix: pd.DataFrame, variance_breakdown: List[Tuple[str, str, float, float, float, float]], smart_truncation: bool = True, truncation_threshold: int = 4 ) -> str: """ Generate detailed variance expansion showing all terms. This is the most complex formula generation function. Shows: 1. Symbolic expansion term by term 2. Numerical substitution 3. Intermediate calculations 4. Final result With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability Args: weights: Portfolio weights cov_matrix: Covariance matrix variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution) smart_truncation: Whether to truncate long expansions truncation_threshold: Number of tickers before truncation kicks in Returns: LaTeX string with full variance expansion """ tickers = list(weights.keys()) n = len(tickers) # Determine if we should truncate should_truncate = smart_truncation and n >= truncation_threshold # Step 1: Build symbolic terms symbolic_terms = [] for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown: if ticker_i == ticker_j: # Diagonal term: w_i^2 × σ_ii term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}" else: # Off-diagonal term: w_i × w_j × σ_ij term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}" symbolic_terms.append(term) # Step 2: Build numerical substitution terms numerical_terms = [] for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown: if ticker_i == ticker_j: # Diagonal: (w_i)^2 × cov_ij num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}" else: # Off-diagonal: w_i × w_j × cov_ij num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}" numerical_terms.append(num) # Step 3: Build intermediate values intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown] # Step 4: Calculate total total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown) # Apply smart truncation if needed if should_truncate: # Show first 3-4 terms, ..., last 2 terms num_show_start = 3 num_show_end = 2 symbolic_display = ( symbolic_terms[:num_show_start] + [r"\cdots"] + symbolic_terms[-num_show_end:] ) numerical_display = ( numerical_terms[:num_show_start] + [r"\cdots"] + numerical_terms[-num_show_end:] ) intermediate_display = ( intermediate_values[:num_show_start] + [r"\cdots"] + intermediate_values[-num_show_end:] ) else: symbolic_display = symbolic_terms numerical_display = numerical_terms intermediate_display = intermediate_values # Build the aligned LaTeX latex_str = "\\begin{aligned}\n" # Line 1: Symbolic expansion latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n" # Line 2: Numerical substitution latex_str += r" &= " + " + ".join(numerical_display) + r" \\" + "\n" # Line 3: Intermediate calculations latex_str += r" &= " + " + ".join(intermediate_display) + r" \\" + "\n" # Line 4: Final result latex_str += f" &= {total_variance:.6f}\n" latex_str += "\\end{aligned}" return latex_str def generate_variance_formula_expanded_full( weights: Dict[str, float], cov_matrix: pd.DataFrame, variance_breakdown: List[Tuple[str, str, float, float, float, float]] ) -> str: """ Generate FULL variance expansion without truncation. Use this for "Show all terms" toggle. Args: weights: Portfolio weights cov_matrix: Covariance matrix variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution) Returns: LaTeX string with complete variance expansion """ # Just call the main function with truncation disabled return generate_variance_formula_expanded( weights, cov_matrix, variance_breakdown, smart_truncation=False ) def generate_volatility_formulas( variance: float, volatility: float ) -> Tuple[str, str]: """ Generate volatility calculation formulas. Returns both symbolic and numerical versions. Args: variance: Calculated portfolio variance volatility: Calculated portfolio volatility Returns: Tuple of (symbolic_latex, numerical_latex) """ # Symbolic formula symbolic = r"\sigma_p = \sqrt{\sigma_p^2}" # Numerical formula numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%" return symbolic, numerical def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str: """ Generate correlation matrix from covariance matrix. Correlation: ρ_ij = σ_ij / (σ_i × σ_j) Args: cov_matrix: Covariance matrix Returns: LaTeX string for correlation matrix """ # Calculate correlation matrix std_devs = np.sqrt(np.diag(cov_matrix)) corr_matrix = cov_matrix / np.outer(std_devs, std_devs) tickers = list(cov_matrix.columns) n = len(tickers) # Build LaTeX matrix latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n" for i in range(n): row_values = [] for j in range(n): value = corr_matrix.iloc[i, j] row_values.append(f"{value:.4f}") latex_str += " & ".join(row_values) if i < n - 1: latex_str += r" \\" + "\n" latex_str += "\n" + r"\end{bmatrix}" return latex_str def generate_all_formulas( amounts: Dict[str, float], weights: Dict[str, float], cov_matrix: pd.DataFrame, variance: float, volatility: float, variance_breakdown: List[Tuple[str, str, float, float, float, float]] ) -> Dict[str, str]: """ Generate all LaTeX formulas for the portfolio analysis. This is the orchestrator function that generates all formula variants. Args: amounts: Portfolio amounts {ticker: amount} weights: Portfolio weights {ticker: weight} cov_matrix: Covariance matrix variance: Portfolio variance volatility: Portfolio volatility variance_breakdown: Detailed variance breakdown Returns: Dictionary of LaTeX strings: { 'weights_symbolic': str, 'weights_numerical': str, 'covariance_matrix': str, 'correlation_matrix': str, 'variance_symbolic': str, 'variance_expanded': str, 'variance_expanded_full': str, 'volatility_symbolic': str, 'volatility_numerical': str } """ tickers = list(weights.keys()) # Generate all formula components weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts) covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True) correlation_matrix = generate_correlation_matrix_latex(cov_matrix) variance_symbolic = generate_variance_formula_symbolic(tickers) variance_expanded = generate_variance_formula_expanded( weights, cov_matrix, variance_breakdown, smart_truncation=True ) variance_expanded_full = generate_variance_formula_expanded_full( weights, cov_matrix, variance_breakdown ) volatility_symbolic, volatility_numerical = generate_volatility_formulas( variance, volatility ) return { 'weights_symbolic': weights_symbolic, 'weights_numerical': weights_numerical, 'covariance_matrix': covariance_matrix, 'correlation_matrix': correlation_matrix, 'variance_symbolic': variance_symbolic, 'variance_expanded': variance_expanded, 'variance_expanded_full': variance_expanded_full, 'volatility_symbolic': volatility_symbolic, 'volatility_numerical': volatility_numerical, }