Spaces:

ResearchEngineering
/

financial_analyst

Running

File size: 11,443 Bytes

e6b8a0f

"""
LaTeX formula generation module using sympy.

Handles:
- Generating symbolic mathematical formulas
- Creating LaTeX representations for all calculations
- Detailed variance expansion with smart truncation
- Both symbolic and numerical formula variants
"""

from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from sympy import symbols, Matrix, sqrt, latex


def generate_weight_formulas(
    weights: Dict[str, float],
    amounts: Dict[str, float]
) -> Tuple[str, str]:
    """
    Generate weight calculation formulas.

    Returns both symbolic and numerical versions.

    Args:
        weights: Calculated weights {ticker: weight}
        amounts: Original amounts {ticker: amount}

    Returns:
        Tuple of (symbolic_latex, numerical_latex)
    """
    tickers = list(weights.keys())
    total = sum(amounts.values())

    # Symbolic formula
    symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}"

    # Numerical formula with actual values
    numerical_lines = []
    for ticker in tickers:
        amt = amounts[ticker]
        wt = weights[ticker]
        line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}"
        numerical_lines.append(line)

    numerical = "\\begin{aligned}\n"
    numerical += " \\\\\n".join(numerical_lines)
    numerical += "\n\\end{aligned}"

    return symbolic, numerical


def generate_covariance_matrix_latex(
    cov_matrix: pd.DataFrame,
    annualized: bool = True
) -> str:
    """
    Generate LaTeX representation of covariance matrix.

    Args:
        cov_matrix: Covariance matrix DataFrame
        annualized: Whether to show annualized values

    Returns:
        LaTeX string for the matrix
    """
    tickers = list(cov_matrix.columns)
    n = len(tickers)

    # Multiply by 252 if annualized
    if annualized:
        cov_values = cov_matrix.values * 252
    else:
        cov_values = cov_matrix.values

    # Build LaTeX matrix
    latex_str = r"\Sigma = \begin{bmatrix}" + "\n"

    for i in range(n):
        row_values = []
        for j in range(n):
            value = cov_values[i, j]
            row_values.append(f"{value:.6f}")
        latex_str += " & ".join(row_values)
        if i < n - 1:
            latex_str += r" \\" + "\n"

    latex_str += "\n" + r"\end{bmatrix}"

    return latex_str


def generate_variance_formula_symbolic(tickers: List[str]) -> str:
    """
    Generate symbolic variance formula using matrix notation.

    Formula: σ²_p = w^T × Σ × w

    Args:
        tickers: List of ticker symbols

    Returns:
        LaTeX string for symbolic variance formula
    """
    # Matrix form
    matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}"

    # Expanded form
    expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}"

    # Combine both
    latex_str = "\\begin{aligned}\n"
    latex_str += matrix_form + r" \\" + "\n"
    latex_str += expanded_form + "\n"
    latex_str += "\\end{aligned}"

    return latex_str


def generate_variance_formula_expanded(
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]],
    smart_truncation: bool = True,
    truncation_threshold: int = 4
) -> str:
    """
    Generate detailed variance expansion showing all terms.

    This is the most complex formula generation function.

    Shows:
    1. Symbolic expansion term by term
    2. Numerical substitution
    3. Intermediate calculations
    4. Final result

    With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability

    Args:
        weights: Portfolio weights
        cov_matrix: Covariance matrix
        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
        smart_truncation: Whether to truncate long expansions
        truncation_threshold: Number of tickers before truncation kicks in

    Returns:
        LaTeX string with full variance expansion
    """
    tickers = list(weights.keys())
    n = len(tickers)

    # Determine if we should truncate
    should_truncate = smart_truncation and n >= truncation_threshold

    # Step 1: Build symbolic terms
    symbolic_terms = []
    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
        if ticker_i == ticker_j:
            # Diagonal term: w_i^2 × σ_ii
            term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}"
        else:
            # Off-diagonal term: w_i × w_j × σ_ij
            term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}"
        symbolic_terms.append(term)

    # Step 2: Build numerical substitution terms
    numerical_terms = []
    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
        if ticker_i == ticker_j:
            # Diagonal: (w_i)^2 × cov_ij
            num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}"
        else:
            # Off-diagonal: w_i × w_j × cov_ij
            num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}"
        numerical_terms.append(num)

    # Step 3: Build intermediate values
    intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown]

    # Step 4: Calculate total
    total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown)

    # Apply smart truncation if needed
    if should_truncate:
        # Show first 3-4 terms, ..., last 2 terms
        num_show_start = 3
        num_show_end = 2

        symbolic_display = (
            symbolic_terms[:num_show_start]
            + [r"\cdots"]
            + symbolic_terms[-num_show_end:]
        )

        numerical_display = (
            numerical_terms[:num_show_start]
            + [r"\cdots"]
            + numerical_terms[-num_show_end:]
        )

        intermediate_display = (
            intermediate_values[:num_show_start]
            + [r"\cdots"]
            + intermediate_values[-num_show_end:]
        )
    else:
        symbolic_display = symbolic_terms
        numerical_display = numerical_terms
        intermediate_display = intermediate_values

    # Build the aligned LaTeX
    latex_str = "\\begin{aligned}\n"

    # Line 1: Symbolic expansion
    latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n"

    # Line 2: Numerical substitution
    latex_str += r"          &= " + " + ".join(numerical_display) + r" \\" + "\n"

    # Line 3: Intermediate calculations
    latex_str += r"          &= " + " + ".join(intermediate_display) + r" \\" + "\n"

    # Line 4: Final result
    latex_str += f"          &= {total_variance:.6f}\n"

    latex_str += "\\end{aligned}"

    return latex_str


def generate_variance_formula_expanded_full(
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> str:
    """
    Generate FULL variance expansion without truncation.

    Use this for "Show all terms" toggle.

    Args:
        weights: Portfolio weights
        cov_matrix: Covariance matrix
        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)

    Returns:
        LaTeX string with complete variance expansion
    """
    # Just call the main function with truncation disabled
    return generate_variance_formula_expanded(
        weights,
        cov_matrix,
        variance_breakdown,
        smart_truncation=False
    )


def generate_volatility_formulas(
    variance: float,
    volatility: float
) -> Tuple[str, str]:
    """
    Generate volatility calculation formulas.

    Returns both symbolic and numerical versions.

    Args:
        variance: Calculated portfolio variance
        volatility: Calculated portfolio volatility

    Returns:
        Tuple of (symbolic_latex, numerical_latex)
    """
    # Symbolic formula
    symbolic = r"\sigma_p = \sqrt{\sigma_p^2}"

    # Numerical formula
    numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%"

    return symbolic, numerical


def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str:
    """
    Generate correlation matrix from covariance matrix.

    Correlation: ρ_ij = σ_ij / (σ_i × σ_j)

    Args:
        cov_matrix: Covariance matrix

    Returns:
        LaTeX string for correlation matrix
    """
    # Calculate correlation matrix
    std_devs = np.sqrt(np.diag(cov_matrix))
    corr_matrix = cov_matrix / np.outer(std_devs, std_devs)

    tickers = list(cov_matrix.columns)
    n = len(tickers)

    # Build LaTeX matrix
    latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n"

    for i in range(n):
        row_values = []
        for j in range(n):
            value = corr_matrix.iloc[i, j]
            row_values.append(f"{value:.4f}")
        latex_str += " & ".join(row_values)
        if i < n - 1:
            latex_str += r" \\" + "\n"

    latex_str += "\n" + r"\end{bmatrix}"

    return latex_str


def generate_all_formulas(
    amounts: Dict[str, float],
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance: float,
    volatility: float,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> Dict[str, str]:
    """
    Generate all LaTeX formulas for the portfolio analysis.

    This is the orchestrator function that generates all formula variants.

    Args:
        amounts: Portfolio amounts {ticker: amount}
        weights: Portfolio weights {ticker: weight}
        cov_matrix: Covariance matrix
        variance: Portfolio variance
        volatility: Portfolio volatility
        variance_breakdown: Detailed variance breakdown

    Returns:
        Dictionary of LaTeX strings:
        {
            'weights_symbolic': str,
            'weights_numerical': str,
            'covariance_matrix': str,
            'correlation_matrix': str,
            'variance_symbolic': str,
            'variance_expanded': str,
            'variance_expanded_full': str,
            'volatility_symbolic': str,
            'volatility_numerical': str
        }
    """
    tickers = list(weights.keys())

    # Generate all formula components
    weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts)

    covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True)

    correlation_matrix = generate_correlation_matrix_latex(cov_matrix)

    variance_symbolic = generate_variance_formula_symbolic(tickers)

    variance_expanded = generate_variance_formula_expanded(
        weights,
        cov_matrix,
        variance_breakdown,
        smart_truncation=True
    )

    variance_expanded_full = generate_variance_formula_expanded_full(
        weights,
        cov_matrix,
        variance_breakdown
    )

    volatility_symbolic, volatility_numerical = generate_volatility_formulas(
        variance,
        volatility
    )

    return {
        'weights_symbolic': weights_symbolic,
        'weights_numerical': weights_numerical,
        'covariance_matrix': covariance_matrix,
        'correlation_matrix': correlation_matrix,
        'variance_symbolic': variance_symbolic,
        'variance_expanded': variance_expanded,
        'variance_expanded_full': variance_expanded_full,
        'volatility_symbolic': volatility_symbolic,
        'volatility_numerical': volatility_numerical,
    }