Spaces:

ResearchEngineering
/

financial_analyst

Running

financial_analyst / formula_generator.py

Dmitry Beresnev

add core modules

e6b8a0f 4 days ago

11.4 kB

	"""
	LaTeX formula generation module using sympy.

	Handles:
	- Generating symbolic mathematical formulas
	- Creating LaTeX representations for all calculations
	- Detailed variance expansion with smart truncation
	- Both symbolic and numerical formula variants
	"""

	from typing import Dict, List, Tuple
	import pandas as pd
	import numpy as np
	from sympy import symbols, Matrix, sqrt, latex


	def generate_weight_formulas(
	weights: Dict[str, float],
	amounts: Dict[str, float]
	) -> Tuple[str, str]:
	"""
	Generate weight calculation formulas.

	Returns both symbolic and numerical versions.

	Args:
	weights: Calculated weights {ticker: weight}
	amounts: Original amounts {ticker: amount}

	Returns:
	Tuple of (symbolic_latex, numerical_latex)
	"""
	tickers = list(weights.keys())
	total = sum(amounts.values())

	# Symbolic formula
	symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}"

	# Numerical formula with actual values
	numerical_lines = []
	for ticker in tickers:
	amt = amounts[ticker]
	wt = weights[ticker]
	line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}"
	numerical_lines.append(line)

	numerical = "\\begin{aligned}\n"
	numerical += " \\\\\n".join(numerical_lines)
	numerical += "\n\\end{aligned}"

	return symbolic, numerical


	def generate_covariance_matrix_latex(
	cov_matrix: pd.DataFrame,
	annualized: bool = True
	) -> str:
	"""
	Generate LaTeX representation of covariance matrix.

	Args:
	cov_matrix: Covariance matrix DataFrame
	annualized: Whether to show annualized values

	Returns:
	LaTeX string for the matrix
	"""
	tickers = list(cov_matrix.columns)
	n = len(tickers)

	# Multiply by 252 if annualized
	if annualized:
	cov_values = cov_matrix.values * 252
	else:
	cov_values = cov_matrix.values

	# Build LaTeX matrix
	latex_str = r"\Sigma = \begin{bmatrix}" + "\n"

	for i in range(n):
	row_values = []
	for j in range(n):
	value = cov_values[i, j]
	row_values.append(f"{value:.6f}")
	latex_str += " & ".join(row_values)
	if i < n - 1:
	latex_str += r" \\" + "\n"

	latex_str += "\n" + r"\end{bmatrix}"

	return latex_str


	def generate_variance_formula_symbolic(tickers: List[str]) -> str:
	"""
	Generate symbolic variance formula using matrix notation.

	Formula: σ²_p = w^T × Σ × w

	Args:
	tickers: List of ticker symbols

	Returns:
	LaTeX string for symbolic variance formula
	"""
	# Matrix form
	matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}"

	# Expanded form
	expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}"

	# Combine both
	latex_str = "\\begin{aligned}\n"
	latex_str += matrix_form + r" \\" + "\n"
	latex_str += expanded_form + "\n"
	latex_str += "\\end{aligned}"

	return latex_str


	def generate_variance_formula_expanded(
	weights: Dict[str, float],
	cov_matrix: pd.DataFrame,
	variance_breakdown: List[Tuple[str, str, float, float, float, float]],
	smart_truncation: bool = True,
	truncation_threshold: int = 4
	) -> str:
	"""
	Generate detailed variance expansion showing all terms.

	This is the most complex formula generation function.

	Shows:
	1. Symbolic expansion term by term
	2. Numerical substitution
	3. Intermediate calculations
	4. Final result

	With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability

	Args:
	weights: Portfolio weights
	cov_matrix: Covariance matrix
	variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
	smart_truncation: Whether to truncate long expansions
	truncation_threshold: Number of tickers before truncation kicks in

	Returns:
	LaTeX string with full variance expansion
	"""
	tickers = list(weights.keys())
	n = len(tickers)

	# Determine if we should truncate
	should_truncate = smart_truncation and n >= truncation_threshold

	# Step 1: Build symbolic terms
	symbolic_terms = []
	for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
	if ticker_i == ticker_j:
	# Diagonal term: w_i^2 × σ_ii
	term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}"
	else:
	# Off-diagonal term: w_i × w_j × σ_ij
	term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}"
	symbolic_terms.append(term)

	# Step 2: Build numerical substitution terms
	numerical_terms = []
	for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
	if ticker_i == ticker_j:
	# Diagonal: (w_i)^2 × cov_ij
	num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}"
	else:
	# Off-diagonal: w_i × w_j × cov_ij
	num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}"
	numerical_terms.append(num)

	# Step 3: Build intermediate values
	intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown]

	# Step 4: Calculate total
	total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown)

	# Apply smart truncation if needed
	if should_truncate:
	# Show first 3-4 terms, ..., last 2 terms
	num_show_start = 3
	num_show_end = 2

	symbolic_display = (
	symbolic_terms[:num_show_start]
	+ [r"\cdots"]
	+ symbolic_terms[-num_show_end:]
	)

	numerical_display = (
	numerical_terms[:num_show_start]
	+ [r"\cdots"]
	+ numerical_terms[-num_show_end:]
	)

	intermediate_display = (
	intermediate_values[:num_show_start]
	+ [r"\cdots"]
	+ intermediate_values[-num_show_end:]
	)
	else:
	symbolic_display = symbolic_terms
	numerical_display = numerical_terms
	intermediate_display = intermediate_values

	# Build the aligned LaTeX
	latex_str = "\\begin{aligned}\n"

	# Line 1: Symbolic expansion
	latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n"

	# Line 2: Numerical substitution
	latex_str += r" &= " + " + ".join(numerical_display) + r" \\" + "\n"

	# Line 3: Intermediate calculations
	latex_str += r" &= " + " + ".join(intermediate_display) + r" \\" + "\n"

	# Line 4: Final result
	latex_str += f" &= {total_variance:.6f}\n"

	latex_str += "\\end{aligned}"

	return latex_str


	def generate_variance_formula_expanded_full(
	weights: Dict[str, float],
	cov_matrix: pd.DataFrame,
	variance_breakdown: List[Tuple[str, str, float, float, float, float]]
	) -> str:
	"""
	Generate FULL variance expansion without truncation.

	Use this for "Show all terms" toggle.

	Args:
	weights: Portfolio weights
	cov_matrix: Covariance matrix
	variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)

	Returns:
	LaTeX string with complete variance expansion
	"""
	# Just call the main function with truncation disabled
	return generate_variance_formula_expanded(
	weights,
	cov_matrix,
	variance_breakdown,
	smart_truncation=False
	)


	def generate_volatility_formulas(
	variance: float,
	volatility: float
	) -> Tuple[str, str]:
	"""
	Generate volatility calculation formulas.

	Returns both symbolic and numerical versions.

	Args:
	variance: Calculated portfolio variance
	volatility: Calculated portfolio volatility

	Returns:
	Tuple of (symbolic_latex, numerical_latex)
	"""
	# Symbolic formula
	symbolic = r"\sigma_p = \sqrt{\sigma_p^2}"

	# Numerical formula
	numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%"

	return symbolic, numerical


	def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str:
	"""
	Generate correlation matrix from covariance matrix.

	Correlation: ρ_ij = σ_ij / (σ_i × σ_j)

	Args:
	cov_matrix: Covariance matrix

	Returns:
	LaTeX string for correlation matrix
	"""
	# Calculate correlation matrix
	std_devs = np.sqrt(np.diag(cov_matrix))
	corr_matrix = cov_matrix / np.outer(std_devs, std_devs)

	tickers = list(cov_matrix.columns)
	n = len(tickers)

	# Build LaTeX matrix
	latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n"

	for i in range(n):
	row_values = []
	for j in range(n):
	value = corr_matrix.iloc[i, j]
	row_values.append(f"{value:.4f}")
	latex_str += " & ".join(row_values)
	if i < n - 1:
	latex_str += r" \\" + "\n"

	latex_str += "\n" + r"\end{bmatrix}"

	return latex_str


	def generate_all_formulas(
	amounts: Dict[str, float],
	weights: Dict[str, float],
	cov_matrix: pd.DataFrame,
	variance: float,
	volatility: float,
	variance_breakdown: List[Tuple[str, str, float, float, float, float]]
	) -> Dict[str, str]:
	"""
	Generate all LaTeX formulas for the portfolio analysis.

	This is the orchestrator function that generates all formula variants.

	Args:
	amounts: Portfolio amounts {ticker: amount}
	weights: Portfolio weights {ticker: weight}
	cov_matrix: Covariance matrix
	variance: Portfolio variance
	volatility: Portfolio volatility
	variance_breakdown: Detailed variance breakdown

	Returns:
	Dictionary of LaTeX strings:
	{
	'weights_symbolic': str,
	'weights_numerical': str,
	'covariance_matrix': str,
	'correlation_matrix': str,
	'variance_symbolic': str,
	'variance_expanded': str,
	'variance_expanded_full': str,
	'volatility_symbolic': str,
	'volatility_numerical': str
	}
	"""
	tickers = list(weights.keys())

	# Generate all formula components
	weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts)

	covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True)

	correlation_matrix = generate_correlation_matrix_latex(cov_matrix)

	variance_symbolic = generate_variance_formula_symbolic(tickers)

	variance_expanded = generate_variance_formula_expanded(
	weights,
	cov_matrix,
	variance_breakdown,
	smart_truncation=True
	)

	variance_expanded_full = generate_variance_formula_expanded_full(
	weights,
	cov_matrix,
	variance_breakdown
	)

	volatility_symbolic, volatility_numerical = generate_volatility_formulas(
	variance,
	volatility
	)

	return {
	'weights_symbolic': weights_symbolic,
	'weights_numerical': weights_numerical,
	'covariance_matrix': covariance_matrix,
	'correlation_matrix': correlation_matrix,
	'variance_symbolic': variance_symbolic,
	'variance_expanded': variance_expanded,
	'variance_expanded_full': variance_expanded_full,
	'volatility_symbolic': volatility_symbolic,
	'volatility_numerical': volatility_numerical,
	}