File size: 11,443 Bytes
e6b8a0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
"""
LaTeX formula generation module using sympy.

Handles:
- Generating symbolic mathematical formulas
- Creating LaTeX representations for all calculations
- Detailed variance expansion with smart truncation
- Both symbolic and numerical formula variants
"""

from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from sympy import symbols, Matrix, sqrt, latex


def generate_weight_formulas(
    weights: Dict[str, float],
    amounts: Dict[str, float]
) -> Tuple[str, str]:
    """
    Generate weight calculation formulas.

    Returns both symbolic and numerical versions.

    Args:
        weights: Calculated weights {ticker: weight}
        amounts: Original amounts {ticker: amount}

    Returns:
        Tuple of (symbolic_latex, numerical_latex)
    """
    tickers = list(weights.keys())
    total = sum(amounts.values())

    # Symbolic formula
    symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}"

    # Numerical formula with actual values
    numerical_lines = []
    for ticker in tickers:
        amt = amounts[ticker]
        wt = weights[ticker]
        line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}"
        numerical_lines.append(line)

    numerical = "\\begin{aligned}\n"
    numerical += " \\\\\n".join(numerical_lines)
    numerical += "\n\\end{aligned}"

    return symbolic, numerical


def generate_covariance_matrix_latex(
    cov_matrix: pd.DataFrame,
    annualized: bool = True
) -> str:
    """
    Generate LaTeX representation of covariance matrix.

    Args:
        cov_matrix: Covariance matrix DataFrame
        annualized: Whether to show annualized values

    Returns:
        LaTeX string for the matrix
    """
    tickers = list(cov_matrix.columns)
    n = len(tickers)

    # Multiply by 252 if annualized
    if annualized:
        cov_values = cov_matrix.values * 252
    else:
        cov_values = cov_matrix.values

    # Build LaTeX matrix
    latex_str = r"\Sigma = \begin{bmatrix}" + "\n"

    for i in range(n):
        row_values = []
        for j in range(n):
            value = cov_values[i, j]
            row_values.append(f"{value:.6f}")
        latex_str += " & ".join(row_values)
        if i < n - 1:
            latex_str += r" \\" + "\n"

    latex_str += "\n" + r"\end{bmatrix}"

    return latex_str


def generate_variance_formula_symbolic(tickers: List[str]) -> str:
    """
    Generate symbolic variance formula using matrix notation.

    Formula: σ²_p = w^T × Σ × w

    Args:
        tickers: List of ticker symbols

    Returns:
        LaTeX string for symbolic variance formula
    """
    # Matrix form
    matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}"

    # Expanded form
    expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}"

    # Combine both
    latex_str = "\\begin{aligned}\n"
    latex_str += matrix_form + r" \\" + "\n"
    latex_str += expanded_form + "\n"
    latex_str += "\\end{aligned}"

    return latex_str


def generate_variance_formula_expanded(
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]],
    smart_truncation: bool = True,
    truncation_threshold: int = 4
) -> str:
    """
    Generate detailed variance expansion showing all terms.

    This is the most complex formula generation function.

    Shows:
    1. Symbolic expansion term by term
    2. Numerical substitution
    3. Intermediate calculations
    4. Final result

    With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability

    Args:
        weights: Portfolio weights
        cov_matrix: Covariance matrix
        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
        smart_truncation: Whether to truncate long expansions
        truncation_threshold: Number of tickers before truncation kicks in

    Returns:
        LaTeX string with full variance expansion
    """
    tickers = list(weights.keys())
    n = len(tickers)

    # Determine if we should truncate
    should_truncate = smart_truncation and n >= truncation_threshold

    # Step 1: Build symbolic terms
    symbolic_terms = []
    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
        if ticker_i == ticker_j:
            # Diagonal term: w_i^2 × σ_ii
            term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}"
        else:
            # Off-diagonal term: w_i × w_j × σ_ij
            term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}"
        symbolic_terms.append(term)

    # Step 2: Build numerical substitution terms
    numerical_terms = []
    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
        if ticker_i == ticker_j:
            # Diagonal: (w_i)^2 × cov_ij
            num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}"
        else:
            # Off-diagonal: w_i × w_j × cov_ij
            num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}"
        numerical_terms.append(num)

    # Step 3: Build intermediate values
    intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown]

    # Step 4: Calculate total
    total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown)

    # Apply smart truncation if needed
    if should_truncate:
        # Show first 3-4 terms, ..., last 2 terms
        num_show_start = 3
        num_show_end = 2

        symbolic_display = (
            symbolic_terms[:num_show_start]
            + [r"\cdots"]
            + symbolic_terms[-num_show_end:]
        )

        numerical_display = (
            numerical_terms[:num_show_start]
            + [r"\cdots"]
            + numerical_terms[-num_show_end:]
        )

        intermediate_display = (
            intermediate_values[:num_show_start]
            + [r"\cdots"]
            + intermediate_values[-num_show_end:]
        )
    else:
        symbolic_display = symbolic_terms
        numerical_display = numerical_terms
        intermediate_display = intermediate_values

    # Build the aligned LaTeX
    latex_str = "\\begin{aligned}\n"

    # Line 1: Symbolic expansion
    latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n"

    # Line 2: Numerical substitution
    latex_str += r"          &= " + " + ".join(numerical_display) + r" \\" + "\n"

    # Line 3: Intermediate calculations
    latex_str += r"          &= " + " + ".join(intermediate_display) + r" \\" + "\n"

    # Line 4: Final result
    latex_str += f"          &= {total_variance:.6f}\n"

    latex_str += "\\end{aligned}"

    return latex_str


def generate_variance_formula_expanded_full(
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> str:
    """
    Generate FULL variance expansion without truncation.

    Use this for "Show all terms" toggle.

    Args:
        weights: Portfolio weights
        cov_matrix: Covariance matrix
        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)

    Returns:
        LaTeX string with complete variance expansion
    """
    # Just call the main function with truncation disabled
    return generate_variance_formula_expanded(
        weights,
        cov_matrix,
        variance_breakdown,
        smart_truncation=False
    )


def generate_volatility_formulas(
    variance: float,
    volatility: float
) -> Tuple[str, str]:
    """
    Generate volatility calculation formulas.

    Returns both symbolic and numerical versions.

    Args:
        variance: Calculated portfolio variance
        volatility: Calculated portfolio volatility

    Returns:
        Tuple of (symbolic_latex, numerical_latex)
    """
    # Symbolic formula
    symbolic = r"\sigma_p = \sqrt{\sigma_p^2}"

    # Numerical formula
    numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%"

    return symbolic, numerical


def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str:
    """
    Generate correlation matrix from covariance matrix.

    Correlation: ρ_ij = σ_ij / (σ_i × σ_j)

    Args:
        cov_matrix: Covariance matrix

    Returns:
        LaTeX string for correlation matrix
    """
    # Calculate correlation matrix
    std_devs = np.sqrt(np.diag(cov_matrix))
    corr_matrix = cov_matrix / np.outer(std_devs, std_devs)

    tickers = list(cov_matrix.columns)
    n = len(tickers)

    # Build LaTeX matrix
    latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n"

    for i in range(n):
        row_values = []
        for j in range(n):
            value = corr_matrix.iloc[i, j]
            row_values.append(f"{value:.4f}")
        latex_str += " & ".join(row_values)
        if i < n - 1:
            latex_str += r" \\" + "\n"

    latex_str += "\n" + r"\end{bmatrix}"

    return latex_str


def generate_all_formulas(
    amounts: Dict[str, float],
    weights: Dict[str, float],
    cov_matrix: pd.DataFrame,
    variance: float,
    volatility: float,
    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
) -> Dict[str, str]:
    """
    Generate all LaTeX formulas for the portfolio analysis.

    This is the orchestrator function that generates all formula variants.

    Args:
        amounts: Portfolio amounts {ticker: amount}
        weights: Portfolio weights {ticker: weight}
        cov_matrix: Covariance matrix
        variance: Portfolio variance
        volatility: Portfolio volatility
        variance_breakdown: Detailed variance breakdown

    Returns:
        Dictionary of LaTeX strings:
        {
            'weights_symbolic': str,
            'weights_numerical': str,
            'covariance_matrix': str,
            'correlation_matrix': str,
            'variance_symbolic': str,
            'variance_expanded': str,
            'variance_expanded_full': str,
            'volatility_symbolic': str,
            'volatility_numerical': str
        }
    """
    tickers = list(weights.keys())

    # Generate all formula components
    weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts)

    covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True)

    correlation_matrix = generate_correlation_matrix_latex(cov_matrix)

    variance_symbolic = generate_variance_formula_symbolic(tickers)

    variance_expanded = generate_variance_formula_expanded(
        weights,
        cov_matrix,
        variance_breakdown,
        smart_truncation=True
    )

    variance_expanded_full = generate_variance_formula_expanded_full(
        weights,
        cov_matrix,
        variance_breakdown
    )

    volatility_symbolic, volatility_numerical = generate_volatility_formulas(
        variance,
        volatility
    )

    return {
        'weights_symbolic': weights_symbolic,
        'weights_numerical': weights_numerical,
        'covariance_matrix': covariance_matrix,
        'correlation_matrix': correlation_matrix,
        'variance_symbolic': variance_symbolic,
        'variance_expanded': variance_expanded,
        'variance_expanded_full': variance_expanded_full,
        'volatility_symbolic': volatility_symbolic,
        'volatility_numerical': volatility_numerical,
    }