Spaces:

ResearchEngineering
/

financial_analyst

Sleeping

App Files Files Community

Dmitry Beresnev commited on Jan 30

Commit

e6b8a0f

1 Parent(s): d848cbc

add core modules

Browse files

Files changed (9) hide show

.dockerignore +39 -0
.gitignore +45 -0
Dockerfile +34 -0
README.md +105 -3
app.py +360 -0
formula_generator.py +395 -0
ocr_parser.py +175 -0
portfolio_calculator.py +316 -0
requirements.txt +8 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,39 @@

+# Git
+.git
+.gitignore
+.gitattributes
+# Python cache
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+# Virtual environments
+.venv
+venv
+env
+# Logs
+*.log
+# OS files
+.DS_Store
+Thumbs.db
+# Documentation (not needed in Docker image)
+*.md
+README.md
+# Test files (optional - remove if you want to include test images)
+test_*.png
+# IDE
+.vscode
+.idea
+# Misc
+*.swp
+*.swo
+*~

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# Node modules
+node_modules/
+npm-debug.log
+yarn-error.log
+# Python virtual environment and caches
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+venv/
+env/
+.venv/
+.Python
+# HF Space build artifacts
+*.log
+*.lock
+*.db
+*.sqlite
+*.cache
+/dist/
+.build/
+# Docker
+*.env
+Dockerfile.*.swp
+docker-compose.override.yml
+# Vault local changes (if you want only committed notes to stay)
+vault/*.md
+vault/**/*.md
+# VSCode / IDEs
+.vscode/
+.idea/
+*.sublime-workspace
+*.sublime-project
+# OS files
+.DS_Store
+Thumbs.db
+#
+test_*

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.12-slim
+# Install system dependencies for tesseract OCR and image processing
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    tesseract-ocr-eng \
+    libtesseract-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements first for better Docker layer caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application files
+COPY . .
+# Expose Streamlit port (HF Spaces default)
+EXPOSE 7860
+# Set environment variables for Streamlit
+ENV STREAMLIT_SERVER_PORT=7860
+ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
+ENV STREAMLIT_SERVER_HEADLESS=true
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health || exit 1
+# Run the application
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -1,11 +1,113 @@
 ---
-title: Financial Analyst
-emoji: 🐢
 colorFrom: blue
 colorTo: green
 sdk: docker
 pinned: false
-short_description: on the way to the financial analytics
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Portfolio Volatility Analyzer
+emoji: 📊
 colorFrom: blue
 colorTo: green
 sdk: docker
 pinned: false
+short_description: Investment portfolio risk analysis with OCR and LaTeX formulas
+---
+# 📊 Portfolio Volatility Analyzer
+Analyze your investment portfolio risk using **Modern Portfolio Theory** with OCR, interactive visualizations, and beautiful mathematical formulas.
+## Features
+- 📸 **OCR Portfolio Parsing**: Upload screenshots of your portfolio and automatically extract tickers and amounts
+- ✏️ **Editable JSON**: Correct OCR errors with an intuitive JSON editor
+- 📈 **Historical Data**: Automatically fetch 1 year of price data from Yahoo Finance
+- 🧮 **Full Calculations**:
+  - Portfolio weights
+  - Log returns
+  - Covariance matrix
+  - Portfolio variance and volatility
+- 📐 **Beautiful LaTeX Formulas**: See every calculation step with symbolic and numerical formulas
+- 📊 **Detailed Variance Expansion**: Step-by-step breakdown showing how each asset contributes to portfolio risk
+- 🎚️ **Interactive Rebalancing**: Adjust portfolio amounts with sliders and see volatility update in real-time
+## How to Use
+1. **Upload Portfolio Screenshot**: Take a screenshot of your portfolio (must show ticker symbols and dollar amounts)
+2. **Edit Portfolio JSON**: Review and correct any OCR errors in the JSON editor
+3. **Validate Portfolio**: Click "Validate Portfolio" to start analysis
+4. **View Results**: See historical data, covariance matrix, and detailed formulas
+5. **Rebalance**: Use interactive sliders to adjust positions and see impact on volatility
+## Technical Details
+### Formula Highlights
+**Portfolio Variance:**
+```
+σ²_p = w^T × Σ × w
+```
+Where:
+- `w` = vector of portfolio weights
+- `Σ` = covariance matrix (annualized)
+**Portfolio Volatility:**
+```
+σ_p = √(σ²_p)
+```
+### Architecture
+- **Frontend**: Streamlit
+- **OCR**: Tesseract (pytesseract)
+- **Financial Data**: yfinance (Yahoo Finance)
+- **Math**: NumPy, Pandas, SymPy
+- **Deployment**: Docker on Hugging Face Spaces
+## Local Development
+### Prerequisites
+- Python 3.11+
+- Tesseract OCR installed
+### Setup
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Run the app
+streamlit run app.py
+```
+### Docker Build
+```bash
+# Build
+docker build -t portfolio-analyzer .
+# Run
+docker run -p 7860:7860 portfolio-analyzer
+```
+## Example Portfolio
+Test the app with this JSON:
+```json
+{
+  "AAPL": 5000,
+  "GOOGL": 3000,
+  "MSFT": 2000
+}
+```
+## Notes
+- Uses 252 trading days for annualization
+- Calculates log returns: ln(P_t / P_{t-1})
+- Smart truncation for portfolios with 4+ tickers
+- 1-hour cache for historical data to reduce API calls
+## Built With
+- Modern Portfolio Theory
+- LaTeX mathematical notation
+- Real-time financial data
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,360 @@

+"""
+Portfolio Volatility Analyzer - Main Streamlit Application
+Features:
+- OCR parsing of portfolio screenshots
+- Editable portfolio JSON
+- Financial calculations (weights, returns, covariance, variance, volatility)
+- Beautiful LaTeX formula displays for all calculations
+- Interactive sliders for portfolio rebalancing
+- Real-time recalculation
+"""
+import streamlit as st
+from PIL import Image
+import json
+# Import our modules
+import ocr_parser
+import portfolio_calculator
+import formula_generator
+# Page configuration
+st.set_page_config(
+    page_title="Portfolio Volatility Analyzer",
+    page_icon="📊",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Initialize session state
+if 'portfolio_data' not in st.session_state:
+    st.session_state.portfolio_data = None
+if 'portfolio_validated' not in st.session_state:
+    st.session_state.portfolio_validated = False
+if 'metrics' not in st.session_state:
+    st.session_state.metrics = None
+if 'show_all_terms' not in st.session_state:
+    st.session_state.show_all_terms = False
+# Main title and description
+st.title("📊 Portfolio Volatility Analyzer with OCR")
+st.markdown("""
+Analyze your investment portfolio risk using **modern portfolio theory**.
+**Features:**
+- 📸 Upload portfolio screenshot for automatic OCR parsing
+- ✏️ Edit portfolio data as JSON
+- 📈 Fetch historical price data automatically
+- 🧮 Calculate portfolio volatility with detailed mathematical formulas
+- 🎚️ Interactive sliders for real-time portfolio rebalancing
+""")
+st.divider()
+# ========================================
+# Section 1: Portfolio Input
+# ========================================
+st.header("1️⃣ Portfolio Input")
+# Create two columns for upload and manual entry
+col1, col2 = st.columns([1, 1])
+with col1:
+    st.subheader("📸 Upload Screenshot")
+    uploaded_file = st.file_uploader(
+        "Upload portfolio screenshot (PNG, JPG, JPEG)",
+        type=["png", "jpg", "jpeg"],
+        help="Upload a screenshot of your portfolio with ticker symbols and amounts"
+    )
+    if uploaded_file:
+        # Display uploaded image
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded Portfolio Screenshot", use_container_width=True)
+        # OCR processing
+        with st.spinner("Extracting text from image..."):
+            text, error = ocr_parser.extract_text_from_image(image)
+        if error:
+            st.error(f"❌ {error}")
+        else:
+            # Show extracted text
+            with st.expander("📄 Extracted Text"):
+                st.text(text)
+            # Parse portfolio
+            portfolio = ocr_parser.parse_portfolio(text)
+            if portfolio:
+                st.success(f"✅ Found {len(portfolio)} tickers")
+                st.session_state.portfolio_data = portfolio
+            else:
+                st.warning("⚠️ No valid tickers found. Please edit manually below.")
+                st.session_state.portfolio_data = {}
+with col2:
+    st.subheader("✏️ Edit Portfolio (JSON)")
+    # Get initial JSON value
+    if st.session_state.portfolio_data is not None:
+        initial_json = ocr_parser.format_portfolio_json(st.session_state.portfolio_data)
+    else:
+        # Default example
+        initial_json = json.dumps({
+            "AAPL": 5000,
+            "GOOGL": 3000,
+            "MSFT": 2000
+        }, indent=2)
+    # Editable text area
+    edited_json = st.text_area(
+        "Portfolio (JSON format)",
+        value=initial_json,
+        height=300,
+        help="Edit the portfolio in JSON format: {\"TICKER\": amount, ...}"
+    )
+    # Validate button
+    if st.button("✅ Validate Portfolio", type="primary"):
+        is_valid, portfolio, error = ocr_parser.validate_portfolio_json(edited_json)
+        if is_valid:
+            st.session_state.portfolio_data = portfolio
+            st.session_state.portfolio_validated = True
+            st.success(f"✅ Portfolio validated! {len(portfolio)} tickers ready for analysis.")
+        else:
+            st.error(f"❌ {error}")
+            st.session_state.portfolio_validated = False
+st.divider()
+# ========================================
+# Section 2: Portfolio Analysis
+# ========================================
+if st.session_state.portfolio_validated and st.session_state.portfolio_data:
+    st.header("2️⃣ Portfolio Analysis")
+    portfolio = st.session_state.portfolio_data
+    tickers = list(portfolio.keys())
+    # Display current portfolio
+    st.subheader("Current Portfolio")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Tickers", len(tickers))
+    with col2:
+        total_value = sum(portfolio.values())
+        st.metric("Total Value", f"${total_value:,.2f}")
+    with col3:
+        st.metric("Data Period", "1 year")
+    # Fetch data and calculate metrics
+    with st.spinner("🔄 Fetching historical data and calculating metrics..."):
+        metrics, error = portfolio_calculator.get_portfolio_metrics(portfolio, period="1y")
+    if error:
+        st.error(f"❌ {error}")
+        st.stop()
+    # Store metrics in session state
+    st.session_state.metrics = metrics
+    st.success("✅ Analysis complete!")
+    st.divider()
+    # ========================================
+    # Section 3: Data Display
+    # ========================================
+    st.header("3️⃣ Historical Data")
+    # Portfolio Weights
+    st.subheader("📊 Portfolio Weights")
+    weights_df = [(ticker, f"{weight*100:.2f}%") for ticker, weight in metrics['weights'].items()]
+    st.table(weights_df)
+    # Historical Prices
+    st.subheader("📈 Historical Prices (Last 5 Days)")
+    st.dataframe(metrics['prices'].tail(), use_container_width=True)
+    # Returns
+    with st.expander("📉 Daily Log Returns (Last 5 Days)"):
+        st.dataframe(metrics['returns'].tail(), use_container_width=True)
+    # Covariance Matrix
+    st.subheader("🔢 Covariance Matrix (Annualized)")
+    st.dataframe(metrics['cov_matrix'] * 252, use_container_width=True)
+    st.divider()
+    # ========================================
+    # Section 4: Mathematical Formulas
+    # ========================================
+    st.header("4️⃣ Mathematical Formulas")
+    # Generate all formulas
+    formulas = formula_generator.generate_all_formulas(
+        amounts=portfolio,
+        weights=metrics['weights'],
+        cov_matrix=metrics['cov_matrix'],
+        variance=metrics['variance'],
+        volatility=metrics['volatility'],
+        variance_breakdown=metrics['variance_breakdown']
+    )
+    # Weight Formulas
+    st.subheader("⚖️ Portfolio Weights")
+    st.markdown("**Symbolic Formula:**")
+    st.latex(formulas['weights_symbolic'])
+    st.markdown("**Numerical Calculation:**")
+    st.latex(formulas['weights_numerical'])
+    # Covariance Matrix
+    st.subheader("📊 Covariance Matrix (Annualized)")
+    st.latex(formulas['covariance_matrix'])
+    # Correlation Matrix
+    with st.expander("🔗 Correlation Matrix"):
+        st.latex(formulas['correlation_matrix'])
+    # Variance Formula
+    st.subheader("📐 Portfolio Variance")
+    st.markdown("**Symbolic Formula:**")
+    st.latex(formulas['variance_symbolic'])
+    st.markdown("**Detailed Expansion:**")
+    st.latex(formulas['variance_expanded'])
+    # Toggle for full expansion
+    if st.checkbox("🔍 Show all variance terms (no truncation)", value=False):
+        st.markdown("**Complete Expansion (All Terms):**")
+        st.latex(formulas['variance_expanded_full'])
+    # Volatility Formula
+    st.subheader("📊 Portfolio Volatility")
+    st.markdown("**Symbolic Formula:**")
+    st.latex(formulas['volatility_symbolic'])
+    st.markdown("**Numerical Result:**")
+    st.latex(formulas['volatility_numerical'])
+    st.divider()
+    # ========================================
+    # Section 5: Final Results
+    # ========================================
+    st.header("5️⃣ Final Results")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric(
+            label="Portfolio Variance",
+            value=f"{metrics['variance']:.6f}",
+            help="Annualized portfolio variance"
+        )
+    with col2:
+        st.metric(
+            label="Portfolio Volatility",
+            value=f"{metrics['volatility']:.4f}",
+            help="Annualized portfolio standard deviation (σ)"
+        )
+    with col3:
+        st.metric(
+            label="Volatility (%)",
+            value=f"{metrics['volatility']*100:.2f}%",
+            help="Annualized volatility as percentage"
+        )
+    st.divider()
+    # ========================================
+    # Section 6: Interactive Rebalancing
+    # ========================================
+    st.header("6️⃣ Interactive Portfolio Rebalancing")
+    st.markdown("""
+    **Adjust portfolio amounts** using the sliders below to see how volatility changes in real-time.
+    """)
+    # Create sliders for each ticker
+    new_amounts = {}
+    slider_cols = st.columns(min(len(tickers), 3))  # Max 3 columns
+    for idx, ticker in enumerate(tickers):
+        col_idx = idx % len(slider_cols)
+        with slider_cols[col_idx]:
+            original_amount = portfolio[ticker]
+            new_amount = st.slider(
+                f"{ticker}",
+                min_value=0.0,
+                max_value=original_amount * 3,  # Allow up to 3x original
+                value=original_amount,
+                step=100.0,
+                format="$%.0f",
+                key=f"slider_{ticker}"
+            )
+            new_amounts[ticker] = new_amount
+    # Check if amounts changed
+    amounts_changed = any(new_amounts[t] != portfolio[t] for t in tickers)
+    if amounts_changed:
+        st.subheader("🔄 Recalculated Metrics")
+        # Recalculate with new amounts
+        with st.spinner("Recalculating..."):
+            new_metrics, error = portfolio_calculator.get_portfolio_metrics(new_amounts, period="1y")
+        if error:
+            st.error(f"❌ {error}")
+        else:
+            # Display new results
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("**New Portfolio Weights:**")
+                for ticker, weight in new_metrics['weights'].items():
+                    st.write(f"{ticker}: {weight*100:.2f}%")
+            with col2:
+                st.markdown("**New Volatility:**")
+                st.metric(
+                    label="Updated Volatility",
+                    value=f"{new_metrics['volatility']*100:.2f}%",
+                    delta=f"{(new_metrics['volatility'] - metrics['volatility'])*100:.2f}%",
+                    delta_color="inverse"  # Lower volatility is better
+                )
+else:
+    # Show instructions if portfolio not validated
+    st.info("👆 Please upload a portfolio screenshot or enter portfolio data above, then click 'Validate Portfolio' to begin analysis.")
+st.divider()
+# ========================================
+# Footer
+# ========================================
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: gray;'>
+    <p>Built with ❤️ using Streamlit | Powered by Modern Portfolio Theory</p>
+    <p><small>Data source: Yahoo Finance (yfinance) | OCR: Tesseract</small></p>
+</div>
+""", unsafe_allow_html=True)

formula_generator.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""
+LaTeX formula generation module using sympy.
+Handles:
+- Generating symbolic mathematical formulas
+- Creating LaTeX representations for all calculations
+- Detailed variance expansion with smart truncation
+- Both symbolic and numerical formula variants
+"""
+from typing import Dict, List, Tuple
+import pandas as pd
+import numpy as np
+from sympy import symbols, Matrix, sqrt, latex
+def generate_weight_formulas(
+    weights: Dict[str, float],
+    amounts: Dict[str, float]
+) -> Tuple[str, str]:
+    """
+    Generate weight calculation formulas.
+    Returns both symbolic and numerical versions.
+    Args:
+        weights: Calculated weights {ticker: weight}
+        amounts: Original amounts {ticker: amount}
+    Returns:
+        Tuple of (symbolic_latex, numerical_latex)
+    """
+    tickers = list(weights.keys())
+    total = sum(amounts.values())
+    # Symbolic formula
+    symbolic = r"w_i = \frac{\text{amount}_i}{\sum_j \text{amount}_j}"
+    # Numerical formula with actual values
+    numerical_lines = []
+    for ticker in tickers:
+        amt = amounts[ticker]
+        wt = weights[ticker]
+        line = f"w_{{{ticker}}} = \\frac{{{amt:.2f}}}{{{total:.2f}}} = {wt:.4f}"
+        numerical_lines.append(line)
+    numerical = "\\begin{aligned}\n"
+    numerical += " \\\\\n".join(numerical_lines)
+    numerical += "\n\\end{aligned}"
+    return symbolic, numerical
+def generate_covariance_matrix_latex(
+    cov_matrix: pd.DataFrame,
+    annualized: bool = True
+) -> str:
+    """
+    Generate LaTeX representation of covariance matrix.
+    Args:
+        cov_matrix: Covariance matrix DataFrame
+        annualized: Whether to show annualized values
+    Returns:
+        LaTeX string for the matrix
+    """
+    tickers = list(cov_matrix.columns)
+    n = len(tickers)
+    # Multiply by 252 if annualized
+    if annualized:
+        cov_values = cov_matrix.values * 252
+    else:
+        cov_values = cov_matrix.values
+    # Build LaTeX matrix
+    latex_str = r"\Sigma = \begin{bmatrix}" + "\n"
+    for i in range(n):
+        row_values = []
+        for j in range(n):
+            value = cov_values[i, j]
+            row_values.append(f"{value:.6f}")
+        latex_str += " & ".join(row_values)
+        if i < n - 1:
+            latex_str += r" \\" + "\n"
+    latex_str += "\n" + r"\end{bmatrix}"
+    return latex_str
+def generate_variance_formula_symbolic(tickers: List[str]) -> str:
+    """
+    Generate symbolic variance formula using matrix notation.
+    Formula: σ²_p = w^T × Σ × w
+    Args:
+        tickers: List of ticker symbols
+    Returns:
+        LaTeX string for symbolic variance formula
+    """
+    # Matrix form
+    matrix_form = r"\sigma_p^2 = \mathbf{w}^T \Sigma \mathbf{w}"
+    # Expanded form
+    expanded_form = r"\sigma_p^2 = \sum_{i=1}^{n} \sum_{j=1}^{n} w_i w_j \sigma_{ij}"
+    # Combine both
+    latex_str = "\\begin{aligned}\n"
+    latex_str += matrix_form + r" \\" + "\n"
+    latex_str += expanded_form + "\n"
+    latex_str += "\\end{aligned}"
+    return latex_str
+def generate_variance_formula_expanded(
+    weights: Dict[str, float],
+    cov_matrix: pd.DataFrame,
+    variance_breakdown: List[Tuple[str, str, float, float, float, float]],
+    smart_truncation: bool = True,
+    truncation_threshold: int = 4
+) -> str:
+    """
+    Generate detailed variance expansion showing all terms.
+    This is the most complex formula generation function.
+    Shows:
+    1. Symbolic expansion term by term
+    2. Numerical substitution
+    3. Intermediate calculations
+    4. Final result
+    With smart truncation: shows first 3-4 terms + "..." + last 2 terms for readability
+    Args:
+        weights: Portfolio weights
+        cov_matrix: Covariance matrix
+        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
+        smart_truncation: Whether to truncate long expansions
+        truncation_threshold: Number of tickers before truncation kicks in
+    Returns:
+        LaTeX string with full variance expansion
+    """
+    tickers = list(weights.keys())
+    n = len(tickers)
+    # Determine if we should truncate
+    should_truncate = smart_truncation and n >= truncation_threshold
+    # Step 1: Build symbolic terms
+    symbolic_terms = []
+    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
+        if ticker_i == ticker_j:
+            # Diagonal term: w_i^2 × σ_ii
+            term = f"w_{{{ticker_i}}}^2 \\sigma_{{{ticker_i}{ticker_j}}}"
+        else:
+            # Off-diagonal term: w_i × w_j × σ_ij
+            term = f"w_{{{ticker_i}}} w_{{{ticker_j}}} \\sigma_{{{ticker_i}{ticker_j}}}"
+        symbolic_terms.append(term)
+    # Step 2: Build numerical substitution terms
+    numerical_terms = []
+    for ticker_i, ticker_j, w_i, w_j, cov_ij, contrib in variance_breakdown:
+        if ticker_i == ticker_j:
+            # Diagonal: (w_i)^2 × cov_ij
+            num = f"({w_i:.4f})^2 \\times {cov_ij:.6f}"
+        else:
+            # Off-diagonal: w_i × w_j × cov_ij
+            num = f"({w_i:.4f}) \\times ({w_j:.4f}) \\times {cov_ij:.6f}"
+        numerical_terms.append(num)
+    # Step 3: Build intermediate values
+    intermediate_values = [f"{contrib:.6f}" for (_, _, _, _, _, contrib) in variance_breakdown]
+    # Step 4: Calculate total
+    total_variance = sum(contrib for (_, _, _, _, _, contrib) in variance_breakdown)
+    # Apply smart truncation if needed
+    if should_truncate:
+        # Show first 3-4 terms, ..., last 2 terms
+        num_show_start = 3
+        num_show_end = 2
+        symbolic_display = (
+            symbolic_terms[:num_show_start]
+            + [r"\cdots"]
+            + symbolic_terms[-num_show_end:]
+        )
+        numerical_display = (
+            numerical_terms[:num_show_start]
+            + [r"\cdots"]
+            + numerical_terms[-num_show_end:]
+        )
+        intermediate_display = (
+            intermediate_values[:num_show_start]
+            + [r"\cdots"]
+            + intermediate_values[-num_show_end:]
+        )
+    else:
+        symbolic_display = symbolic_terms
+        numerical_display = numerical_terms
+        intermediate_display = intermediate_values
+    # Build the aligned LaTeX
+    latex_str = "\\begin{aligned}\n"
+    # Line 1: Symbolic expansion
+    latex_str += r"\sigma_p^2 &= " + " + ".join(symbolic_display) + r" \\" + "\n"
+    # Line 2: Numerical substitution
+    latex_str += r"          &= " + " + ".join(numerical_display) + r" \\" + "\n"
+    # Line 3: Intermediate calculations
+    latex_str += r"          &= " + " + ".join(intermediate_display) + r" \\" + "\n"
+    # Line 4: Final result
+    latex_str += f"          &= {total_variance:.6f}\n"
+    latex_str += "\\end{aligned}"
+    return latex_str
+def generate_variance_formula_expanded_full(
+    weights: Dict[str, float],
+    cov_matrix: pd.DataFrame,
+    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
+) -> str:
+    """
+    Generate FULL variance expansion without truncation.
+    Use this for "Show all terms" toggle.
+    Args:
+        weights: Portfolio weights
+        cov_matrix: Covariance matrix
+        variance_breakdown: List of (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
+    Returns:
+        LaTeX string with complete variance expansion
+    """
+    # Just call the main function with truncation disabled
+    return generate_variance_formula_expanded(
+        weights,
+        cov_matrix,
+        variance_breakdown,
+        smart_truncation=False
+    )
+def generate_volatility_formulas(
+    variance: float,
+    volatility: float
+) -> Tuple[str, str]:
+    """
+    Generate volatility calculation formulas.
+    Returns both symbolic and numerical versions.
+    Args:
+        variance: Calculated portfolio variance
+        volatility: Calculated portfolio volatility
+    Returns:
+        Tuple of (symbolic_latex, numerical_latex)
+    """
+    # Symbolic formula
+    symbolic = r"\sigma_p = \sqrt{\sigma_p^2}"
+    # Numerical formula
+    numerical = f"\\sigma_p = \\sqrt{{{variance:.6f}}} = {volatility:.6f} = {volatility*100:.2f}\\%"
+    return symbolic, numerical
+def generate_correlation_matrix_latex(cov_matrix: pd.DataFrame) -> str:
+    """
+    Generate correlation matrix from covariance matrix.
+    Correlation: ρ_ij = σ_ij / (σ_i × σ_j)
+    Args:
+        cov_matrix: Covariance matrix
+    Returns:
+        LaTeX string for correlation matrix
+    """
+    # Calculate correlation matrix
+    std_devs = np.sqrt(np.diag(cov_matrix))
+    corr_matrix = cov_matrix / np.outer(std_devs, std_devs)
+    tickers = list(cov_matrix.columns)
+    n = len(tickers)
+    # Build LaTeX matrix
+    latex_str = r"\text{Correlation Matrix} = \begin{bmatrix}" + "\n"
+    for i in range(n):
+        row_values = []
+        for j in range(n):
+            value = corr_matrix.iloc[i, j]
+            row_values.append(f"{value:.4f}")
+        latex_str += " & ".join(row_values)
+        if i < n - 1:
+            latex_str += r" \\" + "\n"
+    latex_str += "\n" + r"\end{bmatrix}"
+    return latex_str
+def generate_all_formulas(
+    amounts: Dict[str, float],
+    weights: Dict[str, float],
+    cov_matrix: pd.DataFrame,
+    variance: float,
+    volatility: float,
+    variance_breakdown: List[Tuple[str, str, float, float, float, float]]
+) -> Dict[str, str]:
+    """
+    Generate all LaTeX formulas for the portfolio analysis.
+    This is the orchestrator function that generates all formula variants.
+    Args:
+        amounts: Portfolio amounts {ticker: amount}
+        weights: Portfolio weights {ticker: weight}
+        cov_matrix: Covariance matrix
+        variance: Portfolio variance
+        volatility: Portfolio volatility
+        variance_breakdown: Detailed variance breakdown
+    Returns:
+        Dictionary of LaTeX strings:
+        {
+            'weights_symbolic': str,
+            'weights_numerical': str,
+            'covariance_matrix': str,
+            'correlation_matrix': str,
+            'variance_symbolic': str,
+            'variance_expanded': str,
+            'variance_expanded_full': str,
+            'volatility_symbolic': str,
+            'volatility_numerical': str
+        }
+    """
+    tickers = list(weights.keys())
+    # Generate all formula components
+    weights_symbolic, weights_numerical = generate_weight_formulas(weights, amounts)
+    covariance_matrix = generate_covariance_matrix_latex(cov_matrix, annualized=True)
+    correlation_matrix = generate_correlation_matrix_latex(cov_matrix)
+    variance_symbolic = generate_variance_formula_symbolic(tickers)
+    variance_expanded = generate_variance_formula_expanded(
+        weights,
+        cov_matrix,
+        variance_breakdown,
+        smart_truncation=True
+    )
+    variance_expanded_full = generate_variance_formula_expanded_full(
+        weights,
+        cov_matrix,
+        variance_breakdown
+    )
+    volatility_symbolic, volatility_numerical = generate_volatility_formulas(
+        variance,
+        volatility
+    )
+    return {
+        'weights_symbolic': weights_symbolic,
+        'weights_numerical': weights_numerical,
+        'covariance_matrix': covariance_matrix,
+        'correlation_matrix': correlation_matrix,
+        'variance_symbolic': variance_symbolic,
+        'variance_expanded': variance_expanded,
+        'variance_expanded_full': variance_expanded_full,
+        'volatility_symbolic': volatility_symbolic,
+        'volatility_numerical': volatility_numerical,
+    }

ocr_parser.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+OCR and portfolio parsing module.
+Handles:
+- Text extraction from portfolio screenshots using Tesseract OCR
+- Parsing tickers and amounts using regex
+- JSON validation for user-edited portfolio data
+"""
+import re
+import json
+from typing import Dict, Tuple, Optional
+from PIL import Image
+import pytesseract
+# Regex pattern for ticker extraction: ([A-Z]{1,5})\s+([\d,.]+)
+# Matches: 1-5 uppercase letters followed by whitespace and a number (with optional commas)
+TICKER_PATTERN = r'([A-Z]{1,5})\s+([\d,.]+)'
+def extract_text_from_image(image: Image.Image) -> Tuple[Optional[str], Optional[str]]:
+    """
+    Extract text from uploaded portfolio screenshot using Tesseract OCR.
+    Args:
+        image: PIL Image object
+    Returns:
+        Tuple of (extracted_text, error_message)
+        - If successful: (text, None)
+        - If failed: (None, error_message)
+    """
+    try:
+        # Verify tesseract is available
+        pytesseract.get_tesseract_version()
+        # Extract text
+        text = pytesseract.image_to_string(image)
+        # Check if any text was detected
+        if not text.strip():
+            return None, "No text detected in image. Please upload a clearer screenshot."
+        return text, None
+    except pytesseract.TesseractNotFoundError:
+        return None, "OCR engine (Tesseract) not available. Please check installation."
+    except Exception as e:
+        return None, f"OCR failed: {str(e)}"
+def parse_portfolio(text: str) -> Dict[str, float]:
+    """
+    Parse portfolio from extracted text using regex.
+    Pattern: ([A-Z]{1,5})\\s+([\\d,.]+)
+    Extracts ticker symbols (1-5 uppercase letters) and amounts (numbers with optional commas).
+    Args:
+        text: Extracted text from OCR
+    Returns:
+        Dictionary mapping tickers to amounts: {ticker: amount}
+        Returns empty dict if no valid tickers found
+    """
+    if not text:
+        return {}
+    # Find all matches of pattern
+    matches = re.findall(TICKER_PATTERN, text)
+    if not matches:
+        return {}
+    portfolio = {}
+    for ticker, amount_str in matches:
+        try:
+            # Remove commas from numbers (e.g., "1,234.56" -> "1234.56")
+            clean_amount = amount_str.replace(",", "")
+            amount = float(clean_amount)
+            # Only include positive amounts
+            if amount > 0:
+                portfolio[ticker] = amount
+        except ValueError:
+            # Skip invalid number formats
+            continue
+    return portfolio
+def validate_portfolio_json(json_str: str) -> Tuple[bool, Optional[Dict[str, float]], str]:
+    """
+    Validate user-edited portfolio JSON.
+    Expected format: {"AAPL": 5000, "GOOGL": 3000, ...}
+    Args:
+        json_str: JSON string to validate
+    Returns:
+        Tuple of (is_valid, parsed_dict, error_message)
+        - If valid: (True, portfolio_dict, "")
+        - If invalid: (False, None, error_message)
+    """
+    if not json_str or not json_str.strip():
+        return False, None, "JSON is empty"
+    try:
+        # Parse JSON
+        data = json.loads(json_str)
+        # Validate it's a dictionary
+        if not isinstance(data, dict):
+            return False, None, "JSON must be a dictionary/object, not a list or other type"
+        # Validate all keys are strings and all values are numbers
+        portfolio = {}
+        for ticker, amount in data.items():
+            # Check ticker is string
+            if not isinstance(ticker, str):
+                return False, None, f"Ticker '{ticker}' must be a string"
+            # Check ticker is uppercase (optional validation)
+            if not ticker.isupper():
+                return False, None, f"Ticker '{ticker}' should be uppercase (e.g., 'AAPL' not 'aapl')"
+            # Check ticker length (1-5 characters is typical)
+            if len(ticker) < 1 or len(ticker) > 10:
+                return False, None, f"Ticker '{ticker}' length should be between 1-10 characters"
+            # Check amount is numeric
+            try:
+                amount_float = float(amount)
+            except (TypeError, ValueError):
+                return False, None, f"Amount for {ticker} must be a number, got: {amount}"
+            # Check amount is positive
+            if amount_float <= 0:
+                return False, None, f"Amount for {ticker} must be positive, got: {amount_float}"
+            portfolio[ticker] = amount_float
+        # Check we have at least one ticker
+        if len(portfolio) == 0:
+            return False, None, "Portfolio must contain at least one ticker"
+        # Check we don't exceed maximum tickers (optional limit)
+        MAX_TICKERS = 20
+        if len(portfolio) > MAX_TICKERS:
+            return False, None, f"Portfolio exceeds maximum of {MAX_TICKERS} tickers"
+        return True, portfolio, ""
+    except json.JSONDecodeError as e:
+        return False, None, f"Invalid JSON format: {str(e)}"
+    except Exception as e:
+        return False, None, f"Validation error: {str(e)}"
+def format_portfolio_json(portfolio: Dict[str, float], indent: int = 2) -> str:
+    """
+    Format portfolio dictionary as pretty-printed JSON.
+    Args:
+        portfolio: Dictionary of {ticker: amount}
+        indent: Number of spaces for indentation
+    Returns:
+        Formatted JSON string
+    """
+    return json.dumps(portfolio, indent=indent, sort_keys=True)

portfolio_calculator.py ADDED Viewed

	@@ -0,0 +1,316 @@

+"""
+Portfolio financial calculations module.
+Handles:
+- Fetching historical price data from yfinance
+- Calculating portfolio weights
+- Calculating log returns
+- Computing covariance matrix
+- Calculating portfolio variance and volatility
+- Generating variance breakdown for detailed formulas
+"""
+from typing import Dict, List, Tuple, Optional
+import numpy as np
+import pandas as pd
+import yfinance as yf
+import streamlit as st
+# Constants
+TRADING_DAYS_PER_YEAR = 252
+MIN_DATA_POINTS = 30
+MAX_TICKERS = 20
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def fetch_historical_data(
+    tickers: Tuple[str, ...],  # Tuple for hashability (caching requirement)
+    period: str = "1y"
+) -> Tuple[Optional[pd.DataFrame], Optional[str]]:
+    """
+    Fetch historical price data using yfinance.
+    Args:
+        tickers: Tuple of ticker symbols (e.g., ('AAPL', 'GOOGL', 'MSFT'))
+        period: Time period for historical data (default: '1y')
+    Returns:
+        Tuple of (prices_dataframe, error_message)
+        - If successful: (DataFrame, None)
+        - If failed: (None, error_message)
+    """
+    try:
+        # Convert tuple back to list for yfinance
+        ticker_list = list(tickers)
+        # Download data (progress=False to avoid console output in Streamlit)
+        data = yf.download(ticker_list, period=period, progress=False)
+        # Check if data was returned
+        if data.empty:
+            return None, "No data returned from yfinance. Please check ticker symbols."
+        # Extract 'Adj Close' prices
+        if len(ticker_list) == 1:
+            # Single ticker: yfinance returns different structure
+            prices = data[['Adj Close']].copy()
+            prices.columns = ticker_list
+        else:
+            # Multiple tickers
+            prices = data['Adj Close'].copy()
+        # Check for missing data
+        missing_count = prices.isnull().sum()
+        if missing_count.sum() > 0:
+            missing_tickers = missing_count[missing_count > 0]
+            warning = f"Warning: Missing data detected - {dict(missing_tickers)}"
+            # Don't fail, just warn
+            st.warning(warning)
+        # Drop rows with NaN values
+        prices = prices.dropna()
+        # Check we have enough data points
+        if len(prices) < MIN_DATA_POINTS:
+            return None, f"Insufficient data: only {len(prices)} days available (minimum {MIN_DATA_POINTS} required)"
+        return prices, None
+    except Exception as e:
+        return None, f"Failed to fetch data: {str(e)}"
+def calculate_log_returns(prices: pd.DataFrame) -> pd.DataFrame:
+    """
+    Calculate log returns from price data.
+    Formula: r_t = ln(P_t / P_{t-1})
+    Args:
+        prices: DataFrame of historical prices (columns = tickers, index = dates)
+    Returns:
+        DataFrame of log returns (first row will be dropped due to NaN)
+    """
+    # Calculate log returns: ln(price_t / price_{t-1})
+    returns = np.log(prices / prices.shift(1))
+    # Drop the first row (NaN)
+    returns = returns.dropna()
+    return returns
+def calculate_portfolio_weights(amounts: Dict[str, float]) -> Dict[str, float]:
+    """
+    Calculate portfolio weights from position amounts.
+    Formula: w_i = amount_i / sum(amounts)
+    Args:
+        amounts: Dictionary mapping tickers to dollar amounts
+    Returns:
+        Dictionary mapping tickers to weights (percentages as decimals)
+    """
+    total = sum(amounts.values())
+    if total <= 0:
+        raise ValueError("Total portfolio amount must be positive")
+    weights = {ticker: amount / total for ticker, amount in amounts.items()}
+    # Validate weights sum to 1.0 (accounting for floating point errors)
+    weight_sum = sum(weights.values())
+    if not np.isclose(weight_sum, 1.0, atol=1e-6):
+        # Normalize to ensure exact sum = 1.0
+        weights = {ticker: w / weight_sum for ticker, w in weights.items()}
+    return weights
+def calculate_covariance_matrix(returns: pd.DataFrame, annualized: bool = False) -> pd.DataFrame:
+    """
+    Calculate covariance matrix of returns.
+    Args:
+        returns: DataFrame of log returns
+        annualized: If True, multiply by TRADING_DAYS_PER_YEAR (default: False)
+    Returns:
+        DataFrame of covariance matrix (tickers × tickers)
+    """
+    cov_matrix = returns.cov()
+    if annualized:
+        cov_matrix = cov_matrix * TRADING_DAYS_PER_YEAR
+    return cov_matrix
+def calculate_portfolio_variance(
+    weights: Dict[str, float],
+    cov_matrix: pd.DataFrame,
+    annualized: bool = True
+) -> float:
+    """
+    Calculate portfolio variance.
+    Formula: σ²_p = w^T × Σ × w
+    Where:
+    - w = vector of weights
+    - Σ = covariance matrix (annualized)
+    Args:
+        weights: Dictionary of portfolio weights
+        cov_matrix: Covariance matrix (daily, will be annualized if annualized=True)
+        annualized: If True, annualize the covariance matrix (default: True)
+    Returns:
+        Portfolio variance (annualized if annualized=True)
+    """
+    # Ensure tickers are in same order
+    tickers = list(weights.keys())
+    # Create weight vector (as numpy array)
+    w = np.array([weights[ticker] for ticker in tickers])
+    # Get covariance matrix for these tickers
+    cov = cov_matrix.loc[tickers, tickers].values
+    # Annualize if requested
+    if annualized:
+        cov = cov * TRADING_DAYS_PER_YEAR
+    # Calculate variance: w^T × Σ × w
+    variance = w @ cov @ w
+    return float(variance)
+def calculate_portfolio_volatility(variance: float) -> float:
+    """
+    Calculate portfolio volatility (standard deviation).
+    Formula: σ_p = √(σ²_p)
+    Args:
+        variance: Portfolio variance
+    Returns:
+        Portfolio volatility (standard deviation)
+    """
+    return float(np.sqrt(variance))
+def get_variance_breakdown(
+    weights: Dict[str, float],
+    cov_matrix: pd.DataFrame,
+    annualized: bool = True
+) -> List[Tuple[str, str, float, float, float, float]]:
+    """
+    Generate detailed breakdown of variance calculation.
+    Returns a list of all variance components for the detailed formula expansion.
+    Args:
+        weights: Dictionary of portfolio weights
+        cov_matrix: Covariance matrix (daily)
+        annualized: If True, use annualized covariance (default: True)
+    Returns:
+        List of tuples: (ticker_i, ticker_j, w_i, w_j, cov_ij, contribution)
+        where contribution = w_i × w_j × cov_ij
+    """
+    tickers = list(weights.keys())
+    n = len(tickers)
+    breakdown = []
+    for i, ticker_i in enumerate(tickers):
+        for j, ticker_j in enumerate(tickers):
+            w_i = weights[ticker_i]
+            w_j = weights[ticker_j]
+            # Get covariance value
+            cov_ij = cov_matrix.loc[ticker_i, ticker_j]
+            # Annualize if requested
+            if annualized:
+                cov_ij = cov_ij * TRADING_DAYS_PER_YEAR
+            # Calculate contribution to total variance
+            contribution = w_i * w_j * cov_ij
+            breakdown.append((ticker_i, ticker_j, w_i, w_j, cov_ij, contribution))
+    return breakdown
+def get_portfolio_metrics(
+    amounts: Dict[str, float],
+    period: str = "1y"
+) -> Tuple[Optional[Dict], Optional[str]]:
+    """
+    Calculate all portfolio metrics in one go.
+    This is a convenience function that orchestrates all calculations.
+    Args:
+        amounts: Dictionary of {ticker: amount}
+        period: Historical data period (default: '1y')
+    Returns:
+        Tuple of (metrics_dict, error_message)
+        metrics_dict contains:
+        - weights: Dict[str, float]
+        - prices: pd.DataFrame
+        - returns: pd.DataFrame
+        - cov_matrix: pd.DataFrame
+        - variance: float
+        - volatility: float
+        - variance_breakdown: List[Tuple]
+    """
+    try:
+        tickers = list(amounts.keys())
+        # 1. Calculate weights
+        weights = calculate_portfolio_weights(amounts)
+        # 2. Fetch historical data (convert to tuple for caching)
+        prices, error = fetch_historical_data(tuple(tickers), period)
+        if error:
+            return None, error
+        # 3. Calculate returns
+        returns = calculate_log_returns(prices)
+        # 4. Calculate covariance matrix
+        cov_matrix = calculate_covariance_matrix(returns, annualized=False)
+        # 5. Calculate variance
+        variance = calculate_portfolio_variance(weights, cov_matrix, annualized=True)
+        # 6. Calculate volatility
+        volatility = calculate_portfolio_volatility(variance)
+        # 7. Get variance breakdown
+        variance_breakdown = get_variance_breakdown(weights, cov_matrix, annualized=True)
+        metrics = {
+            'weights': weights,
+            'prices': prices,
+            'returns': returns,
+            'cov_matrix': cov_matrix,
+            'variance': variance,
+            'volatility': volatility,
+            'variance_breakdown': variance_breakdown,
+        }
+        return metrics, None
+    except Exception as e:
+        return None, f"Error calculating portfolio metrics: {str(e)}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit==1.32.0
+pytesseract==0.3.10
+Pillow==10.2.0
+yfinance==0.2.36
+pandas==2.2.0
+numpy==1.26.3
+sympy==1.12
+matplotlib==3.8.2