Spaces:

MLBench
/

RealState_OCR

Sleeping

File size: 113,556 Bytes

import gradio as gr
import tempfile
import shutil
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse,JSONResponse
from typing import List
import uvicorn
import os
import base64
"""
Real Estate Financial Model Pipeline
Extracts data from PDFs, solves formulas with Gemini API, generates Excel
"""
from fastapi.middleware.cors import CORSMiddleware

import re
import json
from pathlib import Path
from typing import Dict, Any, List, Optional
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from pdfminer.high_level import extract_text
import google.generativeai as genai

# Add logging configuration
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class RealEstateModelPipeline:
    def __init__(self, gemini_api_key: str):
        """Initialize pipeline with Gemini API key"""
        genai.configure(api_key=gemini_api_key)
        self.model = genai.GenerativeModel('gemini-2.0-flash')
        self.extracted_data = {}
        self.formula_results = {}
        self.structured_data = {}
    
    def safe_divide(self, numerator: float, denominator: float, default: float = 0) -> float:
        """Safe division that returns default instead of error"""
        try:
            if denominator == 0 or denominator is None:
                return default
            return numerator / denominator
        except:
            return default
        
    def extract_pdf_text(self, pdf_path: str) -> str:
        """Extract text from PDF using pdfminer"""
        try:
            text = extract_text(pdf_path)
            return text.strip()
        except Exception as e:
            print(f"Error extracting {pdf_path}: {e}")
            return ""

    def extract_xlsx_text(self, xlsx_path: str) -> str:
        """Extract text from XLSX using pandas and openpyxl"""
        try:
            extracted_content = []
            
            # Try pandas first for data extraction
            try:
                xlsx = pd.ExcelFile(xlsx_path)
                for sheet_name in xlsx.sheet_names:
                    df = pd.read_excel(xlsx, sheet_name=sheet_name)
                    extracted_content.append(f"=== Sheet: {sheet_name} ===")
                    extracted_content.append(df.to_string(index=False))
                    extracted_content.append("\n")
            except:
                pass
            
            # Also try openpyxl for cell-level data
            try:
                wb = load_workbook(xlsx_path, data_only=True)
                for sheet in wb.worksheets:
                    extracted_content.append(f"\n=== Sheet: {sheet.title} (Raw) ===")
                    for row in sheet.iter_rows(values_only=True):
                        row_text = " | ".join([str(cell) if cell is not None else "" for cell in row])
                        if row_text.strip():
                            extracted_content.append(row_text)
            except:
                pass
            
            return "\n".join(extracted_content)
        except Exception as e:
            print(f"Error extracting {xlsx_path}: {e}")
            return ""
            
    def extract_all_pdfs(self, pdf_directory: str) -> Dict[str, str]:
        """Extract text from all PDFs and XLSX files in directory"""
        pdf_dir = Path(pdf_directory)
        extracted_texts = {}
        
        with open('output_file_3.txt', "w", encoding="utf-8") as f:
            # Process PDFs
            for pdf_file in pdf_dir.glob("*.pdf"):
                print(f"Extracting PDF: {pdf_file.name}")
                text = self.extract_pdf_text(str(pdf_file))
                extracted_texts[pdf_file.stem] = text
    
                f.write(f"=== {pdf_file.name} ===\n")
                f.write(text)
                f.write("\n\n" + "="*80 + "\n\n")
            
            # Process XLSX files
            for xlsx_file in pdf_dir.glob("*.xlsx"):
                print(f"Extracting XLSX: {xlsx_file.name}")
                text = self.extract_xlsx_text(str(xlsx_file))
                extracted_texts[xlsx_file.stem] = text
    
                f.write(f"=== {xlsx_file.name} ===\n")
                f.write(text)
                f.write("\n\n" + "="*80 + "\n\n")
    
        self.extracted_data = extracted_texts
        
        return extracted_texts
    
    def extract_address_fallback(self, pdf_texts: Dict[str, str]) -> Optional[str]:
        """Extract address using simple pattern matching as fallback"""
        for name, text in pdf_texts.items():
            if 'Offering_Memorandum' in name or 'offering' in name.lower():
                # Pattern: "Address: <address text>"
                match = re.search(r'Address:\s*(.+?)(?:\n|Property Type:)', text, re.IGNORECASE)
                if match:
                    address = match.group(1).strip()
                    print(f"  ✓ Extracted address via fallback: {address}")
                    return address
        return None
        
    def create_gemini_prompt(self, pdf_texts: Dict[str, str]) -> str:
        """Create comprehensive prompt for Gemini to extract structured data"""
        
        # Build a clear summary of what's in each PDF
        pdf_summary = "\n".join([f"- {name}: {len(text)} characters" for name, text in pdf_texts.items()])
        
        prompt = f"""You are a real estate financial analyst. Extract ALL numerical data from the following PDF texts and return it as a JSON object.

        CRITICAL INSTRUCTIONS:
        1. ONLY extract data that is EXPLICITLY stated in the PDFs - DO NOT estimate or make up values
        2. For missing values, use null (not 0)
        3. Pay close attention to the specific document names - each contains different information
        4. Extract exact numbers as they appear in the documents
    
        AVAILABLE DOCUMENTS:
        {pdf_summary}
    
        PDF CONTENTS:
        """
        for name, text in pdf_texts.items():
            prompt += f"\n{'='*60}\n=== {name} ===\n{'='*60}\n{text}\n"
        
        prompt += """

        EXTRACTION INSTRUCTIONS BY DOCUMENT:
    
        FROM "Offering_Memorandum.pdf":
        - Extract: Address (full address after "Address:")
        - Extract: Property Type (after "Property Type:")
        - Extract: Units (number after "Units:")
    
        FROM "Operating_Expenses_Summary.pdf" (if present):
        - Extract EXACT annual amounts for:
        * Real Estate Taxes
        * Insurance
        * Utilities
        * Repairs & Maint. (or Repairs & Maintenance)
        * Management Fee
        * Payroll
        * Administrative (if listed)
        * Professional Fees (if listed)
    
        FROM "Sales_Comps.pdf":
        - Extract all Price/SF values
        - Calculate average_price_per_sf = average of all Price/SF values
        - Count total number of comps
    
        FROM "Rent_Comps.pdf" (if present):
        - Extract all rent values (numbers before @ symbol)
        - Calculate average_rent = average of all rent values
        - Count total number of rent comps
    
        FROM "Market_Report.pdf":
        - Extract: Vacancy Rate (percentage)
        - Extract: Rent Growth (YoY) (percentage)
    
        FROM "Demographics_Overview.pdf":
        - Extract: Population (3-mi) - the number
        - Extract: Median HH Income - the dollar amount
        - Extract: Transit Score - the number
    
        REQUIRED JSON OUTPUT STRUCTURE:
        {
        "property_info": {
            "address": "EXTRACT FROM Offering_Memorandum.pdf",
            "property_type": "EXTRACT FROM Offering_Memorandum.pdf",
            "units": EXTRACT_NUMBER_FROM_Offering_Memorandum.pdf,
            "gross_sf": null,
            "rentable_sf": null,
            "retail_sf": null
        },
        "acquisition": {
            "land_value": null,
            "price": null,
            "closing_costs": null
        },
        "construction": {
            "construction_cost_per_gsf": null,
            "construction_months": null
        },
        "soft_costs": {
            "architecture_and_interior_cost": null,
            "structural_engineering_cost": null,
            "mep_engineering_cost": null,
            "civil_engineering_cost": null,
            "controlled_inspections_cost": null,
            "surveying_cost": null,
            "utilities_connection_cost": null,
            "advertising_and_marketing_cost": null,
            "accounting_cost": null,
            "monitoring_cost": null,
            "ff_and_e_cost": null,
            "environmental_consultant_fee": null,
            "miscellaneous_consultants_fee": null,
            "general_legal_cost": null,
            "real_estate_taxes_during_construction": null,
            "miscellaneous_admin_cost": null,
            "ibr_cost": null,
            "project_team_cost": null,
            "pem_fees": null,
            "bank_fees": null
        },
        "financing": {
            "ltc_ratio": null,
            "financing_percentage": null,
            "interest_rate_basis_points": null,
            "financing_cost": null,
            "interest_reserve": null
        },
        "operating_expenses": {
            "payroll": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
            "repairs_and_maintenance": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
            "utilities": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
            "administrative": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null,
            "professional_fees": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null,
            "insurance": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
            "property_taxes": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
            "management_fee_percentage": null
        },
        "revenue": {
            "free_market_rent_psf": null,
            "affordable_rent_psf": null,
            "other_income_per_unit": null,
            "vacancy_rate": null,
            "retail_rent_psf": null,
            "parking_income": null
        },
        "sales_comps": {
            "average_price_per_sf": CALCULATE_AVERAGE_FROM_Sales_Comps.pdf,
            "comp_count": COUNT_FROM_Sales_Comps.pdf
        },
        "rent_comps": {
            "average_rent": CALCULATE_AVERAGE_FROM_Rent_Comps.pdf_IF_EXISTS,
            "comp_count": COUNT_FROM_Rent_Comps.pdf_IF_EXISTS
        },
        "market_data": {
            "vacancy_rate": EXTRACT_FROM_Market_Report.pdf,
            "rent_growth_yoy": EXTRACT_FROM_Market_Report.pdf,
            "median_hh_income": EXTRACT_FROM_Demographics_Overview.pdf,
            "population_3mi": EXTRACT_FROM_Demographics_Overview.pdf,
            "transit_score": EXTRACT_FROM_Demographics_Overview.pdf
        },
        "projections": {
            "lease_up_months": null,
            "stabilization_months": null,
            "revenue_inflation_rate": null,
            "expense_inflation_rate": null,
            "hold_period_months": null,
            "exit_cap_rate_decimal": null,
            "sale_cost_percentage": null
        },
        "equity_structure": {
            "gp_pref_rate": null,
            "lp_pref_rate": null,
            "promote_percentage": null
        }
        }
    
        EXAMPLES OF CORRECT EXTRACTION:
    
        Example 1 - From your Offering_Memorandum.pdf:
        "Address: 455 Atlantic Ave, Brooklyn, NY" 
        → "address": "455 Atlantic Ave, Brooklyn, NY"
    
        "Property Type: Retail"
        → "property_type": "Retail"
    
        "Units: 7"
        → "units": 7
    
        Example 2 - From your Operating_Expenses_Summary.pdf:
        "Real Estate Taxes    $91940.2"
        → "property_taxes": 91940.2
    
        "Insurance    $16778.94"
        → "insurance": 16778.94
    
        "Payroll    $44948.21"
        → "payroll": 44948.21
    
        Example 3 - From your Sales_Comps.pdf:
        "Price/SF" column shows: $880, $919, $673, $894
        → "average_price_per_sf": 841.5 (average of these 4 values)
        → "comp_count": 4
    
        Example 4 - From your Market_Report.pdf:
        "Vacancy Rate: 5.71%"
        → "vacancy_rate": 0.0571
    
        "Rent Growth (YoY): 4.18%"
        → "rent_growth_yoy": 0.0418
    
        CRITICAL RULES:
        1. Use EXACT numbers from the PDFs - don't round or modify
        2. Convert percentages to decimals (5.71% → 0.0571)
        3. Remove dollar signs and commas from numbers ($91,940.2 → 91940.2)
        4. If a field is not in ANY PDF, use null
        5. Double-check the document name before extracting - make sure you're looking at the right PDF
    
        Return ONLY valid JSON with no explanations, comments, or markdown formatting."""

        prompt += """
        
        NOTE: Documents may be in PDF or XLSX format. For XLSX files, data is extracted sheet-by-sheet.
        Look for numerical data in tables, columns, and labeled cells.
        
        PDF AND XLSX CONTENTS:
        """
        
        return prompt
    
    def extract_structured_data(self) -> Dict[str, Any]:
        """Use Gemini to extract structured data from PDFs"""
        print("\nProcessing with Gemini API...")
        
        # NEW: Try simple extraction first
        fallback_address = self.extract_address_fallback(self.extracted_data)
        
        prompt = self.create_gemini_prompt(self.extracted_data)
        
        try:
            response = self.model.generate_content(prompt)
            response_text = response.text.strip()
            
            # Clean JSON if wrapped in markdown
            if "```json" in response_text:
                response_text = response_text.split("```json")[1].split("```")[0].strip()
            elif "```" in response_text:
                response_text = response_text.split("```")[1].split("```")[0].strip()
            
            data = json.loads(response_text)
            
            # NEW: Override with fallback if Gemini failed
            if fallback_address and (not data.get('property_info', {}).get('address') or 
                                    data['property_info']['address'] == 'adress'):
                data['property_info']['address'] = fallback_address
                print(f"  ✓ Used fallback address: {fallback_address}")
            
            print("✓ Successfully extracted structured data")
            return data
            
        except Exception as e:
            print(f"Error with Gemini API: {e}")
            data = self.get_default_data_structure()
            # Use fallback even in error case
            if fallback_address:
                data['property_info']['address'] = fallback_address
            return data
    

    def post_process_extracted_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """Fill in missing values with intelligent estimates"""
        
        # Get units
        units = data.get('property_info', {}).get('units', 32)
        
        # Estimate SF if missing
        if not data['property_info'].get('gross_sf'):
            data['property_info']['gross_sf'] = units * 1000
        
        if not data['property_info'].get('rentable_sf'):
            data['property_info']['rentable_sf'] = int(data['property_info']['gross_sf'] * 0.85)
        
        # Set retail_sf to 0 if None (most residential projects don't have retail)
        if data['property_info'].get('retail_sf') is None:
            data['property_info']['retail_sf'] = 0
        
        # Get gross_sf for calculations
        gross_sf = data['property_info']['gross_sf']
        
        # Set default construction cost if missing
        if not data['construction'].get('construction_cost_per_gsf'):
            data['construction']['construction_cost_per_gsf'] = 338
        
        if not data['construction'].get('construction_months'):
            data['construction']['construction_months'] = 18
        
        # Estimate land value from sales comps if available
        if not data['acquisition'].get('land_value'):
            sales_comps = data.get('sales_comps', {})
            avg_psf = sales_comps.get('average_price_per_sf')
            if avg_psf:
                data['acquisition']['land_value'] = avg_psf * gross_sf
            else:
                # Use default based on typical Manhattan pricing
                data['acquisition']['land_value'] = 6000000
        
        if not data['acquisition'].get('price'):
            data['acquisition']['price'] = data['acquisition']['land_value']
        
        if not data['acquisition'].get('closing_costs'):
            data['acquisition']['closing_costs'] = 150000
        
        # Estimate soft costs as percentages if null
        total_hard_cost = data['construction']['construction_cost_per_gsf'] * gross_sf
        soft_cost_estimate = total_hard_cost * 0.15  # 15% of hard costs
        
        soft_costs = data.get('soft_costs', {})
        default_soft_cost_values = {
            'architecture_and_interior_cost': soft_cost_estimate * 0.15,
            'structural_engineering_cost': soft_cost_estimate * 0.08,
            'mep_engineering_cost': soft_cost_estimate * 0.10,
            'civil_engineering_cost': soft_cost_estimate * 0.05,
            'controlled_inspections_cost': soft_cost_estimate * 0.03,
            'surveying_cost': soft_cost_estimate * 0.02,
            'utilities_connection_cost': soft_cost_estimate * 0.05,
            'advertising_and_marketing_cost': soft_cost_estimate * 0.06,
            'accounting_cost': soft_cost_estimate * 0.03,
            'monitoring_cost': soft_cost_estimate * 0.02,
            'ff_and_e_cost': soft_cost_estimate * 0.10,
            'environmental_consultant_fee': soft_cost_estimate * 0.02,
            'miscellaneous_consultants_fee': soft_cost_estimate * 0.03,
            'general_legal_cost': soft_cost_estimate * 0.06,
            'real_estate_taxes_during_construction': soft_cost_estimate * 0.10,
            'miscellaneous_admin_cost': soft_cost_estimate * 0.04,
            'ibr_cost': soft_cost_estimate * 0.03,
            'project_team_cost': soft_cost_estimate * 0.15,
            'pem_fees': soft_cost_estimate * 0.08,
            'bank_fees': soft_cost_estimate * 0.05
        }
        
        for key, default_value in default_soft_cost_values.items():
            if soft_costs.get(key) is None:
                soft_costs[key] = default_value
        
        # Set financing defaults if missing
        financing = data.get('financing', {})
        if not financing.get('ltc_ratio'):
            financing['ltc_ratio'] = 0.75
        if not financing.get('financing_percentage'):
            financing['financing_percentage'] = 0.03
        if not financing.get('interest_rate_basis_points'):
            financing['interest_rate_basis_points'] = 350
        if not financing.get('financing_cost'):
            financing['financing_cost'] = 200000
        if not financing.get('interest_reserve'):
            financing['interest_reserve'] = 500000
        
        # Set revenue defaults if missing
        revenue = data.get('revenue', {})
        if not revenue.get('free_market_rent_psf'):
            revenue['free_market_rent_psf'] = 60
        if not revenue.get('affordable_rent_psf'):
            revenue['affordable_rent_psf'] = 35
        if not revenue.get('other_income_per_unit'):
            revenue['other_income_per_unit'] = 100
        if not revenue.get('vacancy_rate'):
            revenue['vacancy_rate'] = 0.05
        if not revenue.get('retail_rent_psf'):
            revenue['retail_rent_psf'] = 45
        if not revenue.get('parking_income'):
            revenue['parking_income'] = 50000
        
        # Ensure operating expenses have defaults
        op_expenses = data.get('operating_expenses', {})
        if not op_expenses.get('payroll'):
            op_expenses['payroll'] = 31136.07
        if not op_expenses.get('repairs_and_maintenance'):
            op_expenses['repairs_and_maintenance'] = 44418.61
        if not op_expenses.get('utilities'):
            op_expenses['utilities'] = 12535.90
        if not op_expenses.get('administrative'):
            op_expenses['administrative'] = 0
        if not op_expenses.get('professional_fees'):
            op_expenses['professional_fees'] = 18789.84
        if not op_expenses.get('insurance'):
            op_expenses['insurance'] = 9341.33
        if not op_expenses.get('property_taxes'):
            op_expenses['property_taxes'] = 118832.22
        if not op_expenses.get('management_fee_percentage'):
            op_expenses['management_fee_percentage'] = 0.03
        
        # Ensure projections have defaults
        projections = data.get('projections', {})
        if not projections.get('lease_up_months'):
            projections['lease_up_months'] = 12
        if not projections.get('stabilization_months'):
            projections['stabilization_months'] = 6
        if not projections.get('revenue_inflation_rate'):
            projections['revenue_inflation_rate'] = 0.03
        if not projections.get('expense_inflation_rate'):
            projections['expense_inflation_rate'] = 0.025
        if not projections.get('hold_period_months'):
            projections['hold_period_months'] = 60
        if not projections.get('exit_cap_rate_decimal'):
            projections['exit_cap_rate_decimal'] = 0.045
        if not projections.get('sale_cost_percentage'):
            projections['sale_cost_percentage'] = 0.02
        
        # Ensure equity structure has defaults
        equity = data.get('equity_structure', {})
        if not equity.get('gp_pref_rate'):
            equity['gp_pref_rate'] = 0.08
        if not equity.get('lp_pref_rate'):
            equity['lp_pref_rate'] = 0.08
        if not equity.get('promote_percentage'):
            equity['promote_percentage'] = 0.20
        
        return data

    def get_default_data_structure(self) -> Dict[str, Any]:
        """Return default data structure with known values from PDFs"""
        # Try to get basic info from extracted text
        units = 32  # Default from your PDFs
        
        # Smart estimation
        gross_sf = units * 1000  # Typical 1000 SF per unit
        rentable_sf = int(gross_sf * 0.85)  # 85% efficiency
        
        return {
            "property_info": {
                "address": "adress",
                "units": units,
                "gross_sf": gross_sf,
                "rentable_sf": rentable_sf,
                "retail_sf": 0  # No retail in this project
            },
            "acquisition": {
                "land_value": None,  # Will be estimated from comps
                "price": None,
                "closing_costs": 150000
            },
            "construction": {
                "construction_cost_per_gsf": 338,
                "construction_months": 18
            },
            "soft_costs": {
                "architecture_and_interior_cost": None,
                "structural_engineering_cost": None,
                "mep_engineering_cost": None,
                "civil_engineering_cost": None,
                "controlled_inspections_cost": None,
                "surveying_cost": None,
                "utilities_connection_cost": None,
                "advertising_and_marketing_cost": None,
                "accounting_cost": None,
                "monitoring_cost": None,
                "ff_and_e_cost": None,
                "environmental_consultant_fee": None,
                "miscellaneous_consultants_fee": None,
                "general_legal_cost": None,
                "real_estate_taxes_during_construction": None,
                "miscellaneous_admin_cost": None,
                "ibr_cost": None,
                "project_team_cost": None,
                "pem_fees": None,
                "bank_fees": None
            },
            "financing": {
                "ltc_ratio": 0.75,
                "financing_percentage": 0.03,
                "interest_rate_basis_points": 350,
                "financing_cost": None,
                "interest_reserve": None
            },
            "operating_expenses": {
                "payroll": 31136.07,  # From PDF
                "repairs_and_maintenance": 44418.61,
                "utilities": 12535.90,
                "administrative": 0,
                "professional_fees": 18789.84,
                "insurance": 9341.33,
                "property_taxes": 118832.22,
                "management_fee_percentage": 0.03
            },
            "revenue": {
                "free_market_rent_psf": 60,
                "affordable_rent_psf": 35,
                "other_income_per_unit": 100,
                "vacancy_rate": 0.05,
                "retail_rent_psf": 45,
                "parking_income": 50000
            },
            "projections": {
                "lease_up_months": 12,
                "stabilization_months": 6,
                "revenue_inflation_rate": 0.03,
                "expense_inflation_rate": 0.025,
                "hold_period_months": 60,
                "exit_cap_rate_decimal": 0.045,
                "sale_cost_percentage": 0.02
            },
            "equity_structure": {
                "gp_pref_rate": 0.08,
                "lp_pref_rate": 0.08,
                "promote_percentage": 0.20
            }
        }
    
    def calculate_all_formulas(self, data: Dict[str, Any]) -> Dict[str, float]:
        """Calculate all formulas in correct dependency order"""
        results = {}
        self.structured_data = data
        # Flatten data for easier access
        d = self.flatten_dict(data)
        
        # Helper function to get value
        def get(key, default=0):
            value = d.get(key, default)
            # CRITICAL: Convert None to default
            return default if value is None else value
        # def safe_multiply(a, b, default=0):
        #     """Safely multiply two values, handling None"""
        #     if a is None or b is None:
        #         return default
        #     try:
        #         return a * b
        #     except:
        #         return default
        
        # BASIC PROPERTY METRICS
        results['UNITS'] = get('property_info.units')
        results['GROSS_SF'] = get('property_info.gross_sf')
        results['RENTABLE_SF'] = get('property_info.rentable_sf')
        results['RETAIL_SF'] = get('property_info.retail_sf')
        results['BUILDING_EFFICIENCY'] = self.safe_divide(results['RENTABLE_SF'], results['GROSS_SF'])
        
        # ACQUISITION COSTS
        results['LAND_VALUE'] = get('acquisition.land_value')
        results['PRICE'] = get('acquisition.price')
        results['CLOSING_COSTS'] = get('acquisition.closing_costs')
        results['ACQUISITION_FEE'] = results['LAND_VALUE'] * 0.02
        results['TOTAL_ACQUISITION_COST'] = results['LAND_VALUE'] + results['CLOSING_COSTS'] + results['ACQUISITION_FEE']
        
        # Per unit/SF metrics for acquisition
        results['LAND_VALUE_PER_GSF'] = self.safe_divide(results['LAND_VALUE'], results['GROSS_SF'])
        results['LAND_VALUE_PER_RSF'] = self.safe_divide(results['LAND_VALUE'], results['RENTABLE_SF'])
        results['LAND_VALUE_PER_UNIT'] = self.safe_divide(results['LAND_VALUE'], results['UNITS'])
        results['TOTAL_ACQUISITION_COST_PER_GSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['GROSS_SF'])
        results['TOTAL_ACQUISITION_COST_PER_RSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['RENTABLE_SF'])
        results['TOTAL_ACQUISITION_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['UNITS'])
        
        # CONSTRUCTION COSTS
        results['CONSTRUCTION_COST_PER_GSF'] = get('construction.construction_cost_per_gsf')
        results['CONSTRUCTION_MONTHS'] = get('construction.construction_months')
        results['TOTAL_CONSTRUCTION_GMP'] = results['CONSTRUCTION_COST_PER_GSF'] * results['GROSS_SF']
        results['CONSTRUCTION_GMP_PER_GSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['GROSS_SF'])
        results['CONSTRUCTION_GMP_PER_RSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['RENTABLE_SF'])
        results['CONSTRUCTION_GMP_PER_UNIT'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['UNITS'])
        
        # SOFT COSTS (individual items)
        soft_cost_items = [
            'architecture_and_interior_cost', 'structural_engineering_cost', 'mep_engineering_cost',
            'civil_engineering_cost', 'controlled_inspections_cost', 'surveying_cost',
            'utilities_connection_cost', 'advertising_and_marketing_cost', 'accounting_cost',
            'monitoring_cost', 'ff_and_e_cost', 'environmental_consultant_fee',
            'miscellaneous_consultants_fee', 'general_legal_cost', 'real_estate_taxes_during_construction',
            'miscellaneous_admin_cost', 'ibr_cost', 'project_team_cost', 'pem_fees', 'bank_fees'
        ]
        
        for item in soft_cost_items:
            key = item.upper()
            results[key] = get(f'soft_costs.{item}')
        
        # REVENUE SETUP (needed for some soft costs)
        results['FREE_MARKET_RENT_PSF'] = get('revenue.free_market_rent_psf')
        results['AFFORDABLE_RENT_PSF'] = get('revenue.affordable_rent_psf')
        results['OTHER_INCOME_PER_UNIT'] = get('revenue.other_income_per_unit')
        results['VACANCY_RATE'] = get('revenue.vacancy_rate')
        results['RETAIL_RENT_PSF'] = get('revenue.retail_rent_psf')
        results['PARKING_INCOME'] = get('revenue.parking_income')
        
        # Calculate retail revenue (needed for soft costs)
        results['RETAIL_REVENUE'] = results['RETAIL_RENT_PSF'] * results['RETAIL_SF']
        
        # HPD & IH COST
        results['HPD_AND_IH_COST'] = (3500 * results['UNITS'] * 0.75) + (5000 * results['UNITS'] * 0.25)
        
        # RETAIL TI & LC COST
        results['RETAIL_TI_AND_LC_COST'] = (results['RETAIL_REVENUE'] * 0.3) + (50 * results['RETAIL_SF'])
        
        # TOTAL SOFT COSTS
        soft_cost_sum = sum([results[item.upper()] for item in soft_cost_items])
        results['TOTAL_SOFT_COST'] = soft_cost_sum + results['HPD_AND_IH_COST'] + results['RETAIL_TI_AND_LC_COST']
        results['TOTAL_SOFT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_SOFT_COST'],results['GROSS_SF'])
        
        # OPERATING EXPENSES (for reserves calculation)
        results['PAYROLL'] = get('operating_expenses.payroll')
        results['REPAIRS_AND_MAINTENANCE'] = get('operating_expenses.repairs_and_maintenance')
        results['UTILITIES'] = get('operating_expenses.utilities')
        results['ADMINISTRATIVE'] = get('operating_expenses.administrative')
        results['PROFESSIONAL_FEES'] = get('operating_expenses.professional_fees')
        results['INSURANCE'] = get('operating_expenses.insurance')
        results['PROPERTY_TAXES'] = get('operating_expenses.property_taxes')
        results['MANAGEMENT_FEE_PERCENTAGE'] = get('operating_expenses.management_fee_percentage')
        
        results['TOTAL_OPERATING_EXPENSES'] = (results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] + 
                                               results['UTILITIES'] + results['ADMINISTRATIVE'] + 
                                               results['PROFESSIONAL_FEES'] + results['INSURANCE'] + 
                                               results['PROPERTY_TAXES'])
        
        # CONTINGENCY & RESERVES
        results['CONTINGENCY_COST'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.05
        results['DEVELOPMENT_FEE'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.04
        results['OPERATING_RESERVE'] = results['TOTAL_OPERATING_EXPENSES'] * 0.2
        
        results['FINANCING_COST'] = get('financing.financing_cost')
        results['INTEREST_RESERVE'] = get('financing.interest_reserve')
        
        # TOTAL PROJECT COST (before financing)
        results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] = (
            results['TOTAL_SOFT_COST'] + 
            results['TOTAL_CONSTRUCTION_GMP'] + 
            results['TOTAL_ACQUISITION_COST'] + 
            results['CONTINGENCY_COST'] + 
            results['DEVELOPMENT_FEE'] + 
            results['FINANCING_COST'] + 
            results['INTEREST_RESERVE'] + 
            results['OPERATING_RESERVE']
        )
        
        results['TOTAL_PROJECT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['GROSS_SF'])
        results['TOTAL_PROJECT_COST_PER_RSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['RENTABLE_SF'])
        results['TOTAL_PROJECT_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['UNITS'])
        
        # FINANCING CALCULATIONS
        results['LTC_RATIO'] = get('financing.ltc_ratio')
        results['FINANCING_PERCENTAGE'] = get('financing.financing_percentage')
        results['INTEREST_RATE_BASIS_POINTS'] = get('financing.interest_rate_basis_points')
        
        results['PRE_LTC_BUDGET'] = (results['TOTAL_SOFT_COST'] + results['CONTINGENCY_COST'] + 
                                     results['DEVELOPMENT_FEE'] + results['OPERATING_RESERVE'] + 
                                     results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_ACQUISITION_COST'])
        
        results['LOAN_AMOUNT'] = results['LTC_RATIO'] * results['PRE_LTC_BUDGET']
        results['FINANCING_AMOUNT'] = results['FINANCING_PERCENTAGE'] * results['LOAN_AMOUNT']
        results['INTEREST_RATE_DECIMAL'] = (results['INTEREST_RATE_BASIS_POINTS'] + 430) / 10000
        results['CONSTRUCTION_INTEREST'] = results['LOAN_AMOUNT'] * 0.7 * (results['INTEREST_RATE_DECIMAL'] / 12) * results['CONSTRUCTION_MONTHS']
        
        # DEBT & EQUITY
        results['TOTAL_DEBT'] = results['CONSTRUCTION_INTEREST'] + results['LOAN_AMOUNT'] + results['FINANCING_AMOUNT']
        results['TOTAL_EQUITY'] = results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] - results['TOTAL_DEBT']
        results['DEBT_PERCENTAGE'] = results['TOTAL_DEBT'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
        results['EQUITY_PERCENTAGE'] = results['TOTAL_EQUITY'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
        results['TOTAL_CAPITAL_STACK'] = results['TOTAL_DEBT'] + results['TOTAL_EQUITY']
        
        results['DEBT_PER_GSF'] = self.safe_divide(results['TOTAL_DEBT'],results['GROSS_SF'])
        results['EQUITY_PER_GSF'] = self.safe_divide(results['TOTAL_EQUITY'],results['GROSS_SF'])
        results['DEBT_PER_UNIT'] = self.safe_divide(results['TOTAL_DEBT'], results['UNITS'])

        results['EQUITY_PER_UNIT'] = self.safe_divide(results['TOTAL_EQUITY'], results['UNITS'])
        
        # OPERATING EXPENSE METRICS
        results['PAYROLL_PER_UNIT'] = self.safe_divide(results['PAYROLL'], results['UNITS'])
        results['REPAIRS_AND_MAINTENANCE_PER_UNIT'] = self.safe_divide(results['REPAIRS_AND_MAINTENANCE'], results['UNITS'])
        results['UTILITIES_PER_UNIT'] = self.safe_divide(results['UTILITIES'], results['UNITS'])
        results['ADMIN_AND_PROFESSIONAL_PER_UNIT'] = self.safe_divide((results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']), results['UNITS'])
        results['INSURANCE_PER_UNIT'] = self.safe_divide(results['INSURANCE'], results['UNITS'])
        results['OPERATING_EXPENSES_PER_UNIT'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'], results['UNITS'])
        results['OPERATING_EXPENSES_PER_GSF'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'],results['GROSS_SF'])
        
        # REVENUE CALCULATIONS
        results['LEASE_UP_MONTHS'] = get('projections.lease_up_months')
        results['STABILIZATION_MONTHS'] = get('projections.stabilization_months')
        results['REVENUE_INFLATION_RATE'] = get('projections.revenue_inflation_rate')
        results['EXPENSE_INFLATION_RATE'] = get('projections.expense_inflation_rate')
        
        results['TRENDING_TERM'] = results['LEASE_UP_MONTHS'] + results['STABILIZATION_MONTHS']
        results['TERM_REVENUE_INFLATION'] = (1 + results['REVENUE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12)
        results['TERM_EXPENSE_INFLATION'] = (1 + results['EXPENSE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12)
        
        results['GROSS_POTENTIAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 0.75 * results['RENTABLE_SF']
        results['GROSS_POTENTIAL_AFFORDABLE_RENT'] = results['AFFORDABLE_RENT_PSF'] * 0.25 * results['RENTABLE_SF']
        results['OTHER_INCOME'] = results['OTHER_INCOME_PER_UNIT'] * results['UNITS'] * 12 * 0.75
        results['VACANCY_LOSS'] = results['VACANCY_RATE'] * (results['OTHER_INCOME'] + results['GROSS_POTENTIAL_FREE_MARKET_RENT'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT'])
        results['EFFECTIVE_GROSS_INCOME'] = results['GROSS_POTENTIAL_FREE_MARKET_RENT'] - results['VACANCY_LOSS'] + results['OTHER_INCOME'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT']
        
        results['MANAGEMENT_FEE'] = results['MANAGEMENT_FEE_PERCENTAGE'] * results['EFFECTIVE_GROSS_INCOME']
        results['REAL_ESTATE_TAXES'] = results['GROSS_SF'] * 30 * 0.1
        results['TOTAL_EXPENSES'] = results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] + results['UTILITIES'] + results['REAL_ESTATE_TAXES'] + results['MANAGEMENT_FEE']
        
        # NOI & RETURNS
        results['NET_OPERATING_INCOME'] = results['EFFECTIVE_GROSS_INCOME'] - results['TOTAL_EXPENSES'] + results['PARKING_INCOME'] + results['RETAIL_REVENUE']
        results['NOI_PER_UNIT'] = self.safe_divide(results['NET_OPERATING_INCOME'], results['UNITS'])
        results['NOI_PER_GSF'] = self.safe_divide(results['NET_OPERATING_INCOME'],results['GROSS_SF'])
        results['CAP_RATE'] = (results['NET_OPERATING_INCOME'] / results['PRICE']) * 100 if results['PRICE'] > 0 else 0
        
        results['STABILIZED_YIELD_ON_COST'] = (((results['EFFECTIVE_GROSS_INCOME'] + results['RETAIL_REVENUE'] - results['GROSS_POTENTIAL_AFFORDABLE_RENT']) * results['TERM_REVENUE_INFLATION']) - (results['TOTAL_EXPENSES'] * results['TERM_EXPENSE_INFLATION'])) + results['GROSS_POTENTIAL_AFFORDABLE_RENT']
        
        results['YIELD_ON_COST_PERCENTAGE'] = results['NET_OPERATING_INCOME'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
        results['STABILIZED_YIELD_ON_COST_PERCENTAGE'] = results['STABILIZED_YIELD_ON_COST'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
        
        results['ANNUAL_DEBT_SERVICE'] = results['LOAN_AMOUNT'] * results['INTEREST_RATE_DECIMAL']
        results['CASH_ON_CASH_RETURN'] = ((results['NET_OPERATING_INCOME'] - results['ANNUAL_DEBT_SERVICE']) / results['TOTAL_EQUITY']) * 100 if results['TOTAL_EQUITY'] > 0 else 0
        results['DEBT_SERVICE_COVERAGE_RATIO'] = results['NET_OPERATING_INCOME'] / results['ANNUAL_DEBT_SERVICE'] if results['ANNUAL_DEBT_SERVICE'] > 0 else 0
        
        # EXIT & EQUITY WATERFALL
        results['EXIT_CAP_RATE_DECIMAL'] = get('projections.exit_cap_rate_decimal')
        results['SALE_COST_PERCENTAGE'] = get('projections.sale_cost_percentage')
        results['HOLD_PERIOD_MONTHS'] = get('projections.hold_period_months')
        
        results['PROPERTY_VALUE_ON_SALE'] = (results['STABILIZED_YIELD_ON_COST'] / results['EXIT_CAP_RATE_DECIMAL']) + (results['STABILIZED_YIELD_ON_COST'] * 0.25) if results['EXIT_CAP_RATE_DECIMAL'] > 0 else 0
        results['SALE_COST'] = results['SALE_COST_PERCENTAGE'] * results['PROPERTY_VALUE_ON_SALE']
        results['NET_SALE_PROCEEDS'] = results['PROPERTY_VALUE_ON_SALE'] - results['SALE_COST']
        results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] = results['NET_SALE_PROCEEDS'] - results['TOTAL_DEBT']
        
        results['GP_PREF_RATE'] = get('equity_structure.gp_pref_rate')
        results['LP_PREF_RATE'] = get('equity_structure.lp_pref_rate')
        results['PROMOTE_PERCENTAGE'] = get('equity_structure.promote_percentage')
        
        results['GP_INVESTMENT'] = results['TOTAL_EQUITY'] * 0.2
        results['LP_INVESTMENT'] = results['TOTAL_EQUITY'] - results['GP_INVESTMENT']
        results['GP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['GP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['GP_INVESTMENT']
        results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['LP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['LP_INVESTMENT']
        results['CASH_REMAINING_AFTER_PREFERRED'] = results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] - results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] - results['GP_PREFERRED_RETURN_WITH_PRINCIPAL']
        results['PROMOTE_ON_JOINT_VENTURE'] = results['PROMOTE_PERCENTAGE'] * results['CASH_REMAINING_AFTER_PREFERRED']
        results['CASH_TO_LP'] = (results['CASH_REMAINING_AFTER_PREFERRED'] - results['PROMOTE_ON_JOINT_VENTURE']) * (results['LP_INVESTMENT'] / (results['LP_INVESTMENT'] + results['GP_INVESTMENT'])) if (results['LP_INVESTMENT'] + results['GP_INVESTMENT']) > 0 else 0
        results['NET_TO_LP_INVESTOR'] = results['CASH_TO_LP'] + results['LP_PREFERRED_RETURN_WITH_PRINCIPAL']
        results['LP_MULTIPLE'] = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT'] if results['LP_INVESTMENT'] > 0 else 0
        # results['IRR_TO_LP'] = ((results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT']) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100 if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0 else 0
        # IRR calculation with complex number handling
        if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0:
            irr_base = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT']
            if irr_base > 0:
                results['IRR_TO_LP'] = ((irr_base) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100
            else:
                results['IRR_TO_LP'] = -100  # Total loss
        else:
            results['IRR_TO_LP'] = 0

        # BLENDED RENT CALCULATIONS
        results['BLENDED_RENT_PER_RSF'] = (results['FREE_MARKET_RENT_PSF'] * 0.75) + (results['AFFORDABLE_RENT_PSF'] * 0.25)
        results['TOTAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 425 / 12
        results['TOTAL_BLENDED_RENT'] = results['BLENDED_RENT_PER_RSF'] * 750 / 12
        results['FREE_MARKET_RENT_PER_SF'] = results['TOTAL_FREE_MARKET_RENT'] * 110 / 12
        results['AFFORDABLE_RENT_PER_SF'] = results['AFFORDABLE_RENT_PSF'] * 110 / 12
        results['BLENDED_RENT_PER_SF'] = results['TOTAL_BLENDED_RENT'] * 110 / 12
        results['AVERAGE_RENT_PER_UNIT'] = self.safe_divide((results['GROSS_POTENTIAL_FREE_MARKET_RENT']+results['GROSS_POTENTIAL_AFFORDABLE_RENT']), results['UNITS'])
        results['RENT_PER_UNIT_PER_MONTH'] = results['AVERAGE_RENT_PER_UNIT'] / 12
        
        # EGI PERCENTAGES
        if results['EFFECTIVE_GROSS_INCOME'] > 0:
            results['PAYROLL_PERCENTAGE_OF_EGI'] = results['PAYROLL'] / results['EFFECTIVE_GROSS_INCOME']
            results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = results['REPAIRS_AND_MAINTENANCE'] / results['EFFECTIVE_GROSS_INCOME']
            results['UTILITIES_PERCENTAGE_OF_EGI'] = results['UTILITIES'] / results['EFFECTIVE_GROSS_INCOME']
            results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = (results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']) / results['EFFECTIVE_GROSS_INCOME']
            results['INSURANCE_PERCENTAGE_OF_EGI'] = results['INSURANCE'] / results['EFFECTIVE_GROSS_INCOME']
            results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = results['PROFESSIONAL_FEES'] / results['EFFECTIVE_GROSS_INCOME']
            results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = results['TOTAL_OPERATING_EXPENSES'] / results['EFFECTIVE_GROSS_INCOME']
        else:
            results['PAYROLL_PERCENTAGE_OF_EGI'] = 0
            results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = 0
            results['UTILITIES_PERCENTAGE_OF_EGI'] = 0
            results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = 0
            results['INSURANCE_PERCENTAGE_OF_EGI'] = 0
            results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = 0
            results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = 0
        
        self.formula_results = results
        return results
    
    def flatten_dict(self, d: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
        """Flatten nested dictionary"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)
    
    def generate_excel(self, output_path: str = "Real_Estate_Financial_Model.xlsx"):
        """Generate professional Excel file with all calculations"""
        try:
            # Validate critical values before Excel generation
            r = self.formula_results
            
            print("  Validating calculations...")
            critical_values = {
                'UNITS': r.get('UNITS', 0),
                'GROSS_SF': r.get('GROSS_SF', 0),
                'RENTABLE_SF': r.get('RENTABLE_SF', 0),
                'EFFECTIVE_GROSS_INCOME': r.get('EFFECTIVE_GROSS_INCOME', 0),
                'TOTAL_PROJECT_COST': r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0)
            }
            
            warnings = []
            for key, value in critical_values.items():
                if value == 0:
                    warnings.append(f"    WARNING: {key} is zero or missing")
            
            if warnings:
                print("\n".join(warnings))
                print("    Continuing with available data...\n")
            
            wb = openpyxl.Workbook()
            
            # Remove default sheet
            if 'Sheet' in wb.sheetnames:
                wb.remove(wb['Sheet'])
            
            # Create sheets with error handling
            print("  Creating Executive Summary...")
            self.create_summary_sheet(wb)
            
            print("  Creating Acquisition sheet...")
            self.create_acquisition_sheet(wb)
            
            print("  Creating Construction sheet...")
            self.create_construction_sheet(wb)
            
            print("  Creating Soft Costs sheet...")
            self.create_soft_costs_sheet(wb)
            
            print("  Creating Financing sheet...")
            self.create_financing_sheet(wb)
            
            print("  Creating Operations sheet...")
            self.create_operations_sheet(wb)
            
            print("  Creating Returns sheet...")
            self.create_returns_sheet(wb)
            
            # Save workbook
            wb.save(output_path)
            print(f"✓ Excel file generated: {output_path}")
            return output_path
        except Exception as e:
            print(f"ERROR generating Excel: {e}")
            import traceback
            traceback.print_exc()
            raise
    
    def create_summary_sheet(self, wb):
        """Create executive summary sheet"""
        ws = wb.create_sheet("Executive Summary", 0)
        
        # Styles
        header_fill = PatternFill(start_color="1F4E78", end_color="1F4E78", fill_type="solid")
        header_font = Font(color="FFFFFF", bold=True, size=12)
        subheader_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        subheader_font = Font(color="FFFFFF", bold=True, size=11)
        
        r = self.formula_results
        
        # Title
        ws['A1'] = "REAL ESTATE DEVELOPMENT FINANCIAL MODEL"
        ws['A1'].font = Font(bold=True, size=16)
        ws.merge_cells('A1:D1')
        
        # Property Information
        row = 3
        ws[f'A{row}'] = "PROPERTY INFORMATION"
        ws[f'A{row}'].fill = header_fill
        ws[f'A{row}'].font = header_font
        ws.merge_cells(f'A{row}:D{row}')

        address = self.structured_data.get('property_info', {}).get('address', 'N/A')

        row += 1
        data = [
            ("Address:", address),
            ("Units:", r.get('UNITS', 0)),
            ("Gross Square Feet:", f"{r.get('GROSS_SF', 0):,.0f}"),
            ("Rentable Square Feet:", f"{r.get('RENTABLE_SF', 0):,.0f}"),
            ("Building Efficiency:", f"{r.get('BUILDING_EFFICIENCY', 0):.2%}"),
        ]
        
        for label, value in data:
            ws[f'A{row}'] = label
            ws[f'A{row}'].font = Font(bold=True)
            ws[f'B{row}'] = value
            row += 1
        
        # Project Costs Summary
        row += 1
        ws[f'A{row}'] = "PROJECT COSTS SUMMARY"
        ws[f'A{row}'].fill = header_fill
        ws[f'A{row}'].font = header_font
        ws.merge_cells(f'A{row}:D{row}')
        
        row += 1
        ws[f'A{row}'] = "Category"
        ws[f'B{row}'] = "Total Cost"
        ws[f'C{row}'] = "Per GSF"
        ws[f'D{row}'] = "Per Unit"
        for col in ['A', 'B', 'C', 'D']:
            ws[f'{col}{row}'].fill = subheader_fill
            ws[f'{col}{row}'].font = subheader_font
        
        row += 1
        cost_summary = [
            ("Acquisition", r.get('TOTAL_ACQUISITION_COST', 0), r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0), r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)),
            ("Construction", r.get('TOTAL_CONSTRUCTION_GMP', 0), r.get('CONSTRUCTION_GMP_PER_GSF', 0), r.get('CONSTRUCTION_GMP_PER_UNIT', 0)),
            ("Soft Costs", r.get('TOTAL_SOFT_COST', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0) * r.get('GROSS_SF', 0) / r.get('UNITS', 1)),
            ("Contingency", r.get('CONTINGENCY_COST', 0), r.get('CONTINGENCY_COST', 0) / r.get('GROSS_SF', 1), r.get('CONTINGENCY_COST', 0) / r.get('UNITS', 1)),
            ("Development Fee", r.get('DEVELOPMENT_FEE', 0), r.get('DEVELOPMENT_FEE', 0) / r.get('GROSS_SF', 1), r.get('DEVELOPMENT_FEE', 0) / r.get('UNITS', 1)),
            ("Financing & Reserves", r.get('FINANCING_COST', 0) + r.get('INTEREST_RESERVE', 0) + r.get('OPERATING_RESERVE', 0), 0, 0),
        ]
        
        for label, total, per_gsf, per_unit in cost_summary:
            ws[f'A{row}'] = label
            ws[f'B{row}'] = total
            ws[f'B{row}'].number_format = '$#,##0'
            ws[f'C{row}'] = per_gsf
            ws[f'C{row}'].number_format = '$#,##0.00'
            ws[f'D{row}'] = per_unit
            ws[f'D{row}'].number_format = '$#,##0'
            row += 1
        
        # Total
        ws[f'A{row}'] = "TOTAL PROJECT COST"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        ws[f'C{row}'] = r.get('TOTAL_PROJECT_COST_PER_GSF', 0)
        ws[f'C{row}'].number_format = '$#,##0.00'
        ws[f'C{row}'].font = Font(bold=True)
        ws[f'D{row}'] = r.get('TOTAL_PROJECT_COST_PER_UNIT', 0)
        ws[f'D{row}'].number_format = '$#,##0'
        ws[f'D{row}'].font = Font(bold=True)
        
        # Capital Stack
        row += 2
        ws[f'A{row}'] = "CAPITAL STACK"
        ws[f'A{row}'].fill = header_fill
        ws[f'A{row}'].font = header_font
        ws.merge_cells(f'A{row}:D{row}')
        
        row += 1
        ws[f'A{row}'] = "Source"
        ws[f'B{row}'] = "Amount"
        ws[f'C{row}'] = "Percentage"
        ws[f'D{row}'] = "Per Unit"
        for col in ['A', 'B', 'C', 'D']:
            ws[f'{col}{row}'].fill = subheader_fill
            ws[f'{col}{row}'].font = subheader_font
        
        row += 1
        ws[f'A{row}'] = "Total Debt"
        ws[f'B{row}'] = r.get('TOTAL_DEBT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'C{row}'] = r.get('DEBT_PERCENTAGE', 0)
        ws[f'C{row}'].number_format = '0.00%'
        ws[f'D{row}'] = r.get('DEBT_PER_UNIT', 0)
        ws[f'D{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Total Equity"
        ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'C{row}'] = r.get('EQUITY_PERCENTAGE', 0)
        ws[f'C{row}'].number_format = '0.00%'
        ws[f'D{row}'] = r.get('EQUITY_PER_UNIT', 0)
        ws[f'D{row}'].number_format = '$#,##0'
        
        # Returns Summary
        row += 2
        ws[f'A{row}'] = "INVESTMENT RETURNS"
        ws[f'A{row}'].fill = header_fill
        ws[f'A{row}'].font = header_font
        ws.merge_cells(f'A{row}:D{row}')
        
        row += 1
        returns_data = [
            ("Stabilized NOI:", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"),
            ("Yield on Cost:", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
            ("Stabilized Yield on Cost:", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
            ("Cash-on-Cash Return:", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"),
            ("DSCR:", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"),
            ("LP IRR:", f"{float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)):.2f}%"),
            ("LP Multiple:", f"{r.get('LP_MULTIPLE', 0):.2f}x"),
        ]
        
        for label, value in returns_data:
            ws[f'A{row}'] = label
            ws[f'A{row}'].font = Font(bold=True)
            ws[f'B{row}'] = value
            row += 1
        
        # Adjust column widths
        ws.column_dimensions['A'].width = 25
        ws.column_dimensions['B'].width = 18
        ws.column_dimensions['C'].width = 15
        ws.column_dimensions['D'].width = 15
    
    def create_acquisition_sheet(self, wb):
        """Create acquisition costs detail sheet"""
        ws = wb.create_sheet("Acquisition")
        r = self.formula_results
        
        # Header
        ws['A1'] = "ACQUISITION COSTS"
        ws['A1'].font = Font(bold=True, size=14)
        ws.merge_cells('A1:E1')
        
        # Column headers
        row = 3
        headers = ["Item", "Total Cost", "Per GSF", "Per RSF", "Per Unit"]
        for col_idx, header in enumerate(headers, start=1):
            cell = ws.cell(row=row, column=col_idx, value=header)
            cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
            cell.font = Font(color="FFFFFF", bold=True)
        
        # Data
        row += 1
        data = [
            ("Land Value", r.get('LAND_VALUE', 0), r.get('LAND_VALUE_PER_GSF', 0), r.get('LAND_VALUE_PER_RSF', 0), r.get('LAND_VALUE_PER_UNIT', 0)),
            ("Closing Costs", r.get('CLOSING_COSTS', 0), r.get('CLOSING_COSTS', 0) / r.get('GROSS_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('RENTABLE_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('UNITS', 1)),
            ("Acquisition Fee (2%)", r.get('ACQUISITION_FEE', 0), r.get('ACQUISITION_FEE', 0) / r.get('GROSS_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('RENTABLE_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('UNITS', 1)),
        ]
        
        for item, total, per_gsf, per_rsf, per_unit in data:
            ws.cell(row=row, column=1, value=item)
            ws.cell(row=row, column=2, value=total).number_format = '$#,##0'
            ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00'
            ws.cell(row=row, column=4, value=per_rsf).number_format = '$#,##0.00'
            ws.cell(row=row, column=5, value=per_unit).number_format = '$#,##0'
            row += 1
        
        # Total
        ws.cell(row=row, column=1, value="TOTAL ACQUISITION COST").font = Font(bold=True)
        ws.cell(row=row, column=2, value=r.get('TOTAL_ACQUISITION_COST', 0)).number_format = '$#,##0'
        ws.cell(row=row, column=2).font = Font(bold=True)
        ws.cell(row=row, column=3, value=r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0)).number_format = '$#,##0.00'
        ws.cell(row=row, column=3).font = Font(bold=True)
        ws.cell(row=row, column=4, value=r.get('TOTAL_ACQUISITION_COST_PER_RSF', 0)).number_format = '$#,##0.00'
        ws.cell(row=row, column=4).font = Font(bold=True)
        ws.cell(row=row, column=5, value=r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)).number_format = '$#,##0'
        ws.cell(row=row, column=5).font = Font(bold=True)
        
        # Adjust widths
        for col in range(1, 6):
            ws.column_dimensions[get_column_letter(col)].width = 20
    
    def create_construction_sheet(self, wb):
        """Create construction costs sheet"""
        ws = wb.create_sheet("Construction")
        r = self.formula_results
        
        ws['A1'] = "CONSTRUCTION COSTS"
        ws['A1'].font = Font(bold=True, size=14)
        
        row = 3
        ws[f'A{row}'] = "Construction Cost per GSF:"
        ws[f'B{row}'] = r.get('CONSTRUCTION_COST_PER_GSF', 0)
        ws[f'B{row}'].number_format = '$#,##0.00'
        
        row += 1
        ws[f'A{row}'] = "Gross Square Feet:"
        ws[f'B{row}'] = r.get('GROSS_SF', 0)
        ws[f'B{row}'].number_format = '#,##0'
        
        row += 2
        ws[f'A{row}'] = "Total Construction GMP:"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('TOTAL_CONSTRUCTION_GMP', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        
        row += 2
        ws[f'A{row}'] = "Construction Duration:"
        ws[f'B{row}'] = f"{r.get('CONSTRUCTION_MONTHS', 0)} months"
        
        ws.column_dimensions['A'].width = 30
        ws.column_dimensions['B'].width = 20
    
    def create_soft_costs_sheet(self, wb):
        """Create soft costs detail sheet"""
        ws = wb.create_sheet("Soft Costs")
        r = self.formula_results
        
        ws['A1'] = "SOFT COSTS BUDGET"
        ws['A1'].font = Font(bold=True, size=14)
        
        row = 3
        headers = ["Category", "Total Cost", "Per GSF"]
        for col_idx, header in enumerate(headers, start=1):
            cell = ws.cell(row=row, column=col_idx, value=header)
            cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
            cell.font = Font(color="FFFFFF", bold=True)
        
        row += 1
        soft_cost_items = [
            ("Architecture & Interior Design", 'ARCHITECTURE_AND_INTERIOR_COST'),
            ("Structural Engineering", 'STRUCTURAL_ENGINEERING_COST'),
            ("MEP Engineering", 'MEP_ENGINEERING_COST'),
            ("Civil Engineering", 'CIVIL_ENGINEERING_COST'),
            ("Controlled Inspections", 'CONTROLLED_INSPECTIONS_COST'),
            ("Surveying", 'SURVEYING_COST'),
            ("Utilities Connection", 'UTILITIES_CONNECTION_COST'),
            ("Advertising & Marketing", 'ADVERTISING_AND_MARKETING_COST'),
            ("Accounting", 'ACCOUNTING_COST'),
            ("Monitoring", 'MONITORING_COST'),
            ("FF&E", 'FF_AND_E_COST'),
            ("Environmental Consultant", 'ENVIRONMENTAL_CONSULTANT_FEE'),
            ("Miscellaneous Consultants", 'MISCELLANEOUS_CONSULTANTS_FEE'),
            ("General Legal", 'GENERAL_LEGAL_COST'),
            ("RE Taxes During Construction", 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION'),
            ("Miscellaneous Admin", 'MISCELLANEOUS_ADMIN_COST'),
            ("IBR Cost", 'IBR_COST'),
            ("Project Team", 'PROJECT_TEAM_COST'),
            ("PEM Fees", 'PEM_FEES'),
            ("Bank Fees", 'BANK_FEES'),
            ("HPD & IH Costs", 'HPD_AND_IH_COST'),
            ("Retail TI & LC", 'RETAIL_TI_AND_LC_COST'),
        ]
        
        for label, key in soft_cost_items:
            cost = r.get(key, 0)
            per_gsf = cost / r.get('GROSS_SF', 1) if r.get('GROSS_SF', 0) > 0 else 0
            ws.cell(row=row, column=1, value=label)
            ws.cell(row=row, column=2, value=cost).number_format = '$#,##0'
            ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00'
            row += 1
        
        # Total
        ws.cell(row=row, column=1, value="TOTAL SOFT COSTS").font = Font(bold=True)
        ws.cell(row=row, column=2, value=r.get('TOTAL_SOFT_COST', 0)).number_format = '$#,##0'
        ws.cell(row=row, column=2).font = Font(bold=True)
        ws.cell(row=row, column=3, value=r.get('TOTAL_SOFT_COST_PER_GSF', 0)).number_format = '$#,##0.00'
        ws.cell(row=row, column=3).font = Font(bold=True)
        
        ws.column_dimensions['A'].width = 35
        ws.column_dimensions['B'].width = 18
        ws.column_dimensions['C'].width = 15
    
    def create_financing_sheet(self, wb):
        """Create financing structure sheet"""
        ws = wb.create_sheet("Financing")
        r = self.formula_results
        
        ws['A1'] = "FINANCING STRUCTURE"
        ws['A1'].font = Font(bold=True, size=14)
        
        row = 3
        ws[f'A{row}'] = "Pre-LTC Budget:"
        ws[f'B{row}'] = r.get('PRE_LTC_BUDGET', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "LTC Ratio:"
        ws[f'B{row}'] = r.get('LTC_RATIO', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        row += 1
        ws[f'A{row}'] = "Loan Amount:"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('LOAN_AMOUNT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        
        row += 2
        ws[f'A{row}'] = "Financing Percentage:"
        ws[f'B{row}'] = r.get('FINANCING_PERCENTAGE', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        row += 1
        ws[f'A{row}'] = "Financing Amount:"
        ws[f'B{row}'] = r.get('FINANCING_AMOUNT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 2
        ws[f'A{row}'] = "Interest Rate (bps + spread):"
        ws[f'B{row}'] = r.get('INTEREST_RATE_DECIMAL', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        row += 1
        ws[f'A{row}'] = "Construction Interest:"
        ws[f'B{row}'] = r.get('CONSTRUCTION_INTEREST', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 2
        ws[f'A{row}'] = "TOTAL DEBT"
        ws[f'A{row}'].font = Font(bold=True, size=12)
        ws[f'B{row}'] = r.get('TOTAL_DEBT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True, size=12)
        
        row += 1
        ws[f'A{row}'] = "TOTAL EQUITY"
        ws[f'A{row}'].font = Font(bold=True, size=12)
        ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True, size=12)
        
        row += 2
        ws[f'A{row}'] = "Debt Percentage:"
        ws[f'B{row}'] = r.get('DEBT_PERCENTAGE', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        row += 1
        ws[f'A{row}'] = "Equity Percentage:"
        ws[f'B{row}'] = r.get('EQUITY_PERCENTAGE', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        ws.column_dimensions['A'].width = 35
        ws.column_dimensions['B'].width = 20
    
    def create_operations_sheet(self, wb):
        """Create operations and revenue sheet"""
        ws = wb.create_sheet("Operations")
        r = self.formula_results
        
        ws['A1'] = "OPERATIONS & REVENUE"
        ws['A1'].font = Font(bold=True, size=14)
        
        # Revenue Section
        row = 3
        ws[f'A{row}'] = "REVENUE"
        ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
        ws.merge_cells(f'A{row}:B{row}')
        
        row += 1
        revenue_items = [
            ("Gross Potential Free Market Rent", r.get('GROSS_POTENTIAL_FREE_MARKET_RENT', 0)),
            ("Gross Potential Affordable Rent", r.get('GROSS_POTENTIAL_AFFORDABLE_RENT', 0)),
            ("Other Income", r.get('OTHER_INCOME', 0)),
            ("Less: Vacancy Loss", -r.get('VACANCY_LOSS', 0)),
            ("Effective Gross Income", r.get('EFFECTIVE_GROSS_INCOME', 0)),
            ("Parking Income", r.get('PARKING_INCOME', 0)),
            ("Retail Revenue", r.get('RETAIL_REVENUE', 0)),
        ]
        
        for label, value in revenue_items:
            ws[f'A{row}'] = label
            if "Effective Gross" in label:
                ws[f'A{row}'].font = Font(bold=True)
            ws[f'B{row}'] = value
            ws[f'B{row}'].number_format = '$#,##0'
            row += 1
        
        # Expense Section
        row += 1
        ws[f'A{row}'] = "OPERATING EXPENSES"
        ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
        ws.merge_cells(f'A{row}:C{row}')
        
        row += 1
        ws[f'A{row}'] = "Expense Category"
        ws[f'B{row}'] = "Annual Amount"
        ws[f'C{row}'] = "% of EGI"
        for col in ['A', 'B', 'C']:
            ws[f'{col}{row}'].font = Font(bold=True)
        
        row += 1
        # Safe division helper
        egi = r.get('EFFECTIVE_GROSS_INCOME', 0)
        def safe_pct(value):
            return value / egi if egi > 0 else 0

        expense_items = [
            ("Payroll", r.get('PAYROLL', 0), r.get('PAYROLL_PERCENTAGE_OF_EGI', 0)),
            ("Repairs & Maintenance", r.get('REPAIRS_AND_MAINTENANCE', 0), r.get('REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI', 0)),
            ("Utilities", r.get('UTILITIES', 0), r.get('UTILITIES_PERCENTAGE_OF_EGI', 0)),
            ("Insurance", r.get('INSURANCE', 0), r.get('INSURANCE_PERCENTAGE_OF_EGI', 0)),
            ("Management Fee", r.get('MANAGEMENT_FEE', 0), safe_pct(r.get('MANAGEMENT_FEE', 0))),
            ("Real Estate Taxes", r.get('REAL_ESTATE_TAXES', 0), safe_pct(r.get('REAL_ESTATE_TAXES', 0))),
        ]
        
        for label, amount, pct in expense_items:
            ws[f'A{row}'] = label
            ws[f'B{row}'] = amount
            ws[f'B{row}'].number_format = '$#,##0'
            ws[f'C{row}'] = pct
            ws[f'C{row}'].number_format = '0.00%'
            row += 1
        
        ws[f'A{row}'] = "TOTAL EXPENSES"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('TOTAL_EXPENSES', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        total_exp_pct = safe_pct(r.get('TOTAL_EXPENSES', 0))
        ws[f'C{row}'] = total_exp_pct
        ws[f'C{row}'].number_format = '0.00%'
        ws[f'C{row}'].font = Font(bold=True)
        
        row += 2
        ws[f'A{row}'] = "NET OPERATING INCOME"
        ws[f'A{row}'].font = Font(bold=True, size=12)
        ws[f'B{row}'] = r.get('NET_OPERATING_INCOME', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True, size=12)
        
        row += 2
        ws[f'A{row}'] = "NOI per Unit:"
        ws[f'B{row}'] = r.get('NOI_PER_UNIT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "NOI per GSF:"
        ws[f'B{row}'] = r.get('NOI_PER_GSF', 0)
        ws[f'B{row}'].number_format = '$#,##0.00'
        
        ws.column_dimensions['A'].width = 35
        ws.column_dimensions['B'].width = 20
        ws.column_dimensions['C'].width = 15
    
    def create_returns_sheet(self, wb):
        """Create investment returns and waterfall sheet"""
        ws = wb.create_sheet("Returns")
        r = self.formula_results
        
        ws['A1'] = "INVESTMENT RETURNS & EXIT ANALYSIS"
        ws['A1'].font = Font(bold=True, size=14)
        
        # Current Returns
        row = 3
        ws[f'A{row}'] = "STABILIZED RETURNS"
        ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
        ws.merge_cells(f'A{row}:B{row}')
        
        row += 1
        returns_data = [
            ("Net Operating Income", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"),
            ("Stabilized Yield on Cost", f"${r.get('STABILIZED_YIELD_ON_COST', 0):,.0f}"),
            ("Yield on Cost %", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
            ("Stabilized Yield on Cost %", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
            ("Annual Debt Service", f"${r.get('ANNUAL_DEBT_SERVICE', 0):,.0f}"),
            ("Cash-on-Cash Return", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"),
            ("Debt Service Coverage Ratio", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"),
        ]
        
        for label, value in returns_data:
            ws[f'A{row}'] = label
            ws[f'A{row}'].font = Font(bold=True)
            ws[f'B{row}'] = value
            row += 1
        
        # Exit Analysis
        row += 2
        ws[f'A{row}'] = "EXIT ANALYSIS"
        ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
        ws.merge_cells(f'A{row}:B{row}')
        
        row += 1
        ws[f'A{row}'] = "Hold Period (months):"
        ws[f'B{row}'] = r.get('HOLD_PERIOD_MONTHS', 0)
        
        row += 1
        ws[f'A{row}'] = "Exit Cap Rate:"
        ws[f'B{row}'] = r.get('EXIT_CAP_RATE_DECIMAL', 0)
        ws[f'B{row}'].number_format = '0.00%'
        
        row += 1
        ws[f'A{row}'] = "Property Value on Sale:"
        ws[f'B{row}'] = r.get('PROPERTY_VALUE_ON_SALE', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Less: Sale Costs (2%):"
        ws[f'B{row}'] = -r.get('SALE_COST', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Net Sale Proceeds:"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('NET_SALE_PROCEEDS', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        
        row += 1
        ws[f'A{row}'] = "Less: Loan Payoff:"
        ws[f'B{row}'] = -r.get('TOTAL_DEBT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Cash After Loan Payback:"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_LOAN_PAYBACK', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True)
        
        # Equity Waterfall
        row += 2
        ws[f'A{row}'] = "EQUITY WATERFALL"
        ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
        ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
        ws.merge_cells(f'A{row}:B{row}')
        
        row += 1
        ws[f'A{row}'] = "GP Investment (20%):"
        ws[f'B{row}'] = r.get('GP_INVESTMENT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "LP Investment (80%):"
        ws[f'B{row}'] = r.get('LP_INVESTMENT', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 2
        ws[f'A{row}'] = "GP Preferred Return + Principal:"
        ws[f'B{row}'] = r.get('GP_PREFERRED_RETURN_WITH_PRINCIPAL', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "LP Preferred Return + Principal:"
        ws[f'B{row}'] = r.get('LP_PREFERRED_RETURN_WITH_PRINCIPAL', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Cash After Preferred:"
        ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_PREFERRED', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 2
        ws[f'A{row}'] = "GP Promote (20%):"
        ws[f'B{row}'] = r.get('PROMOTE_ON_JOINT_VENTURE', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 1
        ws[f'A{row}'] = "Cash to LP:"
        ws[f'B{row}'] = r.get('CASH_TO_LP', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        
        row += 2
        ws[f'A{row}'] = "NET TO LP INVESTOR"
        ws[f'A{row}'].font = Font(bold=True, size=12)
        ws[f'B{row}'] = r.get('NET_TO_LP_INVESTOR', 0)
        ws[f'B{row}'].number_format = '$#,##0'
        ws[f'B{row}'].font = Font(bold=True, size=12)
        
        row += 2
        ws[f'A{row}'] = "LP Multiple:"
        ws[f'A{row}'].font = Font(bold=True)
        ws[f'B{row}'] = r.get('LP_MULTIPLE', 0)
        ws[f'B{row}'].number_format = '0.00x'
        ws[f'B{row}'].font = Font(bold=True)
        
        row += 1
        ws[f'A{row}'] = "LP IRR:"
        ws[f'A{row}'].font = Font(bold=True)

        irr_value = r.get('IRR_TO_LP', 0)
        # Handle complex numbers or invalid values
        if isinstance(irr_value, complex):
            irr_value = 0  # or use irr_value.real if you want the real component
        ws[f'B{row}'] = irr_value / 100

        ws[f'B{row}'].number_format = '0.00%'
        ws[f'B{row}'].font = Font(bold=True)
        
        ws.column_dimensions['A'].width = 35
        ws.column_dimensions['B'].width = 20
    
    def run_full_pipeline(self, pdf_directory: str, output_excel: str = "Real_Estate_Financial_Model.xlsx"):
        """Execute complete pipeline"""
        print("=" * 60)
        print("REAL ESTATE FINANCIAL MODEL PIPELINE")
        print("=" * 60)
        
        # Step 1: Extract PDFs
        print("\n[Step 1/4] Extracting text from PDFs...")
        self.extract_all_pdfs(pdf_directory)
        print(f"✓ Extracted {len(self.extracted_data)} PDF files")
        
        # Step 2: Process with Gemini
        print("\n[Step 2/4] Extracting structured data with Gemini API...")
        structured_data = self.extract_structured_data()
        
        # NEW: Post-process to fill gaps
        print("\n[Step 2.5/4] Post-processing and filling estimates...")
        structured_data = self.post_process_extracted_data(structured_data)

        # Step 3: Calculate formulas
        print("\n[Step 3/4] Calculating all formulas...")
        self.calculate_all_formulas(structured_data)
        print(f"✓ Calculated {len(self.formula_results)} formula values")
        
        # Step 4: Generate Excel
        print("\n[Step 4/4] Generating Excel file...")
        self.generate_excel(output_excel)
        
        print("\n" + "=" * 60)
        print("PIPELINE COMPLETE!")
        print("=" * 60)
        print(f"\nKey Metrics:")
        print(f"  Total Project Cost: ${self.formula_results.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f}")
        print(f"  Total Debt: ${self.formula_results.get('TOTAL_DEBT', 0):,.0f}")
        print(f"  Total Equity: ${self.formula_results.get('TOTAL_EQUITY', 0):,.0f}")
        print(f"  NOI: ${self.formula_results.get('NET_OPERATING_INCOME', 0):,.0f}")
        print(f"  Yield on Cost: {self.formula_results.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}")
        irr_val = self.formula_results.get('IRR_TO_LP', 0)
        if isinstance(irr_val, complex):
            irr_val = irr_val.real
        print(f"  LP IRR: {irr_val:.2f}%")

        print(f"\nExcel file: {output_excel}")
        
        return output_excel

# ============= FastAPI Setup =============
app = FastAPI(title="Real Estate Financial Model API")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# Hardcoded API Key
GEMINI_API_KEY = "AIzaSyCy6GoBR724Hj9VyuW3hKM4N0P6liBOlDo"

@app.post("/api/generate-model")
async def generate_model(files: List[UploadFile] = File(...)):
    """
    API endpoint to process uploaded files and return Excel model
    
    Parameters:
    - files: List of PDF/XLSX files to process
    
    Returns:
    - Excel file with financial model
    """
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded")
    
    try:
        # Create temporary directory
        temp_dir = tempfile.mkdtemp()
        
        # Save uploaded files
        saved_files = []
        for upload_file in files:
            file_path = Path(temp_dir) / upload_file.filename
            
            # Write file content
            with open(file_path, "wb") as f:
                content = await upload_file.read()
                f.write(content)
            
            saved_files.append(str(file_path))
        
        # Initialize pipeline
        pipeline = RealEstateModelPipeline(GEMINI_API_KEY)
        
        # Create output file
        output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx"
        
        # Run pipeline
        pipeline.run_full_pipeline(temp_dir, str(output_file))
        
        # Return Excel file
        return FileResponse(
            path=str(output_file),
            filename="Real_Estate_Financial_Model.xlsx",
            media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
    
    finally:
        # Cleanup will happen when temp directory is garbage collected
        pass


@app.get("/api/health")
async def health_check():
    """Health check endpoint"""
    return {"status": "healthy", "service": "Real Estate Financial Model API"}
    

@app.post("/api/generate-model-with-summary")
async def generate_model_with_summary(files: List[UploadFile] = File(...)):
    """
    API endpoint that returns JSON summary with key metrics
    AND includes base64 encoded Excel file
    
    Returns JSON with:
    - Key financial metrics
    - Deal summary
    - Market analysis
    - Excel file as base64
    """
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded")
    
    temp_dir = None
    try:
        # Create temporary directory
        temp_dir = tempfile.mkdtemp()
        print(f"📁 Created temp directory: {temp_dir}")
        
        # Save uploaded files
        for upload_file in files:
            file_path = Path(temp_dir) / upload_file.filename
            print(f"💾 Saving file: {upload_file.filename}")
            
            with open(file_path, "wb") as f:
                content = await upload_file.read()
                f.write(content)
        
        print(f"✅ Saved {len(files)} files")
        
        # Initialize pipeline
        print("🔧 Initializing pipeline...")
        pipeline = RealEstateModelPipeline(GEMINI_API_KEY)
        
        # Create output file
        output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx"
        
        # Run pipeline
        print("🚀 Running pipeline...")
        pipeline.run_full_pipeline(temp_dir, str(output_file))
        
        # Check if file was created
        if not output_file.exists():
            raise Exception("Excel file was not generated")
        
        print(f"✅ Excel file created: {output_file}")
        
        # Get formula results
        r = pipeline.formula_results
        d = pipeline.structured_data
        
        # Build comprehensive JSON response
        response_data = {
            "status": "success",
            "message": "Financial model generated successfully",
            
            # KEY METRICS (Dashboard Top Cards)
            "key_metrics": {
                "purchase_price": {
                    "value": r.get('PRICE', 0),
                    "formatted": f"${r.get('PRICE', 0):,.0f}",
                    "label": "Purchase Price"
                },
                "noi": {
                    "value": r.get('NET_OPERATING_INCOME', 0),
                    "formatted": f"${r.get('NET_OPERATING_INCOME', 0):,.0f}",
                    "label": "NOI"
                },
                "cap_rate": {
                    "value": r.get('CAP_RATE', 0),
                    "formatted": f"{r.get('CAP_RATE', 0):.1f}%",
                    "label": "Cap Rate"
                },
                "irr_10yr": {
                    "value": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)),
                    "formatted": f"{float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)):.1f}%",
                    "label": "IRR (10 yr)"
                },
                "dscr": {
                    "value": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0),
                    "formatted": f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x",
                    "label": "DSCR"
                }
            },
            
            # T12 SUMMARY (Extracted Summary)
            "t12_summary": {
                "gross_income": {
                    "annual": r.get('EFFECTIVE_GROSS_INCOME', 0),
                    "monthly": r.get('EFFECTIVE_GROSS_INCOME', 0) / 12,
                    "source": "Calculated from rent roll"
                },
                "operating_expenses": {
                    "annual": r.get('TOTAL_OPERATING_EXPENSES', 0),
                    "monthly": r.get('TOTAL_OPERATING_EXPENSES', 0) / 12,
                    "expense_ratio": f"{(r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1) if r.get('EFFECTIVE_GROSS_INCOME', 0) > 0 else 0):.1%}"
                },
                "noi": {
                    "annual": r.get('NET_OPERATING_INCOME', 0),
                    "monthly": r.get('NET_OPERATING_INCOME', 0) / 12,
                    "verified": True
                },
                "occupancy": {
                    "rate": f"{(1 - r.get('VACANCY_RATE', 0)):.0%}",
                    "value": 1 - r.get('VACANCY_RATE', 0)
                }
            },
            
            # PROPERTY INFO
            "property_info": {
                "address": d.get('property_info', {}).get('address', 'N/A'),
                "property_type": d.get('property_info', {}).get('property_type', 'N/A'),
                "units": r.get('UNITS', 0),
                "gross_sf": r.get('GROSS_SF', 0),
                "rentable_sf": r.get('RENTABLE_SF', 0),
                "building_efficiency": f"{r.get('BUILDING_EFFICIENCY', 0):.1%}"
            },
            
            # DEAL VS MARKET AVERAGES
            "market_comparison": {
                "your_deal": {
                    "cap_rate": r.get('CAP_RATE', 0) / 100,
                    "irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)) / 100,
                    "dscr": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0)
                },
                "market_average": {
                    "cap_rate": 0.055,  # Example market average
                    "irr": 0.12,
                    "dscr": 1.25
                }
            },
            
            # AI OBSERVATIONS
            "ai_observations": [
                {
                    "observation": f"Cap Rate {'above' if r.get('CAP_RATE', 0) > 5.5 else 'below'} area average",
                    "recommendation": "undervalued potential" if r.get('CAP_RATE', 0) > 5.5 else "premium pricing"
                },
                {
                    "observation": f"Expense ratio at {(r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1) if r.get('EFFECTIVE_GROSS_INCOME', 0) > 0 else 0):.1%}",
                    "recommendation": "within normal range" if 0.3 <= (r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1)) <= 0.45 else "review expenses"
                },
                {
                    "observation": "DSCR meets financing standards" if r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) >= 1.2 else "DSCR below standards",
                    "recommendation": "approved" if r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) >= 1.2 else "increase equity"
                },
                {
                    "observation": "NOI verification",
                    "recommendation": "Full underwriting review recommended"
                }
            ],
            
            # CAPITAL STACK
            "capital_stack": {
                "total_project_cost": r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0),
                "total_debt": r.get('TOTAL_DEBT', 0),
                "total_equity": r.get('TOTAL_EQUITY', 0),
                "debt_percentage": r.get('DEBT_PERCENTAGE', 0),
                "equity_percentage": r.get('EQUITY_PERCENTAGE', 0),
                "ltc_ratio": r.get('LTC_RATIO', 0)
            },
            
            # RETURNS ANALYSIS
            "returns": {
                "yield_on_cost": r.get('YIELD_ON_COST_PERCENTAGE', 0),
                "stabilized_yield_on_cost": r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0),
                "cash_on_cash_return": r.get('CASH_ON_CASH_RETURN', 0),
                "lp_multiple": r.get('LP_MULTIPLE', 0),
                "lp_irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)),
                "hold_period_years": r.get('HOLD_PERIOD_MONTHS', 0) / 12
            },
            
            # OPERATING METRICS
            "operating_metrics": {
                "gross_potential_rent": r.get('GROSS_POTENTIAL_FREE_MARKET_RENT', 0) + r.get('GROSS_POTENTIAL_AFFORDABLE_RENT', 0),
                "vacancy_loss": r.get('VACANCY_LOSS', 0),
                "effective_gross_income": r.get('EFFECTIVE_GROSS_INCOME', 0),
                "operating_expenses": r.get('TOTAL_OPERATING_EXPENSES', 0),
                "noi": r.get('NET_OPERATING_INCOME', 0),
                "noi_per_unit": r.get('NOI_PER_UNIT', 0),
                "noi_per_sf": r.get('NOI_PER_GSF', 0)
            },
            
            # COST BREAKDOWN
            "cost_breakdown": {
                "acquisition": {
                    "total": r.get('TOTAL_ACQUISITION_COST', 0),
                    "per_unit": r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0),
                    "per_sf": r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0)
                },
                "construction": {
                    "total": r.get('TOTAL_CONSTRUCTION_GMP', 0),
                    "per_unit": r.get('CONSTRUCTION_GMP_PER_UNIT', 0),
                    "per_sf": r.get('CONSTRUCTION_GMP_PER_GSF', 0)
                },
                "soft_costs": {
                    "total": r.get('TOTAL_SOFT_COST', 0),
                    "per_sf": r.get('TOTAL_SOFT_COST_PER_GSF', 0)
                },
                "contingency": r.get('CONTINGENCY_COST', 0),
                "reserves": r.get('OPERATING_RESERVE', 0)
            },
            
            # SOURCE FILES
            "source_files": [
                {
                    "filename": file.filename,
                    "size": len(await file.read()) if hasattr(file, 'read') else 0,
                    "type": file.filename.split('.')[-1].upper()
                }
                for file in files
            ],
            
            # EXCEL FILE (BASE64 ENCODED)
            "excel_file": {
                "filename": "Real_Estate_Financial_Model.xlsx",
                "size_bytes": output_file.stat().st_size,
                "base64_data": base64.b64encode(output_file.read_bytes()).decode('utf-8')
            }
        }
        
        print("✅ Response JSON created")
        
        return JSONResponse(content=response_data)
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print("❌ ERROR OCCURRED:")
        print(error_details)
        
        raise HTTPException(
            status_code=500, 
            detail={
                "error": str(e),
                "type": type(e).__name__,
                "traceback": error_details
            }
        )



# NEW: Endpoint that returns ONLY JSON (no Excel file)
@app.post("/api/analyze-only")
async def analyze_only(files: List[UploadFile] = File(...)):
    """
    Returns only JSON summary without Excel file
    Faster response, good for dashboards
    """
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded")
    
    temp_dir = None
    try:
        temp_dir = tempfile.mkdtemp()
        
        for upload_file in files:
            file_path = Path(temp_dir) / upload_file.filename
            with open(file_path, "wb") as f:
                content = await upload_file.read()
                f.write(content)
        
        pipeline = RealEstateModelPipeline(GEMINI_API_KEY)
        
        # Extract and process data (no Excel generation)
        pipeline.extract_all_pdfs(temp_dir)
        structured_data = pipeline.extract_structured_data()
        structured_data = pipeline.post_process_extracted_data(structured_data)
        pipeline.calculate_all_formulas(structured_data)
        
        r = pipeline.formula_results
        d = pipeline.structured_data
        
        # Return lightweight JSON
        return JSONResponse(content={
            "status": "success",
            "key_metrics": {
                "purchase_price": r.get('PRICE', 0),
                "noi": r.get('NET_OPERATING_INCOME', 0),
                "cap_rate": r.get('CAP_RATE', 0),
                "irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)),
                "dscr": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0)
            },
            "property_info": {
                "address": d.get('property_info', {}).get('address', 'N/A'),
                "units": r.get('UNITS', 0),
                "gross_sf": r.get('GROSS_SF', 0)
            },
            "capital_stack": {
                "total_project_cost": r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0),
                "total_debt": r.get('TOTAL_DEBT', 0),
                "total_equity": r.get('TOTAL_EQUITY', 0)
            },
            "returns": {
                "yield_on_cost": r.get('YIELD_ON_COST_PERCENTAGE', 0),
                "cash_on_cash": r.get('CASH_ON_CASH_RETURN', 0),
                "lp_multiple": r.get('LP_MULTIPLE', 0)
            }
        })
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/analyze-documents")
async def analyze_documents(
    files: List[UploadFile] = File(...),
    max_pages_per_doc: int = 2,
    confidence_threshold: float = 0.7
):
    """
    Industrial-scale document relevance analysis endpoint
    
    Analyzes uploaded documents to determine if they are relevant to real estate
    and metrics calculation. Uses first few pages for efficiency.
    
    Parameters:
    - files: List of document files (PDF, XLSX, DOCX, etc.)
    - max_pages_per_doc: Maximum number of pages to analyze per document (default: 2)
    - confidence_threshold: Minimum confidence score to mark as relevant (default: 0.7)
    
    Returns:
    - JSON with relevance analysis for each file
    """
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded")
    
    # Validate input parameters
    if max_pages_per_doc < 1 or max_pages_per_doc > 10:
        raise HTTPException(status_code=400, detail="max_pages_per_doc must be between 1 and 10")
    
    if confidence_threshold < 0.1 or confidence_threshold > 1.0:
        raise HTTPException(status_code=400, detail="confidence_threshold must be between 0.1 and 1.0")
    
    temp_dir = None
    try:
        # Create temporary directory with unique name
        temp_dir = tempfile.mkdtemp(prefix="doc_analysis_")
        logger.info(f"Created temp directory: {temp_dir}")
        
        # Process files in parallel for better performance
        analysis_results = await process_documents_parallel(
            files, temp_dir, max_pages_per_doc, confidence_threshold
        )
        
        # Generate overall summary
        summary = generate_analysis_summary(analysis_results)
        
        response = {
            "status": "success",
            "summary": summary,
            "analysis": analysis_results,
            "metadata": {
                "total_files": len(files),
                "relevant_files": summary["relevant_count"],
                "non_relevant_files": summary["non_relevant_count"],
                "confidence_threshold": confidence_threshold,
                "max_pages_analyzed": max_pages_per_doc,
                "processing_time_seconds": summary["processing_time_seconds"]
            }
        }
        
        logger.info(f"Document analysis completed: {summary['relevant_count']}/{len(files)} relevant files")
        return JSONResponse(content=response)
        
    except Exception as e:
        logger.error(f"Document analysis error: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=500, 
            detail=f"Document analysis failed: {str(e)}"
        )
    finally:
        # Cleanup temporary directory
        if temp_dir and os.path.exists(temp_dir):
            try:
                shutil.rmtree(temp_dir)
                logger.info(f"Cleaned up temp directory: {temp_dir}")
            except Exception as e:
                logger.warning(f"Failed to cleanup temp directory: {str(e)}")


async def process_documents_parallel(
    files: List[UploadFile], 
    temp_dir: str, 
    max_pages: int, 
    confidence_threshold: float
) -> List[Dict]:
    """Process documents in parallel for better performance"""
    import asyncio
    
    # Save all files first
    saved_paths = []
    for upload_file in files:
        file_path = Path(temp_dir) / secure_filename(upload_file.filename)
        with open(file_path, "wb") as f:
            content = await upload_file.read()
            f.write(content)
        saved_paths.append((file_path, upload_file.filename, upload_file.content_type))
    
    # Process files concurrently
    tasks = []
    for file_path, filename, content_type in saved_paths:
        task = analyze_single_document(
            file_path, filename, content_type, max_pages, confidence_threshold
        )
        tasks.append(task)
    
    # Use asyncio.gather for concurrent processing
    results = await asyncio.gather(*tasks, return_exceptions=True)
    
    # Handle exceptions in individual file processing
    processed_results = []
    for i, result in enumerate(results):
        filename = saved_paths[i][1]
        if isinstance(result, Exception):
            logger.error(f"Error processing {filename}: {str(result)}")
            processed_results.append({
                "filename": filename,
                "relevant": False,
                "confidence": 0.0,
                "error": str(result),
                "reason": "Processing failed",
                "key_indicators": []
            })
        else:
            processed_results.append(result)
    
    return processed_results


async def analyze_single_document(
    file_path: Path,
    filename: str,
    content_type: str,
    max_pages: int,
    confidence_threshold: float
) -> Dict:
    """Analyze a single document for real estate relevance"""
    
    start_time = time.time()
    
    try:
        # Extract text from document (first N pages)
        extracted_text = await extract_document_text(
            file_path, content_type, max_pages
        )
        
        if not extracted_text or len(extracted_text.strip()) < 50:
            return {
                "filename": filename,
                "relevant": False,
                "confidence": 0.0,
                "reason": "Insufficient or unreadable text content",
                "key_indicators": [],
                "text_sample": extracted_text[:200] if extracted_text else ""
            }
        
        # Analyze with Gemini
        analysis_result = await analyze_with_gemini(extracted_text, confidence_threshold)
        
        processing_time = time.time() - start_time
        
        return {
            "filename": filename,
            "relevant": analysis_result["relevant"],
            "confidence": analysis_result["confidence"],
            "reason": analysis_result["reason"],
            "key_indicators": analysis_result["key_indicators"],
            "document_type": analysis_result.get("document_type", "unknown"),
            "text_sample": extracted_text[:500],  # First 500 chars for debugging
            "processing_time_seconds": round(processing_time, 2),
            "pages_analyzed": min(max_pages, estimate_page_count(file_path, content_type))
        }
        
    except Exception as e:
        logger.error(f"Error analyzing {filename}: {str(e)}")
        return {
            "filename": filename,
            "relevant": False,
            "confidence": 0.0,
            "error": str(e),
            "reason": "Analysis error",
            "key_indicators": []
        }


async def extract_document_text(file_path: Path, content_type: str, max_pages: int) -> str:
    """Extract text from document with page limit"""
    
    file_extension = file_path.suffix.lower()
    
    try:
        if file_extension == '.pdf':
            return extract_pdf_text_limited(file_path, max_pages)
        elif file_extension in ['.xlsx', '.xls']:
            return extract_excel_text_limited(file_path, max_pages)
        elif file_extension in ['.docx', '.doc']:
            return extract_docx_text_limited(file_path, max_pages)
        elif file_extension in ['.txt', '.csv']:
            return extract_text_file_limited(file_path, max_pages)
        else:
            # Fallback: try to read as text
            return extract_text_file_limited(file_path, max_pages)
            
    except Exception as e:
        logger.warning(f"Text extraction failed for {file_path}: {str(e)}")
        return ""


def extract_pdf_text_limited(pdf_path: Path, max_pages: int) -> str:
    """Extract text from first N pages of PDF"""
    try:
        from pdfminer.high_level import extract_text
        from pdfminer.layout import LAParams
        
        # Extract only first N pages
        text = extract_text(
            str(pdf_path), 
            laparams=LAParams(),
            maxpages=max_pages
        )
        return text.strip()
    except Exception as e:
        logger.error(f"PDF extraction error: {str(e)}")
        return ""


def extract_excel_text_limited(excel_path: Path, max_sheets: int) -> str:
    """Extract text from first N sheets of Excel file"""
    try:
        import pandas as pd
        
        extracted_content = []
        xlsx = pd.ExcelFile(excel_path)
        
        # Limit number of sheets processed
        sheets_to_process = xlsx.sheet_names[:max_sheets]
        
        for sheet_name in sheets_to_process:
            try:
                df = pd.read_excel(xlsx, sheet_name=sheet_name, nrows=50)  # First 50 rows
                extracted_content.append(f"=== Sheet: {sheet_name} ===")
                extracted_content.append(df.to_string(index=False, max_rows=20))
                extracted_content.append("\n")
            except Exception as e:
                logger.warning(f"Could not read sheet {sheet_name}: {str(e)}")
                continue
        
        return "\n".join(extracted_content)
    except Exception as e:
        logger.error(f"Excel extraction error: {str(e)}")
        return ""


def extract_docx_text_limited(docx_path: Path, max_pages: int) -> str:
    """Extract text from first N pages of DOCX (estimated)"""
    try:
        import docx
        
        doc = docx.Document(str(docx_path))
        full_text = []
        
        # Estimate pages by paragraphs (rough approximation)
        paragraphs_processed = 0
        paragraphs_per_page = 10  # Rough estimate
        
        for paragraph in doc.paragraphs:
            if paragraphs_processed >= max_pages * paragraphs_per_page:
                break
            if paragraph.text.strip():
                full_text.append(paragraph.text)
                paragraphs_processed += 1
        
        return "\n".join(full_text)
    except Exception as e:
        logger.error(f"DOCX extraction error: {str(e)}")
        return ""


def extract_text_file_limited(file_path: Path, max_pages: int) -> str:
    """Extract limited text from text file"""
    try:
        lines_per_page = 50
        max_lines = max_pages * lines_per_page
        
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = []
            for i, line in enumerate(f):
                if i >= max_lines:
                    break
                lines.append(line)
        
        return "".join(lines)
    except Exception as e:
        logger.error(f"Text file extraction error: {str(e)}")
        return ""


def estimate_page_count(file_path: Path, content_type: str) -> int:
    """Estimate number of pages in document"""
    # Simple estimation - can be enhanced based on file type
    return 1


async def analyze_with_gemini(text: str, confidence_threshold: float) -> Dict:
    """Use Gemini to analyze document relevance"""
    
    prompt = f"""
    Analyze this document text and determine if it's relevant to REAL ESTATE and METRICS CALCULATION.
    
    CRITICAL: You must respond with ONLY a JSON object, no other text.
    
    DOCUMENT TEXT (first few pages):
    {text[:8000]}  # Limit text to avoid token limits
    
    ANALYSIS INSTRUCTIONS:
    1. Determine if this document is relevant to real estate business, investments, or metrics
    2. Identify key indicators that support your decision
    3. Provide a confidence score (0.0 to 1.0)
    4. Classify the document type if possible
    
    RELEVANCE CRITERIA:
    - Real estate related: property listings, financial models, market analysis, offering memorandums, rent rolls, operating statements
    - Metrics calculation: financial projections, ROI analysis, cap rates, NOI calculations, cash flow analysis
    - Real estate development: construction costs, pro formas, feasibility studies
    
    NON-RELEVANT EXAMPLES:
    - Resumes, personal documents, marketing brochures for non-real estate
    - Academic papers unrelated to real estate
    - General business documents without real estate focus
    
    REQUIRED JSON RESPONSE FORMAT:
    {{
        "relevant": true/false,
        "confidence": 0.85,
        "reason": "Brief explanation of relevance decision",
        "key_indicators": ["indicator1", "indicator2", ...],
        "document_type": "offering_memorandum|financial_statement|market_report|rent_roll|unknown"
    }}
    
    Confidence threshold for relevance: {confidence_threshold}
    """
    
    try:
        # Initialize Gemini
        genai.configure(api_key=GEMINI_API_KEY)
        model = genai.GenerativeModel('gemini-2.0-flash')
        
        response = await asyncio.get_event_loop().run_in_executor(
            None, 
            lambda: model.generate_content(prompt)
        )
        
        response_text = response.text.strip()
        
        # Clean JSON response
        if "```json" in response_text:
            response_text = response_text.split("```json")[1].split("```")[0].strip()
        elif "```" in response_text:
            response_text = response_text.split("```")[1].split("```")[0].strip()
        
        result = json.loads(response_text)
        
        # Validate response structure
        required_fields = ["relevant", "confidence", "reason", "key_indicators"]
        for field in required_fields:
            if field not in result:
                raise ValueError(f"Missing field in Gemini response: {field}")
        
        # Apply confidence threshold
        if result["confidence"] < confidence_threshold:
            result["relevant"] = False
            result["reason"] = f"Confidence ({result['confidence']}) below threshold ({confidence_threshold})"
        
        return result
        
    except Exception as e:
        logger.error(f"Gemini analysis failed: {str(e)}")
        # Fallback: simple keyword-based analysis
        return perform_fallback_analysis(text, confidence_threshold)


def perform_fallback_analysis(text: str, confidence_threshold: float) -> Dict:
    """Fallback analysis using keyword matching when Gemini fails"""
    
    real_estate_keywords = [
        'real estate', 'property', 'rent', 'lease', 'mortgage', 'cap rate', 
        'noi', 'net operating income', 'cash flow', 'pro forma', 'offering memorandum',
        'rent roll', 'operating expenses', 'vacancy rate', 'occupancy', 'square feet',
        'acquisition', 'disposition', 'broker', 'listing', 'appraisal', 'valuation',
        'construction', 'development', 'zoning', 'permit', 'tenant', 'landlord'
    ]
    
    metrics_keywords = [
        'metrics', 'kpi', 'key performance indicator', 'roi', 'return on investment',
        'irr', 'internal rate of return', 'dscr', 'debt service coverage ratio',
        'ltv', 'loan to value', 'calculation', 'analysis', 'projection', 'forecast',
        'financial model', 'spreadsheet', 'excel', 'numbers', 'data', 'statistics'
    ]
    
    text_lower = text.lower()
    
    # Count keyword matches
    re_matches = sum(1 for keyword in real_estate_keywords if keyword in text_lower)
    metrics_matches = sum(1 for keyword in metrics_keywords if keyword in text_lower)
    
    total_matches = re_matches + metrics_matches
    
    # Calculate confidence based on matches
    confidence = min(1.0, total_matches / 10)  # Normalize
    
    relevant = confidence >= confidence_threshold and (re_matches >= 2 or metrics_matches >= 2)
    
    key_indicators = []
    if re_matches > 0:
        key_indicators.append(f"Real estate terms found: {re_matches}")
    if metrics_matches > 0:
        key_indicators.append(f"Metrics terms found: {metrics_matches}")
    
    return {
        "relevant": relevant,
        "confidence": round(confidence, 2),
        "reason": f"Keyword analysis: {re_matches} real estate terms, {metrics_matches} metrics terms",
        "key_indicators": key_indicators,
        "document_type": "unknown"
    }


def generate_analysis_summary(analysis_results: List[Dict]) -> Dict:
    """Generate summary of document analysis"""
    
    relevant_files = [r for r in analysis_results if r.get('relevant', False)]
    non_relevant_files = [r for r in analysis_results if not r.get('relevant', False)]
    
    # Calculate average confidence
    confidences = [r.get('confidence', 0) for r in analysis_results if r.get('confidence') is not None]
    avg_confidence = sum(confidences) / len(confidences) if confidences else 0
    
    # Document type distribution
    doc_types = {}
    for result in analysis_results:
        doc_type = result.get('document_type', 'unknown')
        doc_types[doc_type] = doc_types.get(doc_type, 0) + 1
    
    return {
        "relevant_count": len(relevant_files),
        "non_relevant_count": len(non_relevant_files),
        "relevance_rate": len(relevant_files) / len(analysis_results) if analysis_results else 0,
        "average_confidence": round(avg_confidence, 3),
        "document_type_breakdown": doc_types,
        "processing_time_seconds": sum(r.get('processing_time_seconds', 0) for r in analysis_results)
    }


def secure_filename(filename: str) -> str:
    """Sanitize filename for security"""
    import re
    filename = re.sub(r'[^a-zA-Z0-9_.-]', '_', filename)
    return filename




def process_pdfs(pdf_files):
    """Process uploaded PDFs and return Excel file"""
    if not pdf_files:
        return None, "⚠️ Please upload at least one PDF/XLSX file"
    
    try:
        # Create temporary directory for PDFs
        temp_dir = tempfile.mkdtemp()
        
        # Save uploaded PDFs to temp directory
        for pdf_file in pdf_files:
            if pdf_file is None:
                continue
            dest_path = Path(temp_dir) / Path(pdf_file.name).name
            shutil.copy(pdf_file.name, dest_path)
        
        # Initialize pipeline with hardcoded API key
        pipeline = RealEstateModelPipeline(GEMINI_API_KEY)
        
        # Create output file in temp directory
        output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx"
        
        # Run pipeline
        result = pipeline.run_full_pipeline(temp_dir, str(output_file))
        
        # Generate summary text
        r = pipeline.formula_results
        irr_val = r.get('IRR_TO_LP', 0)
        if isinstance(irr_val, complex):
            irr_val = irr_val.real
        
        summary = f"""✅ **Processing Complete!**

📊 **Key Metrics:**
• Total Project Cost: ${r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f}
• Total Debt: ${r.get('TOTAL_DEBT', 0):,.0f}
• Total Equity: ${r.get('TOTAL_EQUITY', 0):,.0f}
• NOI: ${r.get('NET_OPERATING_INCOME', 0):,.0f}
• Yield on Cost: {r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}
• LP IRR: {float(irr_val):.2f}%

✨ Download your Excel file below ⬇️
"""
        
        return str(output_file), summary
        
    except Exception as e:
        import traceback
        error_msg = f"❌ **Error occurred:**\n\n{str(e)}\n\n**Details:**\n{traceback.format_exc()}"
        return None, error_msg

# Create Gradio interface with better styling
with gr.Blocks(
    title="Real Estate Financial Model Generator",
    theme=gr.themes.Soft(),
    css="""
        .gradio-container {
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
        }
    """
) as demo:
    
    gr.Markdown("""
    # 🏢 Real Estate Financial Model Generator
    
    Upload your PDF/XLSX documents and generate a comprehensive financial model in Excel format.
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            pdf_input = gr.File(
                label="Upload PDF/XLSX Files",
                file_count="multiple",
                file_types=[".pdf", ".xlsx", ".xls"],
                type="filepath",
                elem_classes=["file-upload"]
            )
            
            process_btn = gr.Button(
                "Generate Financial Model",
                variant="primary",
                size="lg"
            )
        
        with gr.Column(scale=1):
            gr.Markdown("""
            ### Supported Formats
            - **PDF**: Offering Memorandum, Reports
            - **XLSX/XLS**: Financial statements, data tables
            
            ### Required Documents
            - Offering Memorandum
            - Operating Expenses Summary
            - Sales Comps
            - Rent Comps
            - Market Report
            - Demographics Overview
            
            ### Features
            - Automated data extraction
            - Formula calculations
            - Professional Excel output
            - Multiple analysis sheets
            """)
    
    with gr.Row():
        output_text = gr.Textbox(
            label="Processing Results",
            lines=15,
            interactive=False,
            elem_classes=["output-text"]
        )
    
    with gr.Row():
        excel_output = gr.File(
            label="Download Excel File",
            interactive=False
        )
    
    # Connect the button
    process_btn.click(
        fn=process_pdfs,
        inputs=[pdf_input],
        outputs=[excel_output, output_text],
        api_name="process"
    )
    
    gr.Markdown("""
    ---
    ### Tips
    - Ensure PDF files are readable and not scanned images
    - Use descriptive filenames (e.g., "Offering_Memorandum.pdf")
    - Processing may take 30-60 seconds depending on file sizes
    - Check the **Processing Results** section for detailed feedback
    
    ### API Endpoints
    - `POST /api/generate-model` - Generate Excel only
    - `POST /api/generate-model-with-summary` - Generate Excel + JSON summary
    - `POST /api/analyze-only` - Generate JSON summary only
    - `GET /api/health` - Health check
    """)


# For Hugging Face Spaces, we need to expose the app
if __name__ == "__main__":
    # For Hugging Face Spaces deployment
    import os
    
    # Check if running on Hugging Face Spaces
    is_hf_space = os.environ.get('SPACE_ID') is not None
    
    if is_hf_space:
        # On HF Spaces: Mount Gradio to FastAPI and launch together
        app = gr.mount_gradio_app(app, demo, path="/")
        uvicorn.run(
            app,
            host="0.0.0.0",
            port=7860,
            log_level="info"
        )
    else:
        # Local development: Launch separately
        import threading
        
        # Start FastAPI in background thread
        def run_fastapi():
            uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")
        
        api_thread = threading.Thread(target=run_fastapi, daemon=True)
        api_thread.start()
        
        # Launch Gradio on main thread
        demo.launch(
            server_name="0.0.0.0",
            server_port=7861,
            share=False
        )