Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import pandas as pd | |
| from openpyxl import load_workbook | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.responses import FileResponse,JSONResponse | |
| from typing import List | |
| import uvicorn | |
| import os | |
| import base64 | |
| """ | |
| Real Estate Financial Model Pipeline | |
| Extracts data from PDFs, solves formulas with Gemini API, generates Excel | |
| """ | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import re | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, Any, List, Optional | |
| import openpyxl | |
| from openpyxl.styles import Font, PatternFill, Alignment, Border, Side | |
| from openpyxl.utils import get_column_letter | |
| from pdfminer.high_level import extract_text | |
| import google.generativeai as genai | |
| # Add logging configuration | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class RealEstateModelPipeline: | |
| def __init__(self, gemini_api_key: str): | |
| """Initialize pipeline with Gemini API key""" | |
| genai.configure(api_key=gemini_api_key) | |
| self.model = genai.GenerativeModel('gemini-2.0-flash') | |
| self.extracted_data = {} | |
| self.formula_results = {} | |
| self.structured_data = {} | |
| def safe_divide(self, numerator: float, denominator: float, default: float = 0) -> float: | |
| """Safe division that returns default instead of error""" | |
| try: | |
| if denominator == 0 or denominator is None: | |
| return default | |
| return numerator / denominator | |
| except: | |
| return default | |
| def extract_pdf_text(self, pdf_path: str) -> str: | |
| """Extract text from PDF using pdfminer""" | |
| try: | |
| text = extract_text(pdf_path) | |
| return text.strip() | |
| except Exception as e: | |
| print(f"Error extracting {pdf_path}: {e}") | |
| return "" | |
| def extract_xlsx_text(self, xlsx_path: str) -> str: | |
| """Extract text from XLSX using pandas and openpyxl""" | |
| try: | |
| extracted_content = [] | |
| # Try pandas first for data extraction | |
| try: | |
| xlsx = pd.ExcelFile(xlsx_path) | |
| for sheet_name in xlsx.sheet_names: | |
| df = pd.read_excel(xlsx, sheet_name=sheet_name) | |
| extracted_content.append(f"=== Sheet: {sheet_name} ===") | |
| extracted_content.append(df.to_string(index=False)) | |
| extracted_content.append("\n") | |
| except: | |
| pass | |
| # Also try openpyxl for cell-level data | |
| try: | |
| wb = load_workbook(xlsx_path, data_only=True) | |
| for sheet in wb.worksheets: | |
| extracted_content.append(f"\n=== Sheet: {sheet.title} (Raw) ===") | |
| for row in sheet.iter_rows(values_only=True): | |
| row_text = " | ".join([str(cell) if cell is not None else "" for cell in row]) | |
| if row_text.strip(): | |
| extracted_content.append(row_text) | |
| except: | |
| pass | |
| return "\n".join(extracted_content) | |
| except Exception as e: | |
| print(f"Error extracting {xlsx_path}: {e}") | |
| return "" | |
| def extract_all_pdfs(self, pdf_directory: str) -> Dict[str, str]: | |
| """Extract text from all PDFs and XLSX files in directory""" | |
| pdf_dir = Path(pdf_directory) | |
| extracted_texts = {} | |
| with open('output_file_3.txt', "w", encoding="utf-8") as f: | |
| # Process PDFs | |
| for pdf_file in pdf_dir.glob("*.pdf"): | |
| print(f"Extracting PDF: {pdf_file.name}") | |
| text = self.extract_pdf_text(str(pdf_file)) | |
| extracted_texts[pdf_file.stem] = text | |
| f.write(f"=== {pdf_file.name} ===\n") | |
| f.write(text) | |
| f.write("\n\n" + "="*80 + "\n\n") | |
| # Process XLSX files | |
| for xlsx_file in pdf_dir.glob("*.xlsx"): | |
| print(f"Extracting XLSX: {xlsx_file.name}") | |
| text = self.extract_xlsx_text(str(xlsx_file)) | |
| extracted_texts[xlsx_file.stem] = text | |
| f.write(f"=== {xlsx_file.name} ===\n") | |
| f.write(text) | |
| f.write("\n\n" + "="*80 + "\n\n") | |
| self.extracted_data = extracted_texts | |
| return extracted_texts | |
| def extract_address_fallback(self, pdf_texts: Dict[str, str]) -> Optional[str]: | |
| """Extract address using simple pattern matching as fallback""" | |
| for name, text in pdf_texts.items(): | |
| if 'Offering_Memorandum' in name or 'offering' in name.lower(): | |
| # Pattern: "Address: <address text>" | |
| match = re.search(r'Address:\s*(.+?)(?:\n|Property Type:)', text, re.IGNORECASE) | |
| if match: | |
| address = match.group(1).strip() | |
| print(f" ✓ Extracted address via fallback: {address}") | |
| return address | |
| return None | |
| def create_gemini_prompt(self, pdf_texts: Dict[str, str]) -> str: | |
| """Create comprehensive prompt for Gemini to extract structured data""" | |
| # Build a clear summary of what's in each PDF | |
| pdf_summary = "\n".join([f"- {name}: {len(text)} characters" for name, text in pdf_texts.items()]) | |
| prompt = f"""You are a real estate financial analyst. Extract ALL numerical data from the following PDF texts and return it as a JSON object. | |
| CRITICAL INSTRUCTIONS: | |
| 1. ONLY extract data that is EXPLICITLY stated in the PDFs - DO NOT estimate or make up values | |
| 2. For missing values, use null (not 0) | |
| 3. Pay close attention to the specific document names - each contains different information | |
| 4. Extract exact numbers as they appear in the documents | |
| AVAILABLE DOCUMENTS: | |
| {pdf_summary} | |
| PDF CONTENTS: | |
| """ | |
| for name, text in pdf_texts.items(): | |
| prompt += f"\n{'='*60}\n=== {name} ===\n{'='*60}\n{text}\n" | |
| prompt += """ | |
| EXTRACTION INSTRUCTIONS BY DOCUMENT: | |
| FROM "Offering_Memorandum.pdf": | |
| - Extract: Address (full address after "Address:") | |
| - Extract: Property Type (after "Property Type:") | |
| - Extract: Units (number after "Units:") | |
| FROM "Operating_Expenses_Summary.pdf" (if present): | |
| - Extract EXACT annual amounts for: | |
| * Real Estate Taxes | |
| * Insurance | |
| * Utilities | |
| * Repairs & Maint. (or Repairs & Maintenance) | |
| * Management Fee | |
| * Payroll | |
| * Administrative (if listed) | |
| * Professional Fees (if listed) | |
| FROM "Sales_Comps.pdf": | |
| - Extract all Price/SF values | |
| - Calculate average_price_per_sf = average of all Price/SF values | |
| - Count total number of comps | |
| FROM "Rent_Comps.pdf" (if present): | |
| - Extract all rent values (numbers before @ symbol) | |
| - Calculate average_rent = average of all rent values | |
| - Count total number of rent comps | |
| FROM "Market_Report.pdf": | |
| - Extract: Vacancy Rate (percentage) | |
| - Extract: Rent Growth (YoY) (percentage) | |
| FROM "Demographics_Overview.pdf": | |
| - Extract: Population (3-mi) - the number | |
| - Extract: Median HH Income - the dollar amount | |
| - Extract: Transit Score - the number | |
| REQUIRED JSON OUTPUT STRUCTURE: | |
| { | |
| "property_info": { | |
| "address": "EXTRACT FROM Offering_Memorandum.pdf", | |
| "property_type": "EXTRACT FROM Offering_Memorandum.pdf", | |
| "units": EXTRACT_NUMBER_FROM_Offering_Memorandum.pdf, | |
| "gross_sf": null, | |
| "rentable_sf": null, | |
| "retail_sf": null | |
| }, | |
| "acquisition": { | |
| "land_value": null, | |
| "price": null, | |
| "closing_costs": null | |
| }, | |
| "construction": { | |
| "construction_cost_per_gsf": null, | |
| "construction_months": null | |
| }, | |
| "soft_costs": { | |
| "architecture_and_interior_cost": null, | |
| "structural_engineering_cost": null, | |
| "mep_engineering_cost": null, | |
| "civil_engineering_cost": null, | |
| "controlled_inspections_cost": null, | |
| "surveying_cost": null, | |
| "utilities_connection_cost": null, | |
| "advertising_and_marketing_cost": null, | |
| "accounting_cost": null, | |
| "monitoring_cost": null, | |
| "ff_and_e_cost": null, | |
| "environmental_consultant_fee": null, | |
| "miscellaneous_consultants_fee": null, | |
| "general_legal_cost": null, | |
| "real_estate_taxes_during_construction": null, | |
| "miscellaneous_admin_cost": null, | |
| "ibr_cost": null, | |
| "project_team_cost": null, | |
| "pem_fees": null, | |
| "bank_fees": null | |
| }, | |
| "financing": { | |
| "ltc_ratio": null, | |
| "financing_percentage": null, | |
| "interest_rate_basis_points": null, | |
| "financing_cost": null, | |
| "interest_reserve": null | |
| }, | |
| "operating_expenses": { | |
| "payroll": EXTRACT_FROM_Operating_Expenses_Summary.pdf, | |
| "repairs_and_maintenance": EXTRACT_FROM_Operating_Expenses_Summary.pdf, | |
| "utilities": EXTRACT_FROM_Operating_Expenses_Summary.pdf, | |
| "administrative": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null, | |
| "professional_fees": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null, | |
| "insurance": EXTRACT_FROM_Operating_Expenses_Summary.pdf, | |
| "property_taxes": EXTRACT_FROM_Operating_Expenses_Summary.pdf, | |
| "management_fee_percentage": null | |
| }, | |
| "revenue": { | |
| "free_market_rent_psf": null, | |
| "affordable_rent_psf": null, | |
| "other_income_per_unit": null, | |
| "vacancy_rate": null, | |
| "retail_rent_psf": null, | |
| "parking_income": null | |
| }, | |
| "sales_comps": { | |
| "average_price_per_sf": CALCULATE_AVERAGE_FROM_Sales_Comps.pdf, | |
| "comp_count": COUNT_FROM_Sales_Comps.pdf | |
| }, | |
| "rent_comps": { | |
| "average_rent": CALCULATE_AVERAGE_FROM_Rent_Comps.pdf_IF_EXISTS, | |
| "comp_count": COUNT_FROM_Rent_Comps.pdf_IF_EXISTS | |
| }, | |
| "market_data": { | |
| "vacancy_rate": EXTRACT_FROM_Market_Report.pdf, | |
| "rent_growth_yoy": EXTRACT_FROM_Market_Report.pdf, | |
| "median_hh_income": EXTRACT_FROM_Demographics_Overview.pdf, | |
| "population_3mi": EXTRACT_FROM_Demographics_Overview.pdf, | |
| "transit_score": EXTRACT_FROM_Demographics_Overview.pdf | |
| }, | |
| "projections": { | |
| "lease_up_months": null, | |
| "stabilization_months": null, | |
| "revenue_inflation_rate": null, | |
| "expense_inflation_rate": null, | |
| "hold_period_months": null, | |
| "exit_cap_rate_decimal": null, | |
| "sale_cost_percentage": null | |
| }, | |
| "equity_structure": { | |
| "gp_pref_rate": null, | |
| "lp_pref_rate": null, | |
| "promote_percentage": null | |
| } | |
| } | |
| EXAMPLES OF CORRECT EXTRACTION: | |
| Example 1 - From your Offering_Memorandum.pdf: | |
| "Address: 455 Atlantic Ave, Brooklyn, NY" | |
| → "address": "455 Atlantic Ave, Brooklyn, NY" | |
| "Property Type: Retail" | |
| → "property_type": "Retail" | |
| "Units: 7" | |
| → "units": 7 | |
| Example 2 - From your Operating_Expenses_Summary.pdf: | |
| "Real Estate Taxes $91940.2" | |
| → "property_taxes": 91940.2 | |
| "Insurance $16778.94" | |
| → "insurance": 16778.94 | |
| "Payroll $44948.21" | |
| → "payroll": 44948.21 | |
| Example 3 - From your Sales_Comps.pdf: | |
| "Price/SF" column shows: $880, $919, $673, $894 | |
| → "average_price_per_sf": 841.5 (average of these 4 values) | |
| → "comp_count": 4 | |
| Example 4 - From your Market_Report.pdf: | |
| "Vacancy Rate: 5.71%" | |
| → "vacancy_rate": 0.0571 | |
| "Rent Growth (YoY): 4.18%" | |
| → "rent_growth_yoy": 0.0418 | |
| CRITICAL RULES: | |
| 1. Use EXACT numbers from the PDFs - don't round or modify | |
| 2. Convert percentages to decimals (5.71% → 0.0571) | |
| 3. Remove dollar signs and commas from numbers ($91,940.2 → 91940.2) | |
| 4. If a field is not in ANY PDF, use null | |
| 5. Double-check the document name before extracting - make sure you're looking at the right PDF | |
| Return ONLY valid JSON with no explanations, comments, or markdown formatting.""" | |
| prompt += """ | |
| NOTE: Documents may be in PDF or XLSX format. For XLSX files, data is extracted sheet-by-sheet. | |
| Look for numerical data in tables, columns, and labeled cells. | |
| PDF AND XLSX CONTENTS: | |
| """ | |
| return prompt | |
| def extract_structured_data(self) -> Dict[str, Any]: | |
| """Use Gemini to extract structured data from PDFs""" | |
| print("\nProcessing with Gemini API...") | |
| # NEW: Try simple extraction first | |
| fallback_address = self.extract_address_fallback(self.extracted_data) | |
| prompt = self.create_gemini_prompt(self.extracted_data) | |
| try: | |
| response = self.model.generate_content(prompt) | |
| response_text = response.text.strip() | |
| # Clean JSON if wrapped in markdown | |
| if "```json" in response_text: | |
| response_text = response_text.split("```json")[1].split("```")[0].strip() | |
| elif "```" in response_text: | |
| response_text = response_text.split("```")[1].split("```")[0].strip() | |
| data = json.loads(response_text) | |
| # NEW: Override with fallback if Gemini failed | |
| if fallback_address and (not data.get('property_info', {}).get('address') or | |
| data['property_info']['address'] == 'adress'): | |
| data['property_info']['address'] = fallback_address | |
| print(f" ✓ Used fallback address: {fallback_address}") | |
| print("✓ Successfully extracted structured data") | |
| return data | |
| except Exception as e: | |
| print(f"Error with Gemini API: {e}") | |
| data = self.get_default_data_structure() | |
| # Use fallback even in error case | |
| if fallback_address: | |
| data['property_info']['address'] = fallback_address | |
| return data | |
| def post_process_extracted_data(self, data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Fill in missing values with intelligent estimates""" | |
| # Get units | |
| units = data.get('property_info', {}).get('units', 32) | |
| # Estimate SF if missing | |
| if not data['property_info'].get('gross_sf'): | |
| data['property_info']['gross_sf'] = units * 1000 | |
| if not data['property_info'].get('rentable_sf'): | |
| data['property_info']['rentable_sf'] = int(data['property_info']['gross_sf'] * 0.85) | |
| # Set retail_sf to 0 if None (most residential projects don't have retail) | |
| if data['property_info'].get('retail_sf') is None: | |
| data['property_info']['retail_sf'] = 0 | |
| # Get gross_sf for calculations | |
| gross_sf = data['property_info']['gross_sf'] | |
| # Set default construction cost if missing | |
| if not data['construction'].get('construction_cost_per_gsf'): | |
| data['construction']['construction_cost_per_gsf'] = 338 | |
| if not data['construction'].get('construction_months'): | |
| data['construction']['construction_months'] = 18 | |
| # Estimate land value from sales comps if available | |
| if not data['acquisition'].get('land_value'): | |
| sales_comps = data.get('sales_comps', {}) | |
| avg_psf = sales_comps.get('average_price_per_sf') | |
| if avg_psf: | |
| data['acquisition']['land_value'] = avg_psf * gross_sf | |
| else: | |
| # Use default based on typical Manhattan pricing | |
| data['acquisition']['land_value'] = 6000000 | |
| if not data['acquisition'].get('price'): | |
| data['acquisition']['price'] = data['acquisition']['land_value'] | |
| if not data['acquisition'].get('closing_costs'): | |
| data['acquisition']['closing_costs'] = 150000 | |
| # Estimate soft costs as percentages if null | |
| total_hard_cost = data['construction']['construction_cost_per_gsf'] * gross_sf | |
| soft_cost_estimate = total_hard_cost * 0.15 # 15% of hard costs | |
| soft_costs = data.get('soft_costs', {}) | |
| default_soft_cost_values = { | |
| 'architecture_and_interior_cost': soft_cost_estimate * 0.15, | |
| 'structural_engineering_cost': soft_cost_estimate * 0.08, | |
| 'mep_engineering_cost': soft_cost_estimate * 0.10, | |
| 'civil_engineering_cost': soft_cost_estimate * 0.05, | |
| 'controlled_inspections_cost': soft_cost_estimate * 0.03, | |
| 'surveying_cost': soft_cost_estimate * 0.02, | |
| 'utilities_connection_cost': soft_cost_estimate * 0.05, | |
| 'advertising_and_marketing_cost': soft_cost_estimate * 0.06, | |
| 'accounting_cost': soft_cost_estimate * 0.03, | |
| 'monitoring_cost': soft_cost_estimate * 0.02, | |
| 'ff_and_e_cost': soft_cost_estimate * 0.10, | |
| 'environmental_consultant_fee': soft_cost_estimate * 0.02, | |
| 'miscellaneous_consultants_fee': soft_cost_estimate * 0.03, | |
| 'general_legal_cost': soft_cost_estimate * 0.06, | |
| 'real_estate_taxes_during_construction': soft_cost_estimate * 0.10, | |
| 'miscellaneous_admin_cost': soft_cost_estimate * 0.04, | |
| 'ibr_cost': soft_cost_estimate * 0.03, | |
| 'project_team_cost': soft_cost_estimate * 0.15, | |
| 'pem_fees': soft_cost_estimate * 0.08, | |
| 'bank_fees': soft_cost_estimate * 0.05 | |
| } | |
| for key, default_value in default_soft_cost_values.items(): | |
| if soft_costs.get(key) is None: | |
| soft_costs[key] = default_value | |
| # Set financing defaults if missing | |
| financing = data.get('financing', {}) | |
| if not financing.get('ltc_ratio'): | |
| financing['ltc_ratio'] = 0.75 | |
| if not financing.get('financing_percentage'): | |
| financing['financing_percentage'] = 0.03 | |
| if not financing.get('interest_rate_basis_points'): | |
| financing['interest_rate_basis_points'] = 350 | |
| if not financing.get('financing_cost'): | |
| financing['financing_cost'] = 200000 | |
| if not financing.get('interest_reserve'): | |
| financing['interest_reserve'] = 500000 | |
| # Set revenue defaults if missing | |
| revenue = data.get('revenue', {}) | |
| if not revenue.get('free_market_rent_psf'): | |
| revenue['free_market_rent_psf'] = 60 | |
| if not revenue.get('affordable_rent_psf'): | |
| revenue['affordable_rent_psf'] = 35 | |
| if not revenue.get('other_income_per_unit'): | |
| revenue['other_income_per_unit'] = 100 | |
| if not revenue.get('vacancy_rate'): | |
| revenue['vacancy_rate'] = 0.05 | |
| if not revenue.get('retail_rent_psf'): | |
| revenue['retail_rent_psf'] = 45 | |
| if not revenue.get('parking_income'): | |
| revenue['parking_income'] = 50000 | |
| # Ensure operating expenses have defaults | |
| op_expenses = data.get('operating_expenses', {}) | |
| if not op_expenses.get('payroll'): | |
| op_expenses['payroll'] = 31136.07 | |
| if not op_expenses.get('repairs_and_maintenance'): | |
| op_expenses['repairs_and_maintenance'] = 44418.61 | |
| if not op_expenses.get('utilities'): | |
| op_expenses['utilities'] = 12535.90 | |
| if not op_expenses.get('administrative'): | |
| op_expenses['administrative'] = 0 | |
| if not op_expenses.get('professional_fees'): | |
| op_expenses['professional_fees'] = 18789.84 | |
| if not op_expenses.get('insurance'): | |
| op_expenses['insurance'] = 9341.33 | |
| if not op_expenses.get('property_taxes'): | |
| op_expenses['property_taxes'] = 118832.22 | |
| if not op_expenses.get('management_fee_percentage'): | |
| op_expenses['management_fee_percentage'] = 0.03 | |
| # Ensure projections have defaults | |
| projections = data.get('projections', {}) | |
| if not projections.get('lease_up_months'): | |
| projections['lease_up_months'] = 12 | |
| if not projections.get('stabilization_months'): | |
| projections['stabilization_months'] = 6 | |
| if not projections.get('revenue_inflation_rate'): | |
| projections['revenue_inflation_rate'] = 0.03 | |
| if not projections.get('expense_inflation_rate'): | |
| projections['expense_inflation_rate'] = 0.025 | |
| if not projections.get('hold_period_months'): | |
| projections['hold_period_months'] = 60 | |
| if not projections.get('exit_cap_rate_decimal'): | |
| projections['exit_cap_rate_decimal'] = 0.045 | |
| if not projections.get('sale_cost_percentage'): | |
| projections['sale_cost_percentage'] = 0.02 | |
| # Ensure equity structure has defaults | |
| equity = data.get('equity_structure', {}) | |
| if not equity.get('gp_pref_rate'): | |
| equity['gp_pref_rate'] = 0.08 | |
| if not equity.get('lp_pref_rate'): | |
| equity['lp_pref_rate'] = 0.08 | |
| if not equity.get('promote_percentage'): | |
| equity['promote_percentage'] = 0.20 | |
| return data | |
| def get_default_data_structure(self) -> Dict[str, Any]: | |
| """Return default data structure with known values from PDFs""" | |
| # Try to get basic info from extracted text | |
| units = 32 # Default from your PDFs | |
| # Smart estimation | |
| gross_sf = units * 1000 # Typical 1000 SF per unit | |
| rentable_sf = int(gross_sf * 0.85) # 85% efficiency | |
| return { | |
| "property_info": { | |
| "address": "adress", | |
| "units": units, | |
| "gross_sf": gross_sf, | |
| "rentable_sf": rentable_sf, | |
| "retail_sf": 0 # No retail in this project | |
| }, | |
| "acquisition": { | |
| "land_value": None, # Will be estimated from comps | |
| "price": None, | |
| "closing_costs": 150000 | |
| }, | |
| "construction": { | |
| "construction_cost_per_gsf": 338, | |
| "construction_months": 18 | |
| }, | |
| "soft_costs": { | |
| "architecture_and_interior_cost": None, | |
| "structural_engineering_cost": None, | |
| "mep_engineering_cost": None, | |
| "civil_engineering_cost": None, | |
| "controlled_inspections_cost": None, | |
| "surveying_cost": None, | |
| "utilities_connection_cost": None, | |
| "advertising_and_marketing_cost": None, | |
| "accounting_cost": None, | |
| "monitoring_cost": None, | |
| "ff_and_e_cost": None, | |
| "environmental_consultant_fee": None, | |
| "miscellaneous_consultants_fee": None, | |
| "general_legal_cost": None, | |
| "real_estate_taxes_during_construction": None, | |
| "miscellaneous_admin_cost": None, | |
| "ibr_cost": None, | |
| "project_team_cost": None, | |
| "pem_fees": None, | |
| "bank_fees": None | |
| }, | |
| "financing": { | |
| "ltc_ratio": 0.75, | |
| "financing_percentage": 0.03, | |
| "interest_rate_basis_points": 350, | |
| "financing_cost": None, | |
| "interest_reserve": None | |
| }, | |
| "operating_expenses": { | |
| "payroll": 31136.07, # From PDF | |
| "repairs_and_maintenance": 44418.61, | |
| "utilities": 12535.90, | |
| "administrative": 0, | |
| "professional_fees": 18789.84, | |
| "insurance": 9341.33, | |
| "property_taxes": 118832.22, | |
| "management_fee_percentage": 0.03 | |
| }, | |
| "revenue": { | |
| "free_market_rent_psf": 60, | |
| "affordable_rent_psf": 35, | |
| "other_income_per_unit": 100, | |
| "vacancy_rate": 0.05, | |
| "retail_rent_psf": 45, | |
| "parking_income": 50000 | |
| }, | |
| "projections": { | |
| "lease_up_months": 12, | |
| "stabilization_months": 6, | |
| "revenue_inflation_rate": 0.03, | |
| "expense_inflation_rate": 0.025, | |
| "hold_period_months": 60, | |
| "exit_cap_rate_decimal": 0.045, | |
| "sale_cost_percentage": 0.02 | |
| }, | |
| "equity_structure": { | |
| "gp_pref_rate": 0.08, | |
| "lp_pref_rate": 0.08, | |
| "promote_percentage": 0.20 | |
| } | |
| } | |
| def calculate_all_formulas(self, data: Dict[str, Any]) -> Dict[str, float]: | |
| """Calculate all formulas in correct dependency order""" | |
| results = {} | |
| self.structured_data = data | |
| # Flatten data for easier access | |
| d = self.flatten_dict(data) | |
| # Helper function to get value | |
| def get(key, default=0): | |
| value = d.get(key, default) | |
| # CRITICAL: Convert None to default | |
| return default if value is None else value | |
| # def safe_multiply(a, b, default=0): | |
| # """Safely multiply two values, handling None""" | |
| # if a is None or b is None: | |
| # return default | |
| # try: | |
| # return a * b | |
| # except: | |
| # return default | |
| # BASIC PROPERTY METRICS | |
| results['UNITS'] = get('property_info.units') | |
| results['GROSS_SF'] = get('property_info.gross_sf') | |
| results['RENTABLE_SF'] = get('property_info.rentable_sf') | |
| results['RETAIL_SF'] = get('property_info.retail_sf') | |
| results['BUILDING_EFFICIENCY'] = self.safe_divide(results['RENTABLE_SF'], results['GROSS_SF']) | |
| # ACQUISITION COSTS | |
| results['LAND_VALUE'] = get('acquisition.land_value') | |
| results['PRICE'] = get('acquisition.price') | |
| results['CLOSING_COSTS'] = get('acquisition.closing_costs') | |
| results['ACQUISITION_FEE'] = results['LAND_VALUE'] * 0.02 | |
| results['TOTAL_ACQUISITION_COST'] = results['LAND_VALUE'] + results['CLOSING_COSTS'] + results['ACQUISITION_FEE'] | |
| # Per unit/SF metrics for acquisition | |
| results['LAND_VALUE_PER_GSF'] = self.safe_divide(results['LAND_VALUE'], results['GROSS_SF']) | |
| results['LAND_VALUE_PER_RSF'] = self.safe_divide(results['LAND_VALUE'], results['RENTABLE_SF']) | |
| results['LAND_VALUE_PER_UNIT'] = self.safe_divide(results['LAND_VALUE'], results['UNITS']) | |
| results['TOTAL_ACQUISITION_COST_PER_GSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['GROSS_SF']) | |
| results['TOTAL_ACQUISITION_COST_PER_RSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['RENTABLE_SF']) | |
| results['TOTAL_ACQUISITION_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['UNITS']) | |
| # CONSTRUCTION COSTS | |
| results['CONSTRUCTION_COST_PER_GSF'] = get('construction.construction_cost_per_gsf') | |
| results['CONSTRUCTION_MONTHS'] = get('construction.construction_months') | |
| results['TOTAL_CONSTRUCTION_GMP'] = results['CONSTRUCTION_COST_PER_GSF'] * results['GROSS_SF'] | |
| results['CONSTRUCTION_GMP_PER_GSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['GROSS_SF']) | |
| results['CONSTRUCTION_GMP_PER_RSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['RENTABLE_SF']) | |
| results['CONSTRUCTION_GMP_PER_UNIT'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['UNITS']) | |
| # SOFT COSTS (individual items) | |
| soft_cost_items = [ | |
| 'architecture_and_interior_cost', 'structural_engineering_cost', 'mep_engineering_cost', | |
| 'civil_engineering_cost', 'controlled_inspections_cost', 'surveying_cost', | |
| 'utilities_connection_cost', 'advertising_and_marketing_cost', 'accounting_cost', | |
| 'monitoring_cost', 'ff_and_e_cost', 'environmental_consultant_fee', | |
| 'miscellaneous_consultants_fee', 'general_legal_cost', 'real_estate_taxes_during_construction', | |
| 'miscellaneous_admin_cost', 'ibr_cost', 'project_team_cost', 'pem_fees', 'bank_fees' | |
| ] | |
| for item in soft_cost_items: | |
| key = item.upper() | |
| results[key] = get(f'soft_costs.{item}') | |
| # REVENUE SETUP (needed for some soft costs) | |
| results['FREE_MARKET_RENT_PSF'] = get('revenue.free_market_rent_psf') | |
| results['AFFORDABLE_RENT_PSF'] = get('revenue.affordable_rent_psf') | |
| results['OTHER_INCOME_PER_UNIT'] = get('revenue.other_income_per_unit') | |
| results['VACANCY_RATE'] = get('revenue.vacancy_rate') | |
| results['RETAIL_RENT_PSF'] = get('revenue.retail_rent_psf') | |
| results['PARKING_INCOME'] = get('revenue.parking_income') | |
| # Calculate retail revenue (needed for soft costs) | |
| results['RETAIL_REVENUE'] = results['RETAIL_RENT_PSF'] * results['RETAIL_SF'] | |
| # HPD & IH COST | |
| results['HPD_AND_IH_COST'] = (3500 * results['UNITS'] * 0.75) + (5000 * results['UNITS'] * 0.25) | |
| # RETAIL TI & LC COST | |
| results['RETAIL_TI_AND_LC_COST'] = (results['RETAIL_REVENUE'] * 0.3) + (50 * results['RETAIL_SF']) | |
| # TOTAL SOFT COSTS | |
| soft_cost_sum = sum([results[item.upper()] for item in soft_cost_items]) | |
| results['TOTAL_SOFT_COST'] = soft_cost_sum + results['HPD_AND_IH_COST'] + results['RETAIL_TI_AND_LC_COST'] | |
| results['TOTAL_SOFT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_SOFT_COST'],results['GROSS_SF']) | |
| # OPERATING EXPENSES (for reserves calculation) | |
| results['PAYROLL'] = get('operating_expenses.payroll') | |
| results['REPAIRS_AND_MAINTENANCE'] = get('operating_expenses.repairs_and_maintenance') | |
| results['UTILITIES'] = get('operating_expenses.utilities') | |
| results['ADMINISTRATIVE'] = get('operating_expenses.administrative') | |
| results['PROFESSIONAL_FEES'] = get('operating_expenses.professional_fees') | |
| results['INSURANCE'] = get('operating_expenses.insurance') | |
| results['PROPERTY_TAXES'] = get('operating_expenses.property_taxes') | |
| results['MANAGEMENT_FEE_PERCENTAGE'] = get('operating_expenses.management_fee_percentage') | |
| results['TOTAL_OPERATING_EXPENSES'] = (results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] + | |
| results['UTILITIES'] + results['ADMINISTRATIVE'] + | |
| results['PROFESSIONAL_FEES'] + results['INSURANCE'] + | |
| results['PROPERTY_TAXES']) | |
| # CONTINGENCY & RESERVES | |
| results['CONTINGENCY_COST'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.05 | |
| results['DEVELOPMENT_FEE'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.04 | |
| results['OPERATING_RESERVE'] = results['TOTAL_OPERATING_EXPENSES'] * 0.2 | |
| results['FINANCING_COST'] = get('financing.financing_cost') | |
| results['INTEREST_RESERVE'] = get('financing.interest_reserve') | |
| # TOTAL PROJECT COST (before financing) | |
| results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] = ( | |
| results['TOTAL_SOFT_COST'] + | |
| results['TOTAL_CONSTRUCTION_GMP'] + | |
| results['TOTAL_ACQUISITION_COST'] + | |
| results['CONTINGENCY_COST'] + | |
| results['DEVELOPMENT_FEE'] + | |
| results['FINANCING_COST'] + | |
| results['INTEREST_RESERVE'] + | |
| results['OPERATING_RESERVE'] | |
| ) | |
| results['TOTAL_PROJECT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['GROSS_SF']) | |
| results['TOTAL_PROJECT_COST_PER_RSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['RENTABLE_SF']) | |
| results['TOTAL_PROJECT_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['UNITS']) | |
| # FINANCING CALCULATIONS | |
| results['LTC_RATIO'] = get('financing.ltc_ratio') | |
| results['FINANCING_PERCENTAGE'] = get('financing.financing_percentage') | |
| results['INTEREST_RATE_BASIS_POINTS'] = get('financing.interest_rate_basis_points') | |
| results['PRE_LTC_BUDGET'] = (results['TOTAL_SOFT_COST'] + results['CONTINGENCY_COST'] + | |
| results['DEVELOPMENT_FEE'] + results['OPERATING_RESERVE'] + | |
| results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_ACQUISITION_COST']) | |
| results['LOAN_AMOUNT'] = results['LTC_RATIO'] * results['PRE_LTC_BUDGET'] | |
| results['FINANCING_AMOUNT'] = results['FINANCING_PERCENTAGE'] * results['LOAN_AMOUNT'] | |
| results['INTEREST_RATE_DECIMAL'] = (results['INTEREST_RATE_BASIS_POINTS'] + 430) / 10000 | |
| results['CONSTRUCTION_INTEREST'] = results['LOAN_AMOUNT'] * 0.7 * (results['INTEREST_RATE_DECIMAL'] / 12) * results['CONSTRUCTION_MONTHS'] | |
| # DEBT & EQUITY | |
| results['TOTAL_DEBT'] = results['CONSTRUCTION_INTEREST'] + results['LOAN_AMOUNT'] + results['FINANCING_AMOUNT'] | |
| results['TOTAL_EQUITY'] = results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] - results['TOTAL_DEBT'] | |
| results['DEBT_PERCENTAGE'] = results['TOTAL_DEBT'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0 | |
| results['EQUITY_PERCENTAGE'] = results['TOTAL_EQUITY'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0 | |
| results['TOTAL_CAPITAL_STACK'] = results['TOTAL_DEBT'] + results['TOTAL_EQUITY'] | |
| results['DEBT_PER_GSF'] = self.safe_divide(results['TOTAL_DEBT'],results['GROSS_SF']) | |
| results['EQUITY_PER_GSF'] = self.safe_divide(results['TOTAL_EQUITY'],results['GROSS_SF']) | |
| results['DEBT_PER_UNIT'] = self.safe_divide(results['TOTAL_DEBT'], results['UNITS']) | |
| results['EQUITY_PER_UNIT'] = self.safe_divide(results['TOTAL_EQUITY'], results['UNITS']) | |
| # OPERATING EXPENSE METRICS | |
| results['PAYROLL_PER_UNIT'] = self.safe_divide(results['PAYROLL'], results['UNITS']) | |
| results['REPAIRS_AND_MAINTENANCE_PER_UNIT'] = self.safe_divide(results['REPAIRS_AND_MAINTENANCE'], results['UNITS']) | |
| results['UTILITIES_PER_UNIT'] = self.safe_divide(results['UTILITIES'], results['UNITS']) | |
| results['ADMIN_AND_PROFESSIONAL_PER_UNIT'] = self.safe_divide((results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']), results['UNITS']) | |
| results['INSURANCE_PER_UNIT'] = self.safe_divide(results['INSURANCE'], results['UNITS']) | |
| results['OPERATING_EXPENSES_PER_UNIT'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'], results['UNITS']) | |
| results['OPERATING_EXPENSES_PER_GSF'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'],results['GROSS_SF']) | |
| # REVENUE CALCULATIONS | |
| results['LEASE_UP_MONTHS'] = get('projections.lease_up_months') | |
| results['STABILIZATION_MONTHS'] = get('projections.stabilization_months') | |
| results['REVENUE_INFLATION_RATE'] = get('projections.revenue_inflation_rate') | |
| results['EXPENSE_INFLATION_RATE'] = get('projections.expense_inflation_rate') | |
| results['TRENDING_TERM'] = results['LEASE_UP_MONTHS'] + results['STABILIZATION_MONTHS'] | |
| results['TERM_REVENUE_INFLATION'] = (1 + results['REVENUE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12) | |
| results['TERM_EXPENSE_INFLATION'] = (1 + results['EXPENSE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12) | |
| results['GROSS_POTENTIAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 0.75 * results['RENTABLE_SF'] | |
| results['GROSS_POTENTIAL_AFFORDABLE_RENT'] = results['AFFORDABLE_RENT_PSF'] * 0.25 * results['RENTABLE_SF'] | |
| results['OTHER_INCOME'] = results['OTHER_INCOME_PER_UNIT'] * results['UNITS'] * 12 * 0.75 | |
| results['VACANCY_LOSS'] = results['VACANCY_RATE'] * (results['OTHER_INCOME'] + results['GROSS_POTENTIAL_FREE_MARKET_RENT'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT']) | |
| results['EFFECTIVE_GROSS_INCOME'] = results['GROSS_POTENTIAL_FREE_MARKET_RENT'] - results['VACANCY_LOSS'] + results['OTHER_INCOME'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT'] | |
| results['MANAGEMENT_FEE'] = results['MANAGEMENT_FEE_PERCENTAGE'] * results['EFFECTIVE_GROSS_INCOME'] | |
| results['REAL_ESTATE_TAXES'] = results['GROSS_SF'] * 30 * 0.1 | |
| results['TOTAL_EXPENSES'] = results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] + results['UTILITIES'] + results['REAL_ESTATE_TAXES'] + results['MANAGEMENT_FEE'] | |
| # NOI & RETURNS | |
| results['NET_OPERATING_INCOME'] = results['EFFECTIVE_GROSS_INCOME'] - results['TOTAL_EXPENSES'] + results['PARKING_INCOME'] + results['RETAIL_REVENUE'] | |
| results['NOI_PER_UNIT'] = self.safe_divide(results['NET_OPERATING_INCOME'], results['UNITS']) | |
| results['NOI_PER_GSF'] = self.safe_divide(results['NET_OPERATING_INCOME'],results['GROSS_SF']) | |
| results['CAP_RATE'] = (results['NET_OPERATING_INCOME'] / results['PRICE']) * 100 if results['PRICE'] > 0 else 0 | |
| results['STABILIZED_YIELD_ON_COST'] = (((results['EFFECTIVE_GROSS_INCOME'] + results['RETAIL_REVENUE'] - results['GROSS_POTENTIAL_AFFORDABLE_RENT']) * results['TERM_REVENUE_INFLATION']) - (results['TOTAL_EXPENSES'] * results['TERM_EXPENSE_INFLATION'])) + results['GROSS_POTENTIAL_AFFORDABLE_RENT'] | |
| results['YIELD_ON_COST_PERCENTAGE'] = results['NET_OPERATING_INCOME'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0 | |
| results['STABILIZED_YIELD_ON_COST_PERCENTAGE'] = results['STABILIZED_YIELD_ON_COST'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0 | |
| results['ANNUAL_DEBT_SERVICE'] = results['LOAN_AMOUNT'] * results['INTEREST_RATE_DECIMAL'] | |
| results['CASH_ON_CASH_RETURN'] = ((results['NET_OPERATING_INCOME'] - results['ANNUAL_DEBT_SERVICE']) / results['TOTAL_EQUITY']) * 100 if results['TOTAL_EQUITY'] > 0 else 0 | |
| results['DEBT_SERVICE_COVERAGE_RATIO'] = results['NET_OPERATING_INCOME'] / results['ANNUAL_DEBT_SERVICE'] if results['ANNUAL_DEBT_SERVICE'] > 0 else 0 | |
| # EXIT & EQUITY WATERFALL | |
| results['EXIT_CAP_RATE_DECIMAL'] = get('projections.exit_cap_rate_decimal') | |
| results['SALE_COST_PERCENTAGE'] = get('projections.sale_cost_percentage') | |
| results['HOLD_PERIOD_MONTHS'] = get('projections.hold_period_months') | |
| results['PROPERTY_VALUE_ON_SALE'] = (results['STABILIZED_YIELD_ON_COST'] / results['EXIT_CAP_RATE_DECIMAL']) + (results['STABILIZED_YIELD_ON_COST'] * 0.25) if results['EXIT_CAP_RATE_DECIMAL'] > 0 else 0 | |
| results['SALE_COST'] = results['SALE_COST_PERCENTAGE'] * results['PROPERTY_VALUE_ON_SALE'] | |
| results['NET_SALE_PROCEEDS'] = results['PROPERTY_VALUE_ON_SALE'] - results['SALE_COST'] | |
| results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] = results['NET_SALE_PROCEEDS'] - results['TOTAL_DEBT'] | |
| results['GP_PREF_RATE'] = get('equity_structure.gp_pref_rate') | |
| results['LP_PREF_RATE'] = get('equity_structure.lp_pref_rate') | |
| results['PROMOTE_PERCENTAGE'] = get('equity_structure.promote_percentage') | |
| results['GP_INVESTMENT'] = results['TOTAL_EQUITY'] * 0.2 | |
| results['LP_INVESTMENT'] = results['TOTAL_EQUITY'] - results['GP_INVESTMENT'] | |
| results['GP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['GP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['GP_INVESTMENT'] | |
| results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['LP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['LP_INVESTMENT'] | |
| results['CASH_REMAINING_AFTER_PREFERRED'] = results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] - results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] - results['GP_PREFERRED_RETURN_WITH_PRINCIPAL'] | |
| results['PROMOTE_ON_JOINT_VENTURE'] = results['PROMOTE_PERCENTAGE'] * results['CASH_REMAINING_AFTER_PREFERRED'] | |
| results['CASH_TO_LP'] = (results['CASH_REMAINING_AFTER_PREFERRED'] - results['PROMOTE_ON_JOINT_VENTURE']) * (results['LP_INVESTMENT'] / (results['LP_INVESTMENT'] + results['GP_INVESTMENT'])) if (results['LP_INVESTMENT'] + results['GP_INVESTMENT']) > 0 else 0 | |
| results['NET_TO_LP_INVESTOR'] = results['CASH_TO_LP'] + results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] | |
| results['LP_MULTIPLE'] = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT'] if results['LP_INVESTMENT'] > 0 else 0 | |
| # results['IRR_TO_LP'] = ((results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT']) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100 if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0 else 0 | |
| # IRR calculation with complex number handling | |
| if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0: | |
| irr_base = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT'] | |
| if irr_base > 0: | |
| results['IRR_TO_LP'] = ((irr_base) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100 | |
| else: | |
| results['IRR_TO_LP'] = -100 # Total loss | |
| else: | |
| results['IRR_TO_LP'] = 0 | |
| # BLENDED RENT CALCULATIONS | |
| results['BLENDED_RENT_PER_RSF'] = (results['FREE_MARKET_RENT_PSF'] * 0.75) + (results['AFFORDABLE_RENT_PSF'] * 0.25) | |
| results['TOTAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 425 / 12 | |
| results['TOTAL_BLENDED_RENT'] = results['BLENDED_RENT_PER_RSF'] * 750 / 12 | |
| results['FREE_MARKET_RENT_PER_SF'] = results['TOTAL_FREE_MARKET_RENT'] * 110 / 12 | |
| results['AFFORDABLE_RENT_PER_SF'] = results['AFFORDABLE_RENT_PSF'] * 110 / 12 | |
| results['BLENDED_RENT_PER_SF'] = results['TOTAL_BLENDED_RENT'] * 110 / 12 | |
| results['AVERAGE_RENT_PER_UNIT'] = self.safe_divide((results['GROSS_POTENTIAL_FREE_MARKET_RENT']+results['GROSS_POTENTIAL_AFFORDABLE_RENT']), results['UNITS']) | |
| results['RENT_PER_UNIT_PER_MONTH'] = results['AVERAGE_RENT_PER_UNIT'] / 12 | |
| # EGI PERCENTAGES | |
| if results['EFFECTIVE_GROSS_INCOME'] > 0: | |
| results['PAYROLL_PERCENTAGE_OF_EGI'] = results['PAYROLL'] / results['EFFECTIVE_GROSS_INCOME'] | |
| results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = results['REPAIRS_AND_MAINTENANCE'] / results['EFFECTIVE_GROSS_INCOME'] | |
| results['UTILITIES_PERCENTAGE_OF_EGI'] = results['UTILITIES'] / results['EFFECTIVE_GROSS_INCOME'] | |
| results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = (results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']) / results['EFFECTIVE_GROSS_INCOME'] | |
| results['INSURANCE_PERCENTAGE_OF_EGI'] = results['INSURANCE'] / results['EFFECTIVE_GROSS_INCOME'] | |
| results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = results['PROFESSIONAL_FEES'] / results['EFFECTIVE_GROSS_INCOME'] | |
| results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = results['TOTAL_OPERATING_EXPENSES'] / results['EFFECTIVE_GROSS_INCOME'] | |
| else: | |
| results['PAYROLL_PERCENTAGE_OF_EGI'] = 0 | |
| results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = 0 | |
| results['UTILITIES_PERCENTAGE_OF_EGI'] = 0 | |
| results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = 0 | |
| results['INSURANCE_PERCENTAGE_OF_EGI'] = 0 | |
| results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = 0 | |
| results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = 0 | |
| self.formula_results = results | |
| return results | |
| def flatten_dict(self, d: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]: | |
| """Flatten nested dictionary""" | |
| items = [] | |
| for k, v in d.items(): | |
| new_key = f"{parent_key}{sep}{k}" if parent_key else k | |
| if isinstance(v, dict): | |
| items.extend(self.flatten_dict(v, new_key, sep=sep).items()) | |
| else: | |
| items.append((new_key, v)) | |
| return dict(items) | |
| def generate_excel(self, output_path: str = "Real_Estate_Financial_Model.xlsx"): | |
| """Generate professional Excel file with all calculations""" | |
| try: | |
| # Validate critical values before Excel generation | |
| r = self.formula_results | |
| print(" Validating calculations...") | |
| critical_values = { | |
| 'UNITS': r.get('UNITS', 0), | |
| 'GROSS_SF': r.get('GROSS_SF', 0), | |
| 'RENTABLE_SF': r.get('RENTABLE_SF', 0), | |
| 'EFFECTIVE_GROSS_INCOME': r.get('EFFECTIVE_GROSS_INCOME', 0), | |
| 'TOTAL_PROJECT_COST': r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0) | |
| } | |
| warnings = [] | |
| for key, value in critical_values.items(): | |
| if value == 0: | |
| warnings.append(f" WARNING: {key} is zero or missing") | |
| if warnings: | |
| print("\n".join(warnings)) | |
| print(" Continuing with available data...\n") | |
| wb = openpyxl.Workbook() | |
| # Remove default sheet | |
| if 'Sheet' in wb.sheetnames: | |
| wb.remove(wb['Sheet']) | |
| # Create sheets with error handling | |
| print(" Creating Executive Summary...") | |
| self.create_summary_sheet(wb) | |
| print(" Creating Acquisition sheet...") | |
| self.create_acquisition_sheet(wb) | |
| print(" Creating Construction sheet...") | |
| self.create_construction_sheet(wb) | |
| print(" Creating Soft Costs sheet...") | |
| self.create_soft_costs_sheet(wb) | |
| print(" Creating Financing sheet...") | |
| self.create_financing_sheet(wb) | |
| print(" Creating Operations sheet...") | |
| self.create_operations_sheet(wb) | |
| print(" Creating Returns sheet...") | |
| self.create_returns_sheet(wb) | |
| # Save workbook | |
| wb.save(output_path) | |
| print(f"✓ Excel file generated: {output_path}") | |
| return output_path | |
| except Exception as e: | |
| print(f"ERROR generating Excel: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def create_summary_sheet(self, wb): | |
| """Create executive summary sheet""" | |
| ws = wb.create_sheet("Executive Summary", 0) | |
| # Styles | |
| header_fill = PatternFill(start_color="1F4E78", end_color="1F4E78", fill_type="solid") | |
| header_font = Font(color="FFFFFF", bold=True, size=12) | |
| subheader_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| subheader_font = Font(color="FFFFFF", bold=True, size=11) | |
| r = self.formula_results | |
| # Title | |
| ws['A1'] = "REAL ESTATE DEVELOPMENT FINANCIAL MODEL" | |
| ws['A1'].font = Font(bold=True, size=16) | |
| ws.merge_cells('A1:D1') | |
| # Property Information | |
| row = 3 | |
| ws[f'A{row}'] = "PROPERTY INFORMATION" | |
| ws[f'A{row}'].fill = header_fill | |
| ws[f'A{row}'].font = header_font | |
| ws.merge_cells(f'A{row}:D{row}') | |
| address = self.structured_data.get('property_info', {}).get('address', 'N/A') | |
| row += 1 | |
| data = [ | |
| ("Address:", address), | |
| ("Units:", r.get('UNITS', 0)), | |
| ("Gross Square Feet:", f"{r.get('GROSS_SF', 0):,.0f}"), | |
| ("Rentable Square Feet:", f"{r.get('RENTABLE_SF', 0):,.0f}"), | |
| ("Building Efficiency:", f"{r.get('BUILDING_EFFICIENCY', 0):.2%}"), | |
| ] | |
| for label, value in data: | |
| ws[f'A{row}'] = label | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = value | |
| row += 1 | |
| # Project Costs Summary | |
| row += 1 | |
| ws[f'A{row}'] = "PROJECT COSTS SUMMARY" | |
| ws[f'A{row}'].fill = header_fill | |
| ws[f'A{row}'].font = header_font | |
| ws.merge_cells(f'A{row}:D{row}') | |
| row += 1 | |
| ws[f'A{row}'] = "Category" | |
| ws[f'B{row}'] = "Total Cost" | |
| ws[f'C{row}'] = "Per GSF" | |
| ws[f'D{row}'] = "Per Unit" | |
| for col in ['A', 'B', 'C', 'D']: | |
| ws[f'{col}{row}'].fill = subheader_fill | |
| ws[f'{col}{row}'].font = subheader_font | |
| row += 1 | |
| cost_summary = [ | |
| ("Acquisition", r.get('TOTAL_ACQUISITION_COST', 0), r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0), r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)), | |
| ("Construction", r.get('TOTAL_CONSTRUCTION_GMP', 0), r.get('CONSTRUCTION_GMP_PER_GSF', 0), r.get('CONSTRUCTION_GMP_PER_UNIT', 0)), | |
| ("Soft Costs", r.get('TOTAL_SOFT_COST', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0) * r.get('GROSS_SF', 0) / r.get('UNITS', 1)), | |
| ("Contingency", r.get('CONTINGENCY_COST', 0), r.get('CONTINGENCY_COST', 0) / r.get('GROSS_SF', 1), r.get('CONTINGENCY_COST', 0) / r.get('UNITS', 1)), | |
| ("Development Fee", r.get('DEVELOPMENT_FEE', 0), r.get('DEVELOPMENT_FEE', 0) / r.get('GROSS_SF', 1), r.get('DEVELOPMENT_FEE', 0) / r.get('UNITS', 1)), | |
| ("Financing & Reserves", r.get('FINANCING_COST', 0) + r.get('INTEREST_RESERVE', 0) + r.get('OPERATING_RESERVE', 0), 0, 0), | |
| ] | |
| for label, total, per_gsf, per_unit in cost_summary: | |
| ws[f'A{row}'] = label | |
| ws[f'B{row}'] = total | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'C{row}'] = per_gsf | |
| ws[f'C{row}'].number_format = '$#,##0.00' | |
| ws[f'D{row}'] = per_unit | |
| ws[f'D{row}'].number_format = '$#,##0' | |
| row += 1 | |
| # Total | |
| ws[f'A{row}'] = "TOTAL PROJECT COST" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| ws[f'C{row}'] = r.get('TOTAL_PROJECT_COST_PER_GSF', 0) | |
| ws[f'C{row}'].number_format = '$#,##0.00' | |
| ws[f'C{row}'].font = Font(bold=True) | |
| ws[f'D{row}'] = r.get('TOTAL_PROJECT_COST_PER_UNIT', 0) | |
| ws[f'D{row}'].number_format = '$#,##0' | |
| ws[f'D{row}'].font = Font(bold=True) | |
| # Capital Stack | |
| row += 2 | |
| ws[f'A{row}'] = "CAPITAL STACK" | |
| ws[f'A{row}'].fill = header_fill | |
| ws[f'A{row}'].font = header_font | |
| ws.merge_cells(f'A{row}:D{row}') | |
| row += 1 | |
| ws[f'A{row}'] = "Source" | |
| ws[f'B{row}'] = "Amount" | |
| ws[f'C{row}'] = "Percentage" | |
| ws[f'D{row}'] = "Per Unit" | |
| for col in ['A', 'B', 'C', 'D']: | |
| ws[f'{col}{row}'].fill = subheader_fill | |
| ws[f'{col}{row}'].font = subheader_font | |
| row += 1 | |
| ws[f'A{row}'] = "Total Debt" | |
| ws[f'B{row}'] = r.get('TOTAL_DEBT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'C{row}'] = r.get('DEBT_PERCENTAGE', 0) | |
| ws[f'C{row}'].number_format = '0.00%' | |
| ws[f'D{row}'] = r.get('DEBT_PER_UNIT', 0) | |
| ws[f'D{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Total Equity" | |
| ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'C{row}'] = r.get('EQUITY_PERCENTAGE', 0) | |
| ws[f'C{row}'].number_format = '0.00%' | |
| ws[f'D{row}'] = r.get('EQUITY_PER_UNIT', 0) | |
| ws[f'D{row}'].number_format = '$#,##0' | |
| # Returns Summary | |
| row += 2 | |
| ws[f'A{row}'] = "INVESTMENT RETURNS" | |
| ws[f'A{row}'].fill = header_fill | |
| ws[f'A{row}'].font = header_font | |
| ws.merge_cells(f'A{row}:D{row}') | |
| row += 1 | |
| returns_data = [ | |
| ("Stabilized NOI:", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"), | |
| ("Yield on Cost:", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"), | |
| ("Stabilized Yield on Cost:", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"), | |
| ("Cash-on-Cash Return:", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"), | |
| ("DSCR:", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"), | |
| ("LP IRR:", f"{float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)):.2f}%"), | |
| ("LP Multiple:", f"{r.get('LP_MULTIPLE', 0):.2f}x"), | |
| ] | |
| for label, value in returns_data: | |
| ws[f'A{row}'] = label | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = value | |
| row += 1 | |
| # Adjust column widths | |
| ws.column_dimensions['A'].width = 25 | |
| ws.column_dimensions['B'].width = 18 | |
| ws.column_dimensions['C'].width = 15 | |
| ws.column_dimensions['D'].width = 15 | |
| def create_acquisition_sheet(self, wb): | |
| """Create acquisition costs detail sheet""" | |
| ws = wb.create_sheet("Acquisition") | |
| r = self.formula_results | |
| # Header | |
| ws['A1'] = "ACQUISITION COSTS" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| ws.merge_cells('A1:E1') | |
| # Column headers | |
| row = 3 | |
| headers = ["Item", "Total Cost", "Per GSF", "Per RSF", "Per Unit"] | |
| for col_idx, header in enumerate(headers, start=1): | |
| cell = ws.cell(row=row, column=col_idx, value=header) | |
| cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| cell.font = Font(color="FFFFFF", bold=True) | |
| # Data | |
| row += 1 | |
| data = [ | |
| ("Land Value", r.get('LAND_VALUE', 0), r.get('LAND_VALUE_PER_GSF', 0), r.get('LAND_VALUE_PER_RSF', 0), r.get('LAND_VALUE_PER_UNIT', 0)), | |
| ("Closing Costs", r.get('CLOSING_COSTS', 0), r.get('CLOSING_COSTS', 0) / r.get('GROSS_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('RENTABLE_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('UNITS', 1)), | |
| ("Acquisition Fee (2%)", r.get('ACQUISITION_FEE', 0), r.get('ACQUISITION_FEE', 0) / r.get('GROSS_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('RENTABLE_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('UNITS', 1)), | |
| ] | |
| for item, total, per_gsf, per_rsf, per_unit in data: | |
| ws.cell(row=row, column=1, value=item) | |
| ws.cell(row=row, column=2, value=total).number_format = '$#,##0' | |
| ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00' | |
| ws.cell(row=row, column=4, value=per_rsf).number_format = '$#,##0.00' | |
| ws.cell(row=row, column=5, value=per_unit).number_format = '$#,##0' | |
| row += 1 | |
| # Total | |
| ws.cell(row=row, column=1, value="TOTAL ACQUISITION COST").font = Font(bold=True) | |
| ws.cell(row=row, column=2, value=r.get('TOTAL_ACQUISITION_COST', 0)).number_format = '$#,##0' | |
| ws.cell(row=row, column=2).font = Font(bold=True) | |
| ws.cell(row=row, column=3, value=r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0)).number_format = '$#,##0.00' | |
| ws.cell(row=row, column=3).font = Font(bold=True) | |
| ws.cell(row=row, column=4, value=r.get('TOTAL_ACQUISITION_COST_PER_RSF', 0)).number_format = '$#,##0.00' | |
| ws.cell(row=row, column=4).font = Font(bold=True) | |
| ws.cell(row=row, column=5, value=r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)).number_format = '$#,##0' | |
| ws.cell(row=row, column=5).font = Font(bold=True) | |
| # Adjust widths | |
| for col in range(1, 6): | |
| ws.column_dimensions[get_column_letter(col)].width = 20 | |
| def create_construction_sheet(self, wb): | |
| """Create construction costs sheet""" | |
| ws = wb.create_sheet("Construction") | |
| r = self.formula_results | |
| ws['A1'] = "CONSTRUCTION COSTS" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| row = 3 | |
| ws[f'A{row}'] = "Construction Cost per GSF:" | |
| ws[f'B{row}'] = r.get('CONSTRUCTION_COST_PER_GSF', 0) | |
| ws[f'B{row}'].number_format = '$#,##0.00' | |
| row += 1 | |
| ws[f'A{row}'] = "Gross Square Feet:" | |
| ws[f'B{row}'] = r.get('GROSS_SF', 0) | |
| ws[f'B{row}'].number_format = '#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "Total Construction GMP:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('TOTAL_CONSTRUCTION_GMP', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| row += 2 | |
| ws[f'A{row}'] = "Construction Duration:" | |
| ws[f'B{row}'] = f"{r.get('CONSTRUCTION_MONTHS', 0)} months" | |
| ws.column_dimensions['A'].width = 30 | |
| ws.column_dimensions['B'].width = 20 | |
| def create_soft_costs_sheet(self, wb): | |
| """Create soft costs detail sheet""" | |
| ws = wb.create_sheet("Soft Costs") | |
| r = self.formula_results | |
| ws['A1'] = "SOFT COSTS BUDGET" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| row = 3 | |
| headers = ["Category", "Total Cost", "Per GSF"] | |
| for col_idx, header in enumerate(headers, start=1): | |
| cell = ws.cell(row=row, column=col_idx, value=header) | |
| cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| cell.font = Font(color="FFFFFF", bold=True) | |
| row += 1 | |
| soft_cost_items = [ | |
| ("Architecture & Interior Design", 'ARCHITECTURE_AND_INTERIOR_COST'), | |
| ("Structural Engineering", 'STRUCTURAL_ENGINEERING_COST'), | |
| ("MEP Engineering", 'MEP_ENGINEERING_COST'), | |
| ("Civil Engineering", 'CIVIL_ENGINEERING_COST'), | |
| ("Controlled Inspections", 'CONTROLLED_INSPECTIONS_COST'), | |
| ("Surveying", 'SURVEYING_COST'), | |
| ("Utilities Connection", 'UTILITIES_CONNECTION_COST'), | |
| ("Advertising & Marketing", 'ADVERTISING_AND_MARKETING_COST'), | |
| ("Accounting", 'ACCOUNTING_COST'), | |
| ("Monitoring", 'MONITORING_COST'), | |
| ("FF&E", 'FF_AND_E_COST'), | |
| ("Environmental Consultant", 'ENVIRONMENTAL_CONSULTANT_FEE'), | |
| ("Miscellaneous Consultants", 'MISCELLANEOUS_CONSULTANTS_FEE'), | |
| ("General Legal", 'GENERAL_LEGAL_COST'), | |
| ("RE Taxes During Construction", 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION'), | |
| ("Miscellaneous Admin", 'MISCELLANEOUS_ADMIN_COST'), | |
| ("IBR Cost", 'IBR_COST'), | |
| ("Project Team", 'PROJECT_TEAM_COST'), | |
| ("PEM Fees", 'PEM_FEES'), | |
| ("Bank Fees", 'BANK_FEES'), | |
| ("HPD & IH Costs", 'HPD_AND_IH_COST'), | |
| ("Retail TI & LC", 'RETAIL_TI_AND_LC_COST'), | |
| ] | |
| for label, key in soft_cost_items: | |
| cost = r.get(key, 0) | |
| per_gsf = cost / r.get('GROSS_SF', 1) if r.get('GROSS_SF', 0) > 0 else 0 | |
| ws.cell(row=row, column=1, value=label) | |
| ws.cell(row=row, column=2, value=cost).number_format = '$#,##0' | |
| ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00' | |
| row += 1 | |
| # Total | |
| ws.cell(row=row, column=1, value="TOTAL SOFT COSTS").font = Font(bold=True) | |
| ws.cell(row=row, column=2, value=r.get('TOTAL_SOFT_COST', 0)).number_format = '$#,##0' | |
| ws.cell(row=row, column=2).font = Font(bold=True) | |
| ws.cell(row=row, column=3, value=r.get('TOTAL_SOFT_COST_PER_GSF', 0)).number_format = '$#,##0.00' | |
| ws.cell(row=row, column=3).font = Font(bold=True) | |
| ws.column_dimensions['A'].width = 35 | |
| ws.column_dimensions['B'].width = 18 | |
| ws.column_dimensions['C'].width = 15 | |
| def create_financing_sheet(self, wb): | |
| """Create financing structure sheet""" | |
| ws = wb.create_sheet("Financing") | |
| r = self.formula_results | |
| ws['A1'] = "FINANCING STRUCTURE" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| row = 3 | |
| ws[f'A{row}'] = "Pre-LTC Budget:" | |
| ws[f'B{row}'] = r.get('PRE_LTC_BUDGET', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "LTC Ratio:" | |
| ws[f'B{row}'] = r.get('LTC_RATIO', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "Loan Amount:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('LOAN_AMOUNT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| row += 2 | |
| ws[f'A{row}'] = "Financing Percentage:" | |
| ws[f'B{row}'] = r.get('FINANCING_PERCENTAGE', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "Financing Amount:" | |
| ws[f'B{row}'] = r.get('FINANCING_AMOUNT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "Interest Rate (bps + spread):" | |
| ws[f'B{row}'] = r.get('INTEREST_RATE_DECIMAL', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "Construction Interest:" | |
| ws[f'B{row}'] = r.get('CONSTRUCTION_INTEREST', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "TOTAL DEBT" | |
| ws[f'A{row}'].font = Font(bold=True, size=12) | |
| ws[f'B{row}'] = r.get('TOTAL_DEBT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True, size=12) | |
| row += 1 | |
| ws[f'A{row}'] = "TOTAL EQUITY" | |
| ws[f'A{row}'].font = Font(bold=True, size=12) | |
| ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True, size=12) | |
| row += 2 | |
| ws[f'A{row}'] = "Debt Percentage:" | |
| ws[f'B{row}'] = r.get('DEBT_PERCENTAGE', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "Equity Percentage:" | |
| ws[f'B{row}'] = r.get('EQUITY_PERCENTAGE', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| ws.column_dimensions['A'].width = 35 | |
| ws.column_dimensions['B'].width = 20 | |
| def create_operations_sheet(self, wb): | |
| """Create operations and revenue sheet""" | |
| ws = wb.create_sheet("Operations") | |
| r = self.formula_results | |
| ws['A1'] = "OPERATIONS & REVENUE" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| # Revenue Section | |
| row = 3 | |
| ws[f'A{row}'] = "REVENUE" | |
| ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| ws[f'A{row}'].font = Font(color="FFFFFF", bold=True) | |
| ws.merge_cells(f'A{row}:B{row}') | |
| row += 1 | |
| revenue_items = [ | |
| ("Gross Potential Free Market Rent", r.get('GROSS_POTENTIAL_FREE_MARKET_RENT', 0)), | |
| ("Gross Potential Affordable Rent", r.get('GROSS_POTENTIAL_AFFORDABLE_RENT', 0)), | |
| ("Other Income", r.get('OTHER_INCOME', 0)), | |
| ("Less: Vacancy Loss", -r.get('VACANCY_LOSS', 0)), | |
| ("Effective Gross Income", r.get('EFFECTIVE_GROSS_INCOME', 0)), | |
| ("Parking Income", r.get('PARKING_INCOME', 0)), | |
| ("Retail Revenue", r.get('RETAIL_REVENUE', 0)), | |
| ] | |
| for label, value in revenue_items: | |
| ws[f'A{row}'] = label | |
| if "Effective Gross" in label: | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = value | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| # Expense Section | |
| row += 1 | |
| ws[f'A{row}'] = "OPERATING EXPENSES" | |
| ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| ws[f'A{row}'].font = Font(color="FFFFFF", bold=True) | |
| ws.merge_cells(f'A{row}:C{row}') | |
| row += 1 | |
| ws[f'A{row}'] = "Expense Category" | |
| ws[f'B{row}'] = "Annual Amount" | |
| ws[f'C{row}'] = "% of EGI" | |
| for col in ['A', 'B', 'C']: | |
| ws[f'{col}{row}'].font = Font(bold=True) | |
| row += 1 | |
| # Safe division helper | |
| egi = r.get('EFFECTIVE_GROSS_INCOME', 0) | |
| def safe_pct(value): | |
| return value / egi if egi > 0 else 0 | |
| expense_items = [ | |
| ("Payroll", r.get('PAYROLL', 0), r.get('PAYROLL_PERCENTAGE_OF_EGI', 0)), | |
| ("Repairs & Maintenance", r.get('REPAIRS_AND_MAINTENANCE', 0), r.get('REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI', 0)), | |
| ("Utilities", r.get('UTILITIES', 0), r.get('UTILITIES_PERCENTAGE_OF_EGI', 0)), | |
| ("Insurance", r.get('INSURANCE', 0), r.get('INSURANCE_PERCENTAGE_OF_EGI', 0)), | |
| ("Management Fee", r.get('MANAGEMENT_FEE', 0), safe_pct(r.get('MANAGEMENT_FEE', 0))), | |
| ("Real Estate Taxes", r.get('REAL_ESTATE_TAXES', 0), safe_pct(r.get('REAL_ESTATE_TAXES', 0))), | |
| ] | |
| for label, amount, pct in expense_items: | |
| ws[f'A{row}'] = label | |
| ws[f'B{row}'] = amount | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'C{row}'] = pct | |
| ws[f'C{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "TOTAL EXPENSES" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('TOTAL_EXPENSES', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| total_exp_pct = safe_pct(r.get('TOTAL_EXPENSES', 0)) | |
| ws[f'C{row}'] = total_exp_pct | |
| ws[f'C{row}'].number_format = '0.00%' | |
| ws[f'C{row}'].font = Font(bold=True) | |
| row += 2 | |
| ws[f'A{row}'] = "NET OPERATING INCOME" | |
| ws[f'A{row}'].font = Font(bold=True, size=12) | |
| ws[f'B{row}'] = r.get('NET_OPERATING_INCOME', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True, size=12) | |
| row += 2 | |
| ws[f'A{row}'] = "NOI per Unit:" | |
| ws[f'B{row}'] = r.get('NOI_PER_UNIT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "NOI per GSF:" | |
| ws[f'B{row}'] = r.get('NOI_PER_GSF', 0) | |
| ws[f'B{row}'].number_format = '$#,##0.00' | |
| ws.column_dimensions['A'].width = 35 | |
| ws.column_dimensions['B'].width = 20 | |
| ws.column_dimensions['C'].width = 15 | |
| def create_returns_sheet(self, wb): | |
| """Create investment returns and waterfall sheet""" | |
| ws = wb.create_sheet("Returns") | |
| r = self.formula_results | |
| ws['A1'] = "INVESTMENT RETURNS & EXIT ANALYSIS" | |
| ws['A1'].font = Font(bold=True, size=14) | |
| # Current Returns | |
| row = 3 | |
| ws[f'A{row}'] = "STABILIZED RETURNS" | |
| ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| ws[f'A{row}'].font = Font(color="FFFFFF", bold=True) | |
| ws.merge_cells(f'A{row}:B{row}') | |
| row += 1 | |
| returns_data = [ | |
| ("Net Operating Income", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"), | |
| ("Stabilized Yield on Cost", f"${r.get('STABILIZED_YIELD_ON_COST', 0):,.0f}"), | |
| ("Yield on Cost %", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"), | |
| ("Stabilized Yield on Cost %", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"), | |
| ("Annual Debt Service", f"${r.get('ANNUAL_DEBT_SERVICE', 0):,.0f}"), | |
| ("Cash-on-Cash Return", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"), | |
| ("Debt Service Coverage Ratio", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"), | |
| ] | |
| for label, value in returns_data: | |
| ws[f'A{row}'] = label | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = value | |
| row += 1 | |
| # Exit Analysis | |
| row += 2 | |
| ws[f'A{row}'] = "EXIT ANALYSIS" | |
| ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| ws[f'A{row}'].font = Font(color="FFFFFF", bold=True) | |
| ws.merge_cells(f'A{row}:B{row}') | |
| row += 1 | |
| ws[f'A{row}'] = "Hold Period (months):" | |
| ws[f'B{row}'] = r.get('HOLD_PERIOD_MONTHS', 0) | |
| row += 1 | |
| ws[f'A{row}'] = "Exit Cap Rate:" | |
| ws[f'B{row}'] = r.get('EXIT_CAP_RATE_DECIMAL', 0) | |
| ws[f'B{row}'].number_format = '0.00%' | |
| row += 1 | |
| ws[f'A{row}'] = "Property Value on Sale:" | |
| ws[f'B{row}'] = r.get('PROPERTY_VALUE_ON_SALE', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Less: Sale Costs (2%):" | |
| ws[f'B{row}'] = -r.get('SALE_COST', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Net Sale Proceeds:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('NET_SALE_PROCEEDS', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| row += 1 | |
| ws[f'A{row}'] = "Less: Loan Payoff:" | |
| ws[f'B{row}'] = -r.get('TOTAL_DEBT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Cash After Loan Payback:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_LOAN_PAYBACK', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| # Equity Waterfall | |
| row += 2 | |
| ws[f'A{row}'] = "EQUITY WATERFALL" | |
| ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") | |
| ws[f'A{row}'].font = Font(color="FFFFFF", bold=True) | |
| ws.merge_cells(f'A{row}:B{row}') | |
| row += 1 | |
| ws[f'A{row}'] = "GP Investment (20%):" | |
| ws[f'B{row}'] = r.get('GP_INVESTMENT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "LP Investment (80%):" | |
| ws[f'B{row}'] = r.get('LP_INVESTMENT', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "GP Preferred Return + Principal:" | |
| ws[f'B{row}'] = r.get('GP_PREFERRED_RETURN_WITH_PRINCIPAL', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "LP Preferred Return + Principal:" | |
| ws[f'B{row}'] = r.get('LP_PREFERRED_RETURN_WITH_PRINCIPAL', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Cash After Preferred:" | |
| ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_PREFERRED', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "GP Promote (20%):" | |
| ws[f'B{row}'] = r.get('PROMOTE_ON_JOINT_VENTURE', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 1 | |
| ws[f'A{row}'] = "Cash to LP:" | |
| ws[f'B{row}'] = r.get('CASH_TO_LP', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| row += 2 | |
| ws[f'A{row}'] = "NET TO LP INVESTOR" | |
| ws[f'A{row}'].font = Font(bold=True, size=12) | |
| ws[f'B{row}'] = r.get('NET_TO_LP_INVESTOR', 0) | |
| ws[f'B{row}'].number_format = '$#,##0' | |
| ws[f'B{row}'].font = Font(bold=True, size=12) | |
| row += 2 | |
| ws[f'A{row}'] = "LP Multiple:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| ws[f'B{row}'] = r.get('LP_MULTIPLE', 0) | |
| ws[f'B{row}'].number_format = '0.00x' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| row += 1 | |
| ws[f'A{row}'] = "LP IRR:" | |
| ws[f'A{row}'].font = Font(bold=True) | |
| irr_value = r.get('IRR_TO_LP', 0) | |
| # Handle complex numbers or invalid values | |
| if isinstance(irr_value, complex): | |
| irr_value = 0 # or use irr_value.real if you want the real component | |
| ws[f'B{row}'] = irr_value / 100 | |
| ws[f'B{row}'].number_format = '0.00%' | |
| ws[f'B{row}'].font = Font(bold=True) | |
| ws.column_dimensions['A'].width = 35 | |
| ws.column_dimensions['B'].width = 20 | |
| def run_full_pipeline(self, pdf_directory: str, output_excel: str = "Real_Estate_Financial_Model.xlsx"): | |
| """Execute complete pipeline""" | |
| print("=" * 60) | |
| print("REAL ESTATE FINANCIAL MODEL PIPELINE") | |
| print("=" * 60) | |
| # Step 1: Extract PDFs | |
| print("\n[Step 1/4] Extracting text from PDFs...") | |
| self.extract_all_pdfs(pdf_directory) | |
| print(f"✓ Extracted {len(self.extracted_data)} PDF files") | |
| # Step 2: Process with Gemini | |
| print("\n[Step 2/4] Extracting structured data with Gemini API...") | |
| structured_data = self.extract_structured_data() | |
| # NEW: Post-process to fill gaps | |
| print("\n[Step 2.5/4] Post-processing and filling estimates...") | |
| structured_data = self.post_process_extracted_data(structured_data) | |
| # Step 3: Calculate formulas | |
| print("\n[Step 3/4] Calculating all formulas...") | |
| self.calculate_all_formulas(structured_data) | |
| print(f"✓ Calculated {len(self.formula_results)} formula values") | |
| # Step 4: Generate Excel | |
| print("\n[Step 4/4] Generating Excel file...") | |
| self.generate_excel(output_excel) | |
| print("\n" + "=" * 60) | |
| print("PIPELINE COMPLETE!") | |
| print("=" * 60) | |
| print(f"\nKey Metrics:") | |
| print(f" Total Project Cost: ${self.formula_results.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f}") | |
| print(f" Total Debt: ${self.formula_results.get('TOTAL_DEBT', 0):,.0f}") | |
| print(f" Total Equity: ${self.formula_results.get('TOTAL_EQUITY', 0):,.0f}") | |
| print(f" NOI: ${self.formula_results.get('NET_OPERATING_INCOME', 0):,.0f}") | |
| print(f" Yield on Cost: {self.formula_results.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}") | |
| irr_val = self.formula_results.get('IRR_TO_LP', 0) | |
| if isinstance(irr_val, complex): | |
| irr_val = irr_val.real | |
| print(f" LP IRR: {irr_val:.2f}%") | |
| print(f"\nExcel file: {output_excel}") | |
| return output_excel | |
| # ============= FastAPI Setup ============= | |
| app = FastAPI(title="Real Estate Financial Model API") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Hardcoded API Key | |
| GEMINI_API_KEY = "AIzaSyCy6GoBR724Hj9VyuW3hKM4N0P6liBOlDo" | |
| async def generate_model(files: List[UploadFile] = File(...)): | |
| """ | |
| API endpoint to process uploaded files and return Excel model | |
| Parameters: | |
| - files: List of PDF/XLSX files to process | |
| Returns: | |
| - Excel file with financial model | |
| """ | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded") | |
| try: | |
| # Create temporary directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Save uploaded files | |
| saved_files = [] | |
| for upload_file in files: | |
| file_path = Path(temp_dir) / upload_file.filename | |
| # Write file content | |
| with open(file_path, "wb") as f: | |
| content = await upload_file.read() | |
| f.write(content) | |
| saved_files.append(str(file_path)) | |
| # Initialize pipeline | |
| pipeline = RealEstateModelPipeline(GEMINI_API_KEY) | |
| # Create output file | |
| output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx" | |
| # Run pipeline | |
| pipeline.run_full_pipeline(temp_dir, str(output_file)) | |
| # Return Excel file | |
| return FileResponse( | |
| path=str(output_file), | |
| filename="Real_Estate_Financial_Model.xlsx", | |
| media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}") | |
| finally: | |
| # Cleanup will happen when temp directory is garbage collected | |
| pass | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return {"status": "healthy", "service": "Real Estate Financial Model API"} | |
| async def generate_model_with_summary(files: List[UploadFile] = File(...)): | |
| """ | |
| API endpoint that returns JSON summary with key metrics | |
| AND includes base64 encoded Excel file | |
| Returns JSON with: | |
| - Key financial metrics | |
| - Deal summary | |
| - Market analysis | |
| - Excel file as base64 | |
| """ | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded") | |
| temp_dir = None | |
| try: | |
| # Create temporary directory | |
| temp_dir = tempfile.mkdtemp() | |
| print(f"📁 Created temp directory: {temp_dir}") | |
| # Save uploaded files | |
| for upload_file in files: | |
| file_path = Path(temp_dir) / upload_file.filename | |
| print(f"💾 Saving file: {upload_file.filename}") | |
| with open(file_path, "wb") as f: | |
| content = await upload_file.read() | |
| f.write(content) | |
| print(f"✅ Saved {len(files)} files") | |
| # Initialize pipeline | |
| print("🔧 Initializing pipeline...") | |
| pipeline = RealEstateModelPipeline(GEMINI_API_KEY) | |
| # Create output file | |
| output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx" | |
| # Run pipeline | |
| print("🚀 Running pipeline...") | |
| pipeline.run_full_pipeline(temp_dir, str(output_file)) | |
| # Check if file was created | |
| if not output_file.exists(): | |
| raise Exception("Excel file was not generated") | |
| print(f"✅ Excel file created: {output_file}") | |
| # Get formula results | |
| r = pipeline.formula_results | |
| d = pipeline.structured_data | |
| # Build comprehensive JSON response | |
| response_data = { | |
| "status": "success", | |
| "message": "Financial model generated successfully", | |
| # KEY METRICS (Dashboard Top Cards) | |
| "key_metrics": { | |
| "purchase_price": { | |
| "value": r.get('PRICE', 0), | |
| "formatted": f"${r.get('PRICE', 0):,.0f}", | |
| "label": "Purchase Price" | |
| }, | |
| "noi": { | |
| "value": r.get('NET_OPERATING_INCOME', 0), | |
| "formatted": f"${r.get('NET_OPERATING_INCOME', 0):,.0f}", | |
| "label": "NOI" | |
| }, | |
| "cap_rate": { | |
| "value": r.get('CAP_RATE', 0), | |
| "formatted": f"{r.get('CAP_RATE', 0):.1f}%", | |
| "label": "Cap Rate" | |
| }, | |
| "irr_10yr": { | |
| "value": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)), | |
| "formatted": f"{float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)):.1f}%", | |
| "label": "IRR (10 yr)" | |
| }, | |
| "dscr": { | |
| "value": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0), | |
| "formatted": f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x", | |
| "label": "DSCR" | |
| } | |
| }, | |
| # T12 SUMMARY (Extracted Summary) | |
| "t12_summary": { | |
| "gross_income": { | |
| "annual": r.get('EFFECTIVE_GROSS_INCOME', 0), | |
| "monthly": r.get('EFFECTIVE_GROSS_INCOME', 0) / 12, | |
| "source": "Calculated from rent roll" | |
| }, | |
| "operating_expenses": { | |
| "annual": r.get('TOTAL_OPERATING_EXPENSES', 0), | |
| "monthly": r.get('TOTAL_OPERATING_EXPENSES', 0) / 12, | |
| "expense_ratio": f"{(r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1) if r.get('EFFECTIVE_GROSS_INCOME', 0) > 0 else 0):.1%}" | |
| }, | |
| "noi": { | |
| "annual": r.get('NET_OPERATING_INCOME', 0), | |
| "monthly": r.get('NET_OPERATING_INCOME', 0) / 12, | |
| "verified": True | |
| }, | |
| "occupancy": { | |
| "rate": f"{(1 - r.get('VACANCY_RATE', 0)):.0%}", | |
| "value": 1 - r.get('VACANCY_RATE', 0) | |
| } | |
| }, | |
| # PROPERTY INFO | |
| "property_info": { | |
| "address": d.get('property_info', {}).get('address', 'N/A'), | |
| "property_type": d.get('property_info', {}).get('property_type', 'N/A'), | |
| "units": r.get('UNITS', 0), | |
| "gross_sf": r.get('GROSS_SF', 0), | |
| "rentable_sf": r.get('RENTABLE_SF', 0), | |
| "building_efficiency": f"{r.get('BUILDING_EFFICIENCY', 0):.1%}" | |
| }, | |
| # DEAL VS MARKET AVERAGES | |
| "market_comparison": { | |
| "your_deal": { | |
| "cap_rate": r.get('CAP_RATE', 0) / 100, | |
| "irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)) / 100, | |
| "dscr": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) | |
| }, | |
| "market_average": { | |
| "cap_rate": 0.055, # Example market average | |
| "irr": 0.12, | |
| "dscr": 1.25 | |
| } | |
| }, | |
| # AI OBSERVATIONS | |
| "ai_observations": [ | |
| { | |
| "observation": f"Cap Rate {'above' if r.get('CAP_RATE', 0) > 5.5 else 'below'} area average", | |
| "recommendation": "undervalued potential" if r.get('CAP_RATE', 0) > 5.5 else "premium pricing" | |
| }, | |
| { | |
| "observation": f"Expense ratio at {(r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1) if r.get('EFFECTIVE_GROSS_INCOME', 0) > 0 else 0):.1%}", | |
| "recommendation": "within normal range" if 0.3 <= (r.get('TOTAL_OPERATING_EXPENSES', 0) / r.get('EFFECTIVE_GROSS_INCOME', 1)) <= 0.45 else "review expenses" | |
| }, | |
| { | |
| "observation": "DSCR meets financing standards" if r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) >= 1.2 else "DSCR below standards", | |
| "recommendation": "approved" if r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) >= 1.2 else "increase equity" | |
| }, | |
| { | |
| "observation": "NOI verification", | |
| "recommendation": "Full underwriting review recommended" | |
| } | |
| ], | |
| # CAPITAL STACK | |
| "capital_stack": { | |
| "total_project_cost": r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0), | |
| "total_debt": r.get('TOTAL_DEBT', 0), | |
| "total_equity": r.get('TOTAL_EQUITY', 0), | |
| "debt_percentage": r.get('DEBT_PERCENTAGE', 0), | |
| "equity_percentage": r.get('EQUITY_PERCENTAGE', 0), | |
| "ltc_ratio": r.get('LTC_RATIO', 0) | |
| }, | |
| # RETURNS ANALYSIS | |
| "returns": { | |
| "yield_on_cost": r.get('YIELD_ON_COST_PERCENTAGE', 0), | |
| "stabilized_yield_on_cost": r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0), | |
| "cash_on_cash_return": r.get('CASH_ON_CASH_RETURN', 0), | |
| "lp_multiple": r.get('LP_MULTIPLE', 0), | |
| "lp_irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)), | |
| "hold_period_years": r.get('HOLD_PERIOD_MONTHS', 0) / 12 | |
| }, | |
| # OPERATING METRICS | |
| "operating_metrics": { | |
| "gross_potential_rent": r.get('GROSS_POTENTIAL_FREE_MARKET_RENT', 0) + r.get('GROSS_POTENTIAL_AFFORDABLE_RENT', 0), | |
| "vacancy_loss": r.get('VACANCY_LOSS', 0), | |
| "effective_gross_income": r.get('EFFECTIVE_GROSS_INCOME', 0), | |
| "operating_expenses": r.get('TOTAL_OPERATING_EXPENSES', 0), | |
| "noi": r.get('NET_OPERATING_INCOME', 0), | |
| "noi_per_unit": r.get('NOI_PER_UNIT', 0), | |
| "noi_per_sf": r.get('NOI_PER_GSF', 0) | |
| }, | |
| # COST BREAKDOWN | |
| "cost_breakdown": { | |
| "acquisition": { | |
| "total": r.get('TOTAL_ACQUISITION_COST', 0), | |
| "per_unit": r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0), | |
| "per_sf": r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0) | |
| }, | |
| "construction": { | |
| "total": r.get('TOTAL_CONSTRUCTION_GMP', 0), | |
| "per_unit": r.get('CONSTRUCTION_GMP_PER_UNIT', 0), | |
| "per_sf": r.get('CONSTRUCTION_GMP_PER_GSF', 0) | |
| }, | |
| "soft_costs": { | |
| "total": r.get('TOTAL_SOFT_COST', 0), | |
| "per_sf": r.get('TOTAL_SOFT_COST_PER_GSF', 0) | |
| }, | |
| "contingency": r.get('CONTINGENCY_COST', 0), | |
| "reserves": r.get('OPERATING_RESERVE', 0) | |
| }, | |
| # SOURCE FILES | |
| "source_files": [ | |
| { | |
| "filename": file.filename, | |
| "size": len(await file.read()) if hasattr(file, 'read') else 0, | |
| "type": file.filename.split('.')[-1].upper() | |
| } | |
| for file in files | |
| ], | |
| # EXCEL FILE (BASE64 ENCODED) | |
| "excel_file": { | |
| "filename": "Real_Estate_Financial_Model.xlsx", | |
| "size_bytes": output_file.stat().st_size, | |
| "base64_data": base64.b64encode(output_file.read_bytes()).decode('utf-8') | |
| } | |
| } | |
| print("✅ Response JSON created") | |
| return JSONResponse(content=response_data) | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| print("❌ ERROR OCCURRED:") | |
| print(error_details) | |
| raise HTTPException( | |
| status_code=500, | |
| detail={ | |
| "error": str(e), | |
| "type": type(e).__name__, | |
| "traceback": error_details | |
| } | |
| ) | |
| # NEW: Endpoint that returns ONLY JSON (no Excel file) | |
| async def analyze_only(files: List[UploadFile] = File(...)): | |
| """ | |
| Returns only JSON summary without Excel file | |
| Faster response, good for dashboards | |
| """ | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded") | |
| temp_dir = None | |
| try: | |
| temp_dir = tempfile.mkdtemp() | |
| for upload_file in files: | |
| file_path = Path(temp_dir) / upload_file.filename | |
| with open(file_path, "wb") as f: | |
| content = await upload_file.read() | |
| f.write(content) | |
| pipeline = RealEstateModelPipeline(GEMINI_API_KEY) | |
| # Extract and process data (no Excel generation) | |
| pipeline.extract_all_pdfs(temp_dir) | |
| structured_data = pipeline.extract_structured_data() | |
| structured_data = pipeline.post_process_extracted_data(structured_data) | |
| pipeline.calculate_all_formulas(structured_data) | |
| r = pipeline.formula_results | |
| d = pipeline.structured_data | |
| # Return lightweight JSON | |
| return JSONResponse(content={ | |
| "status": "success", | |
| "key_metrics": { | |
| "purchase_price": r.get('PRICE', 0), | |
| "noi": r.get('NET_OPERATING_INCOME', 0), | |
| "cap_rate": r.get('CAP_RATE', 0), | |
| "irr": float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)), | |
| "dscr": r.get('DEBT_SERVICE_COVERAGE_RATIO', 0) | |
| }, | |
| "property_info": { | |
| "address": d.get('property_info', {}).get('address', 'N/A'), | |
| "units": r.get('UNITS', 0), | |
| "gross_sf": r.get('GROSS_SF', 0) | |
| }, | |
| "capital_stack": { | |
| "total_project_cost": r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0), | |
| "total_debt": r.get('TOTAL_DEBT', 0), | |
| "total_equity": r.get('TOTAL_EQUITY', 0) | |
| }, | |
| "returns": { | |
| "yield_on_cost": r.get('YIELD_ON_COST_PERCENTAGE', 0), | |
| "cash_on_cash": r.get('CASH_ON_CASH_RETURN', 0), | |
| "lp_multiple": r.get('LP_MULTIPLE', 0) | |
| } | |
| }) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def analyze_documents( | |
| files: List[UploadFile] = File(...), | |
| max_pages_per_doc: int = 2, | |
| confidence_threshold: float = 0.7 | |
| ): | |
| """ | |
| Industrial-scale document relevance analysis endpoint | |
| Analyzes uploaded documents to determine if they are relevant to real estate | |
| and metrics calculation. Uses first few pages for efficiency. | |
| Parameters: | |
| - files: List of document files (PDF, XLSX, DOCX, etc.) | |
| - max_pages_per_doc: Maximum number of pages to analyze per document (default: 2) | |
| - confidence_threshold: Minimum confidence score to mark as relevant (default: 0.7) | |
| Returns: | |
| - JSON with relevance analysis for each file | |
| """ | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded") | |
| # Validate input parameters | |
| if max_pages_per_doc < 1 or max_pages_per_doc > 10: | |
| raise HTTPException(status_code=400, detail="max_pages_per_doc must be between 1 and 10") | |
| if confidence_threshold < 0.1 or confidence_threshold > 1.0: | |
| raise HTTPException(status_code=400, detail="confidence_threshold must be between 0.1 and 1.0") | |
| temp_dir = None | |
| try: | |
| # Create temporary directory with unique name | |
| temp_dir = tempfile.mkdtemp(prefix="doc_analysis_") | |
| logger.info(f"Created temp directory: {temp_dir}") | |
| # Process files in parallel for better performance | |
| analysis_results = await process_documents_parallel( | |
| files, temp_dir, max_pages_per_doc, confidence_threshold | |
| ) | |
| # Generate overall summary | |
| summary = generate_analysis_summary(analysis_results) | |
| response = { | |
| "status": "success", | |
| "summary": summary, | |
| "analysis": analysis_results, | |
| "metadata": { | |
| "total_files": len(files), | |
| "relevant_files": summary["relevant_count"], | |
| "non_relevant_files": summary["non_relevant_count"], | |
| "confidence_threshold": confidence_threshold, | |
| "max_pages_analyzed": max_pages_per_doc, | |
| "processing_time_seconds": summary["processing_time_seconds"] | |
| } | |
| } | |
| logger.info(f"Document analysis completed: {summary['relevant_count']}/{len(files)} relevant files") | |
| return JSONResponse(content=response) | |
| except Exception as e: | |
| logger.error(f"Document analysis error: {str(e)}", exc_info=True) | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Document analysis failed: {str(e)}" | |
| ) | |
| finally: | |
| # Cleanup temporary directory | |
| if temp_dir and os.path.exists(temp_dir): | |
| try: | |
| shutil.rmtree(temp_dir) | |
| logger.info(f"Cleaned up temp directory: {temp_dir}") | |
| except Exception as e: | |
| logger.warning(f"Failed to cleanup temp directory: {str(e)}") | |
| async def process_documents_parallel( | |
| files: List[UploadFile], | |
| temp_dir: str, | |
| max_pages: int, | |
| confidence_threshold: float | |
| ) -> List[Dict]: | |
| """Process documents in parallel for better performance""" | |
| import asyncio | |
| # Save all files first | |
| saved_paths = [] | |
| for upload_file in files: | |
| file_path = Path(temp_dir) / secure_filename(upload_file.filename) | |
| with open(file_path, "wb") as f: | |
| content = await upload_file.read() | |
| f.write(content) | |
| saved_paths.append((file_path, upload_file.filename, upload_file.content_type)) | |
| # Process files concurrently | |
| tasks = [] | |
| for file_path, filename, content_type in saved_paths: | |
| task = analyze_single_document( | |
| file_path, filename, content_type, max_pages, confidence_threshold | |
| ) | |
| tasks.append(task) | |
| # Use asyncio.gather for concurrent processing | |
| results = await asyncio.gather(*tasks, return_exceptions=True) | |
| # Handle exceptions in individual file processing | |
| processed_results = [] | |
| for i, result in enumerate(results): | |
| filename = saved_paths[i][1] | |
| if isinstance(result, Exception): | |
| logger.error(f"Error processing {filename}: {str(result)}") | |
| processed_results.append({ | |
| "filename": filename, | |
| "relevant": False, | |
| "confidence": 0.0, | |
| "error": str(result), | |
| "reason": "Processing failed", | |
| "key_indicators": [] | |
| }) | |
| else: | |
| processed_results.append(result) | |
| return processed_results | |
| async def analyze_single_document( | |
| file_path: Path, | |
| filename: str, | |
| content_type: str, | |
| max_pages: int, | |
| confidence_threshold: float | |
| ) -> Dict: | |
| """Analyze a single document for real estate relevance""" | |
| start_time = time.time() | |
| try: | |
| # Extract text from document (first N pages) | |
| extracted_text = await extract_document_text( | |
| file_path, content_type, max_pages | |
| ) | |
| if not extracted_text or len(extracted_text.strip()) < 50: | |
| return { | |
| "filename": filename, | |
| "relevant": False, | |
| "confidence": 0.0, | |
| "reason": "Insufficient or unreadable text content", | |
| "key_indicators": [], | |
| "text_sample": extracted_text[:200] if extracted_text else "" | |
| } | |
| # Analyze with Gemini | |
| analysis_result = await analyze_with_gemini(extracted_text, confidence_threshold) | |
| processing_time = time.time() - start_time | |
| return { | |
| "filename": filename, | |
| "relevant": analysis_result["relevant"], | |
| "confidence": analysis_result["confidence"], | |
| "reason": analysis_result["reason"], | |
| "key_indicators": analysis_result["key_indicators"], | |
| "document_type": analysis_result.get("document_type", "unknown"), | |
| "text_sample": extracted_text[:500], # First 500 chars for debugging | |
| "processing_time_seconds": round(processing_time, 2), | |
| "pages_analyzed": min(max_pages, estimate_page_count(file_path, content_type)) | |
| } | |
| except Exception as e: | |
| logger.error(f"Error analyzing {filename}: {str(e)}") | |
| return { | |
| "filename": filename, | |
| "relevant": False, | |
| "confidence": 0.0, | |
| "error": str(e), | |
| "reason": "Analysis error", | |
| "key_indicators": [] | |
| } | |
| async def extract_document_text(file_path: Path, content_type: str, max_pages: int) -> str: | |
| """Extract text from document with page limit""" | |
| file_extension = file_path.suffix.lower() | |
| try: | |
| if file_extension == '.pdf': | |
| return extract_pdf_text_limited(file_path, max_pages) | |
| elif file_extension in ['.xlsx', '.xls']: | |
| return extract_excel_text_limited(file_path, max_pages) | |
| elif file_extension in ['.docx', '.doc']: | |
| return extract_docx_text_limited(file_path, max_pages) | |
| elif file_extension in ['.txt', '.csv']: | |
| return extract_text_file_limited(file_path, max_pages) | |
| else: | |
| # Fallback: try to read as text | |
| return extract_text_file_limited(file_path, max_pages) | |
| except Exception as e: | |
| logger.warning(f"Text extraction failed for {file_path}: {str(e)}") | |
| return "" | |
| def extract_pdf_text_limited(pdf_path: Path, max_pages: int) -> str: | |
| """Extract text from first N pages of PDF""" | |
| try: | |
| from pdfminer.high_level import extract_text | |
| from pdfminer.layout import LAParams | |
| # Extract only first N pages | |
| text = extract_text( | |
| str(pdf_path), | |
| laparams=LAParams(), | |
| maxpages=max_pages | |
| ) | |
| return text.strip() | |
| except Exception as e: | |
| logger.error(f"PDF extraction error: {str(e)}") | |
| return "" | |
| def extract_excel_text_limited(excel_path: Path, max_sheets: int) -> str: | |
| """Extract text from first N sheets of Excel file""" | |
| try: | |
| import pandas as pd | |
| extracted_content = [] | |
| xlsx = pd.ExcelFile(excel_path) | |
| # Limit number of sheets processed | |
| sheets_to_process = xlsx.sheet_names[:max_sheets] | |
| for sheet_name in sheets_to_process: | |
| try: | |
| df = pd.read_excel(xlsx, sheet_name=sheet_name, nrows=50) # First 50 rows | |
| extracted_content.append(f"=== Sheet: {sheet_name} ===") | |
| extracted_content.append(df.to_string(index=False, max_rows=20)) | |
| extracted_content.append("\n") | |
| except Exception as e: | |
| logger.warning(f"Could not read sheet {sheet_name}: {str(e)}") | |
| continue | |
| return "\n".join(extracted_content) | |
| except Exception as e: | |
| logger.error(f"Excel extraction error: {str(e)}") | |
| return "" | |
| def extract_docx_text_limited(docx_path: Path, max_pages: int) -> str: | |
| """Extract text from first N pages of DOCX (estimated)""" | |
| try: | |
| import docx | |
| doc = docx.Document(str(docx_path)) | |
| full_text = [] | |
| # Estimate pages by paragraphs (rough approximation) | |
| paragraphs_processed = 0 | |
| paragraphs_per_page = 10 # Rough estimate | |
| for paragraph in doc.paragraphs: | |
| if paragraphs_processed >= max_pages * paragraphs_per_page: | |
| break | |
| if paragraph.text.strip(): | |
| full_text.append(paragraph.text) | |
| paragraphs_processed += 1 | |
| return "\n".join(full_text) | |
| except Exception as e: | |
| logger.error(f"DOCX extraction error: {str(e)}") | |
| return "" | |
| def extract_text_file_limited(file_path: Path, max_pages: int) -> str: | |
| """Extract limited text from text file""" | |
| try: | |
| lines_per_page = 50 | |
| max_lines = max_pages * lines_per_page | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| lines = [] | |
| for i, line in enumerate(f): | |
| if i >= max_lines: | |
| break | |
| lines.append(line) | |
| return "".join(lines) | |
| except Exception as e: | |
| logger.error(f"Text file extraction error: {str(e)}") | |
| return "" | |
| def estimate_page_count(file_path: Path, content_type: str) -> int: | |
| """Estimate number of pages in document""" | |
| # Simple estimation - can be enhanced based on file type | |
| return 1 | |
| async def analyze_with_gemini(text: str, confidence_threshold: float) -> Dict: | |
| """Use Gemini to analyze document relevance""" | |
| prompt = f""" | |
| Analyze this document text and determine if it's relevant to REAL ESTATE and METRICS CALCULATION. | |
| CRITICAL: You must respond with ONLY a JSON object, no other text. | |
| DOCUMENT TEXT (first few pages): | |
| {text[:8000]} # Limit text to avoid token limits | |
| ANALYSIS INSTRUCTIONS: | |
| 1. Determine if this document is relevant to real estate business, investments, or metrics | |
| 2. Identify key indicators that support your decision | |
| 3. Provide a confidence score (0.0 to 1.0) | |
| 4. Classify the document type if possible | |
| RELEVANCE CRITERIA: | |
| - Real estate related: property listings, financial models, market analysis, offering memorandums, rent rolls, operating statements | |
| - Metrics calculation: financial projections, ROI analysis, cap rates, NOI calculations, cash flow analysis | |
| - Real estate development: construction costs, pro formas, feasibility studies | |
| NON-RELEVANT EXAMPLES: | |
| - Resumes, personal documents, marketing brochures for non-real estate | |
| - Academic papers unrelated to real estate | |
| - General business documents without real estate focus | |
| REQUIRED JSON RESPONSE FORMAT: | |
| {{ | |
| "relevant": true/false, | |
| "confidence": 0.85, | |
| "reason": "Brief explanation of relevance decision", | |
| "key_indicators": ["indicator1", "indicator2", ...], | |
| "document_type": "offering_memorandum|financial_statement|market_report|rent_roll|unknown" | |
| }} | |
| Confidence threshold for relevance: {confidence_threshold} | |
| """ | |
| try: | |
| # Initialize Gemini | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| model = genai.GenerativeModel('gemini-2.0-flash') | |
| response = await asyncio.get_event_loop().run_in_executor( | |
| None, | |
| lambda: model.generate_content(prompt) | |
| ) | |
| response_text = response.text.strip() | |
| # Clean JSON response | |
| if "```json" in response_text: | |
| response_text = response_text.split("```json")[1].split("```")[0].strip() | |
| elif "```" in response_text: | |
| response_text = response_text.split("```")[1].split("```")[0].strip() | |
| result = json.loads(response_text) | |
| # Validate response structure | |
| required_fields = ["relevant", "confidence", "reason", "key_indicators"] | |
| for field in required_fields: | |
| if field not in result: | |
| raise ValueError(f"Missing field in Gemini response: {field}") | |
| # Apply confidence threshold | |
| if result["confidence"] < confidence_threshold: | |
| result["relevant"] = False | |
| result["reason"] = f"Confidence ({result['confidence']}) below threshold ({confidence_threshold})" | |
| return result | |
| except Exception as e: | |
| logger.error(f"Gemini analysis failed: {str(e)}") | |
| # Fallback: simple keyword-based analysis | |
| return perform_fallback_analysis(text, confidence_threshold) | |
| def perform_fallback_analysis(text: str, confidence_threshold: float) -> Dict: | |
| """Fallback analysis using keyword matching when Gemini fails""" | |
| real_estate_keywords = [ | |
| 'real estate', 'property', 'rent', 'lease', 'mortgage', 'cap rate', | |
| 'noi', 'net operating income', 'cash flow', 'pro forma', 'offering memorandum', | |
| 'rent roll', 'operating expenses', 'vacancy rate', 'occupancy', 'square feet', | |
| 'acquisition', 'disposition', 'broker', 'listing', 'appraisal', 'valuation', | |
| 'construction', 'development', 'zoning', 'permit', 'tenant', 'landlord' | |
| ] | |
| metrics_keywords = [ | |
| 'metrics', 'kpi', 'key performance indicator', 'roi', 'return on investment', | |
| 'irr', 'internal rate of return', 'dscr', 'debt service coverage ratio', | |
| 'ltv', 'loan to value', 'calculation', 'analysis', 'projection', 'forecast', | |
| 'financial model', 'spreadsheet', 'excel', 'numbers', 'data', 'statistics' | |
| ] | |
| text_lower = text.lower() | |
| # Count keyword matches | |
| re_matches = sum(1 for keyword in real_estate_keywords if keyword in text_lower) | |
| metrics_matches = sum(1 for keyword in metrics_keywords if keyword in text_lower) | |
| total_matches = re_matches + metrics_matches | |
| # Calculate confidence based on matches | |
| confidence = min(1.0, total_matches / 10) # Normalize | |
| relevant = confidence >= confidence_threshold and (re_matches >= 2 or metrics_matches >= 2) | |
| key_indicators = [] | |
| if re_matches > 0: | |
| key_indicators.append(f"Real estate terms found: {re_matches}") | |
| if metrics_matches > 0: | |
| key_indicators.append(f"Metrics terms found: {metrics_matches}") | |
| return { | |
| "relevant": relevant, | |
| "confidence": round(confidence, 2), | |
| "reason": f"Keyword analysis: {re_matches} real estate terms, {metrics_matches} metrics terms", | |
| "key_indicators": key_indicators, | |
| "document_type": "unknown" | |
| } | |
| def generate_analysis_summary(analysis_results: List[Dict]) -> Dict: | |
| """Generate summary of document analysis""" | |
| relevant_files = [r for r in analysis_results if r.get('relevant', False)] | |
| non_relevant_files = [r for r in analysis_results if not r.get('relevant', False)] | |
| # Calculate average confidence | |
| confidences = [r.get('confidence', 0) for r in analysis_results if r.get('confidence') is not None] | |
| avg_confidence = sum(confidences) / len(confidences) if confidences else 0 | |
| # Document type distribution | |
| doc_types = {} | |
| for result in analysis_results: | |
| doc_type = result.get('document_type', 'unknown') | |
| doc_types[doc_type] = doc_types.get(doc_type, 0) + 1 | |
| return { | |
| "relevant_count": len(relevant_files), | |
| "non_relevant_count": len(non_relevant_files), | |
| "relevance_rate": len(relevant_files) / len(analysis_results) if analysis_results else 0, | |
| "average_confidence": round(avg_confidence, 3), | |
| "document_type_breakdown": doc_types, | |
| "processing_time_seconds": sum(r.get('processing_time_seconds', 0) for r in analysis_results) | |
| } | |
| def secure_filename(filename: str) -> str: | |
| """Sanitize filename for security""" | |
| import re | |
| filename = re.sub(r'[^a-zA-Z0-9_.-]', '_', filename) | |
| return filename | |
| def process_pdfs(pdf_files): | |
| """Process uploaded PDFs and return Excel file""" | |
| if not pdf_files: | |
| return None, "⚠️ Please upload at least one PDF/XLSX file" | |
| try: | |
| # Create temporary directory for PDFs | |
| temp_dir = tempfile.mkdtemp() | |
| # Save uploaded PDFs to temp directory | |
| for pdf_file in pdf_files: | |
| if pdf_file is None: | |
| continue | |
| dest_path = Path(temp_dir) / Path(pdf_file.name).name | |
| shutil.copy(pdf_file.name, dest_path) | |
| # Initialize pipeline with hardcoded API key | |
| pipeline = RealEstateModelPipeline(GEMINI_API_KEY) | |
| # Create output file in temp directory | |
| output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx" | |
| # Run pipeline | |
| result = pipeline.run_full_pipeline(temp_dir, str(output_file)) | |
| # Generate summary text | |
| r = pipeline.formula_results | |
| irr_val = r.get('IRR_TO_LP', 0) | |
| if isinstance(irr_val, complex): | |
| irr_val = irr_val.real | |
| summary = f"""✅ **Processing Complete!** | |
| 📊 **Key Metrics:** | |
| • Total Project Cost: ${r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f} | |
| • Total Debt: ${r.get('TOTAL_DEBT', 0):,.0f} | |
| • Total Equity: ${r.get('TOTAL_EQUITY', 0):,.0f} | |
| • NOI: ${r.get('NET_OPERATING_INCOME', 0):,.0f} | |
| • Yield on Cost: {r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%} | |
| • LP IRR: {float(irr_val):.2f}% | |
| ✨ Download your Excel file below ⬇️ | |
| """ | |
| return str(output_file), summary | |
| except Exception as e: | |
| import traceback | |
| error_msg = f"❌ **Error occurred:**\n\n{str(e)}\n\n**Details:**\n{traceback.format_exc()}" | |
| return None, error_msg | |
| # Create Gradio interface with better styling | |
| with gr.Blocks( | |
| title="Real Estate Financial Model Generator", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| padding: 20px; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🏢 Real Estate Financial Model Generator | |
| Upload your PDF/XLSX documents and generate a comprehensive financial model in Excel format. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| pdf_input = gr.File( | |
| label="Upload PDF/XLSX Files", | |
| file_count="multiple", | |
| file_types=[".pdf", ".xlsx", ".xls"], | |
| type="filepath", | |
| elem_classes=["file-upload"] | |
| ) | |
| process_btn = gr.Button( | |
| "Generate Financial Model", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### Supported Formats | |
| - **PDF**: Offering Memorandum, Reports | |
| - **XLSX/XLS**: Financial statements, data tables | |
| ### Required Documents | |
| - Offering Memorandum | |
| - Operating Expenses Summary | |
| - Sales Comps | |
| - Rent Comps | |
| - Market Report | |
| - Demographics Overview | |
| ### Features | |
| - Automated data extraction | |
| - Formula calculations | |
| - Professional Excel output | |
| - Multiple analysis sheets | |
| """) | |
| with gr.Row(): | |
| output_text = gr.Textbox( | |
| label="Processing Results", | |
| lines=15, | |
| interactive=False, | |
| elem_classes=["output-text"] | |
| ) | |
| with gr.Row(): | |
| excel_output = gr.File( | |
| label="Download Excel File", | |
| interactive=False | |
| ) | |
| # Connect the button | |
| process_btn.click( | |
| fn=process_pdfs, | |
| inputs=[pdf_input], | |
| outputs=[excel_output, output_text], | |
| api_name="process" | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### Tips | |
| - Ensure PDF files are readable and not scanned images | |
| - Use descriptive filenames (e.g., "Offering_Memorandum.pdf") | |
| - Processing may take 30-60 seconds depending on file sizes | |
| - Check the **Processing Results** section for detailed feedback | |
| ### API Endpoints | |
| - `POST /api/generate-model` - Generate Excel only | |
| - `POST /api/generate-model-with-summary` - Generate Excel + JSON summary | |
| - `POST /api/analyze-only` - Generate JSON summary only | |
| - `GET /api/health` - Health check | |
| """) | |
| # For Hugging Face Spaces, we need to expose the app | |
| if __name__ == "__main__": | |
| # For Hugging Face Spaces deployment | |
| import os | |
| # Check if running on Hugging Face Spaces | |
| is_hf_space = os.environ.get('SPACE_ID') is not None | |
| if is_hf_space: | |
| # On HF Spaces: Mount Gradio to FastAPI and launch together | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| uvicorn.run( | |
| app, | |
| host="0.0.0.0", | |
| port=7860, | |
| log_level="info" | |
| ) | |
| else: | |
| # Local development: Launch separately | |
| import threading | |
| # Start FastAPI in background thread | |
| def run_fastapi(): | |
| uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info") | |
| api_thread = threading.Thread(target=run_fastapi, daemon=True) | |
| api_thread.start() | |
| # Launch Gradio on main thread | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7861, | |
| share=False | |
| ) |