mlbench123 commited on
Commit
dbf6bf0
Β·
verified Β·
1 Parent(s): ca324e4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1668 -0
app.py ADDED
@@ -0,0 +1,1668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ """
7
+ Real Estate Financial Model Pipeline
8
+ Extracts data from PDFs, solves formulas with Gemini API, generates Excel
9
+ """
10
+
11
+ import re
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Dict, Any, List, Optional
15
+ import openpyxl
16
+ from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
17
+ from openpyxl.utils import get_column_letter
18
+ from pdfminer.high_level import extract_text
19
+ import google.generativeai as genai
20
+
21
+ class RealEstateModelPipeline:
22
+ def __init__(self, gemini_api_key: str):
23
+ """Initialize pipeline with Gemini API key"""
24
+ genai.configure(api_key=gemini_api_key)
25
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
26
+ self.extracted_data = {}
27
+ self.formula_results = {}
28
+ self.structured_data = {}
29
+
30
+ def safe_divide(self, numerator: float, denominator: float, default: float = 0) -> float:
31
+ """Safe division that returns default instead of error"""
32
+ try:
33
+ if denominator == 0 or denominator is None:
34
+ return default
35
+ return numerator / denominator
36
+ except:
37
+ return default
38
+
39
+ def extract_pdf_text(self, pdf_path: str) -> str:
40
+ """Extract text from PDF using pdfminer"""
41
+ try:
42
+ text = extract_text(pdf_path)
43
+ return text.strip()
44
+ except Exception as e:
45
+ print(f"Error extracting {pdf_path}: {e}")
46
+ return ""
47
+
48
+ def extract_all_pdfs(self, pdf_directory: str) -> Dict[str, str]:
49
+ """Extract text from all PDFs in directory"""
50
+ pdf_dir = Path(pdf_directory)
51
+ extracted_texts = {}
52
+
53
+ with open('output_file_3.txt', "w", encoding="utf-8") as f:
54
+ for pdf_file in pdf_dir.glob("*.pdf"):
55
+ print(f"Extracting: {pdf_file.name}")
56
+ text = self.extract_pdf_text(str(pdf_file))
57
+ extracted_texts[pdf_file.stem] = text
58
+
59
+ # Write each PDF’s name and extracted text to file
60
+ f.write(f"=== {pdf_file.name} ===\n")
61
+ f.write(text)
62
+ f.write("\n\n" + "="*80 + "\n\n")
63
+
64
+ self.extracted_data = extracted_texts
65
+
66
+ return extracted_texts
67
+
68
+ def extract_address_fallback(self, pdf_texts: Dict[str, str]) -> Optional[str]:
69
+ """Extract address using simple pattern matching as fallback"""
70
+ for name, text in pdf_texts.items():
71
+ if 'Offering_Memorandum' in name or 'offering' in name.lower():
72
+ # Pattern: "Address: <address text>"
73
+ match = re.search(r'Address:\s*(.+?)(?:\n|Property Type:)', text, re.IGNORECASE)
74
+ if match:
75
+ address = match.group(1).strip()
76
+ print(f" βœ“ Extracted address via fallback: {address}")
77
+ return address
78
+ return None
79
+
80
+ def create_gemini_prompt(self, pdf_texts: Dict[str, str]) -> str:
81
+ """Create comprehensive prompt for Gemini to extract structured data"""
82
+
83
+ # Build a clear summary of what's in each PDF
84
+ pdf_summary = "\n".join([f"- {name}: {len(text)} characters" for name, text in pdf_texts.items()])
85
+
86
+ prompt = f"""You are a real estate financial analyst. Extract ALL numerical data from the following PDF texts and return it as a JSON object.
87
+
88
+ CRITICAL INSTRUCTIONS:
89
+ 1. ONLY extract data that is EXPLICITLY stated in the PDFs - DO NOT estimate or make up values
90
+ 2. For missing values, use null (not 0)
91
+ 3. Pay close attention to the specific document names - each contains different information
92
+ 4. Extract exact numbers as they appear in the documents
93
+
94
+ AVAILABLE DOCUMENTS:
95
+ {pdf_summary}
96
+
97
+ PDF CONTENTS:
98
+ """
99
+ for name, text in pdf_texts.items():
100
+ prompt += f"\n{'='*60}\n=== {name} ===\n{'='*60}\n{text}\n"
101
+
102
+ prompt += """
103
+
104
+ EXTRACTION INSTRUCTIONS BY DOCUMENT:
105
+
106
+ FROM "Offering_Memorandum.pdf":
107
+ - Extract: Address (full address after "Address:")
108
+ - Extract: Property Type (after "Property Type:")
109
+ - Extract: Units (number after "Units:")
110
+
111
+ FROM "Operating_Expenses_Summary.pdf" (if present):
112
+ - Extract EXACT annual amounts for:
113
+ * Real Estate Taxes
114
+ * Insurance
115
+ * Utilities
116
+ * Repairs & Maint. (or Repairs & Maintenance)
117
+ * Management Fee
118
+ * Payroll
119
+ * Administrative (if listed)
120
+ * Professional Fees (if listed)
121
+
122
+ FROM "Sales_Comps.pdf":
123
+ - Extract all Price/SF values
124
+ - Calculate average_price_per_sf = average of all Price/SF values
125
+ - Count total number of comps
126
+
127
+ FROM "Rent_Comps.pdf" (if present):
128
+ - Extract all rent values (numbers before @ symbol)
129
+ - Calculate average_rent = average of all rent values
130
+ - Count total number of rent comps
131
+
132
+ FROM "Market_Report.pdf":
133
+ - Extract: Vacancy Rate (percentage)
134
+ - Extract: Rent Growth (YoY) (percentage)
135
+
136
+ FROM "Demographics_Overview.pdf":
137
+ - Extract: Population (3-mi) - the number
138
+ - Extract: Median HH Income - the dollar amount
139
+ - Extract: Transit Score - the number
140
+
141
+ REQUIRED JSON OUTPUT STRUCTURE:
142
+ {
143
+ "property_info": {
144
+ "address": "EXTRACT FROM Offering_Memorandum.pdf",
145
+ "property_type": "EXTRACT FROM Offering_Memorandum.pdf",
146
+ "units": EXTRACT_NUMBER_FROM_Offering_Memorandum.pdf,
147
+ "gross_sf": null,
148
+ "rentable_sf": null,
149
+ "retail_sf": null
150
+ },
151
+ "acquisition": {
152
+ "land_value": null,
153
+ "price": null,
154
+ "closing_costs": null
155
+ },
156
+ "construction": {
157
+ "construction_cost_per_gsf": null,
158
+ "construction_months": null
159
+ },
160
+ "soft_costs": {
161
+ "architecture_and_interior_cost": null,
162
+ "structural_engineering_cost": null,
163
+ "mep_engineering_cost": null,
164
+ "civil_engineering_cost": null,
165
+ "controlled_inspections_cost": null,
166
+ "surveying_cost": null,
167
+ "utilities_connection_cost": null,
168
+ "advertising_and_marketing_cost": null,
169
+ "accounting_cost": null,
170
+ "monitoring_cost": null,
171
+ "ff_and_e_cost": null,
172
+ "environmental_consultant_fee": null,
173
+ "miscellaneous_consultants_fee": null,
174
+ "general_legal_cost": null,
175
+ "real_estate_taxes_during_construction": null,
176
+ "miscellaneous_admin_cost": null,
177
+ "ibr_cost": null,
178
+ "project_team_cost": null,
179
+ "pem_fees": null,
180
+ "bank_fees": null
181
+ },
182
+ "financing": {
183
+ "ltc_ratio": null,
184
+ "financing_percentage": null,
185
+ "interest_rate_basis_points": null,
186
+ "financing_cost": null,
187
+ "interest_reserve": null
188
+ },
189
+ "operating_expenses": {
190
+ "payroll": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
191
+ "repairs_and_maintenance": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
192
+ "utilities": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
193
+ "administrative": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null,
194
+ "professional_fees": EXTRACT_FROM_Operating_Expenses_Summary.pdf_OR_null,
195
+ "insurance": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
196
+ "property_taxes": EXTRACT_FROM_Operating_Expenses_Summary.pdf,
197
+ "management_fee_percentage": null
198
+ },
199
+ "revenue": {
200
+ "free_market_rent_psf": null,
201
+ "affordable_rent_psf": null,
202
+ "other_income_per_unit": null,
203
+ "vacancy_rate": null,
204
+ "retail_rent_psf": null,
205
+ "parking_income": null
206
+ },
207
+ "sales_comps": {
208
+ "average_price_per_sf": CALCULATE_AVERAGE_FROM_Sales_Comps.pdf,
209
+ "comp_count": COUNT_FROM_Sales_Comps.pdf
210
+ },
211
+ "rent_comps": {
212
+ "average_rent": CALCULATE_AVERAGE_FROM_Rent_Comps.pdf_IF_EXISTS,
213
+ "comp_count": COUNT_FROM_Rent_Comps.pdf_IF_EXISTS
214
+ },
215
+ "market_data": {
216
+ "vacancy_rate": EXTRACT_FROM_Market_Report.pdf,
217
+ "rent_growth_yoy": EXTRACT_FROM_Market_Report.pdf,
218
+ "median_hh_income": EXTRACT_FROM_Demographics_Overview.pdf,
219
+ "population_3mi": EXTRACT_FROM_Demographics_Overview.pdf,
220
+ "transit_score": EXTRACT_FROM_Demographics_Overview.pdf
221
+ },
222
+ "projections": {
223
+ "lease_up_months": null,
224
+ "stabilization_months": null,
225
+ "revenue_inflation_rate": null,
226
+ "expense_inflation_rate": null,
227
+ "hold_period_months": null,
228
+ "exit_cap_rate_decimal": null,
229
+ "sale_cost_percentage": null
230
+ },
231
+ "equity_structure": {
232
+ "gp_pref_rate": null,
233
+ "lp_pref_rate": null,
234
+ "promote_percentage": null
235
+ }
236
+ }
237
+
238
+ EXAMPLES OF CORRECT EXTRACTION:
239
+
240
+ Example 1 - From your Offering_Memorandum.pdf:
241
+ "Address: 455 Atlantic Ave, Brooklyn, NY"
242
+ β†’ "address": "455 Atlantic Ave, Brooklyn, NY"
243
+
244
+ "Property Type: Retail"
245
+ β†’ "property_type": "Retail"
246
+
247
+ "Units: 7"
248
+ β†’ "units": 7
249
+
250
+ Example 2 - From your Operating_Expenses_Summary.pdf:
251
+ "Real Estate Taxes $91940.2"
252
+ β†’ "property_taxes": 91940.2
253
+
254
+ "Insurance $16778.94"
255
+ β†’ "insurance": 16778.94
256
+
257
+ "Payroll $44948.21"
258
+ β†’ "payroll": 44948.21
259
+
260
+ Example 3 - From your Sales_Comps.pdf:
261
+ "Price/SF" column shows: $880, $919, $673, $894
262
+ β†’ "average_price_per_sf": 841.5 (average of these 4 values)
263
+ β†’ "comp_count": 4
264
+
265
+ Example 4 - From your Market_Report.pdf:
266
+ "Vacancy Rate: 5.71%"
267
+ β†’ "vacancy_rate": 0.0571
268
+
269
+ "Rent Growth (YoY): 4.18%"
270
+ β†’ "rent_growth_yoy": 0.0418
271
+
272
+ CRITICAL RULES:
273
+ 1. Use EXACT numbers from the PDFs - don't round or modify
274
+ 2. Convert percentages to decimals (5.71% β†’ 0.0571)
275
+ 3. Remove dollar signs and commas from numbers ($91,940.2 β†’ 91940.2)
276
+ 4. If a field is not in ANY PDF, use null
277
+ 5. Double-check the document name before extracting - make sure you're looking at the right PDF
278
+
279
+ Return ONLY valid JSON with no explanations, comments, or markdown formatting."""
280
+
281
+ return prompt
282
+
283
+ def extract_structured_data(self) -> Dict[str, Any]:
284
+ """Use Gemini to extract structured data from PDFs"""
285
+ print("\nProcessing with Gemini API...")
286
+
287
+ # NEW: Try simple extraction first
288
+ fallback_address = self.extract_address_fallback(self.extracted_data)
289
+
290
+ prompt = self.create_gemini_prompt(self.extracted_data)
291
+
292
+ try:
293
+ response = self.model.generate_content(prompt)
294
+ response_text = response.text.strip()
295
+
296
+ # Clean JSON if wrapped in markdown
297
+ if "```json" in response_text:
298
+ response_text = response_text.split("```json")[1].split("```")[0].strip()
299
+ elif "```" in response_text:
300
+ response_text = response_text.split("```")[1].split("```")[0].strip()
301
+
302
+ data = json.loads(response_text)
303
+
304
+ # NEW: Override with fallback if Gemini failed
305
+ if fallback_address and (not data.get('property_info', {}).get('address') or
306
+ data['property_info']['address'] == 'adress'):
307
+ data['property_info']['address'] = fallback_address
308
+ print(f" βœ“ Used fallback address: {fallback_address}")
309
+
310
+ print("βœ“ Successfully extracted structured data")
311
+ return data
312
+
313
+ except Exception as e:
314
+ print(f"Error with Gemini API: {e}")
315
+ data = self.get_default_data_structure()
316
+ # Use fallback even in error case
317
+ if fallback_address:
318
+ data['property_info']['address'] = fallback_address
319
+ return data
320
+
321
+
322
+ def post_process_extracted_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
323
+ """Fill in missing values with intelligent estimates"""
324
+
325
+ # Get units
326
+ units = data.get('property_info', {}).get('units', 32)
327
+
328
+ # Estimate SF if missing
329
+ if not data['property_info'].get('gross_sf'):
330
+ data['property_info']['gross_sf'] = units * 1000
331
+
332
+ if not data['property_info'].get('rentable_sf'):
333
+ data['property_info']['rentable_sf'] = int(data['property_info']['gross_sf'] * 0.85)
334
+
335
+ # Set retail_sf to 0 if None (most residential projects don't have retail)
336
+ if data['property_info'].get('retail_sf') is None:
337
+ data['property_info']['retail_sf'] = 0
338
+
339
+ # Get gross_sf for calculations
340
+ gross_sf = data['property_info']['gross_sf']
341
+
342
+ # Set default construction cost if missing
343
+ if not data['construction'].get('construction_cost_per_gsf'):
344
+ data['construction']['construction_cost_per_gsf'] = 338
345
+
346
+ if not data['construction'].get('construction_months'):
347
+ data['construction']['construction_months'] = 18
348
+
349
+ # Estimate land value from sales comps if available
350
+ if not data['acquisition'].get('land_value'):
351
+ sales_comps = data.get('sales_comps', {})
352
+ avg_psf = sales_comps.get('average_price_per_sf')
353
+ if avg_psf:
354
+ data['acquisition']['land_value'] = avg_psf * gross_sf
355
+ else:
356
+ # Use default based on typical Manhattan pricing
357
+ data['acquisition']['land_value'] = 6000000
358
+
359
+ if not data['acquisition'].get('price'):
360
+ data['acquisition']['price'] = data['acquisition']['land_value']
361
+
362
+ if not data['acquisition'].get('closing_costs'):
363
+ data['acquisition']['closing_costs'] = 150000
364
+
365
+ # Estimate soft costs as percentages if null
366
+ total_hard_cost = data['construction']['construction_cost_per_gsf'] * gross_sf
367
+ soft_cost_estimate = total_hard_cost * 0.15 # 15% of hard costs
368
+
369
+ soft_costs = data.get('soft_costs', {})
370
+ default_soft_cost_values = {
371
+ 'architecture_and_interior_cost': soft_cost_estimate * 0.15,
372
+ 'structural_engineering_cost': soft_cost_estimate * 0.08,
373
+ 'mep_engineering_cost': soft_cost_estimate * 0.10,
374
+ 'civil_engineering_cost': soft_cost_estimate * 0.05,
375
+ 'controlled_inspections_cost': soft_cost_estimate * 0.03,
376
+ 'surveying_cost': soft_cost_estimate * 0.02,
377
+ 'utilities_connection_cost': soft_cost_estimate * 0.05,
378
+ 'advertising_and_marketing_cost': soft_cost_estimate * 0.06,
379
+ 'accounting_cost': soft_cost_estimate * 0.03,
380
+ 'monitoring_cost': soft_cost_estimate * 0.02,
381
+ 'ff_and_e_cost': soft_cost_estimate * 0.10,
382
+ 'environmental_consultant_fee': soft_cost_estimate * 0.02,
383
+ 'miscellaneous_consultants_fee': soft_cost_estimate * 0.03,
384
+ 'general_legal_cost': soft_cost_estimate * 0.06,
385
+ 'real_estate_taxes_during_construction': soft_cost_estimate * 0.10,
386
+ 'miscellaneous_admin_cost': soft_cost_estimate * 0.04,
387
+ 'ibr_cost': soft_cost_estimate * 0.03,
388
+ 'project_team_cost': soft_cost_estimate * 0.15,
389
+ 'pem_fees': soft_cost_estimate * 0.08,
390
+ 'bank_fees': soft_cost_estimate * 0.05
391
+ }
392
+
393
+ for key, default_value in default_soft_cost_values.items():
394
+ if soft_costs.get(key) is None:
395
+ soft_costs[key] = default_value
396
+
397
+ # Set financing defaults if missing
398
+ financing = data.get('financing', {})
399
+ if not financing.get('ltc_ratio'):
400
+ financing['ltc_ratio'] = 0.75
401
+ if not financing.get('financing_percentage'):
402
+ financing['financing_percentage'] = 0.03
403
+ if not financing.get('interest_rate_basis_points'):
404
+ financing['interest_rate_basis_points'] = 350
405
+ if not financing.get('financing_cost'):
406
+ financing['financing_cost'] = 200000
407
+ if not financing.get('interest_reserve'):
408
+ financing['interest_reserve'] = 500000
409
+
410
+ # Set revenue defaults if missing
411
+ revenue = data.get('revenue', {})
412
+ if not revenue.get('free_market_rent_psf'):
413
+ revenue['free_market_rent_psf'] = 60
414
+ if not revenue.get('affordable_rent_psf'):
415
+ revenue['affordable_rent_psf'] = 35
416
+ if not revenue.get('other_income_per_unit'):
417
+ revenue['other_income_per_unit'] = 100
418
+ if not revenue.get('vacancy_rate'):
419
+ revenue['vacancy_rate'] = 0.05
420
+ if not revenue.get('retail_rent_psf'):
421
+ revenue['retail_rent_psf'] = 45
422
+ if not revenue.get('parking_income'):
423
+ revenue['parking_income'] = 50000
424
+
425
+ # Ensure operating expenses have defaults
426
+ op_expenses = data.get('operating_expenses', {})
427
+ if not op_expenses.get('payroll'):
428
+ op_expenses['payroll'] = 31136.07
429
+ if not op_expenses.get('repairs_and_maintenance'):
430
+ op_expenses['repairs_and_maintenance'] = 44418.61
431
+ if not op_expenses.get('utilities'):
432
+ op_expenses['utilities'] = 12535.90
433
+ if not op_expenses.get('administrative'):
434
+ op_expenses['administrative'] = 0
435
+ if not op_expenses.get('professional_fees'):
436
+ op_expenses['professional_fees'] = 18789.84
437
+ if not op_expenses.get('insurance'):
438
+ op_expenses['insurance'] = 9341.33
439
+ if not op_expenses.get('property_taxes'):
440
+ op_expenses['property_taxes'] = 118832.22
441
+ if not op_expenses.get('management_fee_percentage'):
442
+ op_expenses['management_fee_percentage'] = 0.03
443
+
444
+ # Ensure projections have defaults
445
+ projections = data.get('projections', {})
446
+ if not projections.get('lease_up_months'):
447
+ projections['lease_up_months'] = 12
448
+ if not projections.get('stabilization_months'):
449
+ projections['stabilization_months'] = 6
450
+ if not projections.get('revenue_inflation_rate'):
451
+ projections['revenue_inflation_rate'] = 0.03
452
+ if not projections.get('expense_inflation_rate'):
453
+ projections['expense_inflation_rate'] = 0.025
454
+ if not projections.get('hold_period_months'):
455
+ projections['hold_period_months'] = 60
456
+ if not projections.get('exit_cap_rate_decimal'):
457
+ projections['exit_cap_rate_decimal'] = 0.045
458
+ if not projections.get('sale_cost_percentage'):
459
+ projections['sale_cost_percentage'] = 0.02
460
+
461
+ # Ensure equity structure has defaults
462
+ equity = data.get('equity_structure', {})
463
+ if not equity.get('gp_pref_rate'):
464
+ equity['gp_pref_rate'] = 0.08
465
+ if not equity.get('lp_pref_rate'):
466
+ equity['lp_pref_rate'] = 0.08
467
+ if not equity.get('promote_percentage'):
468
+ equity['promote_percentage'] = 0.20
469
+
470
+ return data
471
+
472
+ def get_default_data_structure(self) -> Dict[str, Any]:
473
+ """Return default data structure with known values from PDFs"""
474
+ # Try to get basic info from extracted text
475
+ units = 32 # Default from your PDFs
476
+
477
+ # Smart estimation
478
+ gross_sf = units * 1000 # Typical 1000 SF per unit
479
+ rentable_sf = int(gross_sf * 0.85) # 85% efficiency
480
+
481
+ return {
482
+ "property_info": {
483
+ "address": "adress",
484
+ "units": units,
485
+ "gross_sf": gross_sf,
486
+ "rentable_sf": rentable_sf,
487
+ "retail_sf": 0 # No retail in this project
488
+ },
489
+ "acquisition": {
490
+ "land_value": None, # Will be estimated from comps
491
+ "price": None,
492
+ "closing_costs": 150000
493
+ },
494
+ "construction": {
495
+ "construction_cost_per_gsf": 338,
496
+ "construction_months": 18
497
+ },
498
+ "soft_costs": {
499
+ "architecture_and_interior_cost": None,
500
+ "structural_engineering_cost": None,
501
+ "mep_engineering_cost": None,
502
+ "civil_engineering_cost": None,
503
+ "controlled_inspections_cost": None,
504
+ "surveying_cost": None,
505
+ "utilities_connection_cost": None,
506
+ "advertising_and_marketing_cost": None,
507
+ "accounting_cost": None,
508
+ "monitoring_cost": None,
509
+ "ff_and_e_cost": None,
510
+ "environmental_consultant_fee": None,
511
+ "miscellaneous_consultants_fee": None,
512
+ "general_legal_cost": None,
513
+ "real_estate_taxes_during_construction": None,
514
+ "miscellaneous_admin_cost": None,
515
+ "ibr_cost": None,
516
+ "project_team_cost": None,
517
+ "pem_fees": None,
518
+ "bank_fees": None
519
+ },
520
+ "financing": {
521
+ "ltc_ratio": 0.75,
522
+ "financing_percentage": 0.03,
523
+ "interest_rate_basis_points": 350,
524
+ "financing_cost": None,
525
+ "interest_reserve": None
526
+ },
527
+ "operating_expenses": {
528
+ "payroll": 31136.07, # From PDF
529
+ "repairs_and_maintenance": 44418.61,
530
+ "utilities": 12535.90,
531
+ "administrative": 0,
532
+ "professional_fees": 18789.84,
533
+ "insurance": 9341.33,
534
+ "property_taxes": 118832.22,
535
+ "management_fee_percentage": 0.03
536
+ },
537
+ "revenue": {
538
+ "free_market_rent_psf": 60,
539
+ "affordable_rent_psf": 35,
540
+ "other_income_per_unit": 100,
541
+ "vacancy_rate": 0.05,
542
+ "retail_rent_psf": 45,
543
+ "parking_income": 50000
544
+ },
545
+ "projections": {
546
+ "lease_up_months": 12,
547
+ "stabilization_months": 6,
548
+ "revenue_inflation_rate": 0.03,
549
+ "expense_inflation_rate": 0.025,
550
+ "hold_period_months": 60,
551
+ "exit_cap_rate_decimal": 0.045,
552
+ "sale_cost_percentage": 0.02
553
+ },
554
+ "equity_structure": {
555
+ "gp_pref_rate": 0.08,
556
+ "lp_pref_rate": 0.08,
557
+ "promote_percentage": 0.20
558
+ }
559
+ }
560
+
561
+ def calculate_all_formulas(self, data: Dict[str, Any]) -> Dict[str, float]:
562
+ """Calculate all formulas in correct dependency order"""
563
+ results = {}
564
+ self.structured_data = data
565
+ # Flatten data for easier access
566
+ d = self.flatten_dict(data)
567
+
568
+ # Helper function to get value
569
+ def get(key, default=0):
570
+ return d.get(key, default)
571
+
572
+ # BASIC PROPERTY METRICS
573
+ results['UNITS'] = get('property_info.units')
574
+ results['GROSS_SF'] = get('property_info.gross_sf')
575
+ results['RENTABLE_SF'] = get('property_info.rentable_sf')
576
+ results['RETAIL_SF'] = get('property_info.retail_sf')
577
+ results['BUILDING_EFFICIENCY'] = self.safe_divide(results['RENTABLE_SF'], results['GROSS_SF'])
578
+
579
+ # ACQUISITION COSTS
580
+ results['LAND_VALUE'] = get('acquisition.land_value')
581
+ results['PRICE'] = get('acquisition.price')
582
+ results['CLOSING_COSTS'] = get('acquisition.closing_costs')
583
+ results['ACQUISITION_FEE'] = results['LAND_VALUE'] * 0.02
584
+ results['TOTAL_ACQUISITION_COST'] = results['LAND_VALUE'] + results['CLOSING_COSTS'] + results['ACQUISITION_FEE']
585
+
586
+ # Per unit/SF metrics for acquisition
587
+ results['LAND_VALUE_PER_GSF'] = self.safe_divide(results['LAND_VALUE'], results['GROSS_SF'])
588
+ results['LAND_VALUE_PER_RSF'] = self.safe_divide(results['LAND_VALUE'], results['RENTABLE_SF'])
589
+ results['LAND_VALUE_PER_UNIT'] = self.safe_divide(results['LAND_VALUE'], results['UNITS'])
590
+ results['TOTAL_ACQUISITION_COST_PER_GSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['GROSS_SF'])
591
+ results['TOTAL_ACQUISITION_COST_PER_RSF'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['RENTABLE_SF'])
592
+ results['TOTAL_ACQUISITION_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_ACQUISITION_COST'], results['UNITS'])
593
+
594
+ # CONSTRUCTION COSTS
595
+ results['CONSTRUCTION_COST_PER_GSF'] = get('construction.construction_cost_per_gsf')
596
+ results['CONSTRUCTION_MONTHS'] = get('construction.construction_months')
597
+ results['TOTAL_CONSTRUCTION_GMP'] = results['CONSTRUCTION_COST_PER_GSF'] * results['GROSS_SF']
598
+ results['CONSTRUCTION_GMP_PER_GSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['GROSS_SF'])
599
+ results['CONSTRUCTION_GMP_PER_RSF'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['RENTABLE_SF'])
600
+ results['CONSTRUCTION_GMP_PER_UNIT'] = self.safe_divide(results['TOTAL_CONSTRUCTION_GMP'], results['UNITS'])
601
+
602
+ # SOFT COSTS (individual items)
603
+ soft_cost_items = [
604
+ 'architecture_and_interior_cost', 'structural_engineering_cost', 'mep_engineering_cost',
605
+ 'civil_engineering_cost', 'controlled_inspections_cost', 'surveying_cost',
606
+ 'utilities_connection_cost', 'advertising_and_marketing_cost', 'accounting_cost',
607
+ 'monitoring_cost', 'ff_and_e_cost', 'environmental_consultant_fee',
608
+ 'miscellaneous_consultants_fee', 'general_legal_cost', 'real_estate_taxes_during_construction',
609
+ 'miscellaneous_admin_cost', 'ibr_cost', 'project_team_cost', 'pem_fees', 'bank_fees'
610
+ ]
611
+
612
+ for item in soft_cost_items:
613
+ key = item.upper()
614
+ results[key] = get(f'soft_costs.{item}')
615
+
616
+ # REVENUE SETUP (needed for some soft costs)
617
+ results['FREE_MARKET_RENT_PSF'] = get('revenue.free_market_rent_psf')
618
+ results['AFFORDABLE_RENT_PSF'] = get('revenue.affordable_rent_psf')
619
+ results['OTHER_INCOME_PER_UNIT'] = get('revenue.other_income_per_unit')
620
+ results['VACANCY_RATE'] = get('revenue.vacancy_rate')
621
+ results['RETAIL_RENT_PSF'] = get('revenue.retail_rent_psf')
622
+ results['PARKING_INCOME'] = get('revenue.parking_income')
623
+
624
+ # Calculate retail revenue (needed for soft costs)
625
+ results['RETAIL_REVENUE'] = results['RETAIL_RENT_PSF'] * results['RETAIL_SF']
626
+
627
+ # HPD & IH COST
628
+ results['HPD_AND_IH_COST'] = (3500 * results['UNITS'] * 0.75) + (5000 * results['UNITS'] * 0.25)
629
+
630
+ # RETAIL TI & LC COST
631
+ results['RETAIL_TI_AND_LC_COST'] = (results['RETAIL_REVENUE'] * 0.3) + (50 * results['RETAIL_SF'])
632
+
633
+ # TOTAL SOFT COSTS
634
+ soft_cost_sum = sum([results[item.upper()] for item in soft_cost_items])
635
+ results['TOTAL_SOFT_COST'] = soft_cost_sum + results['HPD_AND_IH_COST'] + results['RETAIL_TI_AND_LC_COST']
636
+ results['TOTAL_SOFT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_SOFT_COST'],results['GROSS_SF'])
637
+
638
+ # OPERATING EXPENSES (for reserves calculation)
639
+ results['PAYROLL'] = get('operating_expenses.payroll')
640
+ results['REPAIRS_AND_MAINTENANCE'] = get('operating_expenses.repairs_and_maintenance')
641
+ results['UTILITIES'] = get('operating_expenses.utilities')
642
+ results['ADMINISTRATIVE'] = get('operating_expenses.administrative')
643
+ results['PROFESSIONAL_FEES'] = get('operating_expenses.professional_fees')
644
+ results['INSURANCE'] = get('operating_expenses.insurance')
645
+ results['PROPERTY_TAXES'] = get('operating_expenses.property_taxes')
646
+ results['MANAGEMENT_FEE_PERCENTAGE'] = get('operating_expenses.management_fee_percentage')
647
+
648
+ results['TOTAL_OPERATING_EXPENSES'] = (results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] +
649
+ results['UTILITIES'] + results['ADMINISTRATIVE'] +
650
+ results['PROFESSIONAL_FEES'] + results['INSURANCE'] +
651
+ results['PROPERTY_TAXES'])
652
+
653
+ # CONTINGENCY & RESERVES
654
+ results['CONTINGENCY_COST'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.05
655
+ results['DEVELOPMENT_FEE'] = (results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_SOFT_COST']) * 0.04
656
+ results['OPERATING_RESERVE'] = results['TOTAL_OPERATING_EXPENSES'] * 0.2
657
+
658
+ results['FINANCING_COST'] = get('financing.financing_cost')
659
+ results['INTEREST_RESERVE'] = get('financing.interest_reserve')
660
+
661
+ # TOTAL PROJECT COST (before financing)
662
+ results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] = (
663
+ results['TOTAL_SOFT_COST'] +
664
+ results['TOTAL_CONSTRUCTION_GMP'] +
665
+ results['TOTAL_ACQUISITION_COST'] +
666
+ results['CONTINGENCY_COST'] +
667
+ results['DEVELOPMENT_FEE'] +
668
+ results['FINANCING_COST'] +
669
+ results['INTEREST_RESERVE'] +
670
+ results['OPERATING_RESERVE']
671
+ )
672
+
673
+ results['TOTAL_PROJECT_COST_PER_GSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['GROSS_SF'])
674
+ results['TOTAL_PROJECT_COST_PER_RSF'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['RENTABLE_SF'])
675
+ results['TOTAL_PROJECT_COST_PER_UNIT'] = self.safe_divide(results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'], results['UNITS'])
676
+
677
+ # FINANCING CALCULATIONS
678
+ results['LTC_RATIO'] = get('financing.ltc_ratio')
679
+ results['FINANCING_PERCENTAGE'] = get('financing.financing_percentage')
680
+ results['INTEREST_RATE_BASIS_POINTS'] = get('financing.interest_rate_basis_points')
681
+
682
+ results['PRE_LTC_BUDGET'] = (results['TOTAL_SOFT_COST'] + results['CONTINGENCY_COST'] +
683
+ results['DEVELOPMENT_FEE'] + results['OPERATING_RESERVE'] +
684
+ results['TOTAL_CONSTRUCTION_GMP'] + results['TOTAL_ACQUISITION_COST'])
685
+
686
+ results['LOAN_AMOUNT'] = results['LTC_RATIO'] * results['PRE_LTC_BUDGET']
687
+ results['FINANCING_AMOUNT'] = results['FINANCING_PERCENTAGE'] * results['LOAN_AMOUNT']
688
+ results['INTEREST_RATE_DECIMAL'] = (results['INTEREST_RATE_BASIS_POINTS'] + 430) / 10000
689
+ results['CONSTRUCTION_INTEREST'] = results['LOAN_AMOUNT'] * 0.7 * (results['INTEREST_RATE_DECIMAL'] / 12) * results['CONSTRUCTION_MONTHS']
690
+
691
+ # DEBT & EQUITY
692
+ results['TOTAL_DEBT'] = results['CONSTRUCTION_INTEREST'] + results['LOAN_AMOUNT'] + results['FINANCING_AMOUNT']
693
+ results['TOTAL_EQUITY'] = results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] - results['TOTAL_DEBT']
694
+ results['DEBT_PERCENTAGE'] = results['TOTAL_DEBT'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
695
+ results['EQUITY_PERCENTAGE'] = results['TOTAL_EQUITY'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
696
+ results['TOTAL_CAPITAL_STACK'] = results['TOTAL_DEBT'] + results['TOTAL_EQUITY']
697
+
698
+ results['DEBT_PER_GSF'] = self.safe_divide(results['TOTAL_DEBT'],results['GROSS_SF'])
699
+ results['EQUITY_PER_GSF'] = self.safe_divide(results['TOTAL_EQUITY'],results['GROSS_SF'])
700
+ results['DEBT_PER_UNIT'] = self.safe_divide(results['TOTAL_DEBT'], results['UNITS'])
701
+
702
+ results['EQUITY_PER_UNIT'] = self.safe_divide(results['TOTAL_EQUITY'], results['UNITS'])
703
+
704
+ # OPERATING EXPENSE METRICS
705
+ results['PAYROLL_PER_UNIT'] = self.safe_divide(results['PAYROLL'], results['UNITS'])
706
+ results['REPAIRS_AND_MAINTENANCE_PER_UNIT'] = self.safe_divide(results['REPAIRS_AND_MAINTENANCE'], results['UNITS'])
707
+ results['UTILITIES_PER_UNIT'] = self.safe_divide(results['UTILITIES'], results['UNITS'])
708
+ results['ADMIN_AND_PROFESSIONAL_PER_UNIT'] = self.safe_divide((results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']), results['UNITS'])
709
+ results['INSURANCE_PER_UNIT'] = self.safe_divide(results['INSURANCE'], results['UNITS'])
710
+ results['OPERATING_EXPENSES_PER_UNIT'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'], results['UNITS'])
711
+ results['OPERATING_EXPENSES_PER_GSF'] = self.safe_divide(results['TOTAL_OPERATING_EXPENSES'],results['GROSS_SF'])
712
+
713
+ # REVENUE CALCULATIONS
714
+ results['LEASE_UP_MONTHS'] = get('projections.lease_up_months')
715
+ results['STABILIZATION_MONTHS'] = get('projections.stabilization_months')
716
+ results['REVENUE_INFLATION_RATE'] = get('projections.revenue_inflation_rate')
717
+ results['EXPENSE_INFLATION_RATE'] = get('projections.expense_inflation_rate')
718
+
719
+ results['TRENDING_TERM'] = results['LEASE_UP_MONTHS'] + results['STABILIZATION_MONTHS']
720
+ results['TERM_REVENUE_INFLATION'] = (1 + results['REVENUE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12)
721
+ results['TERM_EXPENSE_INFLATION'] = (1 + results['EXPENSE_INFLATION_RATE']) ** (results['TRENDING_TERM'] / 12)
722
+
723
+ results['GROSS_POTENTIAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 0.75 * results['RENTABLE_SF']
724
+ results['GROSS_POTENTIAL_AFFORDABLE_RENT'] = results['AFFORDABLE_RENT_PSF'] * 0.25 * results['RENTABLE_SF']
725
+ results['OTHER_INCOME'] = results['OTHER_INCOME_PER_UNIT'] * results['UNITS'] * 12 * 0.75
726
+ results['VACANCY_LOSS'] = results['VACANCY_RATE'] * (results['OTHER_INCOME'] + results['GROSS_POTENTIAL_FREE_MARKET_RENT'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT'])
727
+ results['EFFECTIVE_GROSS_INCOME'] = results['GROSS_POTENTIAL_FREE_MARKET_RENT'] - results['VACANCY_LOSS'] + results['OTHER_INCOME'] + results['GROSS_POTENTIAL_AFFORDABLE_RENT']
728
+
729
+ results['MANAGEMENT_FEE'] = results['MANAGEMENT_FEE_PERCENTAGE'] * results['EFFECTIVE_GROSS_INCOME']
730
+ results['REAL_ESTATE_TAXES'] = results['GROSS_SF'] * 30 * 0.1
731
+ results['TOTAL_EXPENSES'] = results['PAYROLL'] + results['REPAIRS_AND_MAINTENANCE'] + results['UTILITIES'] + results['REAL_ESTATE_TAXES'] + results['MANAGEMENT_FEE']
732
+
733
+ # NOI & RETURNS
734
+ results['NET_OPERATING_INCOME'] = results['EFFECTIVE_GROSS_INCOME'] - results['TOTAL_EXPENSES'] + results['PARKING_INCOME'] + results['RETAIL_REVENUE']
735
+ results['NOI_PER_UNIT'] = self.safe_divide(results['NET_OPERATING_INCOME'], results['UNITS'])
736
+ results['NOI_PER_GSF'] = self.safe_divide(results['NET_OPERATING_INCOME'],results['GROSS_SF'])
737
+ results['CAP_RATE'] = (results['NET_OPERATING_INCOME'] / results['PRICE']) * 100 if results['PRICE'] > 0 else 0
738
+
739
+ results['STABILIZED_YIELD_ON_COST'] = (((results['EFFECTIVE_GROSS_INCOME'] + results['RETAIL_REVENUE'] - results['GROSS_POTENTIAL_AFFORDABLE_RENT']) * results['TERM_REVENUE_INFLATION']) - (results['TOTAL_EXPENSES'] * results['TERM_EXPENSE_INFLATION'])) + results['GROSS_POTENTIAL_AFFORDABLE_RENT']
740
+
741
+ results['YIELD_ON_COST_PERCENTAGE'] = results['NET_OPERATING_INCOME'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
742
+ results['STABILIZED_YIELD_ON_COST_PERCENTAGE'] = results['STABILIZED_YIELD_ON_COST'] / results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] if results['TOTAL_FINANCING_CONTINGENCY_AND_RESERVES'] > 0 else 0
743
+
744
+ results['ANNUAL_DEBT_SERVICE'] = results['LOAN_AMOUNT'] * results['INTEREST_RATE_DECIMAL']
745
+ results['CASH_ON_CASH_RETURN'] = ((results['NET_OPERATING_INCOME'] - results['ANNUAL_DEBT_SERVICE']) / results['TOTAL_EQUITY']) * 100 if results['TOTAL_EQUITY'] > 0 else 0
746
+ results['DEBT_SERVICE_COVERAGE_RATIO'] = results['NET_OPERATING_INCOME'] / results['ANNUAL_DEBT_SERVICE'] if results['ANNUAL_DEBT_SERVICE'] > 0 else 0
747
+
748
+ # EXIT & EQUITY WATERFALL
749
+ results['EXIT_CAP_RATE_DECIMAL'] = get('projections.exit_cap_rate_decimal')
750
+ results['SALE_COST_PERCENTAGE'] = get('projections.sale_cost_percentage')
751
+ results['HOLD_PERIOD_MONTHS'] = get('projections.hold_period_months')
752
+
753
+ results['PROPERTY_VALUE_ON_SALE'] = (results['STABILIZED_YIELD_ON_COST'] / results['EXIT_CAP_RATE_DECIMAL']) + (results['STABILIZED_YIELD_ON_COST'] * 0.25) if results['EXIT_CAP_RATE_DECIMAL'] > 0 else 0
754
+ results['SALE_COST'] = results['SALE_COST_PERCENTAGE'] * results['PROPERTY_VALUE_ON_SALE']
755
+ results['NET_SALE_PROCEEDS'] = results['PROPERTY_VALUE_ON_SALE'] - results['SALE_COST']
756
+ results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] = results['NET_SALE_PROCEEDS'] - results['TOTAL_DEBT']
757
+
758
+ results['GP_PREF_RATE'] = get('equity_structure.gp_pref_rate')
759
+ results['LP_PREF_RATE'] = get('equity_structure.lp_pref_rate')
760
+ results['PROMOTE_PERCENTAGE'] = get('equity_structure.promote_percentage')
761
+
762
+ results['GP_INVESTMENT'] = results['TOTAL_EQUITY'] * 0.2
763
+ results['LP_INVESTMENT'] = results['TOTAL_EQUITY'] - results['GP_INVESTMENT']
764
+ results['GP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['GP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['GP_INVESTMENT']
765
+ results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] = (1 + results['LP_PREF_RATE'] / 12) ** results['HOLD_PERIOD_MONTHS'] * results['LP_INVESTMENT']
766
+ results['CASH_REMAINING_AFTER_PREFERRED'] = results['CASH_REMAINING_AFTER_LOAN_PAYBACK'] - results['LP_PREFERRED_RETURN_WITH_PRINCIPAL'] - results['GP_PREFERRED_RETURN_WITH_PRINCIPAL']
767
+ results['PROMOTE_ON_JOINT_VENTURE'] = results['PROMOTE_PERCENTAGE'] * results['CASH_REMAINING_AFTER_PREFERRED']
768
+ results['CASH_TO_LP'] = (results['CASH_REMAINING_AFTER_PREFERRED'] - results['PROMOTE_ON_JOINT_VENTURE']) * (results['LP_INVESTMENT'] / (results['LP_INVESTMENT'] + results['GP_INVESTMENT'])) if (results['LP_INVESTMENT'] + results['GP_INVESTMENT']) > 0 else 0
769
+ results['NET_TO_LP_INVESTOR'] = results['CASH_TO_LP'] + results['LP_PREFERRED_RETURN_WITH_PRINCIPAL']
770
+ results['LP_MULTIPLE'] = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT'] if results['LP_INVESTMENT'] > 0 else 0
771
+ # results['IRR_TO_LP'] = ((results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT']) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100 if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0 else 0
772
+ # IRR calculation with complex number handling
773
+ if results['LP_INVESTMENT'] > 0 and results['HOLD_PERIOD_MONTHS'] > 0:
774
+ irr_base = results['NET_TO_LP_INVESTOR'] / results['LP_INVESTMENT']
775
+ if irr_base > 0:
776
+ results['IRR_TO_LP'] = ((irr_base) ** (12 / results['HOLD_PERIOD_MONTHS']) - 1) * 100
777
+ else:
778
+ results['IRR_TO_LP'] = -100 # Total loss
779
+ else:
780
+ results['IRR_TO_LP'] = 0
781
+
782
+ # BLENDED RENT CALCULATIONS
783
+ results['BLENDED_RENT_PER_RSF'] = (results['FREE_MARKET_RENT_PSF'] * 0.75) + (results['AFFORDABLE_RENT_PSF'] * 0.25)
784
+ results['TOTAL_FREE_MARKET_RENT'] = results['FREE_MARKET_RENT_PSF'] * 425 / 12
785
+ results['TOTAL_BLENDED_RENT'] = results['BLENDED_RENT_PER_RSF'] * 750 / 12
786
+ results['FREE_MARKET_RENT_PER_SF'] = results['TOTAL_FREE_MARKET_RENT'] * 110 / 12
787
+ results['AFFORDABLE_RENT_PER_SF'] = results['AFFORDABLE_RENT_PSF'] * 110 / 12
788
+ results['BLENDED_RENT_PER_SF'] = results['TOTAL_BLENDED_RENT'] * 110 / 12
789
+ results['AVERAGE_RENT_PER_UNIT'] = self.safe_divide((results['GROSS_POTENTIAL_FREE_MARKET_RENT']+results['GROSS_POTENTIAL_AFFORDABLE_RENT']), results['UNITS'])
790
+ results['RENT_PER_UNIT_PER_MONTH'] = results['AVERAGE_RENT_PER_UNIT'] / 12
791
+
792
+ # EGI PERCENTAGES
793
+ if results['EFFECTIVE_GROSS_INCOME'] > 0:
794
+ results['PAYROLL_PERCENTAGE_OF_EGI'] = results['PAYROLL'] / results['EFFECTIVE_GROSS_INCOME']
795
+ results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = results['REPAIRS_AND_MAINTENANCE'] / results['EFFECTIVE_GROSS_INCOME']
796
+ results['UTILITIES_PERCENTAGE_OF_EGI'] = results['UTILITIES'] / results['EFFECTIVE_GROSS_INCOME']
797
+ results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = (results['ADMINISTRATIVE'] + results['PROFESSIONAL_FEES']) / results['EFFECTIVE_GROSS_INCOME']
798
+ results['INSURANCE_PERCENTAGE_OF_EGI'] = results['INSURANCE'] / results['EFFECTIVE_GROSS_INCOME']
799
+ results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = results['PROFESSIONAL_FEES'] / results['EFFECTIVE_GROSS_INCOME']
800
+ results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = results['TOTAL_OPERATING_EXPENSES'] / results['EFFECTIVE_GROSS_INCOME']
801
+ else:
802
+ results['PAYROLL_PERCENTAGE_OF_EGI'] = 0
803
+ results['REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI'] = 0
804
+ results['UTILITIES_PERCENTAGE_OF_EGI'] = 0
805
+ results['ADMIN_AND_PROFESSIONAL_PERCENTAGE_OF_EGI'] = 0
806
+ results['INSURANCE_PERCENTAGE_OF_EGI'] = 0
807
+ results['PROFESSIONAL_FEES_PERCENTAGE_OF_EGI'] = 0
808
+ results['TOTAL_OPERATING_EXPENSES_PERCENTAGE_OF_EGI'] = 0
809
+
810
+ self.formula_results = results
811
+ return results
812
+
813
+ def flatten_dict(self, d: Dict[str, Any], parent_key: str = '', sep: str = '.') -> Dict[str, Any]:
814
+ """Flatten nested dictionary"""
815
+ items = []
816
+ for k, v in d.items():
817
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
818
+ if isinstance(v, dict):
819
+ items.extend(self.flatten_dict(v, new_key, sep=sep).items())
820
+ else:
821
+ items.append((new_key, v))
822
+ return dict(items)
823
+
824
+ def generate_excel(self, output_path: str = "Real_Estate_Financial_Model.xlsx"):
825
+ """Generate professional Excel file with all calculations"""
826
+ try:
827
+ # Validate critical values before Excel generation
828
+ r = self.formula_results
829
+
830
+ print(" Validating calculations...")
831
+ critical_values = {
832
+ 'UNITS': r.get('UNITS', 0),
833
+ 'GROSS_SF': r.get('GROSS_SF', 0),
834
+ 'RENTABLE_SF': r.get('RENTABLE_SF', 0),
835
+ 'EFFECTIVE_GROSS_INCOME': r.get('EFFECTIVE_GROSS_INCOME', 0),
836
+ 'TOTAL_PROJECT_COST': r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0)
837
+ }
838
+
839
+ warnings = []
840
+ for key, value in critical_values.items():
841
+ if value == 0:
842
+ warnings.append(f" WARNING: {key} is zero or missing")
843
+
844
+ if warnings:
845
+ print("\n".join(warnings))
846
+ print(" Continuing with available data...\n")
847
+
848
+ wb = openpyxl.Workbook()
849
+
850
+ # Remove default sheet
851
+ if 'Sheet' in wb.sheetnames:
852
+ wb.remove(wb['Sheet'])
853
+
854
+ # Create sheets with error handling
855
+ print(" Creating Executive Summary...")
856
+ self.create_summary_sheet(wb)
857
+
858
+ print(" Creating Acquisition sheet...")
859
+ self.create_acquisition_sheet(wb)
860
+
861
+ print(" Creating Construction sheet...")
862
+ self.create_construction_sheet(wb)
863
+
864
+ print(" Creating Soft Costs sheet...")
865
+ self.create_soft_costs_sheet(wb)
866
+
867
+ print(" Creating Financing sheet...")
868
+ self.create_financing_sheet(wb)
869
+
870
+ print(" Creating Operations sheet...")
871
+ self.create_operations_sheet(wb)
872
+
873
+ print(" Creating Returns sheet...")
874
+ self.create_returns_sheet(wb)
875
+
876
+ # Save workbook
877
+ wb.save(output_path)
878
+ print(f"βœ“ Excel file generated: {output_path}")
879
+ return output_path
880
+ except Exception as e:
881
+ print(f"ERROR generating Excel: {e}")
882
+ import traceback
883
+ traceback.print_exc()
884
+ raise
885
+
886
+ def create_summary_sheet(self, wb):
887
+ """Create executive summary sheet"""
888
+ ws = wb.create_sheet("Executive Summary", 0)
889
+
890
+ # Styles
891
+ header_fill = PatternFill(start_color="1F4E78", end_color="1F4E78", fill_type="solid")
892
+ header_font = Font(color="FFFFFF", bold=True, size=12)
893
+ subheader_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
894
+ subheader_font = Font(color="FFFFFF", bold=True, size=11)
895
+
896
+ r = self.formula_results
897
+
898
+ # Title
899
+ ws['A1'] = "REAL ESTATE DEVELOPMENT FINANCIAL MODEL"
900
+ ws['A1'].font = Font(bold=True, size=16)
901
+ ws.merge_cells('A1:D1')
902
+
903
+ # Property Information
904
+ row = 3
905
+ ws[f'A{row}'] = "PROPERTY INFORMATION"
906
+ ws[f'A{row}'].fill = header_fill
907
+ ws[f'A{row}'].font = header_font
908
+ ws.merge_cells(f'A{row}:D{row}')
909
+
910
+ address = self.structured_data.get('property_info', {}).get('address', 'N/A')
911
+
912
+ row += 1
913
+ data = [
914
+ ("Address:", address),
915
+ ("Units:", r.get('UNITS', 0)),
916
+ ("Gross Square Feet:", f"{r.get('GROSS_SF', 0):,.0f}"),
917
+ ("Rentable Square Feet:", f"{r.get('RENTABLE_SF', 0):,.0f}"),
918
+ ("Building Efficiency:", f"{r.get('BUILDING_EFFICIENCY', 0):.2%}"),
919
+ ]
920
+
921
+ for label, value in data:
922
+ ws[f'A{row}'] = label
923
+ ws[f'A{row}'].font = Font(bold=True)
924
+ ws[f'B{row}'] = value
925
+ row += 1
926
+
927
+ # Project Costs Summary
928
+ row += 1
929
+ ws[f'A{row}'] = "PROJECT COSTS SUMMARY"
930
+ ws[f'A{row}'].fill = header_fill
931
+ ws[f'A{row}'].font = header_font
932
+ ws.merge_cells(f'A{row}:D{row}')
933
+
934
+ row += 1
935
+ ws[f'A{row}'] = "Category"
936
+ ws[f'B{row}'] = "Total Cost"
937
+ ws[f'C{row}'] = "Per GSF"
938
+ ws[f'D{row}'] = "Per Unit"
939
+ for col in ['A', 'B', 'C', 'D']:
940
+ ws[f'{col}{row}'].fill = subheader_fill
941
+ ws[f'{col}{row}'].font = subheader_font
942
+
943
+ row += 1
944
+ cost_summary = [
945
+ ("Acquisition", r.get('TOTAL_ACQUISITION_COST', 0), r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0), r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)),
946
+ ("Construction", r.get('TOTAL_CONSTRUCTION_GMP', 0), r.get('CONSTRUCTION_GMP_PER_GSF', 0), r.get('CONSTRUCTION_GMP_PER_UNIT', 0)),
947
+ ("Soft Costs", r.get('TOTAL_SOFT_COST', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0), r.get('TOTAL_SOFT_COST_PER_GSF', 0) * r.get('GROSS_SF', 0) / r.get('UNITS', 1)),
948
+ ("Contingency", r.get('CONTINGENCY_COST', 0), r.get('CONTINGENCY_COST', 0) / r.get('GROSS_SF', 1), r.get('CONTINGENCY_COST', 0) / r.get('UNITS', 1)),
949
+ ("Development Fee", r.get('DEVELOPMENT_FEE', 0), r.get('DEVELOPMENT_FEE', 0) / r.get('GROSS_SF', 1), r.get('DEVELOPMENT_FEE', 0) / r.get('UNITS', 1)),
950
+ ("Financing & Reserves", r.get('FINANCING_COST', 0) + r.get('INTEREST_RESERVE', 0) + r.get('OPERATING_RESERVE', 0), 0, 0),
951
+ ]
952
+
953
+ for label, total, per_gsf, per_unit in cost_summary:
954
+ ws[f'A{row}'] = label
955
+ ws[f'B{row}'] = total
956
+ ws[f'B{row}'].number_format = '$#,##0'
957
+ ws[f'C{row}'] = per_gsf
958
+ ws[f'C{row}'].number_format = '$#,##0.00'
959
+ ws[f'D{row}'] = per_unit
960
+ ws[f'D{row}'].number_format = '$#,##0'
961
+ row += 1
962
+
963
+ # Total
964
+ ws[f'A{row}'] = "TOTAL PROJECT COST"
965
+ ws[f'A{row}'].font = Font(bold=True)
966
+ ws[f'B{row}'] = r.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0)
967
+ ws[f'B{row}'].number_format = '$#,##0'
968
+ ws[f'B{row}'].font = Font(bold=True)
969
+ ws[f'C{row}'] = r.get('TOTAL_PROJECT_COST_PER_GSF', 0)
970
+ ws[f'C{row}'].number_format = '$#,##0.00'
971
+ ws[f'C{row}'].font = Font(bold=True)
972
+ ws[f'D{row}'] = r.get('TOTAL_PROJECT_COST_PER_UNIT', 0)
973
+ ws[f'D{row}'].number_format = '$#,##0'
974
+ ws[f'D{row}'].font = Font(bold=True)
975
+
976
+ # Capital Stack
977
+ row += 2
978
+ ws[f'A{row}'] = "CAPITAL STACK"
979
+ ws[f'A{row}'].fill = header_fill
980
+ ws[f'A{row}'].font = header_font
981
+ ws.merge_cells(f'A{row}:D{row}')
982
+
983
+ row += 1
984
+ ws[f'A{row}'] = "Source"
985
+ ws[f'B{row}'] = "Amount"
986
+ ws[f'C{row}'] = "Percentage"
987
+ ws[f'D{row}'] = "Per Unit"
988
+ for col in ['A', 'B', 'C', 'D']:
989
+ ws[f'{col}{row}'].fill = subheader_fill
990
+ ws[f'{col}{row}'].font = subheader_font
991
+
992
+ row += 1
993
+ ws[f'A{row}'] = "Total Debt"
994
+ ws[f'B{row}'] = r.get('TOTAL_DEBT', 0)
995
+ ws[f'B{row}'].number_format = '$#,##0'
996
+ ws[f'C{row}'] = r.get('DEBT_PERCENTAGE', 0)
997
+ ws[f'C{row}'].number_format = '0.00%'
998
+ ws[f'D{row}'] = r.get('DEBT_PER_UNIT', 0)
999
+ ws[f'D{row}'].number_format = '$#,##0'
1000
+
1001
+ row += 1
1002
+ ws[f'A{row}'] = "Total Equity"
1003
+ ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0)
1004
+ ws[f'B{row}'].number_format = '$#,##0'
1005
+ ws[f'C{row}'] = r.get('EQUITY_PERCENTAGE', 0)
1006
+ ws[f'C{row}'].number_format = '0.00%'
1007
+ ws[f'D{row}'] = r.get('EQUITY_PER_UNIT', 0)
1008
+ ws[f'D{row}'].number_format = '$#,##0'
1009
+
1010
+ # Returns Summary
1011
+ row += 2
1012
+ ws[f'A{row}'] = "INVESTMENT RETURNS"
1013
+ ws[f'A{row}'].fill = header_fill
1014
+ ws[f'A{row}'].font = header_font
1015
+ ws.merge_cells(f'A{row}:D{row}')
1016
+
1017
+ row += 1
1018
+ returns_data = [
1019
+ ("Stabilized NOI:", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"),
1020
+ ("Yield on Cost:", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
1021
+ ("Stabilized Yield on Cost:", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
1022
+ ("Cash-on-Cash Return:", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"),
1023
+ ("DSCR:", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"),
1024
+ ("LP IRR:", f"{float(r.get('IRR_TO_LP', 0).real if isinstance(r.get('IRR_TO_LP', 0), complex) else r.get('IRR_TO_LP', 0)):.2f}%"),
1025
+ ("LP Multiple:", f"{r.get('LP_MULTIPLE', 0):.2f}x"),
1026
+ ]
1027
+
1028
+ for label, value in returns_data:
1029
+ ws[f'A{row}'] = label
1030
+ ws[f'A{row}'].font = Font(bold=True)
1031
+ ws[f'B{row}'] = value
1032
+ row += 1
1033
+
1034
+ # Adjust column widths
1035
+ ws.column_dimensions['A'].width = 25
1036
+ ws.column_dimensions['B'].width = 18
1037
+ ws.column_dimensions['C'].width = 15
1038
+ ws.column_dimensions['D'].width = 15
1039
+
1040
+ def create_acquisition_sheet(self, wb):
1041
+ """Create acquisition costs detail sheet"""
1042
+ ws = wb.create_sheet("Acquisition")
1043
+ r = self.formula_results
1044
+
1045
+ # Header
1046
+ ws['A1'] = "ACQUISITION COSTS"
1047
+ ws['A1'].font = Font(bold=True, size=14)
1048
+ ws.merge_cells('A1:E1')
1049
+
1050
+ # Column headers
1051
+ row = 3
1052
+ headers = ["Item", "Total Cost", "Per GSF", "Per RSF", "Per Unit"]
1053
+ for col_idx, header in enumerate(headers, start=1):
1054
+ cell = ws.cell(row=row, column=col_idx, value=header)
1055
+ cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1056
+ cell.font = Font(color="FFFFFF", bold=True)
1057
+
1058
+ # Data
1059
+ row += 1
1060
+ data = [
1061
+ ("Land Value", r.get('LAND_VALUE', 0), r.get('LAND_VALUE_PER_GSF', 0), r.get('LAND_VALUE_PER_RSF', 0), r.get('LAND_VALUE_PER_UNIT', 0)),
1062
+ ("Closing Costs", r.get('CLOSING_COSTS', 0), r.get('CLOSING_COSTS', 0) / r.get('GROSS_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('RENTABLE_SF', 1), r.get('CLOSING_COSTS', 0) / r.get('UNITS', 1)),
1063
+ ("Acquisition Fee (2%)", r.get('ACQUISITION_FEE', 0), r.get('ACQUISITION_FEE', 0) / r.get('GROSS_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('RENTABLE_SF', 1), r.get('ACQUISITION_FEE', 0) / r.get('UNITS', 1)),
1064
+ ]
1065
+
1066
+ for item, total, per_gsf, per_rsf, per_unit in data:
1067
+ ws.cell(row=row, column=1, value=item)
1068
+ ws.cell(row=row, column=2, value=total).number_format = '$#,##0'
1069
+ ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00'
1070
+ ws.cell(row=row, column=4, value=per_rsf).number_format = '$#,##0.00'
1071
+ ws.cell(row=row, column=5, value=per_unit).number_format = '$#,##0'
1072
+ row += 1
1073
+
1074
+ # Total
1075
+ ws.cell(row=row, column=1, value="TOTAL ACQUISITION COST").font = Font(bold=True)
1076
+ ws.cell(row=row, column=2, value=r.get('TOTAL_ACQUISITION_COST', 0)).number_format = '$#,##0'
1077
+ ws.cell(row=row, column=2).font = Font(bold=True)
1078
+ ws.cell(row=row, column=3, value=r.get('TOTAL_ACQUISITION_COST_PER_GSF', 0)).number_format = '$#,##0.00'
1079
+ ws.cell(row=row, column=3).font = Font(bold=True)
1080
+ ws.cell(row=row, column=4, value=r.get('TOTAL_ACQUISITION_COST_PER_RSF', 0)).number_format = '$#,##0.00'
1081
+ ws.cell(row=row, column=4).font = Font(bold=True)
1082
+ ws.cell(row=row, column=5, value=r.get('TOTAL_ACQUISITION_COST_PER_UNIT', 0)).number_format = '$#,##0'
1083
+ ws.cell(row=row, column=5).font = Font(bold=True)
1084
+
1085
+ # Adjust widths
1086
+ for col in range(1, 6):
1087
+ ws.column_dimensions[get_column_letter(col)].width = 20
1088
+
1089
+ def create_construction_sheet(self, wb):
1090
+ """Create construction costs sheet"""
1091
+ ws = wb.create_sheet("Construction")
1092
+ r = self.formula_results
1093
+
1094
+ ws['A1'] = "CONSTRUCTION COSTS"
1095
+ ws['A1'].font = Font(bold=True, size=14)
1096
+
1097
+ row = 3
1098
+ ws[f'A{row}'] = "Construction Cost per GSF:"
1099
+ ws[f'B{row}'] = r.get('CONSTRUCTION_COST_PER_GSF', 0)
1100
+ ws[f'B{row}'].number_format = '$#,##0.00'
1101
+
1102
+ row += 1
1103
+ ws[f'A{row}'] = "Gross Square Feet:"
1104
+ ws[f'B{row}'] = r.get('GROSS_SF', 0)
1105
+ ws[f'B{row}'].number_format = '#,##0'
1106
+
1107
+ row += 2
1108
+ ws[f'A{row}'] = "Total Construction GMP:"
1109
+ ws[f'A{row}'].font = Font(bold=True)
1110
+ ws[f'B{row}'] = r.get('TOTAL_CONSTRUCTION_GMP', 0)
1111
+ ws[f'B{row}'].number_format = '$#,##0'
1112
+ ws[f'B{row}'].font = Font(bold=True)
1113
+
1114
+ row += 2
1115
+ ws[f'A{row}'] = "Construction Duration:"
1116
+ ws[f'B{row}'] = f"{r.get('CONSTRUCTION_MONTHS', 0)} months"
1117
+
1118
+ ws.column_dimensions['A'].width = 30
1119
+ ws.column_dimensions['B'].width = 20
1120
+
1121
+ def create_soft_costs_sheet(self, wb):
1122
+ """Create soft costs detail sheet"""
1123
+ ws = wb.create_sheet("Soft Costs")
1124
+ r = self.formula_results
1125
+
1126
+ ws['A1'] = "SOFT COSTS BUDGET"
1127
+ ws['A1'].font = Font(bold=True, size=14)
1128
+
1129
+ row = 3
1130
+ headers = ["Category", "Total Cost", "Per GSF"]
1131
+ for col_idx, header in enumerate(headers, start=1):
1132
+ cell = ws.cell(row=row, column=col_idx, value=header)
1133
+ cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1134
+ cell.font = Font(color="FFFFFF", bold=True)
1135
+
1136
+ row += 1
1137
+ soft_cost_items = [
1138
+ ("Architecture & Interior Design", 'ARCHITECTURE_AND_INTERIOR_COST'),
1139
+ ("Structural Engineering", 'STRUCTURAL_ENGINEERING_COST'),
1140
+ ("MEP Engineering", 'MEP_ENGINEERING_COST'),
1141
+ ("Civil Engineering", 'CIVIL_ENGINEERING_COST'),
1142
+ ("Controlled Inspections", 'CONTROLLED_INSPECTIONS_COST'),
1143
+ ("Surveying", 'SURVEYING_COST'),
1144
+ ("Utilities Connection", 'UTILITIES_CONNECTION_COST'),
1145
+ ("Advertising & Marketing", 'ADVERTISING_AND_MARKETING_COST'),
1146
+ ("Accounting", 'ACCOUNTING_COST'),
1147
+ ("Monitoring", 'MONITORING_COST'),
1148
+ ("FF&E", 'FF_AND_E_COST'),
1149
+ ("Environmental Consultant", 'ENVIRONMENTAL_CONSULTANT_FEE'),
1150
+ ("Miscellaneous Consultants", 'MISCELLANEOUS_CONSULTANTS_FEE'),
1151
+ ("General Legal", 'GENERAL_LEGAL_COST'),
1152
+ ("RE Taxes During Construction", 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION'),
1153
+ ("Miscellaneous Admin", 'MISCELLANEOUS_ADMIN_COST'),
1154
+ ("IBR Cost", 'IBR_COST'),
1155
+ ("Project Team", 'PROJECT_TEAM_COST'),
1156
+ ("PEM Fees", 'PEM_FEES'),
1157
+ ("Bank Fees", 'BANK_FEES'),
1158
+ ("HPD & IH Costs", 'HPD_AND_IH_COST'),
1159
+ ("Retail TI & LC", 'RETAIL_TI_AND_LC_COST'),
1160
+ ]
1161
+
1162
+ for label, key in soft_cost_items:
1163
+ cost = r.get(key, 0)
1164
+ per_gsf = cost / r.get('GROSS_SF', 1) if r.get('GROSS_SF', 0) > 0 else 0
1165
+ ws.cell(row=row, column=1, value=label)
1166
+ ws.cell(row=row, column=2, value=cost).number_format = '$#,##0'
1167
+ ws.cell(row=row, column=3, value=per_gsf).number_format = '$#,##0.00'
1168
+ row += 1
1169
+
1170
+ # Total
1171
+ ws.cell(row=row, column=1, value="TOTAL SOFT COSTS").font = Font(bold=True)
1172
+ ws.cell(row=row, column=2, value=r.get('TOTAL_SOFT_COST', 0)).number_format = '$#,##0'
1173
+ ws.cell(row=row, column=2).font = Font(bold=True)
1174
+ ws.cell(row=row, column=3, value=r.get('TOTAL_SOFT_COST_PER_GSF', 0)).number_format = '$#,##0.00'
1175
+ ws.cell(row=row, column=3).font = Font(bold=True)
1176
+
1177
+ ws.column_dimensions['A'].width = 35
1178
+ ws.column_dimensions['B'].width = 18
1179
+ ws.column_dimensions['C'].width = 15
1180
+
1181
+ def create_financing_sheet(self, wb):
1182
+ """Create financing structure sheet"""
1183
+ ws = wb.create_sheet("Financing")
1184
+ r = self.formula_results
1185
+
1186
+ ws['A1'] = "FINANCING STRUCTURE"
1187
+ ws['A1'].font = Font(bold=True, size=14)
1188
+
1189
+ row = 3
1190
+ ws[f'A{row}'] = "Pre-LTC Budget:"
1191
+ ws[f'B{row}'] = r.get('PRE_LTC_BUDGET', 0)
1192
+ ws[f'B{row}'].number_format = '$#,##0'
1193
+
1194
+ row += 1
1195
+ ws[f'A{row}'] = "LTC Ratio:"
1196
+ ws[f'B{row}'] = r.get('LTC_RATIO', 0)
1197
+ ws[f'B{row}'].number_format = '0.00%'
1198
+
1199
+ row += 1
1200
+ ws[f'A{row}'] = "Loan Amount:"
1201
+ ws[f'A{row}'].font = Font(bold=True)
1202
+ ws[f'B{row}'] = r.get('LOAN_AMOUNT', 0)
1203
+ ws[f'B{row}'].number_format = '$#,##0'
1204
+ ws[f'B{row}'].font = Font(bold=True)
1205
+
1206
+ row += 2
1207
+ ws[f'A{row}'] = "Financing Percentage:"
1208
+ ws[f'B{row}'] = r.get('FINANCING_PERCENTAGE', 0)
1209
+ ws[f'B{row}'].number_format = '0.00%'
1210
+
1211
+ row += 1
1212
+ ws[f'A{row}'] = "Financing Amount:"
1213
+ ws[f'B{row}'] = r.get('FINANCING_AMOUNT', 0)
1214
+ ws[f'B{row}'].number_format = '$#,##0'
1215
+
1216
+ row += 2
1217
+ ws[f'A{row}'] = "Interest Rate (bps + spread):"
1218
+ ws[f'B{row}'] = r.get('INTEREST_RATE_DECIMAL', 0)
1219
+ ws[f'B{row}'].number_format = '0.00%'
1220
+
1221
+ row += 1
1222
+ ws[f'A{row}'] = "Construction Interest:"
1223
+ ws[f'B{row}'] = r.get('CONSTRUCTION_INTEREST', 0)
1224
+ ws[f'B{row}'].number_format = '$#,##0'
1225
+
1226
+ row += 2
1227
+ ws[f'A{row}'] = "TOTAL DEBT"
1228
+ ws[f'A{row}'].font = Font(bold=True, size=12)
1229
+ ws[f'B{row}'] = r.get('TOTAL_DEBT', 0)
1230
+ ws[f'B{row}'].number_format = '$#,##0'
1231
+ ws[f'B{row}'].font = Font(bold=True, size=12)
1232
+
1233
+ row += 1
1234
+ ws[f'A{row}'] = "TOTAL EQUITY"
1235
+ ws[f'A{row}'].font = Font(bold=True, size=12)
1236
+ ws[f'B{row}'] = r.get('TOTAL_EQUITY', 0)
1237
+ ws[f'B{row}'].number_format = '$#,##0'
1238
+ ws[f'B{row}'].font = Font(bold=True, size=12)
1239
+
1240
+ row += 2
1241
+ ws[f'A{row}'] = "Debt Percentage:"
1242
+ ws[f'B{row}'] = r.get('DEBT_PERCENTAGE', 0)
1243
+ ws[f'B{row}'].number_format = '0.00%'
1244
+
1245
+ row += 1
1246
+ ws[f'A{row}'] = "Equity Percentage:"
1247
+ ws[f'B{row}'] = r.get('EQUITY_PERCENTAGE', 0)
1248
+ ws[f'B{row}'].number_format = '0.00%'
1249
+
1250
+ ws.column_dimensions['A'].width = 35
1251
+ ws.column_dimensions['B'].width = 20
1252
+
1253
+ def create_operations_sheet(self, wb):
1254
+ """Create operations and revenue sheet"""
1255
+ ws = wb.create_sheet("Operations")
1256
+ r = self.formula_results
1257
+
1258
+ ws['A1'] = "OPERATIONS & REVENUE"
1259
+ ws['A1'].font = Font(bold=True, size=14)
1260
+
1261
+ # Revenue Section
1262
+ row = 3
1263
+ ws[f'A{row}'] = "REVENUE"
1264
+ ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1265
+ ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
1266
+ ws.merge_cells(f'A{row}:B{row}')
1267
+
1268
+ row += 1
1269
+ revenue_items = [
1270
+ ("Gross Potential Free Market Rent", r.get('GROSS_POTENTIAL_FREE_MARKET_RENT', 0)),
1271
+ ("Gross Potential Affordable Rent", r.get('GROSS_POTENTIAL_AFFORDABLE_RENT', 0)),
1272
+ ("Other Income", r.get('OTHER_INCOME', 0)),
1273
+ ("Less: Vacancy Loss", -r.get('VACANCY_LOSS', 0)),
1274
+ ("Effective Gross Income", r.get('EFFECTIVE_GROSS_INCOME', 0)),
1275
+ ("Parking Income", r.get('PARKING_INCOME', 0)),
1276
+ ("Retail Revenue", r.get('RETAIL_REVENUE', 0)),
1277
+ ]
1278
+
1279
+ for label, value in revenue_items:
1280
+ ws[f'A{row}'] = label
1281
+ if "Effective Gross" in label:
1282
+ ws[f'A{row}'].font = Font(bold=True)
1283
+ ws[f'B{row}'] = value
1284
+ ws[f'B{row}'].number_format = '$#,##0'
1285
+ row += 1
1286
+
1287
+ # Expense Section
1288
+ row += 1
1289
+ ws[f'A{row}'] = "OPERATING EXPENSES"
1290
+ ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1291
+ ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
1292
+ ws.merge_cells(f'A{row}:C{row}')
1293
+
1294
+ row += 1
1295
+ ws[f'A{row}'] = "Expense Category"
1296
+ ws[f'B{row}'] = "Annual Amount"
1297
+ ws[f'C{row}'] = "% of EGI"
1298
+ for col in ['A', 'B', 'C']:
1299
+ ws[f'{col}{row}'].font = Font(bold=True)
1300
+
1301
+ row += 1
1302
+ # Safe division helper
1303
+ egi = r.get('EFFECTIVE_GROSS_INCOME', 0)
1304
+ def safe_pct(value):
1305
+ return value / egi if egi > 0 else 0
1306
+
1307
+ expense_items = [
1308
+ ("Payroll", r.get('PAYROLL', 0), r.get('PAYROLL_PERCENTAGE_OF_EGI', 0)),
1309
+ ("Repairs & Maintenance", r.get('REPAIRS_AND_MAINTENANCE', 0), r.get('REPAIRS_AND_MAINTENANCE_PERCENTAGE_OF_EGI', 0)),
1310
+ ("Utilities", r.get('UTILITIES', 0), r.get('UTILITIES_PERCENTAGE_OF_EGI', 0)),
1311
+ ("Insurance", r.get('INSURANCE', 0), r.get('INSURANCE_PERCENTAGE_OF_EGI', 0)),
1312
+ ("Management Fee", r.get('MANAGEMENT_FEE', 0), safe_pct(r.get('MANAGEMENT_FEE', 0))),
1313
+ ("Real Estate Taxes", r.get('REAL_ESTATE_TAXES', 0), safe_pct(r.get('REAL_ESTATE_TAXES', 0))),
1314
+ ]
1315
+
1316
+ for label, amount, pct in expense_items:
1317
+ ws[f'A{row}'] = label
1318
+ ws[f'B{row}'] = amount
1319
+ ws[f'B{row}'].number_format = '$#,##0'
1320
+ ws[f'C{row}'] = pct
1321
+ ws[f'C{row}'].number_format = '0.00%'
1322
+ row += 1
1323
+
1324
+ ws[f'A{row}'] = "TOTAL EXPENSES"
1325
+ ws[f'A{row}'].font = Font(bold=True)
1326
+ ws[f'B{row}'] = r.get('TOTAL_EXPENSES', 0)
1327
+ ws[f'B{row}'].number_format = '$#,##0'
1328
+ ws[f'B{row}'].font = Font(bold=True)
1329
+ total_exp_pct = safe_pct(r.get('TOTAL_EXPENSES', 0))
1330
+ ws[f'C{row}'] = total_exp_pct
1331
+ ws[f'C{row}'].number_format = '0.00%'
1332
+ ws[f'C{row}'].font = Font(bold=True)
1333
+
1334
+ row += 2
1335
+ ws[f'A{row}'] = "NET OPERATING INCOME"
1336
+ ws[f'A{row}'].font = Font(bold=True, size=12)
1337
+ ws[f'B{row}'] = r.get('NET_OPERATING_INCOME', 0)
1338
+ ws[f'B{row}'].number_format = '$#,##0'
1339
+ ws[f'B{row}'].font = Font(bold=True, size=12)
1340
+
1341
+ row += 2
1342
+ ws[f'A{row}'] = "NOI per Unit:"
1343
+ ws[f'B{row}'] = r.get('NOI_PER_UNIT', 0)
1344
+ ws[f'B{row}'].number_format = '$#,##0'
1345
+
1346
+ row += 1
1347
+ ws[f'A{row}'] = "NOI per GSF:"
1348
+ ws[f'B{row}'] = r.get('NOI_PER_GSF', 0)
1349
+ ws[f'B{row}'].number_format = '$#,##0.00'
1350
+
1351
+ ws.column_dimensions['A'].width = 35
1352
+ ws.column_dimensions['B'].width = 20
1353
+ ws.column_dimensions['C'].width = 15
1354
+
1355
+ def create_returns_sheet(self, wb):
1356
+ """Create investment returns and waterfall sheet"""
1357
+ ws = wb.create_sheet("Returns")
1358
+ r = self.formula_results
1359
+
1360
+ ws['A1'] = "INVESTMENT RETURNS & EXIT ANALYSIS"
1361
+ ws['A1'].font = Font(bold=True, size=14)
1362
+
1363
+ # Current Returns
1364
+ row = 3
1365
+ ws[f'A{row}'] = "STABILIZED RETURNS"
1366
+ ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1367
+ ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
1368
+ ws.merge_cells(f'A{row}:B{row}')
1369
+
1370
+ row += 1
1371
+ returns_data = [
1372
+ ("Net Operating Income", f"${r.get('NET_OPERATING_INCOME', 0):,.0f}"),
1373
+ ("Stabilized Yield on Cost", f"${r.get('STABILIZED_YIELD_ON_COST', 0):,.0f}"),
1374
+ ("Yield on Cost %", f"{r.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
1375
+ ("Stabilized Yield on Cost %", f"{r.get('STABILIZED_YIELD_ON_COST_PERCENTAGE', 0):.2%}"),
1376
+ ("Annual Debt Service", f"${r.get('ANNUAL_DEBT_SERVICE', 0):,.0f}"),
1377
+ ("Cash-on-Cash Return", f"{r.get('CASH_ON_CASH_RETURN', 0):.2f}%"),
1378
+ ("Debt Service Coverage Ratio", f"{r.get('DEBT_SERVICE_COVERAGE_RATIO', 0):.2f}x"),
1379
+ ]
1380
+
1381
+ for label, value in returns_data:
1382
+ ws[f'A{row}'] = label
1383
+ ws[f'A{row}'].font = Font(bold=True)
1384
+ ws[f'B{row}'] = value
1385
+ row += 1
1386
+
1387
+ # Exit Analysis
1388
+ row += 2
1389
+ ws[f'A{row}'] = "EXIT ANALYSIS"
1390
+ ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1391
+ ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
1392
+ ws.merge_cells(f'A{row}:B{row}')
1393
+
1394
+ row += 1
1395
+ ws[f'A{row}'] = "Hold Period (months):"
1396
+ ws[f'B{row}'] = r.get('HOLD_PERIOD_MONTHS', 0)
1397
+
1398
+ row += 1
1399
+ ws[f'A{row}'] = "Exit Cap Rate:"
1400
+ ws[f'B{row}'] = r.get('EXIT_CAP_RATE_DECIMAL', 0)
1401
+ ws[f'B{row}'].number_format = '0.00%'
1402
+
1403
+ row += 1
1404
+ ws[f'A{row}'] = "Property Value on Sale:"
1405
+ ws[f'B{row}'] = r.get('PROPERTY_VALUE_ON_SALE', 0)
1406
+ ws[f'B{row}'].number_format = '$#,##0'
1407
+
1408
+ row += 1
1409
+ ws[f'A{row}'] = "Less: Sale Costs (2%):"
1410
+ ws[f'B{row}'] = -r.get('SALE_COST', 0)
1411
+ ws[f'B{row}'].number_format = '$#,##0'
1412
+
1413
+ row += 1
1414
+ ws[f'A{row}'] = "Net Sale Proceeds:"
1415
+ ws[f'A{row}'].font = Font(bold=True)
1416
+ ws[f'B{row}'] = r.get('NET_SALE_PROCEEDS', 0)
1417
+ ws[f'B{row}'].number_format = '$#,##0'
1418
+ ws[f'B{row}'].font = Font(bold=True)
1419
+
1420
+ row += 1
1421
+ ws[f'A{row}'] = "Less: Loan Payoff:"
1422
+ ws[f'B{row}'] = -r.get('TOTAL_DEBT', 0)
1423
+ ws[f'B{row}'].number_format = '$#,##0'
1424
+
1425
+ row += 1
1426
+ ws[f'A{row}'] = "Cash After Loan Payback:"
1427
+ ws[f'A{row}'].font = Font(bold=True)
1428
+ ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_LOAN_PAYBACK', 0)
1429
+ ws[f'B{row}'].number_format = '$#,##0'
1430
+ ws[f'B{row}'].font = Font(bold=True)
1431
+
1432
+ # Equity Waterfall
1433
+ row += 2
1434
+ ws[f'A{row}'] = "EQUITY WATERFALL"
1435
+ ws[f'A{row}'].fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
1436
+ ws[f'A{row}'].font = Font(color="FFFFFF", bold=True)
1437
+ ws.merge_cells(f'A{row}:B{row}')
1438
+
1439
+ row += 1
1440
+ ws[f'A{row}'] = "GP Investment (20%):"
1441
+ ws[f'B{row}'] = r.get('GP_INVESTMENT', 0)
1442
+ ws[f'B{row}'].number_format = '$#,##0'
1443
+
1444
+ row += 1
1445
+ ws[f'A{row}'] = "LP Investment (80%):"
1446
+ ws[f'B{row}'] = r.get('LP_INVESTMENT', 0)
1447
+ ws[f'B{row}'].number_format = '$#,##0'
1448
+
1449
+ row += 2
1450
+ ws[f'A{row}'] = "GP Preferred Return + Principal:"
1451
+ ws[f'B{row}'] = r.get('GP_PREFERRED_RETURN_WITH_PRINCIPAL', 0)
1452
+ ws[f'B{row}'].number_format = '$#,##0'
1453
+
1454
+ row += 1
1455
+ ws[f'A{row}'] = "LP Preferred Return + Principal:"
1456
+ ws[f'B{row}'] = r.get('LP_PREFERRED_RETURN_WITH_PRINCIPAL', 0)
1457
+ ws[f'B{row}'].number_format = '$#,##0'
1458
+
1459
+ row += 1
1460
+ ws[f'A{row}'] = "Cash After Preferred:"
1461
+ ws[f'B{row}'] = r.get('CASH_REMAINING_AFTER_PREFERRED', 0)
1462
+ ws[f'B{row}'].number_format = '$#,##0'
1463
+
1464
+ row += 2
1465
+ ws[f'A{row}'] = "GP Promote (20%):"
1466
+ ws[f'B{row}'] = r.get('PROMOTE_ON_JOINT_VENTURE', 0)
1467
+ ws[f'B{row}'].number_format = '$#,##0'
1468
+
1469
+ row += 1
1470
+ ws[f'A{row}'] = "Cash to LP:"
1471
+ ws[f'B{row}'] = r.get('CASH_TO_LP', 0)
1472
+ ws[f'B{row}'].number_format = '$#,##0'
1473
+
1474
+ row += 2
1475
+ ws[f'A{row}'] = "NET TO LP INVESTOR"
1476
+ ws[f'A{row}'].font = Font(bold=True, size=12)
1477
+ ws[f'B{row}'] = r.get('NET_TO_LP_INVESTOR', 0)
1478
+ ws[f'B{row}'].number_format = '$#,##0'
1479
+ ws[f'B{row}'].font = Font(bold=True, size=12)
1480
+
1481
+ row += 2
1482
+ ws[f'A{row}'] = "LP Multiple:"
1483
+ ws[f'A{row}'].font = Font(bold=True)
1484
+ ws[f'B{row}'] = r.get('LP_MULTIPLE', 0)
1485
+ ws[f'B{row}'].number_format = '0.00x'
1486
+ ws[f'B{row}'].font = Font(bold=True)
1487
+
1488
+ row += 1
1489
+ ws[f'A{row}'] = "LP IRR:"
1490
+ ws[f'A{row}'].font = Font(bold=True)
1491
+
1492
+ irr_value = r.get('IRR_TO_LP', 0)
1493
+ # Handle complex numbers or invalid values
1494
+ if isinstance(irr_value, complex):
1495
+ irr_value = 0 # or use irr_value.real if you want the real component
1496
+ ws[f'B{row}'] = irr_value / 100
1497
+
1498
+ ws[f'B{row}'].number_format = '0.00%'
1499
+ ws[f'B{row}'].font = Font(bold=True)
1500
+
1501
+ ws.column_dimensions['A'].width = 35
1502
+ ws.column_dimensions['B'].width = 20
1503
+
1504
+ def run_full_pipeline(self, pdf_directory: str, output_excel: str = "Real_Estate_Financial_Model.xlsx"):
1505
+ """Execute complete pipeline"""
1506
+ print("=" * 60)
1507
+ print("REAL ESTATE FINANCIAL MODEL PIPELINE")
1508
+ print("=" * 60)
1509
+
1510
+ # Step 1: Extract PDFs
1511
+ print("\n[Step 1/4] Extracting text from PDFs...")
1512
+ self.extract_all_pdfs(pdf_directory)
1513
+ print(f"βœ“ Extracted {len(self.extracted_data)} PDF files")
1514
+
1515
+ # Step 2: Process with Gemini
1516
+ print("\n[Step 2/4] Extracting structured data with Gemini API...")
1517
+ structured_data = self.extract_structured_data()
1518
+
1519
+ # NEW: Post-process to fill gaps
1520
+ print("\n[Step 2.5/4] Post-processing and filling estimates...")
1521
+ structured_data = self.post_process_extracted_data(structured_data)
1522
+
1523
+ # Step 3: Calculate formulas
1524
+ print("\n[Step 3/4] Calculating all formulas...")
1525
+ self.calculate_all_formulas(structured_data)
1526
+ print(f"βœ“ Calculated {len(self.formula_results)} formula values")
1527
+
1528
+ # Step 4: Generate Excel
1529
+ print("\n[Step 4/4] Generating Excel file...")
1530
+ self.generate_excel(output_excel)
1531
+
1532
+ print("\n" + "=" * 60)
1533
+ print("PIPELINE COMPLETE!")
1534
+ print("=" * 60)
1535
+ print(f"\nKey Metrics:")
1536
+ print(f" Total Project Cost: ${self.formula_results.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f}")
1537
+ print(f" Total Debt: ${self.formula_results.get('TOTAL_DEBT', 0):,.0f}")
1538
+ print(f" Total Equity: ${self.formula_results.get('TOTAL_EQUITY', 0):,.0f}")
1539
+ print(f" NOI: ${self.formula_results.get('NET_OPERATING_INCOME', 0):,.0f}")
1540
+ print(f" Yield on Cost: {self.formula_results.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}")
1541
+ irr_val = self.formula_results.get('IRR_TO_LP', 0)
1542
+ if isinstance(irr_val, complex):
1543
+ irr_val = irr_val.real
1544
+ print(f" LP IRR: {irr_val:.2f}%")
1545
+
1546
+ print(f"\nExcel file: {output_excel}")
1547
+
1548
+ return output_excel
1549
+
1550
+
1551
+ if __name__ == "__main__":
1552
+
1553
+ def process_pdfs(pdf_files, api_key):
1554
+ """Process uploaded PDFs and return Excel file"""
1555
+ if not pdf_files:
1556
+ return None, "Please upload at least one PDF file"
1557
+
1558
+ if not api_key or api_key.strip() == "":
1559
+ return None, "Please enter your Gemini API key"
1560
+
1561
+ try:
1562
+ # Create temporary directory for PDFs
1563
+ temp_dir = tempfile.mkdtemp()
1564
+
1565
+ # Save uploaded PDFs to temp directory
1566
+ for pdf_file in pdf_files:
1567
+ shutil.copy(pdf_file.name, temp_dir)
1568
+
1569
+ # Initialize pipeline with provided API key
1570
+ pipeline = RealEstateModelPipeline(api_key.strip())
1571
+
1572
+ # Create output file in temp directory
1573
+ output_file = Path(temp_dir) / "Real_Estate_Financial_Model.xlsx"
1574
+
1575
+ # Run pipeline
1576
+ result = pipeline.run_full_pipeline(temp_dir, str(output_file))
1577
+
1578
+ # Generate summary text
1579
+ summary = f"""
1580
+ βœ… Processing Complete!
1581
+
1582
+ Key Metrics:
1583
+ β€’ Total Project Cost: ${pipeline.formula_results.get('TOTAL_FINANCING_CONTINGENCY_AND_RESERVES', 0):,.0f}
1584
+ β€’ Total Debt: ${pipeline.formula_results.get('TOTAL_DEBT', 0):,.0f}
1585
+ β€’ Total Equity: ${pipeline.formula_results.get('TOTAL_EQUITY', 0):,.0f}
1586
+ β€’ NOI: ${pipeline.formula_results.get('NET_OPERATING_INCOME', 0):,.0f}
1587
+ β€’ Yield on Cost: {pipeline.formula_results.get('YIELD_ON_COST_PERCENTAGE', 0):.2%}
1588
+ β€’ LP IRR: {float(pipeline.formula_results.get('IRR_TO_LP', 0).real if isinstance(pipeline.formula_results.get('IRR_TO_LP', 0), complex) else pipeline.formula_results.get('IRR_TO_LP', 0)):.2f}%
1589
+
1590
+ Download your Excel file below ⬇️
1591
+ """
1592
+
1593
+ return str(output_file), summary
1594
+
1595
+ except Exception as e:
1596
+ return None, f"❌ Error: {str(e)}"
1597
+
1598
+ # Create Gradio interface
1599
+ with gr.Blocks(title="Real Estate Financial Model Generator", theme=gr.themes.Soft()) as demo:
1600
+
1601
+ gr.Markdown("""
1602
+ # 🏒 Real Estate Financial Model Generator
1603
+ Upload your PDF documents and generate a comprehensive financial model in Excel format.
1604
+ """)
1605
+
1606
+ with gr.Row():
1607
+ with gr.Column(scale=2):
1608
+ api_key_input = gr.Textbox(
1609
+ label="Gemini API Key",
1610
+ placeholder="Enter your Gemini API key (AIza...)",
1611
+ type="password",
1612
+ info="Get your API key from Google AI Studio"
1613
+ )
1614
+
1615
+ pdf_input = gr.File(
1616
+ label="Upload PDF Files",
1617
+ file_count="multiple",
1618
+ file_types=[".pdf"],
1619
+ type="filepath"
1620
+ )
1621
+
1622
+ process_btn = gr.Button("πŸš€ Generate Financial Model", variant="primary", size="lg")
1623
+
1624
+ with gr.Column(scale=1):
1625
+ gr.Markdown("""
1626
+ ### πŸ“‹ Required Documents
1627
+ - Offering Memorandum
1628
+ - Operating Expenses Summary
1629
+ - Sales Comps
1630
+ - Rent Comps
1631
+ - Market Report
1632
+ - Demographics Overview
1633
+
1634
+ ### ⚑ Features
1635
+ - Automated data extraction
1636
+ - Formula calculations
1637
+ - Professional Excel output
1638
+ - Multiple analysis sheets
1639
+ """)
1640
+
1641
+ with gr.Row():
1642
+ output_text = gr.Textbox(
1643
+ label="Processing Results",
1644
+ lines=12,
1645
+ interactive=False
1646
+ )
1647
+
1648
+ with gr.Row():
1649
+ excel_output = gr.File(
1650
+ label="πŸ“Š Download Excel File"
1651
+ )
1652
+
1653
+ process_btn.click(
1654
+ fn=process_pdfs,
1655
+ inputs=[pdf_input, api_key_input],
1656
+ outputs=[excel_output, output_text]
1657
+ )
1658
+
1659
+ gr.Markdown("""
1660
+ ---
1661
+ ### πŸ’‘ Tips
1662
+ - Ensure PDF files are readable and not scanned images
1663
+ - Use descriptive filenames (e.g., "Offering_Memorandum.pdf")
1664
+ - Processing may take 30-60 seconds depending on file sizes
1665
+ """)
1666
+
1667
+ # Launch the app
1668
+ demo.launch(share=False)