mlbench123 commited on
Commit
b5cf60c
Β·
verified Β·
1 Parent(s): e55d55f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -114
app.py CHANGED
@@ -11,6 +11,7 @@ class SemanticFormulaAnalyzer:
11
  self.formula_file_path = formula_file_path
12
  self.formulas = {}
13
  self.computed_values = {}
 
14
  self.load_formulas()
15
 
16
  def load_formulas(self):
@@ -19,10 +20,7 @@ class SemanticFormulaAnalyzer:
19
  with open(self.formula_file_path, 'r', encoding='utf-8') as f:
20
  content = f.read()
21
 
22
- # Parse semantic formulas: Variable_Name = formula
23
- # Pattern: capture variable name, formula, and description
24
  lines = content.split('\n')
25
-
26
  current_formula_name = None
27
  current_formula = None
28
  current_description = None
@@ -30,30 +28,23 @@ class SemanticFormulaAnalyzer:
30
  for line in lines:
31
  line = line.strip()
32
 
33
- # Skip empty lines and section headers
34
  if not line or line.startswith('#'):
 
 
35
  continue
36
 
37
- # Check if line contains a formula assignment
38
- if '=' in line and not line.startswith('#'):
39
- # Save previous formula if exists
40
  if current_formula_name and current_formula:
41
  self.formulas[current_formula_name] = {
42
  'formula': current_formula,
43
  'description': current_description or current_formula_name
44
  }
45
 
46
- # Parse new formula
47
  parts = line.split('=', 1)
48
  current_formula_name = parts[0].strip()
49
  current_formula = parts[1].strip()
50
  current_description = None
51
-
52
- # Check if line is a description comment
53
- elif line.startswith('# Description:'):
54
- current_description = line.replace('# Description:', '').strip()
55
 
56
- # Add last formula
57
  if current_formula_name and current_formula:
58
  self.formulas[current_formula_name] = {
59
  'formula': current_formula,
@@ -100,15 +91,14 @@ class SemanticFormulaAnalyzer:
100
 
101
  extracted_data = {}
102
 
103
- # Comprehensive extraction patterns with semantic names
104
  patterns = {
105
  # Basic Property Info
106
  'UNITS': [r'(?:Total\s+)?Units?\s*:?\s*(\d+)', r'(\d+)\s*units?'],
107
- 'GROSS_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)', r'Building\s+SF\s*(\d+)', r'(\d+)\s*SF'],
108
  'BUILDING_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)'],
109
  'RENTABLE_SF': [r'Rentable\s+SF\s*:?\s*([\d,]+)', r'RSF\s*:?\s*([\d,]+)'],
110
- 'LOT_ACRES': [r'Lot\s+Size\s*:?\s*([\d.]+)\s*(?:acres?|Acres?)'],
111
- 'LOT_SF': [r'Lot\s+(?:Size\s+)?SF\s*:?\s*([\d,]+)'],
112
 
113
  # Financial - Core
114
  'PRICE': [r'(?:Asking\s+)?Price\s*:?\s*\$\s*([\d,]+)', r'Purchase\s+Price\s*:?\s*\$\s*([\d,]+)'],
@@ -116,58 +106,88 @@ class SemanticFormulaAnalyzer:
116
  'NET_OPERATING_INCOME': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)'],
117
  'EGI': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'],
118
  'EFFECTIVE_GROSS_INCOME': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'],
119
- 'GPR': [r'Gross\s+Potential\s+Rent\s*(?:\(Annual\))?\s*:?\s*\$?\s*([\d,]+)'],
120
- 'GROSS_POTENTIAL_RENT': [r'Gross\s+Potential\s+Rent\s*:?\s*\$?\s*([\d,]+)'],
121
- 'VACANCY': [r'Vacancy\s*(?:\([\d.]+%\))?\s*:?\s*-?\$?\s*([\d,]+)'],
122
- 'VACANCY_LOSS': [r'Vacancy\s*(?:\([\d.]+%\))?\s*:?\s*-?\$?\s*([\d,]+)'],
123
- 'VACANCY_RATE': [r'Vacancy\s*(?:\()?([\d.]+)%'],
124
 
125
  # Operating Expenses
126
- 'OPEX': [r'Operating\s+Expenses\s*:?\s*\$?\s*([\d,]+)', r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'],
127
  'TOTAL_OPERATING_EXPENSES': [r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'],
128
  'PROPERTY_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
129
- 'REAL_ESTATE_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
130
  'INSURANCE': [r'Insurance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
131
  'UTILITIES': [r'Utilities\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
132
  'REPAIRS_AND_MAINTENANCE': [r'Repairs?\s*(?:&|and)?\s*Maintenance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
133
  'PAYROLL': [r'Payroll\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
134
  'ADMINISTRATIVE': [r'Administrative\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
135
- 'MARKETING': [r'Marketing\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
136
- 'ADVERTISING_AND_MARKETING_COST': [r'Marketing\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
137
- 'REPLACEMENT_RESERVES': [r'Replacement\s+Reserves\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
138
- 'MANAGEMENT_FEE': [r'Management\s*(?:\([^)]+\))?\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
139
- 'MANAGEMENT_FEE_PERCENTAGE': [r'Management\s*.*?(\d+)%', r'Management\s*@\s*([\d.]+)%'],
140
  'PROFESSIONAL_FEES': [r'Professional\s+Fees\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
 
 
141
 
142
- # Rates and Percentages
143
  'CAP_RATE': [r'Cap\s+Rate\s*:?\s*([\d.]+)%?'],
144
  'INTEREST_RATE': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'],
145
- 'INTEREST_RATE_DECIMAL': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'],
146
  'LTC': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
147
  'LTC_RATIO': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
148
  'EXIT_CAP_RATE': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'],
149
- 'EXIT_CAP_RATE_DECIMAL': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'],
150
 
151
- # Demographics
152
- 'MEDIAN_INCOME': [r'Median\s+(?:HH\s+)?Income\s*:?\s*\$?\s*([\d,]+)'],
153
- 'POPULATION': [r'Population\s*:?\s*([\d,]+)'],
154
- 'HOUSEHOLDS': [r'Households\s*:?\s*([\d,]+)'],
155
- 'RENTER_OCCUPIED_PCT': [r'Renter[- ]Occupied\s*:?\s*([\d.]+)%?'],
 
 
 
 
 
 
 
 
 
156
 
157
  # Construction & Development
158
  'CONSTRUCTION_COST_PER_GSF': [r'Construction\s+Cost\s*:?\s*\$?\s*([\d,]+)\s*per\s+(?:GSF|SF)'],
159
  'TOTAL_CONSTRUCTION_GMP': [r'(?:Total\s+)?Construction\s+GMP\s*:?\s*\$?\s*([\d,]+)'],
160
- 'SOFT_COSTS': [r'(?:Total\s+)?Soft\s+Costs?\s*:?\s*\$?\s*([\d,]+)'],
161
  'TOTAL_SOFT_COST': [r'(?:Total\s+)?Soft\s+Costs?\s*:?\s*\$?\s*([\d,]+)'],
162
- 'CONTINGENCY': [r'Contingency\s*:?\s*\$?\s*([\d,]+)'],
163
- 'CONTINGENCY_COST': [r'Contingency\s*:?\s*\$?\s*([\d,]+)'],
164
- 'DEV_FEE': [r'Dev(?:elopment)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
165
- 'DEVELOPMENT_FEE': [r'Dev(?:elopment)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Land & Acquisition
168
  'LAND_VALUE': [r'(?:Total\s+)?Land\s+Value\s*:?\s*\$?\s*([\d,]+)'],
169
  'CLOSING_COSTS': [r'Closing\s+Costs\s*:?\s*\$?\s*([\d,]+)'],
170
  'ACQUISITION_FEE': [r'Acq(?:uisition)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
 
173
  for key, pattern_list in patterns.items():
@@ -182,20 +202,15 @@ class SemanticFormulaAnalyzer:
182
  except (ValueError, IndexError):
183
  continue
184
 
185
- # Post-processing: Handle percentages and derived values
186
- if 'INTEREST_RATE' in extracted_data:
187
- if extracted_data['INTEREST_RATE'] > 1:
188
- extracted_data['INTEREST_RATE'] = extracted_data['INTEREST_RATE'] / 100
189
  extracted_data['INTEREST_RATE_DECIMAL'] = extracted_data['INTEREST_RATE']
190
 
191
- if 'LTC' in extracted_data:
192
- if extracted_data['LTC'] > 1:
193
- extracted_data['LTC'] = extracted_data['LTC'] / 100
194
  extracted_data['LTC_RATIO'] = extracted_data['LTC']
195
 
196
- if 'CAP_RATE' in extracted_data and extracted_data['CAP_RATE'] < 1:
197
- extracted_data['CAP_RATE'] = extracted_data['CAP_RATE'] * 100
198
-
199
  if 'EXIT_CAP_RATE' in extracted_data:
200
  if extracted_data['EXIT_CAP_RATE'] > 1:
201
  extracted_data['EXIT_CAP_RATE_DECIMAL'] = extracted_data['EXIT_CAP_RATE'] / 100
@@ -209,51 +224,84 @@ class SemanticFormulaAnalyzer:
209
  if 'BUILDING_SF' in extracted_data and 'GROSS_SF' not in extracted_data:
210
  extracted_data['GROSS_SF'] = extracted_data['BUILDING_SF']
211
 
212
- if 'GROSS_SF' in extracted_data and 'BUILDING_SF' not in extracted_data:
213
- extracted_data['BUILDING_SF'] = extracted_data['GROSS_SF']
214
-
215
- # Estimate RENTABLE_SF if not provided (assume 90% efficiency)
216
  if 'GROSS_SF' in extracted_data and 'RENTABLE_SF' not in extracted_data:
217
  extracted_data['RENTABLE_SF'] = extracted_data['GROSS_SF'] * 0.9
218
 
219
- # Map EGI synonyms
220
  if 'EGI' in extracted_data and 'EFFECTIVE_GROSS_INCOME' not in extracted_data:
221
  extracted_data['EFFECTIVE_GROSS_INCOME'] = extracted_data['EGI']
222
 
223
- if 'EFFECTIVE_GROSS_INCOME' in extracted_data and 'EGI' not in extracted_data:
224
- extracted_data['EGI'] = extracted_data['EFFECTIVE_GROSS_INCOME']
225
-
226
- # Map NOI synonyms
227
  if 'NOI' in extracted_data and 'NET_OPERATING_INCOME' not in extracted_data:
228
  extracted_data['NET_OPERATING_INCOME'] = extracted_data['NOI']
229
 
230
- if 'NET_OPERATING_INCOME' in extracted_data and 'NOI' not in extracted_data:
231
- extracted_data['NOI'] = extracted_data['NET_OPERATING_INCOME']
232
-
233
- # Map OPEX synonyms
234
  if 'OPEX' in extracted_data and 'TOTAL_OPERATING_EXPENSES' not in extracted_data:
235
  extracted_data['TOTAL_OPERATING_EXPENSES'] = extracted_data['OPEX']
236
 
237
- if 'TOTAL_OPERATING_EXPENSES' in extracted_data and 'OPEX' not in extracted_data:
238
- extracted_data['OPEX'] = extracted_data['TOTAL_OPERATING_EXPENSES']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- # Derive management fee percentage if we have the dollar amount
241
- if 'MANAGEMENT_FEE' in extracted_data and 'EFFECTIVE_GROSS_INCOME' in extracted_data and 'MANAGEMENT_FEE_PERCENTAGE' not in extracted_data:
242
- extracted_data['MANAGEMENT_FEE_PERCENTAGE'] = extracted_data['MANAGEMENT_FEE'] / extracted_data['EFFECTIVE_GROSS_INCOME']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  return extracted_data
245
 
246
  def extract_variables_from_formula(self, formula: str) -> List[str]:
247
  """Extract variable names from formula"""
248
- # Match Python-style variable names (letters, numbers, underscores)
249
- # But exclude Python keywords and operators
250
  var_pattern = r'\b([A-Z][A-Z0-9_]*)\b'
251
  variables = re.findall(var_pattern, formula)
252
-
253
- # Remove Python built-in functions
254
  python_builtins = {'SUM', 'MIN', 'MAX', 'ABS', 'ROUND'}
255
  variables = [v for v in variables if v not in python_builtins]
256
-
257
  return list(set(variables))
258
 
259
  def check_formula_computable(self, formula: str, data: Dict[str, Any]) -> Tuple[bool, List[str]]:
@@ -270,10 +318,7 @@ class SemanticFormulaAnalyzer:
270
  def safe_eval_formula(self, formula: str, data: Dict[str, Any]) -> Any:
271
  """Safely evaluate a semantic formula"""
272
  try:
273
- # Combine extracted data with computed values
274
  all_data = {**data, **self.computed_values}
275
-
276
- # Replace variables with their values
277
  formula_eval = formula
278
  variables = self.extract_variables_from_formula(formula)
279
 
@@ -282,11 +327,8 @@ class SemanticFormulaAnalyzer:
282
  value = all_data[var]
283
  formula_eval = re.sub(r'\b' + var + r'\b', str(value), formula_eval)
284
 
285
- # Replace ** with ** (already correct for Python)
286
- # Handle any remaining math operations
287
  formula_eval = formula_eval.replace('^', '**')
288
 
289
- # Evaluate safely
290
  safe_dict = {
291
  'min': min,
292
  'max': max,
@@ -308,14 +350,11 @@ class SemanticFormulaAnalyzer:
308
  return "❌ No files uploaded", "", ""
309
 
310
  file_paths = [f.name for f in files]
311
-
312
- # Extract data
313
  extracted_data = self.extract_data_from_files(file_paths)
314
 
315
  if not extracted_data:
316
  return "❌ No data could be extracted from the files", "", ""
317
 
318
- # Reset computed values
319
  self.computed_values = {}
320
 
321
  # Multiple passes for dependency resolution
@@ -327,7 +366,6 @@ class SemanticFormulaAnalyzer:
327
  newly_computed = 0
328
 
329
  for formula_name, formula_info in self.formulas.items():
330
- # Skip if already computed
331
  if formula_name in computable_formulas:
332
  continue
333
 
@@ -348,7 +386,6 @@ class SemanticFormulaAnalyzer:
348
  'iteration': iteration + 1
349
  }
350
 
351
- # Store for cascading
352
  self.computed_values[formula_name] = result
353
  newly_computed += 1
354
 
@@ -371,43 +408,65 @@ class SemanticFormulaAnalyzer:
371
  if newly_computed == 0:
372
  break
373
 
374
- # Remove computed formulas from non-computable list
375
  for formula_name in computable_formulas.keys():
376
  non_computable_formulas.pop(formula_name, None)
377
 
 
 
 
 
 
 
 
 
378
  # Create summary
 
 
379
  summary = f"""
380
  ## πŸ“Š Analysis Summary
381
 
382
  **Total Formulas Loaded:** {len(self.formulas)}
383
- **βœ… Computable Formulas:** {len(computable_formulas)}
384
- **❌ Non-Computable Formulas:** {len(non_computable_formulas)}
385
  **πŸ“„ Files Processed:** {len(file_paths)}
386
  **πŸ”’ Data Points Extracted:** {len(extracted_data)}
 
387
  **πŸ”„ Computation Iterations:** {iteration + 1}
388
- **πŸ“ˆ Success Rate:** {(len(computable_formulas) / len(self.formulas) * 100):.1f}%
 
389
  """
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  # Extracted data display
392
  data_display = "## πŸ“₯ Extracted Property Data\n\n"
393
- data_display += "| Variable | Value |\n|----------|-------|\n"
394
  for key, value in sorted(extracted_data.items()):
 
395
  if isinstance(value, float):
396
- data_display += f"| {key} | {value:,.4f} |\n"
397
  else:
398
- data_display += f"| {key} | {value} |\n"
399
 
400
  # Results display
401
  results_display = "## βœ… Computed Formulas\n\n"
402
 
403
- # Group by iteration
404
- by_iteration = {}
405
- for name, info in computable_formulas.items():
406
- iter_num = info['iteration']
407
- if iter_num not in by_iteration:
408
- by_iteration[iter_num] = []
409
- by_iteration[iter_num].append((name, info))
410
-
411
  for iter_num in sorted(by_iteration.keys()):
412
  results_display += f"### Iteration {iter_num} ({len(by_iteration[iter_num])} formulas)\n\n"
413
  for name, info in sorted(by_iteration[iter_num]):
@@ -415,22 +474,22 @@ class SemanticFormulaAnalyzer:
415
  results_display += f"*{info['description']}*\n"
416
  results_display += f"`{info['formula']}`\n\n"
417
 
 
418
  # if non_computable_formulas:
419
  # results_display += f"\n## ❌ Non-Computable Formulas ({len(non_computable_formulas)})\n\n"
420
- # # Show sample of non-computable
421
- # sample_size = min(15, len(non_computable_formulas))
422
- # results_display += f"*Showing {sample_size} of {len(non_computable_formulas)} non-computable formulas*\n\n"
423
 
424
- # for idx, (name, info) in enumerate(sorted(non_computable_formulas.items())):
425
- # if idx >= sample_size:
426
- # break
427
- # results_display += f"**{name}**: {info['description']}\n"
428
- # if info.get('missing_variables'):
429
- # missing = info['missing_variables'][:5]
430
- # results_display += f"Missing: {', '.join(missing)}\n"
431
- # results_display += "\n"
432
-
433
- # JSON output
 
 
434
  json_output = {
435
  'summary': {
436
  'total_formulas': len(self.formulas),
 
11
  self.formula_file_path = formula_file_path
12
  self.formulas = {}
13
  self.computed_values = {}
14
+ self.defaults = {}
15
  self.load_formulas()
16
 
17
  def load_formulas(self):
 
20
  with open(self.formula_file_path, 'r', encoding='utf-8') as f:
21
  content = f.read()
22
 
 
 
23
  lines = content.split('\n')
 
24
  current_formula_name = None
25
  current_formula = None
26
  current_description = None
 
28
  for line in lines:
29
  line = line.strip()
30
 
 
31
  if not line or line.startswith('#'):
32
+ if line.startswith('# Description:'):
33
+ current_description = line.replace('# Description:', '').strip()
34
  continue
35
 
36
+ if '=' in line:
 
 
37
  if current_formula_name and current_formula:
38
  self.formulas[current_formula_name] = {
39
  'formula': current_formula,
40
  'description': current_description or current_formula_name
41
  }
42
 
 
43
  parts = line.split('=', 1)
44
  current_formula_name = parts[0].strip()
45
  current_formula = parts[1].strip()
46
  current_description = None
 
 
 
 
47
 
 
48
  if current_formula_name and current_formula:
49
  self.formulas[current_formula_name] = {
50
  'formula': current_formula,
 
91
 
92
  extracted_data = {}
93
 
94
+ # Comprehensive extraction patterns
95
  patterns = {
96
  # Basic Property Info
97
  'UNITS': [r'(?:Total\s+)?Units?\s*:?\s*(\d+)', r'(\d+)\s*units?'],
98
+ 'GROSS_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)', r'Gross\s+SF\s*:?\s*([\d,]+)', r'GSF\s*:?\s*([\d,]+)'],
99
  'BUILDING_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)'],
100
  'RENTABLE_SF': [r'Rentable\s+SF\s*:?\s*([\d,]+)', r'RSF\s*:?\s*([\d,]+)'],
101
+ 'RETAIL_SF': [r'Retail\s+SF\s*:?\s*([\d,]+)', r'Retail\s+Space\s*:?\s*([\d,]+)\s*SF'],
 
102
 
103
  # Financial - Core
104
  'PRICE': [r'(?:Asking\s+)?Price\s*:?\s*\$\s*([\d,]+)', r'Purchase\s+Price\s*:?\s*\$\s*([\d,]+)'],
 
106
  'NET_OPERATING_INCOME': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)'],
107
  'EGI': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'],
108
  'EFFECTIVE_GROSS_INCOME': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'],
109
+ 'VACANCY_RATE': [r'Vacancy\s*(?:Rate)?\s*(?:\()?([\d.]+)%'],
 
 
 
 
110
 
111
  # Operating Expenses
112
+ 'OPEX': [r'Operating\s+Expenses\s*:?\s*\$?\s*([\d,]+)'],
113
  'TOTAL_OPERATING_EXPENSES': [r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'],
114
  'PROPERTY_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
115
+ 'REAL_ESTATE_TAXES': [r'(?:Real\s+Estate\s+|Property\s+)Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
116
  'INSURANCE': [r'Insurance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
117
  'UTILITIES': [r'Utilities\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
118
  'REPAIRS_AND_MAINTENANCE': [r'Repairs?\s*(?:&|and)?\s*Maintenance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
119
  'PAYROLL': [r'Payroll\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
120
  'ADMINISTRATIVE': [r'Administrative\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
 
 
 
 
 
121
  'PROFESSIONAL_FEES': [r'Professional\s+Fees\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
122
+ 'MANAGEMENT_FEE': [r'Management\s*(?:\([^)]+\))?\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
123
+ 'MANAGEMENT_FEE_PERCENTAGE': [r'Management\s*.*?([\d.]+)%'],
124
 
125
+ # Rates
126
  'CAP_RATE': [r'Cap\s+Rate\s*:?\s*([\d.]+)%?'],
127
  'INTEREST_RATE': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'],
128
+ 'INTEREST_RATE_BASIS_POINTS': [r'Interest\s+Rate\s*:?\s*(\d+)\s*(?:bps|basis\s+points)'],
129
  'LTC': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
130
  'LTC_RATIO': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
131
  'EXIT_CAP_RATE': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'],
 
132
 
133
+ # Rent & Revenue
134
+ 'FREE_MARKET_RENT_PSF': [r'Free\s+Market\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'],
135
+ 'AFFORDABLE_RENT_PSF': [r'Affordable\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'],
136
+ 'RETAIL_RENT_PSF': [r'Retail\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'],
137
+ 'OTHER_INCOME_PER_UNIT': [r'Other\s+Income\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:unit|per\s+unit)'],
138
+ 'PARKING_INCOME': [r'Parking\s+Income\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
139
+
140
+ # Inflation & Time
141
+ 'REVENUE_INFLATION_RATE': [r'Revenue\s+Inflation\s*:?\s*([\d.]+)%?'],
142
+ 'EXPENSE_INFLATION_RATE': [r'Expense\s+Inflation\s*:?\s*([\d.]+)%?'],
143
+ 'LEASE_UP_MONTHS': [r'Lease[- ]?Up\s+Period\s*:?\s*(\d+)\s*months?'],
144
+ 'STABILIZATION_MONTHS': [r'Stabilization\s+Period\s*:?\s*(\d+)\s*months?'],
145
+ 'CONSTRUCTION_MONTHS': [r'Construction\s+(?:Period|Duration)\s*:?\s*(\d+)\s*months?'],
146
+ 'HOLD_PERIOD_MONTHS': [r'Hold\s+Period\s*:?\s*(\d+)\s*months?'],
147
 
148
  # Construction & Development
149
  'CONSTRUCTION_COST_PER_GSF': [r'Construction\s+Cost\s*:?\s*\$?\s*([\d,]+)\s*per\s+(?:GSF|SF)'],
150
  'TOTAL_CONSTRUCTION_GMP': [r'(?:Total\s+)?Construction\s+GMP\s*:?\s*\$?\s*([\d,]+)'],
 
151
  'TOTAL_SOFT_COST': [r'(?:Total\s+)?Soft\s+Costs?\s*:?\s*\$?\s*([\d,]+)'],
152
+
153
+ # Soft Costs Components
154
+ 'ARCHITECTURE_AND_INTERIOR_COST': [r'(?:Architecture|A&I)\s*(?:&|and)?\s*Interior\s*:?\s*\$?\s*([\d,]+)'],
155
+ 'STRUCTURAL_ENGINEERING_COST': [r'Structural\s+Engineering\s*:?\s*\$?\s*([\d,]+)'],
156
+ 'MEP_ENGINEERING_COST': [r'MEP\s+Engineering\s*:?\s*\$?\s*([\d,]+)'],
157
+ 'CIVIL_ENGINEERING_COST': [r'Civil\s+Engineering\s*:?\s*\$?\s*([\d,]+)'],
158
+ 'CONTROLLED_INSPECTIONS_COST': [r'(?:Controlled\s+)?Inspections?\s*:?\s*\$?\s*([\d,]+)'],
159
+ 'SURVEYING_COST': [r'Surveying\s*:?\s*\$?\s*([\d,]+)'],
160
+ 'UTILITIES_CONNECTION_COST': [r'Utilities?\s+Connection\s*:?\s*\$?\s*([\d,]+)'],
161
+ 'ADVERTISING_AND_MARKETING_COST': [r'(?:Advertising|Marketing)\s*:?\s*\$?\s*([\d,]+)'],
162
+ 'ACCOUNTING_COST': [r'Accounting\s*:?\s*\$?\s*([\d,]+)'],
163
+ 'MONITORING_COST': [r'Monitoring\s*:?\s*\$?\s*([\d,]+)'],
164
+ 'FF_AND_E_COST': [r'FF&E\s*:?\s*\$?\s*([\d,]+)'],
165
+ 'ENVIRONMENTAL_CONSULTANT_FEE': [r'Environmental\s+Consultant\s*:?\s*\$?\s*([\d,]+)'],
166
+ 'MISCELLANEOUS_CONSULTANTS_FEE': [r'Misc(?:ellaneous)?\s+Consultants\s*:?\s*\$?\s*([\d,]+)'],
167
+ 'GENERAL_LEGAL_COST': [r'(?:General\s+)?Legal\s*:?\s*\$?\s*([\d,]+)'],
168
+ 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION': [r'(?:RE\s+)?Taxes\s+During\s+Construction\s*:?\s*\$?\s*([\d,]+)'],
169
+ 'MISCELLANEOUS_ADMIN_COST': [r'Misc(?:ellaneous)?\s+Admin\s*:?\s*\$?\s*([\d,]+)'],
170
+ 'IBR_COST': [r'IBR\s*:?\s*\$?\s*([\d,]+)'],
171
+ 'PROJECT_TEAM_COST': [r'Project\s+Team\s*:?\s*\$?\s*([\d,]+)'],
172
+ 'PEM_FEES': [r'PEM\s+Fees\s*:?\s*\$?\s*([\d,]+)'],
173
+ 'BANK_FEES': [r'Bank\s+Fees\s*:?\s*\$?\s*([\d,]+)'],
174
 
175
  # Land & Acquisition
176
  'LAND_VALUE': [r'(?:Total\s+)?Land\s+Value\s*:?\s*\$?\s*([\d,]+)'],
177
  'CLOSING_COSTS': [r'Closing\s+Costs\s*:?\s*\$?\s*([\d,]+)'],
178
  'ACQUISITION_FEE': [r'Acq(?:uisition)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
179
+
180
+ # Capital Stack
181
+ 'FINANCING_COST': [r'Financing\s+Cost\s*:?\s*\$?\s*([\d,]+)'],
182
+ 'FINANCING_PERCENTAGE': [r'Financing\s+(?:Percentage|%)\s*:?\s*([\d.]+)%?'],
183
+ 'INTEREST_RESERVE': [r'Interest\s+Reserve\s*:?\s*\$?\s*([\d,]+)'],
184
+ 'LOAN_AMOUNT': [r'Loan\s+Amount\s*:?\s*\$?\s*([\d,]+)'],
185
+
186
+ # Exit Strategy
187
+ 'SALE_COST_PERCENTAGE': [r'Sale\s+Cost\s*:?\s*([\d.]+)%?'],
188
+ 'GP_PREF_RATE': [r'GP\s+Pref(?:erred)?\s+Rate\s*:?\s*([\d.]+)%?'],
189
+ 'LP_PREF_RATE': [r'LP\s+Pref(?:erred)?\s+Rate\s*:?\s*([\d.]+)%?'],
190
+ 'PROMOTE_PERCENTAGE': [r'Promote\s*:?\s*([\d.]+)%?'],
191
  }
192
 
193
  for key, pattern_list in patterns.items():
 
202
  except (ValueError, IndexError):
203
  continue
204
 
205
+ # Post-processing: percentages
206
+ if 'INTEREST_RATE' in extracted_data and extracted_data['INTEREST_RATE'] > 1:
207
+ extracted_data['INTEREST_RATE'] = extracted_data['INTEREST_RATE'] / 100
 
208
  extracted_data['INTEREST_RATE_DECIMAL'] = extracted_data['INTEREST_RATE']
209
 
210
+ if 'LTC' in extracted_data and extracted_data['LTC'] > 1:
211
+ extracted_data['LTC'] = extracted_data['LTC'] / 100
 
212
  extracted_data['LTC_RATIO'] = extracted_data['LTC']
213
 
 
 
 
214
  if 'EXIT_CAP_RATE' in extracted_data:
215
  if extracted_data['EXIT_CAP_RATE'] > 1:
216
  extracted_data['EXIT_CAP_RATE_DECIMAL'] = extracted_data['EXIT_CAP_RATE'] / 100
 
224
  if 'BUILDING_SF' in extracted_data and 'GROSS_SF' not in extracted_data:
225
  extracted_data['GROSS_SF'] = extracted_data['BUILDING_SF']
226
 
 
 
 
 
227
  if 'GROSS_SF' in extracted_data and 'RENTABLE_SF' not in extracted_data:
228
  extracted_data['RENTABLE_SF'] = extracted_data['GROSS_SF'] * 0.9
229
 
 
230
  if 'EGI' in extracted_data and 'EFFECTIVE_GROSS_INCOME' not in extracted_data:
231
  extracted_data['EFFECTIVE_GROSS_INCOME'] = extracted_data['EGI']
232
 
 
 
 
 
233
  if 'NOI' in extracted_data and 'NET_OPERATING_INCOME' not in extracted_data:
234
  extracted_data['NET_OPERATING_INCOME'] = extracted_data['NOI']
235
 
 
 
 
 
236
  if 'OPEX' in extracted_data and 'TOTAL_OPERATING_EXPENSES' not in extracted_data:
237
  extracted_data['TOTAL_OPERATING_EXPENSES'] = extracted_data['OPEX']
238
 
239
+ # DEFAULT VALUES & ASSUMPTIONS
240
+ self.defaults = {
241
+ 'MANAGEMENT_FEE_PERCENTAGE': 0.03,
242
+ 'VACANCY_RATE': 0.05,
243
+ 'REVENUE_INFLATION_RATE': 0.03,
244
+ 'EXPENSE_INFLATION_RATE': 0.025,
245
+ 'INTEREST_RATE_BASIS_POINTS': 500,
246
+ 'EXIT_CAP_RATE_DECIMAL': 0.05,
247
+ 'SALE_COST_PERCENTAGE': 0.02,
248
+ 'LTC_RATIO': 0.75,
249
+ 'FINANCING_PERCENTAGE': 0.01,
250
+ 'CONSTRUCTION_MONTHS': 24,
251
+ 'LEASE_UP_MONTHS': 12,
252
+ 'STABILIZATION_MONTHS': 6,
253
+ 'HOLD_PERIOD_MONTHS': 84,
254
+ 'GP_PREF_RATE': 0.08,
255
+ 'LP_PREF_RATE': 0.08,
256
+ 'PROMOTE_PERCENTAGE': 0.20,
257
+ }
258
 
259
+ # Apply defaults
260
+ for key, default_value in self.defaults.items():
261
+ if key not in extracted_data:
262
+ extracted_data[key] = default_value
263
+
264
+ # Calculate soft costs as % of construction if available
265
+ if 'TOTAL_CONSTRUCTION_GMP' in extracted_data:
266
+ gmp = extracted_data['TOTAL_CONSTRUCTION_GMP']
267
+ soft_defaults = {
268
+ 'ARCHITECTURE_AND_INTERIOR_COST': 0.025,
269
+ 'STRUCTURAL_ENGINEERING_COST': 0.01,
270
+ 'MEP_ENGINEERING_COST': 0.015,
271
+ 'CIVIL_ENGINEERING_COST': 0.005,
272
+ 'CONTROLLED_INSPECTIONS_COST': 0.003,
273
+ 'SURVEYING_COST': 0.002,
274
+ 'UTILITIES_CONNECTION_COST': 0.005,
275
+ 'ACCOUNTING_COST': 0.001,
276
+ 'MONITORING_COST': 0.001,
277
+ 'FF_AND_E_COST': 0.01,
278
+ 'ENVIRONMENTAL_CONSULTANT_FEE': 0.002,
279
+ 'MISCELLANEOUS_CONSULTANTS_FEE': 0.005,
280
+ 'GENERAL_LEGAL_COST': 0.003,
281
+ 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION': 0.005,
282
+ 'MISCELLANEOUS_ADMIN_COST': 0.002,
283
+ 'IBR_COST': 0.003,
284
+ 'PROJECT_TEAM_COST': 0.005,
285
+ 'PEM_FEES': 0.01,
286
+ 'BANK_FEES': 0.005,
287
+ }
288
+
289
+ for key, pct in soft_defaults.items():
290
+ if key not in extracted_data:
291
+ extracted_data[key] = gmp * pct
292
+
293
+ # Calculate construction GMP if cost per GSF available
294
+ if 'CONSTRUCTION_COST_PER_GSF' in extracted_data and 'GROSS_SF' in extracted_data and 'TOTAL_CONSTRUCTION_GMP' not in extracted_data:
295
+ extracted_data['TOTAL_CONSTRUCTION_GMP'] = extracted_data['CONSTRUCTION_COST_PER_GSF'] * extracted_data['GROSS_SF']
296
 
297
  return extracted_data
298
 
299
  def extract_variables_from_formula(self, formula: str) -> List[str]:
300
  """Extract variable names from formula"""
 
 
301
  var_pattern = r'\b([A-Z][A-Z0-9_]*)\b'
302
  variables = re.findall(var_pattern, formula)
 
 
303
  python_builtins = {'SUM', 'MIN', 'MAX', 'ABS', 'ROUND'}
304
  variables = [v for v in variables if v not in python_builtins]
 
305
  return list(set(variables))
306
 
307
  def check_formula_computable(self, formula: str, data: Dict[str, Any]) -> Tuple[bool, List[str]]:
 
318
  def safe_eval_formula(self, formula: str, data: Dict[str, Any]) -> Any:
319
  """Safely evaluate a semantic formula"""
320
  try:
 
321
  all_data = {**data, **self.computed_values}
 
 
322
  formula_eval = formula
323
  variables = self.extract_variables_from_formula(formula)
324
 
 
327
  value = all_data[var]
328
  formula_eval = re.sub(r'\b' + var + r'\b', str(value), formula_eval)
329
 
 
 
330
  formula_eval = formula_eval.replace('^', '**')
331
 
 
332
  safe_dict = {
333
  'min': min,
334
  'max': max,
 
350
  return "❌ No files uploaded", "", ""
351
 
352
  file_paths = [f.name for f in files]
 
 
353
  extracted_data = self.extract_data_from_files(file_paths)
354
 
355
  if not extracted_data:
356
  return "❌ No data could be extracted from the files", "", ""
357
 
 
358
  self.computed_values = {}
359
 
360
  # Multiple passes for dependency resolution
 
366
  newly_computed = 0
367
 
368
  for formula_name, formula_info in self.formulas.items():
 
369
  if formula_name in computable_formulas:
370
  continue
371
 
 
386
  'iteration': iteration + 1
387
  }
388
 
 
389
  self.computed_values[formula_name] = result
390
  newly_computed += 1
391
 
 
408
  if newly_computed == 0:
409
  break
410
 
 
411
  for formula_name in computable_formulas.keys():
412
  non_computable_formulas.pop(formula_name, None)
413
 
414
+ # Group by iteration
415
+ by_iteration = {}
416
+ for name, info in computable_formulas.items():
417
+ iter_num = info['iteration']
418
+ if iter_num not in by_iteration:
419
+ by_iteration[iter_num] = []
420
+ by_iteration[iter_num].append((name, info))
421
+
422
  # Create summary
423
+ defaults_applied = sum(1 for k in extracted_data.keys() if k in self.defaults)
424
+
425
  summary = f"""
426
  ## πŸ“Š Analysis Summary
427
 
428
  **Total Formulas Loaded:** {len(self.formulas)}
429
+ **βœ… Computable Formulas:** {len(computable_formulas)} ({len(computable_formulas) / len(self.formulas) * 100:.1f}%)
430
+ **❌ Non-Computable Formulas:** {len(non_computable_formulas)} ({len(non_computable_formulas) / len(self.formulas) * 100:.1f}%)
431
  **πŸ“„ Files Processed:** {len(file_paths)}
432
  **πŸ”’ Data Points Extracted:** {len(extracted_data)}
433
+ **🎯 Defaults Applied:** {defaults_applied}
434
  **πŸ”„ Computation Iterations:** {iteration + 1}
435
+
436
+ ### πŸ“ˆ Progress by Iteration
437
  """
438
 
439
+ for iter_num in sorted(by_iteration.keys()):
440
+ summary += f"- Iteration {iter_num}: {len(by_iteration[iter_num])} formulas computed\n"
441
+
442
+ # Analyze missing variables
443
+ missing_var_count = {}
444
+ if non_computable_formulas:
445
+ for name, info in non_computable_formulas.items():
446
+ for var in info.get('missing_variables', []):
447
+ if var not in missing_var_count:
448
+ missing_var_count[var] = []
449
+ missing_var_count[var].append(name)
450
+
451
+ top_blockers = sorted(missing_var_count.items(), key=lambda x: len(x[1]), reverse=True)[:5]
452
+ if top_blockers:
453
+ summary += f"\n### 🚫 Top 5 Missing Variables\n"
454
+ for var, blocked in top_blockers:
455
+ summary += f"- **{var}**: Blocks {len(blocked)} formulas\n"
456
+
457
  # Extracted data display
458
  data_display = "## πŸ“₯ Extracted Property Data\n\n"
459
+ data_display += "| Variable | Value | Source |\n|----------|-------|--------|\n"
460
  for key, value in sorted(extracted_data.items()):
461
+ source = "πŸ“„ Document" if key not in self.defaults else "βš™οΈ Default"
462
  if isinstance(value, float):
463
+ data_display += f"| {key} | {value:,.4f} | {source} |\n"
464
  else:
465
+ data_display += f"| {key} | {value} | {source} |\n"
466
 
467
  # Results display
468
  results_display = "## βœ… Computed Formulas\n\n"
469
 
 
 
 
 
 
 
 
 
470
  for iter_num in sorted(by_iteration.keys()):
471
  results_display += f"### Iteration {iter_num} ({len(by_iteration[iter_num])} formulas)\n\n"
472
  for name, info in sorted(by_iteration[iter_num]):
 
474
  results_display += f"*{info['description']}*\n"
475
  results_display += f"`{info['formula']}`\n\n"
476
 
477
+ # Non-computable formulas
478
  # if non_computable_formulas:
479
  # results_display += f"\n## ❌ Non-Computable Formulas ({len(non_computable_formulas)})\n\n"
 
 
 
480
 
481
+ # if missing_var_count:
482
+ # results_display += "### 🚫 Top Missing Variables (Blocking Multiple Formulas)\n\n"
483
+ # sorted_missing = sorted(missing_var_count.items(), key=lambda x: len(x[1]), reverse=True)
484
+
485
+ # for idx, (var, blocked_formulas) in enumerate(sorted_missing[:15]):
486
+ # results_display += f"{idx+1}. **{var}** - Blocks {len(blocked_formulas)} formulas\n"
487
+ # sample = blocked_formulas[:3]
488
+ # results_display += f" - Affects: {', '.join(sample)}"
489
+ # if len(blocked_formulas) > 3:
490
+ # results_display += f" ... and {len(blocked_formulas) - 3} more"
491
+ # results_display += "\n"
492
+ # results_display
493
  json_output = {
494
  'summary': {
495
  'total_formulas': len(self.formulas),