mlbench123 commited on
Commit
ee575e3
Β·
verified Β·
1 Parent(s): 92a0613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -121
app.py CHANGED
@@ -11,6 +11,7 @@ class PropertyFormulaAnalyzer:
11
  """Initialize the analyzer with the formula file path"""
12
  self.formula_file_path = formula_file_path
13
  self.formulas = {}
 
14
  self.load_formulas()
15
 
16
  def load_formulas(self):
@@ -20,13 +21,11 @@ class PropertyFormulaAnalyzer:
20
  content = f.read()
21
 
22
  # Parse formulas using regex
23
- # Pattern: number. cell_ref (description) = formula
24
  pattern = r'(\d+)\.\s+([A-Z]+\d+)\s*\(([^)]+)\)\s*=\s*([^=\n]+?)(?=\s+\d+\.|$)'
25
  matches = re.findall(pattern, content, re.DOTALL)
26
 
27
  for match in matches:
28
  formula_num, cell_ref, description, formula = match
29
- # Clean up the formula
30
  formula = formula.strip()
31
  formula = re.sub(r'\s+', ' ', formula)
32
 
@@ -75,26 +74,19 @@ class PropertyFormulaAnalyzer:
75
  else:
76
  combined_text += self.extract_text_from_txt(file_path) + "\n"
77
 
78
- # Extract data using comprehensive patterns
79
  extracted_data = {}
80
 
81
  # Define extraction patterns
82
  patterns = {
83
- # Basic property info
84
  'UNITS': [r'(?:Total\s+)?Units?\s*:?\s*(\d+)', r'Units\s*(\d+)'],
85
  'BUILDING_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)', r'Building\s+(?:Size|SF)\s*(\d+)'],
86
  'LOT_ACRES': [r'Lot\s+Size\s*:?\s*([\d.]+)\s*(?:acres?|Acres?)', r'Lot:\s*([\d.]+)\s*acres?'],
87
- 'LOT_SF': [r'Lot\s+(?:Size\s+)?SF\s*:?\s*([\d,]+)'],
88
-
89
- # Financial metrics
90
  'PRICE': [r'(?:Asking\s+)?Price\s*:?\s*\$\s*([\d,]+)', r'Price\s+per\s+Unit\s*\$\s*([\d,]+)'],
91
  'NOI': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)', r'NOI\s*:?\s*\$?\s*([\d,]+)'],
92
  'EGI': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)', r'EGI\s*:?\s*\$?\s*([\d,]+)'],
93
  'GPR': [r'Gross\s+Potential\s+Rent\s*(?:\(Annual\))?\s*:?\s*\$?\s*([\d,]+)', r'GPR\s*:?\s*\$?\s*([\d,]+)'],
94
  'OPEX': [r'Operating\s+Expenses\s*:?\s*\$?\s*([\d,]+)', r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'],
95
  'VACANCY': [r'Vacancy\s*(?:\([\d.]+%\))?\s*:?\s*-?\$?\s*([\d,]+)'],
96
-
97
- # Operating expenses categories
98
  'PROPERTY_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
99
  'INSURANCE': [r'Insurance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
100
  'UTILITIES': [r'Utilities\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
@@ -104,38 +96,21 @@ class PropertyFormulaAnalyzer:
104
  'MARKETING': [r'Marketing\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
105
  'REPLACEMENT_RESERVES': [r'Replacement\s+Reserves\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
106
  'MANAGEMENT_FEE': [r'Management\s*(?:\([^)]+\))?\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
107
-
108
- # Rates and percentages
109
  'CAP_RATE': [r'Cap\s+Rate\s*:?\s*([\d.]+)%?', r'Cap\s+Rate\s+([\d.]+)'],
110
  'INTEREST_RATE': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'],
111
  'LTC': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
112
  'EXIT_CAP_RATE': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'],
113
-
114
- # Demographics
115
- 'MEDIAN_INCOME': [r'Median\s+(?:HH\s+)?Income\s*:?\s*\$?\s*([\d,]+)', r'Median\s+(?:Household\s+)?Income:\s*\$?\s*([\d,]+)'],
116
  'POPULATION': [r'Population\s*:?\s*([\d,]+)'],
117
  'HOUSEHOLDS': [r'Households\s*:?\s*([\d,]+)'],
118
  'RENTER_OCCUPIED_PCT': [r'Renter[- ]Occupied\s*:?\s*([\d.]+)%?'],
119
-
120
- # Construction & Development
121
- 'CONSTRUCTION_GMP': [r'(?:Total\s+)?Construction\s+GMP\s*:?\s*\$?\s*([\d,]+)'],
122
- 'SOFT_COSTS': [r'(?:Total\s+)?Soft\s+Costs?\s*:?\s*\$?\s*([\d,]+)'],
123
- 'CONTINGENCY': [r'Contingency\s*:?\s*\$?\s*([\d,]+)'],
124
- 'DEV_FEE': [r'Dev(?:elopment)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
125
-
126
- # Land & Acquisition
127
- 'LAND_VALUE': [r'(?:Total\s+)?Land\s+Value\s*:?\s*\$?\s*([\d,]+)'],
128
- 'CLOSING_COSTS': [r'Closing\s+Costs\s*:?\s*\$?\s*([\d,]+)'],
129
- 'ACQ_FEE': [r'Acq(?:uisition)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'],
130
  }
131
 
132
- # Extract values using patterns
133
  for key, pattern_list in patterns.items():
134
  for pattern in pattern_list:
135
  matches = re.findall(pattern, combined_text, re.IGNORECASE)
136
  if matches:
137
  try:
138
- # Take the first match and clean it
139
  value_str = matches[0].replace(',', '').strip()
140
  value = float(value_str)
141
  extracted_data[key] = value
@@ -143,7 +118,7 @@ class PropertyFormulaAnalyzer:
143
  except (ValueError, IndexError):
144
  continue
145
 
146
- # Calculate derived values
147
  if 'PRICE' in extracted_data and 'UNITS' in extracted_data:
148
  extracted_data['PRICE_PER_UNIT'] = extracted_data['PRICE'] / extracted_data['UNITS']
149
 
@@ -151,63 +126,64 @@ class PropertyFormulaAnalyzer:
151
  extracted_data['CALCULATED_CAP_RATE'] = (extracted_data['NOI'] / extracted_data['PRICE']) * 100
152
 
153
  if 'LTC' in extracted_data and extracted_data['LTC'] > 1:
154
- extracted_data['LTC'] = extracted_data['LTC'] / 100 # Convert percentage
155
 
156
  if 'INTEREST_RATE' in extracted_data and extracted_data['INTEREST_RATE'] > 1:
157
  extracted_data['INTEREST_RATE'] = extracted_data['INTEREST_RATE'] / 100
158
 
159
- # Add common cell references based on extracted data
160
  if 'BUILDING_SF' in extracted_data:
161
  extracted_data['D2'] = extracted_data['BUILDING_SF']
162
- extracted_data['D$2'] = extracted_data['BUILDING_SF']
163
- extracted_data['$D$2'] = extracted_data['BUILDING_SF']
164
 
165
  if 'UNITS' in extracted_data:
166
  extracted_data['F2'] = extracted_data['UNITS']
167
- extracted_data['F$2'] = extracted_data['UNITS']
168
- extracted_data['$F$2'] = extracted_data['UNITS']
169
 
170
- # Assume RSF is 90% of GSF if not provided
171
- if 'BUILDING_SF' in extracted_data and 'E2' not in extracted_data:
172
  extracted_data['E2'] = extracted_data['BUILDING_SF'] * 0.9
173
- extracted_data['E$2'] = extracted_data['E2']
174
- extracted_data['$E$2'] = extracted_data['E2']
175
-
176
- # Map common variables
177
- if 'LAND_VALUE' in extracted_data:
178
- extracted_data['C4'] = extracted_data['LAND_VALUE']
179
- extracted_data['$C4'] = extracted_data['LAND_VALUE']
180
- extracted_data['$C$4'] = extracted_data['LAND_VALUE']
181
-
182
- if 'CLOSING_COSTS' in extracted_data:
183
- extracted_data['C5'] = extracted_data['CLOSING_COSTS']
184
- extracted_data['$C5'] = extracted_data['CLOSING_COSTS']
185
 
186
  if 'OPEX' in extracted_data:
187
  extracted_data['M15'] = extracted_data['OPEX']
188
- extracted_data['$M$15'] = extracted_data['OPEX']
189
 
190
  if 'EGI' in extracted_data:
191
  extracted_data['J38'] = extracted_data['EGI']
192
- extracted_data['$J$38'] = extracted_data['EGI']
193
 
194
  return extracted_data
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def extract_variables_from_formula(self, formula: str) -> List[str]:
197
  """Extract all variable references from a formula"""
198
  # Match Excel-style cell references (e.g., C4, $D$2, E2)
199
  cell_pattern = r'\$?[A-Z]+\$?\d+'
200
  variables = re.findall(cell_pattern, formula)
201
 
202
- # Also match named variables
203
- named_pattern = r'[A-Z_][A-Z0-9_]*'
204
- named_vars = re.findall(named_pattern, formula)
205
-
206
- # Filter out Excel functions
207
  excel_functions = {'SUM', 'PV', 'MIN', 'MAX', 'AVERAGE', 'IF', 'AND', 'OR'}
208
- named_vars = [v for v in named_vars if v not in excel_functions]
209
 
210
- return list(set(variables + named_vars))
211
 
212
  def check_formula_computable(self, formula: str, data: Dict[str, Any]) -> Tuple[bool, List[str]]:
213
  """Check if a formula can be computed with available data"""
@@ -215,63 +191,62 @@ class PropertyFormulaAnalyzer:
215
  missing = []
216
 
217
  for var in variables:
218
- # Check all variants of the variable
219
- variants = [var, var.replace('$', ''), var.upper()]
220
- if not any(v in data for v in variants):
221
  missing.append(var)
222
 
223
  return len(missing) == 0, missing
224
 
225
- def evaluate_formula(self, formula: str, data: Dict[str, Any]) -> Any:
226
  """Safely evaluate a formula with the provided data"""
227
  try:
228
- # Create a safe evaluation environment
229
- safe_dict = {}
230
-
231
- # Add all data to the environment
232
- for key, value in data.items():
233
- safe_dict[key] = value
234
- safe_dict[key.replace('$', '')] = value
235
- safe_dict[key.upper()] = value
236
-
237
- # Replace Excel functions with Python equivalents
238
  formula_py = formula
239
 
240
- # Handle SUM function
241
- sum_pattern = r'SUM\(([^)]+)\)'
242
- while re.search(sum_pattern, formula_py):
243
- match = re.search(sum_pattern, formula_py)
244
  range_str = match.group(1)
245
- # For ranges like C4:C6, we'll need to handle them
246
  if ':' in range_str:
247
- # Extract the range
248
- parts = range_str.split(':')
249
- # For now, we'll just try to add the values if they exist
250
- formula_py = formula_py.replace(match.group(0), f"sum_range('{range_str}')")
251
  else:
252
- formula_py = formula_py.replace(match.group(0), f"sum([{range_str}])")
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- # Handle PV function (present value) - simplified
255
- pv_pattern = r'PV\([^)]+\)'
256
- formula_py = re.sub(pv_pattern, '0', formula_py) # Simplified for now
257
 
258
  # Handle MIN function
259
  formula_py = re.sub(r'MIN\(([^)]+)\)', r'min([\1])', formula_py)
260
 
261
  # Replace cell references with their values
262
- for key in sorted(data.keys(), key=len, reverse=True):
263
- if key in formula_py:
264
- formula_py = formula_py.replace(key, str(data[key]))
 
 
265
 
266
  # Replace ^ with ** for exponentiation
267
  formula_py = formula_py.replace('^', '**')
268
 
 
 
 
269
  # Evaluate
270
- result = eval(formula_py, {"__builtins__": {}}, safe_dict)
271
  return result
272
 
273
  except Exception as e:
274
- raise Exception(f"Error evaluating formula: {str(e)}")
275
 
276
  def process_files(self, files) -> Tuple[str, str, str]:
277
  """Main processing function for Gradio interface"""
@@ -279,45 +254,79 @@ class PropertyFormulaAnalyzer:
279
  if not files:
280
  return "❌ No files uploaded", "", ""
281
 
282
- # Extract file paths
283
  file_paths = [f.name for f in files]
284
 
285
- # Extract data from all files
286
  extracted_data = self.extract_data_from_files(file_paths)
287
 
288
  if not extracted_data:
289
  return "❌ No data could be extracted from the files", "", ""
290
 
291
- # Process formulas
 
 
 
 
292
  computable_formulas = {}
293
  non_computable_formulas = {}
294
 
295
- for cell_ref, formula_info in self.formulas.items():
296
- formula = formula_info['formula']
297
- is_computable, missing_vars = self.check_formula_computable(formula, extracted_data)
298
 
299
- if is_computable:
300
- try:
301
- result = self.evaluate_formula(formula, extracted_data)
302
- computable_formulas[cell_ref] = {
303
- 'description': formula_info['description'],
304
- 'formula': formula,
305
- 'result': result,
306
- 'formatted_result': f"{result:,.2f}" if isinstance(result, (int, float)) else str(result)
307
- }
308
- except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  non_computable_formulas[cell_ref] = {
310
  'description': formula_info['description'],
311
  'formula': formula,
312
- 'error': str(e),
313
- 'missing_variables': []
314
  }
315
- else:
316
- non_computable_formulas[cell_ref] = {
317
- 'description': formula_info['description'],
318
- 'formula': formula,
319
- 'missing_variables': missing_vars
320
- }
 
 
 
 
321
 
322
  # Create summary
323
  summary = f"""
@@ -328,6 +337,7 @@ class PropertyFormulaAnalyzer:
328
  **❌ Non-Computable Formulas:** {len(non_computable_formulas)}
329
  **πŸ“„ Files Processed:** {len(file_paths)}
330
  **πŸ”’ Data Points Extracted:** {len(extracted_data)}
 
331
  """
332
 
333
  # Create extracted data display
@@ -344,15 +354,20 @@ class PropertyFormulaAnalyzer:
344
  for cell_ref, info in sorted(computable_formulas.items()):
345
  results_display += f"### {cell_ref}: {info['description']}\n"
346
  results_display += f"**Formula:** `{info['formula']}`\n"
347
- results_display += f"**Result:** {info['formatted_result']}\n\n"
 
348
 
349
  # if non_computable_formulas:
350
  # results_display += "\n## ❌ Non-Computable Formulas\n\n"
351
- # for cell_ref, info in sorted(non_computable_formulas.items()):
 
 
 
 
352
  # results_display += f"### {cell_ref}: {info['description']}\n"
353
  # results_display += f"**Formula:** `{info['formula']}`\n"
354
  # if info.get('missing_variables'):
355
- # results_display += f"**Missing Variables:** {', '.join(info['missing_variables'])}\n"
356
  # if info.get('error'):
357
  # results_display += f"**Error:** {info['error']}\n"
358
  # results_display += "\n"
@@ -363,7 +378,8 @@ class PropertyFormulaAnalyzer:
363
  'total_formulas': len(self.formulas),
364
  'computable': len(computable_formulas),
365
  'non_computable': len(non_computable_formulas),
366
- 'files_processed': len(file_paths)
 
367
  },
368
  'extracted_data': extracted_data,
369
  'computable_formulas': computable_formulas,
@@ -387,7 +403,7 @@ with gr.Blocks(title="Property Formula Analyzer", theme=gr.themes.Soft()) as app
387
  # 🏒 Property Formula Analyzer
388
 
389
  Upload property documents (PDF or TXT) to automatically extract data and compute real estate formulas.
390
- The system will analyze your documents and calculate all computable formulas based on the extracted data.
391
  """)
392
 
393
  with gr.Row():
@@ -407,6 +423,8 @@ with gr.Blocks(title="Property Formula Analyzer", theme=gr.themes.Soft()) as app
407
  2. Click "Analyze & Compute Formulas"
408
  3. Review the extracted data and computed formulas
409
  4. Download the JSON results for further analysis
 
 
410
  """)
411
 
412
  with gr.Row():
@@ -425,7 +443,6 @@ with gr.Blocks(title="Property Formula Analyzer", theme=gr.themes.Soft()) as app
425
  lines=20
426
  )
427
 
428
- # Connect the button to the processing function
429
  analyze_btn.click(
430
  fn=analyzer.process_files,
431
  inputs=[file_input],
@@ -435,8 +452,8 @@ with gr.Blocks(title="Property Formula Analyzer", theme=gr.themes.Soft()) as app
435
  gr.Markdown("""
436
  ---
437
  ### πŸ“ Notes:
438
- - The system automatically extracts property metrics like units, price, NOI, operating expenses, etc.
439
- - Formulas are computed only when all required variables are available in the extracted data
440
  - Non-computable formulas are listed with their missing variables
441
  - All results can be downloaded as JSON for further processing
442
  """)
 
11
  """Initialize the analyzer with the formula file path"""
12
  self.formula_file_path = formula_file_path
13
  self.formulas = {}
14
+ self.computed_values = {} # Store computed values for cascading calculations
15
  self.load_formulas()
16
 
17
  def load_formulas(self):
 
21
  content = f.read()
22
 
23
  # Parse formulas using regex
 
24
  pattern = r'(\d+)\.\s+([A-Z]+\d+)\s*\(([^)]+)\)\s*=\s*([^=\n]+?)(?=\s+\d+\.|$)'
25
  matches = re.findall(pattern, content, re.DOTALL)
26
 
27
  for match in matches:
28
  formula_num, cell_ref, description, formula = match
 
29
  formula = formula.strip()
30
  formula = re.sub(r'\s+', ' ', formula)
31
 
 
74
  else:
75
  combined_text += self.extract_text_from_txt(file_path) + "\n"
76
 
 
77
  extracted_data = {}
78
 
79
  # Define extraction patterns
80
  patterns = {
 
81
  'UNITS': [r'(?:Total\s+)?Units?\s*:?\s*(\d+)', r'Units\s*(\d+)'],
82
  'BUILDING_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)', r'Building\s+(?:Size|SF)\s*(\d+)'],
83
  'LOT_ACRES': [r'Lot\s+Size\s*:?\s*([\d.]+)\s*(?:acres?|Acres?)', r'Lot:\s*([\d.]+)\s*acres?'],
 
 
 
84
  'PRICE': [r'(?:Asking\s+)?Price\s*:?\s*\$\s*([\d,]+)', r'Price\s+per\s+Unit\s*\$\s*([\d,]+)'],
85
  'NOI': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)', r'NOI\s*:?\s*\$?\s*([\d,]+)'],
86
  'EGI': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)', r'EGI\s*:?\s*\$?\s*([\d,]+)'],
87
  'GPR': [r'Gross\s+Potential\s+Rent\s*(?:\(Annual\))?\s*:?\s*\$?\s*([\d,]+)', r'GPR\s*:?\s*\$?\s*([\d,]+)'],
88
  'OPEX': [r'Operating\s+Expenses\s*:?\s*\$?\s*([\d,]+)', r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'],
89
  'VACANCY': [r'Vacancy\s*(?:\([\d.]+%\))?\s*:?\s*-?\$?\s*([\d,]+)'],
 
 
90
  'PROPERTY_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
91
  'INSURANCE': [r'Insurance\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
92
  'UTILITIES': [r'Utilities\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
 
96
  'MARKETING': [r'Marketing\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
97
  'REPLACEMENT_RESERVES': [r'Replacement\s+Reserves\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
98
  'MANAGEMENT_FEE': [r'Management\s*(?:\([^)]+\))?\s*:?\s*\$?\s*([\d,]+\.?\d*)'],
 
 
99
  'CAP_RATE': [r'Cap\s+Rate\s*:?\s*([\d.]+)%?', r'Cap\s+Rate\s+([\d.]+)'],
100
  'INTEREST_RATE': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'],
101
  'LTC': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'],
102
  'EXIT_CAP_RATE': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'],
103
+ 'MEDIAN_INCOME': [r'Median\s+(?:HH\s+)?Income\s*:?\s*\$?\s*([\d,]+)'],
 
 
104
  'POPULATION': [r'Population\s*:?\s*([\d,]+)'],
105
  'HOUSEHOLDS': [r'Households\s*:?\s*([\d,]+)'],
106
  'RENTER_OCCUPIED_PCT': [r'Renter[- ]Occupied\s*:?\s*([\d.]+)%?'],
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
 
 
109
  for key, pattern_list in patterns.items():
110
  for pattern in pattern_list:
111
  matches = re.findall(pattern, combined_text, re.IGNORECASE)
112
  if matches:
113
  try:
 
114
  value_str = matches[0].replace(',', '').strip()
115
  value = float(value_str)
116
  extracted_data[key] = value
 
118
  except (ValueError, IndexError):
119
  continue
120
 
121
+ # Derived values
122
  if 'PRICE' in extracted_data and 'UNITS' in extracted_data:
123
  extracted_data['PRICE_PER_UNIT'] = extracted_data['PRICE'] / extracted_data['UNITS']
124
 
 
126
  extracted_data['CALCULATED_CAP_RATE'] = (extracted_data['NOI'] / extracted_data['PRICE']) * 100
127
 
128
  if 'LTC' in extracted_data and extracted_data['LTC'] > 1:
129
+ extracted_data['LTC'] = extracted_data['LTC'] / 100
130
 
131
  if 'INTEREST_RATE' in extracted_data and extracted_data['INTEREST_RATE'] > 1:
132
  extracted_data['INTEREST_RATE'] = extracted_data['INTEREST_RATE'] / 100
133
 
134
+ # Map to cell references
135
  if 'BUILDING_SF' in extracted_data:
136
  extracted_data['D2'] = extracted_data['BUILDING_SF']
 
 
137
 
138
  if 'UNITS' in extracted_data:
139
  extracted_data['F2'] = extracted_data['UNITS']
 
 
140
 
141
+ if 'BUILDING_SF' in extracted_data:
 
142
  extracted_data['E2'] = extracted_data['BUILDING_SF'] * 0.9
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  if 'OPEX' in extracted_data:
145
  extracted_data['M15'] = extracted_data['OPEX']
 
146
 
147
  if 'EGI' in extracted_data:
148
  extracted_data['J38'] = extracted_data['EGI']
 
149
 
150
  return extracted_data
151
 
152
+ def normalize_cell_ref(self, cell_ref: str) -> str:
153
+ """Normalize cell reference by removing $ signs"""
154
+ return cell_ref.replace('$', '')
155
+
156
+ def get_value(self, var: str, data: Dict[str, Any]) -> Any:
157
+ """Get value for a variable, handling all variants"""
158
+ # Try exact match
159
+ if var in data:
160
+ return data[var]
161
+
162
+ # Try normalized (without $)
163
+ normalized = self.normalize_cell_ref(var)
164
+ if normalized in data:
165
+ return data[normalized]
166
+
167
+ # Try with computed values
168
+ if var in self.computed_values:
169
+ return self.computed_values[var]
170
+
171
+ if normalized in self.computed_values:
172
+ return self.computed_values[normalized]
173
+
174
+ return None
175
+
176
  def extract_variables_from_formula(self, formula: str) -> List[str]:
177
  """Extract all variable references from a formula"""
178
  # Match Excel-style cell references (e.g., C4, $D$2, E2)
179
  cell_pattern = r'\$?[A-Z]+\$?\d+'
180
  variables = re.findall(cell_pattern, formula)
181
 
182
+ # Remove Excel functions and operators
 
 
 
 
183
  excel_functions = {'SUM', 'PV', 'MIN', 'MAX', 'AVERAGE', 'IF', 'AND', 'OR'}
184
+ variables = [v for v in variables if v not in excel_functions]
185
 
186
+ return list(set(variables))
187
 
188
  def check_formula_computable(self, formula: str, data: Dict[str, Any]) -> Tuple[bool, List[str]]:
189
  """Check if a formula can be computed with available data"""
 
191
  missing = []
192
 
193
  for var in variables:
194
+ if self.get_value(var, data) is None:
 
 
195
  missing.append(var)
196
 
197
  return len(missing) == 0, missing
198
 
199
+ def safe_eval_formula(self, formula: str, data: Dict[str, Any]) -> Any:
200
  """Safely evaluate a formula with the provided data"""
201
  try:
 
 
 
 
 
 
 
 
 
 
202
  formula_py = formula
203
 
204
+ # Handle SUM function with ranges
205
+ def process_sum_range(match):
 
 
206
  range_str = match.group(1)
 
207
  if ':' in range_str:
208
+ # For now, return 0 for ranges we can't process
209
+ return '0'
 
 
210
  else:
211
+ # Individual cells
212
+ cells = [c.strip() for c in range_str.split(',')]
213
+ values = []
214
+ for cell in cells:
215
+ val = self.get_value(cell, data)
216
+ if val is not None:
217
+ values.append(str(val))
218
+ if values:
219
+ return f"({'+'.join(values)})"
220
+ return '0'
221
+
222
+ sum_pattern = r'SUM\(([^)]+)\)'
223
+ formula_py = re.sub(sum_pattern, process_sum_range, formula_py)
224
 
225
+ # Handle PV function - simplified to 0
226
+ formula_py = re.sub(r'PV\([^)]+\)', '0', formula_py)
 
227
 
228
  # Handle MIN function
229
  formula_py = re.sub(r'MIN\(([^)]+)\)', r'min([\1])', formula_py)
230
 
231
  # Replace cell references with their values
232
+ variables = self.extract_variables_from_formula(formula_py)
233
+ for var in sorted(variables, key=len, reverse=True):
234
+ value = self.get_value(var, data)
235
+ if value is not None:
236
+ formula_py = formula_py.replace(var, str(value))
237
 
238
  # Replace ^ with ** for exponentiation
239
  formula_py = formula_py.replace('^', '**')
240
 
241
+ # Clean up any remaining issues
242
+ formula_py = formula_py.replace('--', '+')
243
+
244
  # Evaluate
245
+ result = eval(formula_py, {"__builtins__": {"min": min, "max": max, "sum": sum}}, {})
246
  return result
247
 
248
  except Exception as e:
249
+ raise Exception(f"Error evaluating formula '{formula}': {str(e)}")
250
 
251
  def process_files(self, files) -> Tuple[str, str, str]:
252
  """Main processing function for Gradio interface"""
 
254
  if not files:
255
  return "❌ No files uploaded", "", ""
256
 
 
257
  file_paths = [f.name for f in files]
258
 
259
+ # Extract data
260
  extracted_data = self.extract_data_from_files(file_paths)
261
 
262
  if not extracted_data:
263
  return "❌ No data could be extracted from the files", "", ""
264
 
265
+ # Reset computed values
266
+ self.computed_values = {}
267
+
268
+ # Multiple passes to handle dependencies
269
+ max_iterations = 5
270
  computable_formulas = {}
271
  non_computable_formulas = {}
272
 
273
+ for iteration in range(max_iterations):
274
+ newly_computed = 0
 
275
 
276
+ for cell_ref, formula_info in self.formulas.items():
277
+ # Skip if already computed
278
+ if cell_ref in computable_formulas:
279
+ continue
280
+
281
+ formula = formula_info['formula']
282
+
283
+ # Combine extracted data with computed values for checking
284
+ all_data = {**extracted_data, **self.computed_values}
285
+
286
+ is_computable, missing_vars = self.check_formula_computable(formula, all_data)
287
+
288
+ if is_computable:
289
+ try:
290
+ result = self.safe_eval_formula(formula, all_data)
291
+
292
+ # Store result
293
+ computable_formulas[cell_ref] = {
294
+ 'description': formula_info['description'],
295
+ 'formula': formula,
296
+ 'result': result,
297
+ 'formatted_result': f"{result:,.2f}" if isinstance(result, (int, float)) else str(result),
298
+ 'iteration': iteration + 1
299
+ }
300
+
301
+ # Add to computed values for cascading
302
+ self.computed_values[cell_ref] = result
303
+ self.computed_values[self.normalize_cell_ref(cell_ref)] = result
304
+
305
+ newly_computed += 1
306
+
307
+ except Exception as e:
308
+ non_computable_formulas[cell_ref] = {
309
+ 'description': formula_info['description'],
310
+ 'formula': formula,
311
+ 'error': str(e),
312
+ 'missing_variables': []
313
+ }
314
+ else:
315
  non_computable_formulas[cell_ref] = {
316
  'description': formula_info['description'],
317
  'formula': formula,
318
+ 'missing_variables': missing_vars
 
319
  }
320
+
321
+ print(f"Iteration {iteration + 1}: Computed {newly_computed} new formulas")
322
+
323
+ # If no new formulas computed, stop
324
+ if newly_computed == 0:
325
+ break
326
+
327
+ # Remove successfully computed formulas from non-computable list
328
+ for cell_ref in computable_formulas.keys():
329
+ non_computable_formulas.pop(cell_ref, None)
330
 
331
  # Create summary
332
  summary = f"""
 
337
  **❌ Non-Computable Formulas:** {len(non_computable_formulas)}
338
  **πŸ“„ Files Processed:** {len(file_paths)}
339
  **πŸ”’ Data Points Extracted:** {len(extracted_data)}
340
+ **πŸ”„ Computation Iterations:** {iteration + 1}
341
  """
342
 
343
  # Create extracted data display
 
354
  for cell_ref, info in sorted(computable_formulas.items()):
355
  results_display += f"### {cell_ref}: {info['description']}\n"
356
  results_display += f"**Formula:** `{info['formula']}`\n"
357
+ results_display += f"**Result:** {info['formatted_result']}\n"
358
+ results_display += f"*Computed in iteration {info['iteration']}*\n\n"
359
 
360
  # if non_computable_formulas:
361
  # results_display += "\n## ❌ Non-Computable Formulas\n\n"
362
+ # # Show only first 20 to avoid overwhelming output
363
+ # for idx, (cell_ref, info) in enumerate(sorted(non_computable_formulas.items())):
364
+ # if idx >= 20:
365
+ # results_display += f"\n*... and {len(non_computable_formulas) - 20} more non-computable formulas*\n"
366
+ # break
367
  # results_display += f"### {cell_ref}: {info['description']}\n"
368
  # results_display += f"**Formula:** `{info['formula']}`\n"
369
  # if info.get('missing_variables'):
370
+ # results_display += f"**Missing Variables:** {', '.join(info['missing_variables'][:5])}\n"
371
  # if info.get('error'):
372
  # results_display += f"**Error:** {info['error']}\n"
373
  # results_display += "\n"
 
378
  'total_formulas': len(self.formulas),
379
  'computable': len(computable_formulas),
380
  'non_computable': len(non_computable_formulas),
381
+ 'files_processed': len(file_paths),
382
+ 'iterations': iteration + 1
383
  },
384
  'extracted_data': extracted_data,
385
  'computable_formulas': computable_formulas,
 
403
  # 🏒 Property Formula Analyzer
404
 
405
  Upload property documents (PDF or TXT) to automatically extract data and compute real estate formulas.
406
+ The system uses iterative computation to handle formula dependencies.
407
  """)
408
 
409
  with gr.Row():
 
423
  2. Click "Analyze & Compute Formulas"
424
  3. Review the extracted data and computed formulas
425
  4. Download the JSON results for further analysis
426
+
427
+ **Note:** The system performs multiple computation passes to handle formula dependencies.
428
  """)
429
 
430
  with gr.Row():
 
443
  lines=20
444
  )
445
 
 
446
  analyze_btn.click(
447
  fn=analyzer.process_files,
448
  inputs=[file_input],
 
452
  gr.Markdown("""
453
  ---
454
  ### πŸ“ Notes:
455
+ - The system automatically extracts property metrics from your documents
456
+ - Formulas are computed iteratively to handle dependencies between formulas
457
  - Non-computable formulas are listed with their missing variables
458
  - All results can be downloaded as JSON for further processing
459
  """)