Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| import re | |
| import json | |
| from typing import Dict, List, Tuple, Any | |
| import traceback | |
| class SemanticFormulaAnalyzer: | |
| def __init__(self, formula_file_path: str = "formulas.txt"): | |
| """Initialize the analyzer with the semantic formula file""" | |
| self.formula_file_path = formula_file_path | |
| self.formulas = {} | |
| self.computed_values = {} | |
| self.defaults = {} | |
| self.load_formulas() | |
| def load_formulas(self): | |
| """Load semantic formulas from file""" | |
| try: | |
| with open(self.formula_file_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| lines = content.split('\n') | |
| current_formula_name = None | |
| current_formula = None | |
| current_description = None | |
| for line in lines: | |
| line = line.strip() | |
| if not line or line.startswith('#'): | |
| if line.startswith('# Description:'): | |
| current_description = line.replace('# Description:', '').strip() | |
| continue | |
| if '=' in line: | |
| if current_formula_name and current_formula: | |
| self.formulas[current_formula_name] = { | |
| 'formula': current_formula, | |
| 'description': current_description or current_formula_name | |
| } | |
| parts = line.split('=', 1) | |
| current_formula_name = parts[0].strip() | |
| current_formula = parts[1].strip() | |
| current_description = None | |
| if current_formula_name and current_formula: | |
| self.formulas[current_formula_name] = { | |
| 'formula': current_formula, | |
| 'description': current_description or current_formula_name | |
| } | |
| print(f"β Loaded {len(self.formulas)} semantic formulas") | |
| except Exception as e: | |
| print(f"β Error loading formulas: {str(e)}") | |
| traceback.print_exc() | |
| def extract_text_from_pdf(self, file_path: str) -> str: | |
| """Extract text from PDF file""" | |
| try: | |
| text = "" | |
| with open(file_path, 'rb') as file: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| print(f"Error extracting PDF: {str(e)}") | |
| return "" | |
| def extract_text_from_txt(self, file_path: str) -> str: | |
| """Extract text from TXT file""" | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: | |
| return file.read() | |
| except Exception as e: | |
| print(f"Error reading TXT: {str(e)}") | |
| return "" | |
| def extract_data_from_files(self, files: List[str]) -> Dict[str, Any]: | |
| """Extract data with semantic variable names""" | |
| combined_text = "" | |
| for file_path in files: | |
| if file_path.lower().endswith('.pdf'): | |
| combined_text += self.extract_text_from_pdf(file_path) + "\n" | |
| else: | |
| combined_text += self.extract_text_from_txt(file_path) + "\n" | |
| extracted_data = {} | |
| # Comprehensive extraction patterns | |
| patterns = { | |
| # Basic Property Info | |
| 'UNITS': [r'(?:Total\s+)?Units?\s*:?\s*(\d+)', r'(\d+)\s*units?'], | |
| 'GROSS_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)', r'Gross\s+SF\s*:?\s*([\d,]+)', r'GSF\s*:?\s*([\d,]+)'], | |
| 'BUILDING_SF': [r'Building\s+(?:Size|SF)\s*:?\s*([\d,]+)'], | |
| 'RENTABLE_SF': [r'Rentable\s+SF\s*:?\s*([\d,]+)', r'RSF\s*:?\s*([\d,]+)'], | |
| 'RETAIL_SF': [r'Retail\s+SF\s*:?\s*([\d,]+)', r'Retail\s+Space\s*:?\s*([\d,]+)\s*SF'], | |
| # Financial - Core | |
| 'PRICE': [r'(?:Asking\s+)?Price\s*:?\s*\$\s*([\d,]+)', r'Purchase\s+Price\s*:?\s*\$\s*([\d,]+)'], | |
| 'NOI': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)'], | |
| 'NET_OPERATING_INCOME': [r'Net\s+Operating\s+Income\s*(?:\(NOI\))?\s*:?\s*\$?\s*([\d,]+)'], | |
| 'EGI': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'], | |
| 'EFFECTIVE_GROSS_INCOME': [r'Effective\s+Gross\s+Income\s*:?\s*\$?\s*([\d,]+)'], | |
| 'VACANCY_RATE': [r'Vacancy\s*(?:Rate)?\s*(?:\()?([\d.]+)%'], | |
| # Operating Expenses | |
| 'OPEX': [r'Operating\s+Expenses\s*:?\s*\$?\s*([\d,]+)'], | |
| 'TOTAL_OPERATING_EXPENSES': [r'Total\s+Operating\s+Expenses\s*=?\s*\$?\s*([\d,]+)'], | |
| 'PROPERTY_TAXES': [r'Property\s+Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'REAL_ESTATE_TAXES': [r'(?:Real\s+Estate\s+|Property\s+)Taxes\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'INSURANCE': [r'Insurance\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'UTILITIES': [r'Utilities\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'REPAIRS_AND_MAINTENANCE': [r'Repairs?\s*(?:&|and)?\s*Maintenance\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'PAYROLL': [r'Payroll\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'ADMINISTRATIVE': [r'Administrative\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'PROFESSIONAL_FEES': [r'Professional\s+Fees\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'MANAGEMENT_FEE': [r'Management\s*(?:\([^)]+\))?\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| 'MANAGEMENT_FEE_PERCENTAGE': [r'Management\s*.*?([\d.]+)%'], | |
| # Rates | |
| 'CAP_RATE': [r'Cap\s+Rate\s*:?\s*([\d.]+)%?'], | |
| 'INTEREST_RATE': [r'Interest\s+Rate\s*:?\s*([\d.]+)%?'], | |
| 'INTEREST_RATE_BASIS_POINTS': [r'Interest\s+Rate\s*:?\s*(\d+)\s*(?:bps|basis\s+points)'], | |
| 'LTC': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'], | |
| 'LTC_RATIO': [r'Loan[- ]to[- ]Cost\s*(?:\(LTC\))?\s*:?\s*([\d.]+)%?'], | |
| 'EXIT_CAP_RATE': [r'Exit\s+Cap\s+Rate\s*:?\s*([\d.]+)%?'], | |
| # Rent & Revenue | |
| 'FREE_MARKET_RENT_PSF': [r'Free\s+Market\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'], | |
| 'AFFORDABLE_RENT_PSF': [r'Affordable\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'], | |
| 'RETAIL_RENT_PSF': [r'Retail\s+Rent\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:PSF|per\s+SF)'], | |
| 'OTHER_INCOME_PER_UNIT': [r'Other\s+Income\s*:?\s*\$?\s*([\d,]+\.?\d*)\s*(?:/\s*)?(?:unit|per\s+unit)'], | |
| 'PARKING_INCOME': [r'Parking\s+Income\s*:?\s*\$?\s*([\d,]+\.?\d*)'], | |
| # Inflation & Time | |
| 'REVENUE_INFLATION_RATE': [r'Revenue\s+Inflation\s*:?\s*([\d.]+)%?'], | |
| 'EXPENSE_INFLATION_RATE': [r'Expense\s+Inflation\s*:?\s*([\d.]+)%?'], | |
| 'LEASE_UP_MONTHS': [r'Lease[- ]?Up\s+Period\s*:?\s*(\d+)\s*months?'], | |
| 'STABILIZATION_MONTHS': [r'Stabilization\s+Period\s*:?\s*(\d+)\s*months?'], | |
| 'CONSTRUCTION_MONTHS': [r'Construction\s+(?:Period|Duration)\s*:?\s*(\d+)\s*months?'], | |
| 'HOLD_PERIOD_MONTHS': [r'Hold\s+Period\s*:?\s*(\d+)\s*months?'], | |
| # Construction & Development | |
| 'CONSTRUCTION_COST_PER_GSF': [r'Construction\s+Cost\s*:?\s*\$?\s*([\d,]+)\s*per\s+(?:GSF|SF)'], | |
| 'TOTAL_CONSTRUCTION_GMP': [r'(?:Total\s+)?Construction\s+GMP\s*:?\s*\$?\s*([\d,]+)'], | |
| 'TOTAL_SOFT_COST': [r'(?:Total\s+)?Soft\s+Costs?\s*:?\s*\$?\s*([\d,]+)'], | |
| # Soft Costs Components | |
| 'ARCHITECTURE_AND_INTERIOR_COST': [r'(?:Architecture|A&I)\s*(?:&|and)?\s*Interior\s*:?\s*\$?\s*([\d,]+)'], | |
| 'STRUCTURAL_ENGINEERING_COST': [r'Structural\s+Engineering\s*:?\s*\$?\s*([\d,]+)'], | |
| 'MEP_ENGINEERING_COST': [r'MEP\s+Engineering\s*:?\s*\$?\s*([\d,]+)'], | |
| 'CIVIL_ENGINEERING_COST': [r'Civil\s+Engineering\s*:?\s*\$?\s*([\d,]+)'], | |
| 'CONTROLLED_INSPECTIONS_COST': [r'(?:Controlled\s+)?Inspections?\s*:?\s*\$?\s*([\d,]+)'], | |
| 'SURVEYING_COST': [r'Surveying\s*:?\s*\$?\s*([\d,]+)'], | |
| 'UTILITIES_CONNECTION_COST': [r'Utilities?\s+Connection\s*:?\s*\$?\s*([\d,]+)'], | |
| 'ADVERTISING_AND_MARKETING_COST': [r'(?:Advertising|Marketing)\s*:?\s*\$?\s*([\d,]+)'], | |
| 'ACCOUNTING_COST': [r'Accounting\s*:?\s*\$?\s*([\d,]+)'], | |
| 'MONITORING_COST': [r'Monitoring\s*:?\s*\$?\s*([\d,]+)'], | |
| 'FF_AND_E_COST': [r'FF&E\s*:?\s*\$?\s*([\d,]+)'], | |
| 'ENVIRONMENTAL_CONSULTANT_FEE': [r'Environmental\s+Consultant\s*:?\s*\$?\s*([\d,]+)'], | |
| 'MISCELLANEOUS_CONSULTANTS_FEE': [r'Misc(?:ellaneous)?\s+Consultants\s*:?\s*\$?\s*([\d,]+)'], | |
| 'GENERAL_LEGAL_COST': [r'(?:General\s+)?Legal\s*:?\s*\$?\s*([\d,]+)'], | |
| 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION': [r'(?:RE\s+)?Taxes\s+During\s+Construction\s*:?\s*\$?\s*([\d,]+)'], | |
| 'MISCELLANEOUS_ADMIN_COST': [r'Misc(?:ellaneous)?\s+Admin\s*:?\s*\$?\s*([\d,]+)'], | |
| 'IBR_COST': [r'IBR\s*:?\s*\$?\s*([\d,]+)'], | |
| 'PROJECT_TEAM_COST': [r'Project\s+Team\s*:?\s*\$?\s*([\d,]+)'], | |
| 'PEM_FEES': [r'PEM\s+Fees\s*:?\s*\$?\s*([\d,]+)'], | |
| 'BANK_FEES': [r'Bank\s+Fees\s*:?\s*\$?\s*([\d,]+)'], | |
| # Land & Acquisition | |
| 'LAND_VALUE': [r'(?:Total\s+)?Land\s+Value\s*:?\s*\$?\s*([\d,]+)'], | |
| 'CLOSING_COSTS': [r'Closing\s+Costs\s*:?\s*\$?\s*([\d,]+)'], | |
| 'ACQUISITION_FEE': [r'Acq(?:uisition)?\s+Fee\s*:?\s*\$?\s*([\d,]+)'], | |
| # Capital Stack | |
| 'FINANCING_COST': [r'Financing\s+Cost\s*:?\s*\$?\s*([\d,]+)'], | |
| 'FINANCING_PERCENTAGE': [r'Financing\s+(?:Percentage|%)\s*:?\s*([\d.]+)%?'], | |
| 'INTEREST_RESERVE': [r'Interest\s+Reserve\s*:?\s*\$?\s*([\d,]+)'], | |
| 'LOAN_AMOUNT': [r'Loan\s+Amount\s*:?\s*\$?\s*([\d,]+)'], | |
| # Exit Strategy | |
| 'SALE_COST_PERCENTAGE': [r'Sale\s+Cost\s*:?\s*([\d.]+)%?'], | |
| 'GP_PREF_RATE': [r'GP\s+Pref(?:erred)?\s+Rate\s*:?\s*([\d.]+)%?'], | |
| 'LP_PREF_RATE': [r'LP\s+Pref(?:erred)?\s+Rate\s*:?\s*([\d.]+)%?'], | |
| 'PROMOTE_PERCENTAGE': [r'Promote\s*:?\s*([\d.]+)%?'], | |
| } | |
| for key, pattern_list in patterns.items(): | |
| for pattern in pattern_list: | |
| matches = re.findall(pattern, combined_text, re.IGNORECASE) | |
| if matches: | |
| try: | |
| value_str = matches[0].replace(',', '').strip() | |
| value = float(value_str) | |
| extracted_data[key] = value | |
| break | |
| except (ValueError, IndexError): | |
| continue | |
| # Post-processing: percentages | |
| if 'INTEREST_RATE' in extracted_data and extracted_data['INTEREST_RATE'] > 1: | |
| extracted_data['INTEREST_RATE'] = extracted_data['INTEREST_RATE'] / 100 | |
| extracted_data['INTEREST_RATE_DECIMAL'] = extracted_data['INTEREST_RATE'] | |
| if 'LTC' in extracted_data and extracted_data['LTC'] > 1: | |
| extracted_data['LTC'] = extracted_data['LTC'] / 100 | |
| extracted_data['LTC_RATIO'] = extracted_data['LTC'] | |
| if 'EXIT_CAP_RATE' in extracted_data: | |
| if extracted_data['EXIT_CAP_RATE'] > 1: | |
| extracted_data['EXIT_CAP_RATE_DECIMAL'] = extracted_data['EXIT_CAP_RATE'] / 100 | |
| else: | |
| extracted_data['EXIT_CAP_RATE_DECIMAL'] = extracted_data['EXIT_CAP_RATE'] | |
| if 'VACANCY_RATE' in extracted_data and extracted_data['VACANCY_RATE'] > 1: | |
| extracted_data['VACANCY_RATE'] = extracted_data['VACANCY_RATE'] / 100 | |
| # Map synonyms | |
| if 'BUILDING_SF' in extracted_data and 'GROSS_SF' not in extracted_data: | |
| extracted_data['GROSS_SF'] = extracted_data['BUILDING_SF'] | |
| if 'GROSS_SF' in extracted_data and 'RENTABLE_SF' not in extracted_data: | |
| extracted_data['RENTABLE_SF'] = extracted_data['GROSS_SF'] * 0.9 | |
| if 'EGI' in extracted_data and 'EFFECTIVE_GROSS_INCOME' not in extracted_data: | |
| extracted_data['EFFECTIVE_GROSS_INCOME'] = extracted_data['EGI'] | |
| if 'NOI' in extracted_data and 'NET_OPERATING_INCOME' not in extracted_data: | |
| extracted_data['NET_OPERATING_INCOME'] = extracted_data['NOI'] | |
| if 'OPEX' in extracted_data and 'TOTAL_OPERATING_EXPENSES' not in extracted_data: | |
| extracted_data['TOTAL_OPERATING_EXPENSES'] = extracted_data['OPEX'] | |
| # DEFAULT VALUES & ASSUMPTIONS | |
| self.defaults = { | |
| 'MANAGEMENT_FEE_PERCENTAGE': 0.03, | |
| 'VACANCY_RATE': 0.05, | |
| 'REVENUE_INFLATION_RATE': 0.03, | |
| 'EXPENSE_INFLATION_RATE': 0.025, | |
| 'INTEREST_RATE_BASIS_POINTS': 500, | |
| 'EXIT_CAP_RATE_DECIMAL': 0.05, | |
| 'SALE_COST_PERCENTAGE': 0.02, | |
| 'LTC_RATIO': 0.75, | |
| 'FINANCING_PERCENTAGE': 0.01, | |
| 'CONSTRUCTION_MONTHS': 24, | |
| 'LEASE_UP_MONTHS': 12, | |
| 'STABILIZATION_MONTHS': 6, | |
| 'HOLD_PERIOD_MONTHS': 84, | |
| 'GP_PREF_RATE': 0.08, | |
| 'LP_PREF_RATE': 0.08, | |
| 'PROMOTE_PERCENTAGE': 0.20, | |
| } | |
| # Apply defaults | |
| for key, default_value in self.defaults.items(): | |
| if key not in extracted_data: | |
| extracted_data[key] = default_value | |
| # Calculate soft costs as % of construction if available | |
| if 'TOTAL_CONSTRUCTION_GMP' in extracted_data: | |
| gmp = extracted_data['TOTAL_CONSTRUCTION_GMP'] | |
| soft_defaults = { | |
| 'ARCHITECTURE_AND_INTERIOR_COST': 0.025, | |
| 'STRUCTURAL_ENGINEERING_COST': 0.01, | |
| 'MEP_ENGINEERING_COST': 0.015, | |
| 'CIVIL_ENGINEERING_COST': 0.005, | |
| 'CONTROLLED_INSPECTIONS_COST': 0.003, | |
| 'SURVEYING_COST': 0.002, | |
| 'UTILITIES_CONNECTION_COST': 0.005, | |
| 'ACCOUNTING_COST': 0.001, | |
| 'MONITORING_COST': 0.001, | |
| 'FF_AND_E_COST': 0.01, | |
| 'ENVIRONMENTAL_CONSULTANT_FEE': 0.002, | |
| 'MISCELLANEOUS_CONSULTANTS_FEE': 0.005, | |
| 'GENERAL_LEGAL_COST': 0.003, | |
| 'REAL_ESTATE_TAXES_DURING_CONSTRUCTION': 0.005, | |
| 'MISCELLANEOUS_ADMIN_COST': 0.002, | |
| 'IBR_COST': 0.003, | |
| 'PROJECT_TEAM_COST': 0.005, | |
| 'PEM_FEES': 0.01, | |
| 'BANK_FEES': 0.005, | |
| } | |
| for key, pct in soft_defaults.items(): | |
| if key not in extracted_data: | |
| extracted_data[key] = gmp * pct | |
| # Calculate construction GMP if cost per GSF available | |
| if 'CONSTRUCTION_COST_PER_GSF' in extracted_data and 'GROSS_SF' in extracted_data and 'TOTAL_CONSTRUCTION_GMP' not in extracted_data: | |
| extracted_data['TOTAL_CONSTRUCTION_GMP'] = extracted_data['CONSTRUCTION_COST_PER_GSF'] * extracted_data['GROSS_SF'] | |
| return extracted_data | |
| def extract_variables_from_formula(self, formula: str) -> List[str]: | |
| """Extract variable names from formula""" | |
| var_pattern = r'\b([A-Z][A-Z0-9_]*)\b' | |
| variables = re.findall(var_pattern, formula) | |
| python_builtins = {'SUM', 'MIN', 'MAX', 'ABS', 'ROUND'} | |
| variables = [v for v in variables if v not in python_builtins] | |
| return list(set(variables)) | |
| def check_formula_computable(self, formula: str, data: Dict[str, Any]) -> Tuple[bool, List[str]]: | |
| """Check if formula can be computed""" | |
| variables = self.extract_variables_from_formula(formula) | |
| missing = [] | |
| for var in variables: | |
| if var not in data and var not in self.computed_values: | |
| missing.append(var) | |
| return len(missing) == 0, missing | |
| def safe_eval_formula(self, formula: str, data: Dict[str, Any]) -> Any: | |
| """Safely evaluate a semantic formula""" | |
| try: | |
| all_data = {**data, **self.computed_values} | |
| formula_eval = formula | |
| variables = self.extract_variables_from_formula(formula) | |
| for var in sorted(variables, key=len, reverse=True): | |
| if var in all_data: | |
| value = all_data[var] | |
| formula_eval = re.sub(r'\b' + var + r'\b', str(value), formula_eval) | |
| formula_eval = formula_eval.replace('^', '**') | |
| safe_dict = { | |
| 'min': min, | |
| 'max': max, | |
| 'sum': sum, | |
| 'abs': abs, | |
| 'round': round | |
| } | |
| result = eval(formula_eval, {"__builtins__": safe_dict}, {}) | |
| return result | |
| except Exception as e: | |
| raise Exception(f"Evaluation error: {str(e)}") | |
| def process_files(self, files) -> Tuple[str, str, str]: | |
| """Main processing function""" | |
| try: | |
| if not files: | |
| return "β No files uploaded", "", "" | |
| file_paths = [f.name for f in files] | |
| extracted_data = self.extract_data_from_files(file_paths) | |
| if not extracted_data: | |
| return "β No data could be extracted from the files", "", "" | |
| self.computed_values = {} | |
| # Multiple passes for dependency resolution | |
| max_iterations = 10 | |
| computable_formulas = {} | |
| non_computable_formulas = {} | |
| for iteration in range(max_iterations): | |
| newly_computed = 0 | |
| for formula_name, formula_info in self.formulas.items(): | |
| if formula_name in computable_formulas: | |
| continue | |
| formula = formula_info['formula'] | |
| all_data = {**extracted_data, **self.computed_values} | |
| is_computable, missing_vars = self.check_formula_computable(formula, all_data) | |
| if is_computable: | |
| try: | |
| result = self.safe_eval_formula(formula, all_data) | |
| computable_formulas[formula_name] = { | |
| 'description': formula_info['description'], | |
| 'formula': formula, | |
| 'result': result, | |
| 'formatted_result': f"{result:,.2f}" if isinstance(result, (int, float)) else str(result), | |
| 'iteration': iteration + 1 | |
| } | |
| self.computed_values[formula_name] = result | |
| newly_computed += 1 | |
| except Exception as e: | |
| non_computable_formulas[formula_name] = { | |
| 'description': formula_info['description'], | |
| 'formula': formula, | |
| 'error': str(e), | |
| 'missing_variables': [] | |
| } | |
| else: | |
| non_computable_formulas[formula_name] = { | |
| 'description': formula_info['description'], | |
| 'formula': formula, | |
| 'missing_variables': missing_vars | |
| } | |
| print(f"π Iteration {iteration + 1}: Computed {newly_computed} new formulas (Total: {len(computable_formulas)})") | |
| if newly_computed == 0: | |
| break | |
| for formula_name in computable_formulas.keys(): | |
| non_computable_formulas.pop(formula_name, None) | |
| # Group by iteration | |
| by_iteration = {} | |
| for name, info in computable_formulas.items(): | |
| iter_num = info['iteration'] | |
| if iter_num not in by_iteration: | |
| by_iteration[iter_num] = [] | |
| by_iteration[iter_num].append((name, info)) | |
| # Create summary | |
| defaults_applied = sum(1 for k in extracted_data.keys() if k in self.defaults) | |
| summary = f""" | |
| ## π Analysis Summary | |
| **Total Formulas Loaded:** {len(self.formulas)} | |
| **β Computable Formulas:** {len(computable_formulas)} ({len(computable_formulas) / len(self.formulas) * 100:.1f}%) | |
| **β Non-Computable Formulas:** {len(non_computable_formulas)} ({len(non_computable_formulas) / len(self.formulas) * 100:.1f}%) | |
| **π Files Processed:** {len(file_paths)} | |
| **π’ Data Points Extracted:** {len(extracted_data)} | |
| **π― Defaults Applied:** {defaults_applied} | |
| **π Computation Iterations:** {iteration + 1} | |
| ### π Progress by Iteration | |
| """ | |
| for iter_num in sorted(by_iteration.keys()): | |
| summary += f"- Iteration {iter_num}: {len(by_iteration[iter_num])} formulas computed\n" | |
| # Analyze missing variables | |
| missing_var_count = {} | |
| if non_computable_formulas: | |
| for name, info in non_computable_formulas.items(): | |
| for var in info.get('missing_variables', []): | |
| if var not in missing_var_count: | |
| missing_var_count[var] = [] | |
| missing_var_count[var].append(name) | |
| top_blockers = sorted(missing_var_count.items(), key=lambda x: len(x[1]), reverse=True)[:5] | |
| if top_blockers: | |
| summary += f"\n### π« Top 5 Missing Variables\n" | |
| for var, blocked in top_blockers: | |
| summary += f"- **{var}**: Blocks {len(blocked)} formulas\n" | |
| # Extracted data display | |
| data_display = "## π₯ Extracted Property Data\n\n" | |
| data_display += "| Variable | Value | Source |\n|----------|-------|--------|\n" | |
| for key, value in sorted(extracted_data.items()): | |
| source = "π Document" if key not in self.defaults else "βοΈ Default" | |
| if isinstance(value, float): | |
| data_display += f"| {key} | {value:,.4f} | {source} |\n" | |
| else: | |
| data_display += f"| {key} | {value} | {source} |\n" | |
| # Results display | |
| results_display = "## β Computed Formulas\n\n" | |
| for iter_num in sorted(by_iteration.keys()): | |
| results_display += f"### Iteration {iter_num} ({len(by_iteration[iter_num])} formulas)\n\n" | |
| for name, info in sorted(by_iteration[iter_num]): | |
| results_display += f"**{name}** = {info['formatted_result']}\n" | |
| results_display += f"*{info['description']}*\n" | |
| results_display += f"`{info['formula']}`\n\n" | |
| # Non-computable formulas | |
| # if non_computable_formulas: | |
| # results_display += f"\n## β Non-Computable Formulas ({len(non_computable_formulas)})\n\n" | |
| # if missing_var_count: | |
| # results_display += "### π« Top Missing Variables (Blocking Multiple Formulas)\n\n" | |
| # sorted_missing = sorted(missing_var_count.items(), key=lambda x: len(x[1]), reverse=True) | |
| # for idx, (var, blocked_formulas) in enumerate(sorted_missing[:15]): | |
| # results_display += f"{idx+1}. **{var}** - Blocks {len(blocked_formulas)} formulas\n" | |
| # sample = blocked_formulas[:3] | |
| # results_display += f" - Affects: {', '.join(sample)}" | |
| # if len(blocked_formulas) > 3: | |
| # results_display += f" ... and {len(blocked_formulas) - 3} more" | |
| # results_display += "\n" | |
| # results_display | |
| json_output = { | |
| 'summary': { | |
| 'total_formulas': len(self.formulas), | |
| 'computable': len(computable_formulas), | |
| 'non_computable': len(non_computable_formulas), | |
| 'files_processed': len(file_paths), | |
| 'iterations': iteration + 1, | |
| 'success_rate': round(len(computable_formulas) / len(self.formulas) * 100, 2) | |
| }, | |
| 'extracted_data': extracted_data, | |
| 'computable_formulas': computable_formulas, | |
| 'non_computable_formulas': {k: v for k, v in list(non_computable_formulas.items())[:20]} | |
| } | |
| json_str = json.dumps(json_output, indent=2) | |
| return summary, data_display + "\n\n" + results_display, json_str | |
| except Exception as e: | |
| error_msg = f"β Error processing files:\n{str(e)}\n\n{traceback.format_exc()}" | |
| return error_msg, "", "" | |
| # Initialize analyzer | |
| analyzer = SemanticFormulaAnalyzer("formulas.txt") | |
| # Create Gradio interface | |
| with gr.Blocks(title="Property Formula Analyzer", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # π’ Property Formula Analyzer - Semantic Edition | |
| Upload property documents to extract data and compute real estate formulas using **semantic variable names**. | |
| ### Features: | |
| - π Extracts data from PDFs and text files | |
| - π’ Matches property metrics to formula variables | |
| - π Multi-pass computation for dependent formulas | |
| - π Clear, human-readable formula names | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="π Upload Property Documents", | |
| file_count="multiple", | |
| file_types=[".pdf", ".txt"], | |
| type="filepath" | |
| ) | |
| analyze_btn = gr.Button("π Analyze & Compute Formulas", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| ### π Instructions: | |
| 1. Upload property documents (Offering Memorandum, Operating Expenses, etc.) | |
| 2. Click "Analyze & Compute Formulas" | |
| 3. Review extracted data and computed metrics | |
| 4. Download JSON results | |
| **Example Variables**: `UNITS`, `PRICE`, `NOI`, `GROSS_SF`, `EFFECTIVE_GROSS_INCOME` | |
| """) | |
| with gr.Row(): | |
| summary_output = gr.Markdown(label="Summary") | |
| with gr.Row(): | |
| results_output = gr.Markdown(label="Results") | |
| with gr.Row(): | |
| json_output = gr.Code( | |
| label="π₯ JSON Results", | |
| language="json", | |
| lines=20 | |
| ) | |
| analyze_btn.click( | |
| fn=analyzer.process_files, | |
| inputs=[file_input], | |
| outputs=[summary_output, results_output, json_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π‘ Tips: | |
| - The system uses semantic variable names (e.g., `Building_Efficiency` instead of `E1`) | |
| - Formulas cascade: computed values enable more formulas in subsequent iterations | |
| - Non-computable formulas show which variables are missing | |
| """) | |
| if __name__ == "__main__": | |
| app.launch() |