Spaces:
Sleeping
Sleeping
| import re | |
| # ----------------------------- | |
| # INTEREST RATE EXTRACTION | |
| # ----------------------------- | |
| def extract_interest_rate(text): | |
| """ | |
| Extract interest rate using semantic priority. | |
| The document can contain multiple rates. | |
| We select the most authoritative one. | |
| """ | |
| priority_patterns = [ | |
| # 1. Comparison rate (most stable & always present) | |
| r"砖讬注讜专\s+讛专讬讘讬转\s+诇爪专讻讬\s+讛砖讜讜讗讛\s*[:\-]?\s*(\d+\.\d+)\s*%", | |
| # 2. Forecast total interest rate | |
| r"讛专讬讘讬转\s+讛讻讜诇诇转\s+讛讞讝讜讬讛\s*[:\-]?\s*(\d+\.\d+)\s*%", | |
| # 3. Adjusted interest rate | |
| r"砖讬注讜专\s+讛专讬讘讬转\s+讛诪转讜讗诪转\s*[:\-]?\s*(\d+\.\d+)\s*%", | |
| # 4. Base interest rate | |
| r"砖讬注讜专\s+讛专讬讘讬转\s*[:\-]?\s*(\d+\.\d+)\s*%" | |
| ] | |
| for pattern in priority_patterns: | |
| match = re.search(pattern, text) | |
| if match: | |
| try: | |
| return float(match.group(1)) | |
| except ValueError: | |
| continue | |
| # Special case: Bank of Israel 0% loans | |
| if "诪转讜讜讛 讘谞拽 讬砖专讗诇" in text or "专讬讘讬转 0" in text: | |
| return 0.0 | |
| return None | |
| # ----------------------------- | |
| # LOAN AMOUNT EXTRACTION | |
| # ----------------------------- | |
| def extract_loan_amount(text): | |
| """ | |
| Extract loan amount ONLY from execution amount. | |
| Never guess from balances, totals, or monthly values. | |
| """ | |
| priority_patterns = [ | |
| # Canonical execution amount | |
| r"住讻讜诐\s+讞诇拽\s+讝讛\s+讘注转\s+讛讘讬爪讜注\s*[:\-]?\s*([\d,]+(?:\.\d{2})?)", | |
| # Variant formatting sometimes seen | |
| r"住讻讜诐\s+讛讛诇讜讜讗讛\s+讘注转\s+讛讘讬爪讜注\s*[:\-]?\s*([\d,]+(?:\.\d{2})?)" | |
| ] | |
| for pattern in priority_patterns: | |
| match = re.search(pattern, text) | |
| if match: | |
| value = match.group(1).replace(",", "") | |
| try: | |
| return float(value) | |
| except ValueError: | |
| continue | |
| return None | |