""" ProofAssets: Real Estate Tokenizer (Heuristic Demo) """ from __future__ import annotations import re, json, os from typing import Dict, Any, List, Optional from dateparser.search import search_dates import gradio as gr def _clean(s: str) -> str: return re.sub(r"\s+", " ", s).strip() def extract_parties(text: str) -> Optional[List[str]]: m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S) if m: p1, p2 = _clean(m.group(1)), _clean(m.group(2)) p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I) p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I) return [p1, p2] parties = [] m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I) m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I) if m1: parties.append(_clean(m1.group(1))) if m2: parties.append(_clean(m2.group(1))) return parties or None def extract_address(text: str) -> Optional[str]: for pat in [ r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])", r"\blocated at\s+(.+?)(?:[,.;\n])", r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])", ]: m = re.search(pat, text, flags=re.I) if m: return _clean(m.group(1)) return None def extract_term(text: str) -> Optional[str]: m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I) if m: n, unit = int(m.group(1)), m.group(2).lower() return f"{n} years" if unit.startswith("year") else f"{n} months" m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I) if m: return f"{int(m.group(1))} months" return None def extract_start_date(text: str) -> Optional[str]: window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I) candidates = [] if window: parsed = search_dates(window.group(0)) if parsed: candidates.extend([d for _, d in parsed]) if not candidates: parsed = search_dates(text) if parsed: candidates.extend([d for _, d in parsed]) if candidates: dt = sorted(candidates)[0] return dt.strftime("%Y-%m-%d") return None def extract_rent(text: str) -> Optional[Dict[str, Any]]: near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I) if not near: near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I) if near: cur = near.group(1) or "" amt = float(near.group(2).replace(",", "")) currency = {"$": "USD", "€": "EUR", "£": "GBP"}.get(cur, None) return {"amount": amt, "currency": currency, "period": "monthly"} return None def detect_jurisdiction(text: str) -> Optional[str]: if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US" if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT" if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK" return None CLAUSE_KEYWORDS = { "termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"], "liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"], "indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"], "governing_law": [r"\bgoverning law\b", r"\blaws? of\b"], "security_deposit": [r"\bsecurity deposit\b"], "late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"], "sublet_assignment": [r"\bsublet\b", r"\bassignment\b"], "maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"], "utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"], "entry_access": [r"\bright of entry\b", r"\baccess\b"], } def extract_clauses(text: str) -> Dict[str, bool]: return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()} def extract_metadata(contract_text: str) -> Dict[str, Any]: text = (contract_text or "").strip() if not text: return {"error": "No text provided."} return { "parties": extract_parties(text), "property_address": extract_address(text), "lease_term": extract_term(text), "start_date": extract_start_date(text), "rent": extract_rent(text), "jurisdiction": detect_jurisdiction(text), "clauses": extract_clauses(text), } def summarize(metadata: Dict[str, Any]) -> str: if "error" in metadata: return f"**Error:** {metadata['error']}" parts: List[str] = [] if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}") if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}") if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}") if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}") if metadata.get("rent"): r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "") parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}") if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}") if metadata.get("clauses"): on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v] parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}") return " \n".join(parts) if parts else "_No summary available._" EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207. The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month. A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York. Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent. Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity. """ def run_pipeline(text: str): try: meta = extract_metadata(text) return summarize(meta), meta except Exception as e: return f"**Error:** {e}", {"error": str(e)} with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo: gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer") gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._") with gr.Row(): inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True) with gr.Row(): extract_btn = gr.Button("Extract Metadata", variant="primary") clear_btn = gr.Button("Clear") with gr.Row(): summary_out = gr.Markdown(label="Summary") with gr.Row(): json_out = gr.JSON(label="Structured JSON") extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out]) clear_btn.click(lambda: ("", {}), None, [summary_out, json_out]) # Expose standard names for the runner app = demo gradio_app = demo # As a fallback, explicitly launch if executed directly (works fine on Spaces too) if __name__ == "__main__": demo.queue() demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))