|
|
""" |
|
|
ProofAssets: Real Estate Tokenizer (Heuristic Demo) |
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
import re, json, os |
|
|
from typing import Dict, Any, List, Optional |
|
|
from dateparser.search import search_dates |
|
|
import gradio as gr |
|
|
|
|
|
def _clean(s: str) -> str: |
|
|
return re.sub(r"\s+", " ", s).strip() |
|
|
|
|
|
def extract_parties(text: str) -> Optional[List[str]]: |
|
|
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S) |
|
|
if m: |
|
|
p1, p2 = _clean(m.group(1)), _clean(m.group(2)) |
|
|
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I) |
|
|
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I) |
|
|
return [p1, p2] |
|
|
parties = [] |
|
|
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I) |
|
|
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I) |
|
|
if m1: parties.append(_clean(m1.group(1))) |
|
|
if m2: parties.append(_clean(m2.group(1))) |
|
|
return parties or None |
|
|
|
|
|
def extract_address(text: str) -> Optional[str]: |
|
|
for pat in [ |
|
|
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])", |
|
|
r"\blocated at\s+(.+?)(?:[,.;\n])", |
|
|
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])", |
|
|
]: |
|
|
m = re.search(pat, text, flags=re.I) |
|
|
if m: return _clean(m.group(1)) |
|
|
return None |
|
|
|
|
|
def extract_term(text: str) -> Optional[str]: |
|
|
m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I) |
|
|
if m: |
|
|
n, unit = int(m.group(1)), m.group(2).lower() |
|
|
return f"{n} years" if unit.startswith("year") else f"{n} months" |
|
|
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I) |
|
|
if m: return f"{int(m.group(1))} months" |
|
|
return None |
|
|
|
|
|
def extract_start_date(text: str) -> Optional[str]: |
|
|
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I) |
|
|
candidates = [] |
|
|
if window: |
|
|
parsed = search_dates(window.group(0)) |
|
|
if parsed: candidates.extend([d for _, d in parsed]) |
|
|
if not candidates: |
|
|
parsed = search_dates(text) |
|
|
if parsed: candidates.extend([d for _, d in parsed]) |
|
|
if candidates: |
|
|
dt = sorted(candidates)[0] |
|
|
return dt.strftime("%Y-%m-%d") |
|
|
return None |
|
|
|
|
|
def extract_rent(text: str) -> Optional[Dict[str, Any]]: |
|
|
near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I) |
|
|
if not near: |
|
|
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I) |
|
|
if near: |
|
|
cur = near.group(1) or "" |
|
|
amt = float(near.group(2).replace(",", "")) |
|
|
currency = {"$": "USD", "€": "EUR", "£": "GBP"}.get(cur, None) |
|
|
return {"amount": amt, "currency": currency, "period": "monthly"} |
|
|
return None |
|
|
|
|
|
def detect_jurisdiction(text: str) -> Optional[str]: |
|
|
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US" |
|
|
if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT" |
|
|
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK" |
|
|
return None |
|
|
|
|
|
CLAUSE_KEYWORDS = { |
|
|
"termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"], |
|
|
"liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"], |
|
|
"indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"], |
|
|
"governing_law": [r"\bgoverning law\b", r"\blaws? of\b"], |
|
|
"security_deposit": [r"\bsecurity deposit\b"], |
|
|
"late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"], |
|
|
"sublet_assignment": [r"\bsublet\b", r"\bassignment\b"], |
|
|
"maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"], |
|
|
"utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"], |
|
|
"entry_access": [r"\bright of entry\b", r"\baccess\b"], |
|
|
} |
|
|
|
|
|
def extract_clauses(text: str) -> Dict[str, bool]: |
|
|
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()} |
|
|
|
|
|
def extract_metadata(contract_text: str) -> Dict[str, Any]: |
|
|
text = (contract_text or "").strip() |
|
|
if not text: |
|
|
return {"error": "No text provided."} |
|
|
return { |
|
|
"parties": extract_parties(text), |
|
|
"property_address": extract_address(text), |
|
|
"lease_term": extract_term(text), |
|
|
"start_date": extract_start_date(text), |
|
|
"rent": extract_rent(text), |
|
|
"jurisdiction": detect_jurisdiction(text), |
|
|
"clauses": extract_clauses(text), |
|
|
} |
|
|
|
|
|
def summarize(metadata: Dict[str, Any]) -> str: |
|
|
if "error" in metadata: return f"**Error:** {metadata['error']}" |
|
|
parts: List[str] = [] |
|
|
if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}") |
|
|
if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}") |
|
|
if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}") |
|
|
if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}") |
|
|
if metadata.get("rent"): |
|
|
r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "") |
|
|
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}") |
|
|
if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}") |
|
|
if metadata.get("clauses"): |
|
|
on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v] |
|
|
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}") |
|
|
return " \n".join(parts) if parts else "_No summary available._" |
|
|
|
|
|
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207. |
|
|
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month. |
|
|
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York. |
|
|
Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent. |
|
|
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity. |
|
|
""" |
|
|
|
|
|
def run_pipeline(text: str): |
|
|
try: |
|
|
meta = extract_metadata(text) |
|
|
return summarize(meta), meta |
|
|
except Exception as e: |
|
|
return f"**Error:** {e}", {"error": str(e)} |
|
|
|
|
|
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo: |
|
|
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer") |
|
|
gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._") |
|
|
with gr.Row(): |
|
|
inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True) |
|
|
with gr.Row(): |
|
|
extract_btn = gr.Button("Extract Metadata", variant="primary") |
|
|
clear_btn = gr.Button("Clear") |
|
|
with gr.Row(): |
|
|
summary_out = gr.Markdown(label="Summary") |
|
|
with gr.Row(): |
|
|
json_out = gr.JSON(label="Structured JSON") |
|
|
extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out]) |
|
|
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out]) |
|
|
|
|
|
|
|
|
app = demo |
|
|
gradio_app = demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue() |
|
|
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860"))) |
|
|
|
|
|
|