File size: 7,615 Bytes
460c910 4d0ab1b 460c910 168847f 1327f78 460c910 1327f78 460c910 1327f78 4d0ab1b 460c910 1327f78 460c910 168847f 460c910 168847f 460c910 168847f 1327f78 4d0ab1b 1327f78 460c910 168847f 1327f78 4d0ab1b 460c910 168847f 1327f78 460c910 1327f78 bf35370 168847f 1327f78 bf35370 168847f 1327f78 460c910 1327f78 460c910 168847f 1327f78 460c910 1327f78 460c910 1327f78 460c910 168847f 1327f78 460c910 1327f78 432dfdf 1327f78 460c910 168847f 460c910 168847f 460c910 1327f78 4d0ab1b 460c910 168847f 460c910 168847f 1327f78 168847f 460c910 168847f 1327f78 460c910 1327f78 460c910 168847f 460c910 168847f 1327f78 168847f 1327f78 168847f 1327f78 460c910 1327f78 168847f 460c910 4d0ab1b 168847f 4d0ab1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
"""
ProofAssets: Real Estate Tokenizer (Heuristic Demo)
"""
from __future__ import annotations
import re, json, os
from typing import Dict, Any, List, Optional
from dateparser.search import search_dates
import gradio as gr
def _clean(s: str) -> str:
return re.sub(r"\s+", " ", s).strip()
def extract_parties(text: str) -> Optional[List[str]]:
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
if m:
p1, p2 = _clean(m.group(1)), _clean(m.group(2))
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
return [p1, p2]
parties = []
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
if m1: parties.append(_clean(m1.group(1)))
if m2: parties.append(_clean(m2.group(1)))
return parties or None
def extract_address(text: str) -> Optional[str]:
for pat in [
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
r"\blocated at\s+(.+?)(?:[,.;\n])",
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
]:
m = re.search(pat, text, flags=re.I)
if m: return _clean(m.group(1))
return None
def extract_term(text: str) -> Optional[str]:
m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I)
if m:
n, unit = int(m.group(1)), m.group(2).lower()
return f"{n} years" if unit.startswith("year") else f"{n} months"
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
if m: return f"{int(m.group(1))} months"
return None
def extract_start_date(text: str) -> Optional[str]:
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
candidates = []
if window:
parsed = search_dates(window.group(0))
if parsed: candidates.extend([d for _, d in parsed])
if not candidates:
parsed = search_dates(text)
if parsed: candidates.extend([d for _, d in parsed])
if candidates:
dt = sorted(candidates)[0]
return dt.strftime("%Y-%m-%d")
return None
def extract_rent(text: str) -> Optional[Dict[str, Any]]:
near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
if not near:
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
if near:
cur = near.group(1) or ""
amt = float(near.group(2).replace(",", ""))
currency = {"$": "USD", "€": "EUR", "£": "GBP"}.get(cur, None)
return {"amount": amt, "currency": currency, "period": "monthly"}
return None
def detect_jurisdiction(text: str) -> Optional[str]:
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US"
if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT"
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK"
return None
CLAUSE_KEYWORDS = {
"termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"],
"liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"],
"indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"],
"governing_law": [r"\bgoverning law\b", r"\blaws? of\b"],
"security_deposit": [r"\bsecurity deposit\b"],
"late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"],
"sublet_assignment": [r"\bsublet\b", r"\bassignment\b"],
"maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"],
"utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"],
"entry_access": [r"\bright of entry\b", r"\baccess\b"],
}
def extract_clauses(text: str) -> Dict[str, bool]:
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
def extract_metadata(contract_text: str) -> Dict[str, Any]:
text = (contract_text or "").strip()
if not text:
return {"error": "No text provided."}
return {
"parties": extract_parties(text),
"property_address": extract_address(text),
"lease_term": extract_term(text),
"start_date": extract_start_date(text),
"rent": extract_rent(text),
"jurisdiction": detect_jurisdiction(text),
"clauses": extract_clauses(text),
}
def summarize(metadata: Dict[str, Any]) -> str:
if "error" in metadata: return f"**Error:** {metadata['error']}"
parts: List[str] = []
if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}")
if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}")
if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}")
if metadata.get("rent"):
r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
if metadata.get("clauses"):
on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v]
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
return " \n".join(parts) if parts else "_No summary available._"
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent.
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
"""
def run_pipeline(text: str):
try:
meta = extract_metadata(text)
return summarize(meta), meta
except Exception as e:
return f"**Error:** {e}", {"error": str(e)}
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._")
with gr.Row():
inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
with gr.Row():
extract_btn = gr.Button("Extract Metadata", variant="primary")
clear_btn = gr.Button("Clear")
with gr.Row():
summary_out = gr.Markdown(label="Summary")
with gr.Row():
json_out = gr.JSON(label="Structured JSON")
extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out])
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
# Expose standard names for the runner
app = demo
gradio_app = demo
# As a fallback, explicitly launch if executed directly (works fine on Spaces too)
if __name__ == "__main__":
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
|