Proooof's picture
Update app.py
4d0ab1b verified
"""
ProofAssets: Real Estate Tokenizer (Heuristic Demo)
"""
from __future__ import annotations
import re, json, os
from typing import Dict, Any, List, Optional
from dateparser.search import search_dates
import gradio as gr
def _clean(s: str) -> str:
return re.sub(r"\s+", " ", s).strip()
def extract_parties(text: str) -> Optional[List[str]]:
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
if m:
p1, p2 = _clean(m.group(1)), _clean(m.group(2))
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
return [p1, p2]
parties = []
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
if m1: parties.append(_clean(m1.group(1)))
if m2: parties.append(_clean(m2.group(1)))
return parties or None
def extract_address(text: str) -> Optional[str]:
for pat in [
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
r"\blocated at\s+(.+?)(?:[,.;\n])",
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
]:
m = re.search(pat, text, flags=re.I)
if m: return _clean(m.group(1))
return None
def extract_term(text: str) -> Optional[str]:
m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I)
if m:
n, unit = int(m.group(1)), m.group(2).lower()
return f"{n} years" if unit.startswith("year") else f"{n} months"
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
if m: return f"{int(m.group(1))} months"
return None
def extract_start_date(text: str) -> Optional[str]:
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
candidates = []
if window:
parsed = search_dates(window.group(0))
if parsed: candidates.extend([d for _, d in parsed])
if not candidates:
parsed = search_dates(text)
if parsed: candidates.extend([d for _, d in parsed])
if candidates:
dt = sorted(candidates)[0]
return dt.strftime("%Y-%m-%d")
return None
def extract_rent(text: str) -> Optional[Dict[str, Any]]:
near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
if not near:
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
if near:
cur = near.group(1) or ""
amt = float(near.group(2).replace(",", ""))
currency = {"$": "USD", "€": "EUR", "£": "GBP"}.get(cur, None)
return {"amount": amt, "currency": currency, "period": "monthly"}
return None
def detect_jurisdiction(text: str) -> Optional[str]:
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US"
if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT"
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK"
return None
CLAUSE_KEYWORDS = {
"termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"],
"liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"],
"indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"],
"governing_law": [r"\bgoverning law\b", r"\blaws? of\b"],
"security_deposit": [r"\bsecurity deposit\b"],
"late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"],
"sublet_assignment": [r"\bsublet\b", r"\bassignment\b"],
"maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"],
"utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"],
"entry_access": [r"\bright of entry\b", r"\baccess\b"],
}
def extract_clauses(text: str) -> Dict[str, bool]:
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
def extract_metadata(contract_text: str) -> Dict[str, Any]:
text = (contract_text or "").strip()
if not text:
return {"error": "No text provided."}
return {
"parties": extract_parties(text),
"property_address": extract_address(text),
"lease_term": extract_term(text),
"start_date": extract_start_date(text),
"rent": extract_rent(text),
"jurisdiction": detect_jurisdiction(text),
"clauses": extract_clauses(text),
}
def summarize(metadata: Dict[str, Any]) -> str:
if "error" in metadata: return f"**Error:** {metadata['error']}"
parts: List[str] = []
if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}")
if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}")
if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}")
if metadata.get("rent"):
r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
if metadata.get("clauses"):
on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v]
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
return " \n".join(parts) if parts else "_No summary available._"
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent.
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
"""
def run_pipeline(text: str):
try:
meta = extract_metadata(text)
return summarize(meta), meta
except Exception as e:
return f"**Error:** {e}", {"error": str(e)}
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._")
with gr.Row():
inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
with gr.Row():
extract_btn = gr.Button("Extract Metadata", variant="primary")
clear_btn = gr.Button("Clear")
with gr.Row():
summary_out = gr.Markdown(label="Summary")
with gr.Row():
json_out = gr.JSON(label="Structured JSON")
extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out])
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
# Expose standard names for the runner
app = demo
gradio_app = demo
# As a fallback, explicitly launch if executed directly (works fine on Spaces too)
if __name__ == "__main__":
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))