Update app.py
Browse files
app.py
CHANGED
|
@@ -1,40 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
import json
|
| 3 |
-
from typing import Dict, Any
|
| 4 |
-
import
|
| 5 |
-
from dateparser.search import search_dates # 👈 ADD THIS
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
-
|
| 9 |
-
# ---------- Heuristic extractors (no ML) ----------
|
| 10 |
|
| 11 |
def _clean(s: str) -> str:
|
| 12 |
-
return re.sub(r
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
if m:
|
| 18 |
p1 = _clean(m.group(1))
|
| 19 |
p2 = _clean(m.group(2))
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
p2 = re.sub(r'^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*', '', p2, flags=re.I)
|
| 23 |
return [p1, p2]
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
if
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
patterns = [
|
| 35 |
-
r
|
| 36 |
-
r
|
| 37 |
-
r
|
| 38 |
]
|
| 39 |
for pat in patterns:
|
| 40 |
m = re.search(pat, text, flags=re.I)
|
|
@@ -42,24 +51,25 @@ def extract_address(text: str):
|
|
| 42 |
return _clean(m.group(1))
|
| 43 |
return None
|
| 44 |
|
| 45 |
-
def extract_term(text: str):
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
if m:
|
| 49 |
n = int(m.group(1))
|
| 50 |
unit = m.group(2).lower()
|
| 51 |
-
if unit.startswith(
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
# "12-month term"
|
| 55 |
-
m = re.search(r'(\d{1,3})\s*-\s*month\s+term', text, flags=re.I)
|
| 56 |
if m:
|
| 57 |
return f"{int(m.group(1))} months"
|
| 58 |
return None
|
| 59 |
|
| 60 |
-
|
| 61 |
-
#
|
| 62 |
-
window = re.search(r
|
| 63 |
candidates = []
|
| 64 |
if window:
|
| 65 |
parsed = search_dates(window.group(0))
|
|
@@ -71,95 +81,94 @@ ef extract_start_date(text: str):
|
|
| 71 |
candidates.extend([d for _, d in parsed])
|
| 72 |
if candidates:
|
| 73 |
dt = sorted(candidates)[0]
|
| 74 |
-
return dt.strftime(
|
| 75 |
return None
|
| 76 |
|
| 77 |
-
def extract_rent(text: str):
|
| 78 |
-
#
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
if not near:
|
| 82 |
-
near = re.search(r
|
| 83 |
if near:
|
| 84 |
-
cur = near.group(1) or
|
| 85 |
-
amt = float(near.group(2).replace(
|
| 86 |
-
currency = {
|
| 87 |
-
return {
|
| 88 |
return None
|
| 89 |
|
| 90 |
-
def detect_jurisdiction(text: str):
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
if re.search(r
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
return None
|
| 96 |
|
| 97 |
CLAUSE_KEYWORDS = {
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
}
|
| 109 |
|
| 110 |
def extract_clauses(text: str) -> Dict[str, bool]:
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
def summarize(metadata: Dict[str, Any]) -> str:
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
|
| 121 |
-
if metadata.get(
|
| 122 |
parts.append(f"**Property:** {metadata['property_address']}")
|
| 123 |
-
if metadata.get(
|
| 124 |
parts.append(f"**Term:** {metadata['lease_term']}")
|
| 125 |
-
if metadata.get(
|
| 126 |
parts.append(f"**Start Date:** {metadata['start_date']}")
|
| 127 |
-
if metadata.get(
|
| 128 |
-
r = metadata[
|
| 129 |
-
cur = {
|
| 130 |
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
|
| 131 |
-
if metadata.get(
|
| 132 |
parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
on = [k.replace('_',' ') for k, v in metadata['clauses'].items() if v]
|
| 136 |
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
|
| 137 |
-
return " \n".join(parts) if parts else "_No summary available._"
|
| 138 |
|
| 139 |
-
|
| 140 |
-
text = contract_text.strip()
|
| 141 |
-
if not text:
|
| 142 |
-
return {"error": "No text provided."}
|
| 143 |
-
|
| 144 |
-
parties = extract_parties(text)
|
| 145 |
-
address = extract_address(text)
|
| 146 |
-
term = extract_term(text)
|
| 147 |
-
start_date = extract_start_date(text)
|
| 148 |
-
rent = extract_rent(text)
|
| 149 |
-
juris = detect_jurisdiction(text)
|
| 150 |
-
clauses = extract_clauses(text)
|
| 151 |
-
|
| 152 |
-
return {
|
| 153 |
-
"parties": parties,
|
| 154 |
-
"property_address": address,
|
| 155 |
-
"lease_term": term,
|
| 156 |
-
"start_date": start_date,
|
| 157 |
-
"rent": rent,
|
| 158 |
-
"jurisdiction": juris,
|
| 159 |
-
"clauses": clauses
|
| 160 |
-
}
|
| 161 |
|
| 162 |
-
#
|
| 163 |
|
| 164 |
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
|
| 165 |
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
|
|
@@ -168,27 +177,43 @@ Either party may terminate this agreement for cause subject to 30 days' notice.
|
|
| 168 |
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
|
| 169 |
"""
|
| 170 |
|
| 171 |
-
def run_pipeline(text):
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
|
| 179 |
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
|
| 180 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
with gr.Row():
|
| 183 |
-
inp = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
with gr.Row():
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
with gr.Row():
|
| 187 |
summary_out = gr.Markdown(label="Summary")
|
| 188 |
with gr.Row():
|
| 189 |
json_out = gr.JSON(label="Structured JSON")
|
| 190 |
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
-
if __name__ == "__main__":
|
| 194 |
-
demo.launch()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ProofAssets: Real Estate Tokenizer (Heuristic Demo)
|
| 3 |
+
- Extracts key fields from lease/contract text using regex + date parsing
|
| 4 |
+
- Outputs a human-friendly summary + structured JSON
|
| 5 |
+
- No legal advice; for demo purposes only
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
+
from typing import Dict, Any, List, Optional
|
| 12 |
+
from dateparser.search import search_dates # <-- correct import
|
|
|
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
+
# ----------------------------- Helpers -----------------------------
|
|
|
|
| 16 |
|
| 17 |
def _clean(s: str) -> str:
|
| 18 |
+
return re.sub(r"\s+", " ", s).strip()
|
| 19 |
|
| 20 |
+
# -------------------------- Extractors -----------------------------
|
| 21 |
+
|
| 22 |
+
def extract_parties(text: str) -> Optional[List[str]]:
|
| 23 |
+
# "made between X and Y"
|
| 24 |
+
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
|
| 25 |
if m:
|
| 26 |
p1 = _clean(m.group(1))
|
| 27 |
p2 = _clean(m.group(2))
|
| 28 |
+
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
|
| 29 |
+
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
|
|
|
|
| 30 |
return [p1, p2]
|
| 31 |
+
|
| 32 |
+
# Fallback: "Landlord: X" / "Tenant: Y"
|
| 33 |
+
parties = []
|
| 34 |
+
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 35 |
+
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 36 |
+
if m1:
|
| 37 |
+
parties.append(_clean(m1.group(1)))
|
| 38 |
+
if m2:
|
| 39 |
+
parties.append(_clean(m2.group(1)))
|
| 40 |
+
return parties or None
|
| 41 |
+
|
| 42 |
+
def extract_address(text: str) -> Optional[str]:
|
| 43 |
patterns = [
|
| 44 |
+
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
|
| 45 |
+
r"\blocated at\s+(.+?)(?:[,.;\n])",
|
| 46 |
+
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
|
| 47 |
]
|
| 48 |
for pat in patterns:
|
| 49 |
m = re.search(pat, text, flags=re.I)
|
|
|
|
| 51 |
return _clean(m.group(1))
|
| 52 |
return None
|
| 53 |
|
| 54 |
+
def extract_term(text: str) -> Optional[str]:
|
| 55 |
+
m = re.search(
|
| 56 |
+
r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)",
|
| 57 |
+
text,
|
| 58 |
+
flags=re.I,
|
| 59 |
+
)
|
| 60 |
if m:
|
| 61 |
n = int(m.group(1))
|
| 62 |
unit = m.group(2).lower()
|
| 63 |
+
return f"{n} years" if unit.startswith("year") else f"{n} months"
|
| 64 |
+
|
| 65 |
+
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
|
|
|
|
|
|
|
| 66 |
if m:
|
| 67 |
return f"{int(m.group(1))} months"
|
| 68 |
return None
|
| 69 |
|
| 70 |
+
def extract_start_date(text: str) -> Optional[str]:
|
| 71 |
+
# Prefer a window near "commencing/starting/effective"
|
| 72 |
+
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
|
| 73 |
candidates = []
|
| 74 |
if window:
|
| 75 |
parsed = search_dates(window.group(0))
|
|
|
|
| 81 |
candidates.extend([d for _, d in parsed])
|
| 82 |
if candidates:
|
| 83 |
dt = sorted(candidates)[0]
|
| 84 |
+
return dt.strftime("%Y-%m-%d")
|
| 85 |
return None
|
| 86 |
|
| 87 |
+
def extract_rent(text: str) -> Optional[Dict[str, Any]]:
|
| 88 |
+
# Prefer near "monthly/base rent"
|
| 89 |
+
near = re.search(
|
| 90 |
+
r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})",
|
| 91 |
+
text,
|
| 92 |
+
flags=re.I,
|
| 93 |
+
)
|
| 94 |
if not near:
|
| 95 |
+
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
|
| 96 |
if near:
|
| 97 |
+
cur = near.group(1) or ""
|
| 98 |
+
amt = float(near.group(2).replace(",", ""))
|
| 99 |
+
currency = {"$": "USD", "€": "EUR", "£": "GBP"}.get(cur, None)
|
| 100 |
+
return {"amount": amt, "currency": currency, "period": "monthly"}
|
| 101 |
return None
|
| 102 |
|
| 103 |
+
def detect_jurisdiction(text: str) -> Optional[str]:
|
| 104 |
+
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text):
|
| 105 |
+
return "US"
|
| 106 |
+
if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I):
|
| 107 |
+
return "EU/PT"
|
| 108 |
+
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I):
|
| 109 |
+
return "UK"
|
| 110 |
return None
|
| 111 |
|
| 112 |
CLAUSE_KEYWORDS = {
|
| 113 |
+
"termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"],
|
| 114 |
+
"liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"],
|
| 115 |
+
"indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"],
|
| 116 |
+
"governing_law": [r"\bgoverning law\b", r"\blaws? of\b"],
|
| 117 |
+
"security_deposit": [r"\bsecurity deposit\b"],
|
| 118 |
+
"late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"],
|
| 119 |
+
"sublet_assignment": [r"\bsublet\b", r"\bassignment\b"],
|
| 120 |
+
"maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"],
|
| 121 |
+
"utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"],
|
| 122 |
+
"entry_access": [r"\bright of entry\b", r"\baccess\b"],
|
| 123 |
}
|
| 124 |
|
| 125 |
def extract_clauses(text: str) -> Dict[str, bool]:
|
| 126 |
+
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
|
| 127 |
+
|
| 128 |
+
# --------------------------- Orchestration --------------------------
|
| 129 |
+
|
| 130 |
+
def extract_metadata(contract_text: str) -> Dict[str, Any]:
|
| 131 |
+
text = contract_text.strip()
|
| 132 |
+
if not text:
|
| 133 |
+
return {"error": "No text provided."}
|
| 134 |
+
|
| 135 |
+
meta = {
|
| 136 |
+
"parties": extract_parties(text),
|
| 137 |
+
"property_address": extract_address(text),
|
| 138 |
+
"lease_term": extract_term(text),
|
| 139 |
+
"start_date": extract_start_date(text),
|
| 140 |
+
"rent": extract_rent(text),
|
| 141 |
+
"jurisdiction": detect_jurisdiction(text),
|
| 142 |
+
"clauses": extract_clauses(text),
|
| 143 |
+
}
|
| 144 |
+
return meta
|
| 145 |
|
| 146 |
def summarize(metadata: Dict[str, Any]) -> str:
|
| 147 |
+
if "error" in metadata:
|
| 148 |
+
return f"**Error:** {metadata['error']}"
|
| 149 |
+
|
| 150 |
+
parts: List[str] = []
|
| 151 |
+
if metadata.get("parties"):
|
| 152 |
parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
|
| 153 |
+
if metadata.get("property_address"):
|
| 154 |
parts.append(f"**Property:** {metadata['property_address']}")
|
| 155 |
+
if metadata.get("lease_term"):
|
| 156 |
parts.append(f"**Term:** {metadata['lease_term']}")
|
| 157 |
+
if metadata.get("start_date"):
|
| 158 |
parts.append(f"**Start Date:** {metadata['start_date']}")
|
| 159 |
+
if metadata.get("rent"):
|
| 160 |
+
r = metadata["rent"]
|
| 161 |
+
cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
|
| 162 |
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
|
| 163 |
+
if metadata.get("jurisdiction"):
|
| 164 |
parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
|
| 165 |
+
if metadata.get("clauses"):
|
| 166 |
+
on = [k.replace("_", " ") for k, v in metadata["clauses"].items() if v]
|
|
|
|
| 167 |
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
|
|
|
|
| 168 |
|
| 169 |
+
return " \n".join(parts) if parts else "_No summary available._"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
# ------------------------------ UI ---------------------------------
|
| 172 |
|
| 173 |
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
|
| 174 |
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
|
|
|
|
| 177 |
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
|
| 178 |
"""
|
| 179 |
|
| 180 |
+
def run_pipeline(text: str):
|
| 181 |
+
try:
|
| 182 |
+
meta = extract_metadata(text or "")
|
| 183 |
+
summary_md = summarize(meta)
|
| 184 |
+
# Return dict for gr.JSON (it renders nicer than a JSON string)
|
| 185 |
+
return summary_md, meta
|
| 186 |
+
except Exception as e:
|
| 187 |
+
err = {"error": str(e)}
|
| 188 |
+
return f"**Error:** {e}", err
|
| 189 |
|
| 190 |
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
|
| 191 |
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
|
| 192 |
+
gr.Markdown(
|
| 193 |
+
"Paste a lease/contract. The app will extract key fields and detect common clauses. "
|
| 194 |
+
"_Heuristic demo — no legal advice._"
|
| 195 |
+
)
|
| 196 |
|
| 197 |
with gr.Row():
|
| 198 |
+
inp = gr.Textbox(
|
| 199 |
+
label="Contract Text",
|
| 200 |
+
lines=14,
|
| 201 |
+
value=EXAMPLE_TEXT,
|
| 202 |
+
show_copy_button=True,
|
| 203 |
+
placeholder="Paste lease or contract text here..."
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
with gr.Row():
|
| 207 |
+
extract_btn = gr.Button("Extract Metadata", variant="primary")
|
| 208 |
+
clear_btn = gr.Button("Clear")
|
| 209 |
+
|
| 210 |
with gr.Row():
|
| 211 |
summary_out = gr.Markdown(label="Summary")
|
| 212 |
with gr.Row():
|
| 213 |
json_out = gr.JSON(label="Structured JSON")
|
| 214 |
|
| 215 |
+
extract_btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
|
| 216 |
+
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
|
| 217 |
+
|
| 218 |
+
# NOTE: Do NOT call demo.launch() in Spaces. The platform loads `demo` automatically.
|
| 219 |
|
|
|
|
|
|