Update app.py
Browse files
app.py
CHANGED
|
@@ -1,26 +1,18 @@
|
|
| 1 |
"""
|
| 2 |
ProofAssets: Real Estate Tokenizer (Heuristic Demo)
|
| 3 |
-
- Extracts key fields from lease/contract text using regex + date parsing
|
| 4 |
-
- Outputs a human-friendly summary + structured JSON
|
| 5 |
-
- No legal advice; for demo purposes only
|
| 6 |
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
from typing import Dict, Any, List, Optional
|
| 12 |
-
from dateparser.search import search_dates
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
-
# ----------------------------- Helpers -----------------------------
|
| 16 |
-
|
| 17 |
def _clean(s: str) -> str:
|
| 18 |
return re.sub(r"\s+", " ", s).strip()
|
| 19 |
|
| 20 |
-
# -------------------------- Extractors -----------------------------
|
| 21 |
-
|
| 22 |
def extract_parties(text: str) -> Optional[List[str]]:
|
| 23 |
-
# "made between X and Y"
|
| 24 |
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
|
| 25 |
if m:
|
| 26 |
p1 = _clean(m.group(1))
|
|
@@ -28,69 +20,49 @@ def extract_parties(text: str) -> Optional[List[str]]:
|
|
| 28 |
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
|
| 29 |
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
|
| 30 |
return [p1, p2]
|
| 31 |
-
|
| 32 |
-
# Fallback: "Landlord: X" / "Tenant: Y"
|
| 33 |
parties = []
|
| 34 |
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 35 |
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 36 |
-
if m1:
|
| 37 |
-
|
| 38 |
-
if m2:
|
| 39 |
-
parties.append(_clean(m2.group(1)))
|
| 40 |
return parties or None
|
| 41 |
|
| 42 |
def extract_address(text: str) -> Optional[str]:
|
| 43 |
-
|
| 44 |
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
|
| 45 |
r"\blocated at\s+(.+?)(?:[,.;\n])",
|
| 46 |
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
|
| 47 |
-
]
|
| 48 |
-
for pat in patterns:
|
| 49 |
m = re.search(pat, text, flags=re.I)
|
| 50 |
if m:
|
| 51 |
return _clean(m.group(1))
|
| 52 |
return None
|
| 53 |
|
| 54 |
def extract_term(text: str) -> Optional[str]:
|
| 55 |
-
m = re.search(
|
| 56 |
-
r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)",
|
| 57 |
-
text,
|
| 58 |
-
flags=re.I,
|
| 59 |
-
)
|
| 60 |
if m:
|
| 61 |
-
n = int(m.group(1))
|
| 62 |
-
unit = m.group(2).lower()
|
| 63 |
return f"{n} years" if unit.startswith("year") else f"{n} months"
|
| 64 |
-
|
| 65 |
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
|
| 66 |
-
if m:
|
| 67 |
-
return f"{int(m.group(1))} months"
|
| 68 |
return None
|
| 69 |
|
| 70 |
def extract_start_date(text: str) -> Optional[str]:
|
| 71 |
-
# Prefer a window near "commencing/starting/effective"
|
| 72 |
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
|
| 73 |
candidates = []
|
| 74 |
if window:
|
| 75 |
parsed = search_dates(window.group(0))
|
| 76 |
-
if parsed:
|
| 77 |
-
candidates.extend([d for _, d in parsed])
|
| 78 |
if not candidates:
|
| 79 |
parsed = search_dates(text)
|
| 80 |
-
if parsed:
|
| 81 |
-
candidates.extend([d for _, d in parsed])
|
| 82 |
if candidates:
|
| 83 |
dt = sorted(candidates)[0]
|
| 84 |
return dt.strftime("%Y-%m-%d")
|
| 85 |
return None
|
| 86 |
|
| 87 |
def extract_rent(text: str) -> Optional[Dict[str, Any]]:
|
| 88 |
-
|
| 89 |
-
near = re.search(
|
| 90 |
-
r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})",
|
| 91 |
-
text,
|
| 92 |
-
flags=re.I,
|
| 93 |
-
)
|
| 94 |
if not near:
|
| 95 |
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
|
| 96 |
if near:
|
|
@@ -101,12 +73,9 @@ def extract_rent(text: str) -> Optional[Dict[str, Any]]:
|
|
| 101 |
return None
|
| 102 |
|
| 103 |
def detect_jurisdiction(text: str) -> Optional[str]:
|
| 104 |
-
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text):
|
| 105 |
-
|
| 106 |
-
if re.search(r"\
|
| 107 |
-
return "EU/PT"
|
| 108 |
-
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I):
|
| 109 |
-
return "UK"
|
| 110 |
return None
|
| 111 |
|
| 112 |
CLAUSE_KEYWORDS = {
|
|
@@ -125,14 +94,11 @@ CLAUSE_KEYWORDS = {
|
|
| 125 |
def extract_clauses(text: str) -> Dict[str, bool]:
|
| 126 |
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
|
| 127 |
|
| 128 |
-
# --------------------------- Orchestration --------------------------
|
| 129 |
-
|
| 130 |
def extract_metadata(contract_text: str) -> Dict[str, Any]:
|
| 131 |
-
text = contract_text.strip()
|
| 132 |
if not text:
|
| 133 |
return {"error": "No text provided."}
|
| 134 |
-
|
| 135 |
-
meta = {
|
| 136 |
"parties": extract_parties(text),
|
| 137 |
"property_address": extract_address(text),
|
| 138 |
"lease_term": extract_term(text),
|
|
@@ -141,35 +107,24 @@ def extract_metadata(contract_text: str) -> Dict[str, Any]:
|
|
| 141 |
"jurisdiction": detect_jurisdiction(text),
|
| 142 |
"clauses": extract_clauses(text),
|
| 143 |
}
|
| 144 |
-
return meta
|
| 145 |
|
| 146 |
def summarize(metadata: Dict[str, Any]) -> str:
|
| 147 |
if "error" in metadata:
|
| 148 |
return f"**Error:** {metadata['error']}"
|
| 149 |
-
|
| 150 |
parts: List[str] = []
|
| 151 |
-
if metadata.get("parties"):
|
| 152 |
-
|
| 153 |
-
if metadata.get("
|
| 154 |
-
|
| 155 |
-
if metadata.get("lease_term"):
|
| 156 |
-
parts.append(f"**Term:** {metadata['lease_term']}")
|
| 157 |
-
if metadata.get("start_date"):
|
| 158 |
-
parts.append(f"**Start Date:** {metadata['start_date']}")
|
| 159 |
if metadata.get("rent"):
|
| 160 |
-
r = metadata["rent"]
|
| 161 |
-
cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
|
| 162 |
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
|
| 163 |
-
if metadata.get("jurisdiction"):
|
| 164 |
-
parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
|
| 165 |
if metadata.get("clauses"):
|
| 166 |
-
on = [k.replace("_",
|
| 167 |
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
|
| 168 |
-
|
| 169 |
return " \n".join(parts) if parts else "_No summary available._"
|
| 170 |
|
| 171 |
-
# ------------------------------ UI ---------------------------------
|
| 172 |
-
|
| 173 |
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
|
| 174 |
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
|
| 175 |
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
|
|
@@ -179,41 +134,25 @@ Landlord is responsible for structural repairs; Tenant shall handle routine main
|
|
| 179 |
|
| 180 |
def run_pipeline(text: str):
|
| 181 |
try:
|
| 182 |
-
meta = extract_metadata(text
|
| 183 |
-
|
| 184 |
-
# Return dict for gr.JSON (it renders nicer than a JSON string)
|
| 185 |
-
return summary_md, meta
|
| 186 |
except Exception as e:
|
| 187 |
-
|
| 188 |
-
return f"**Error:** {e}", err
|
| 189 |
|
| 190 |
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
|
| 191 |
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
|
| 192 |
-
gr.Markdown(
|
| 193 |
-
"Paste a lease/contract. The app will extract key fields and detect common clauses. "
|
| 194 |
-
"_Heuristic demo — no legal advice._"
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
with gr.Row():
|
| 198 |
-
inp = gr.Textbox(
|
| 199 |
-
label="Contract Text",
|
| 200 |
-
lines=14,
|
| 201 |
-
value=EXAMPLE_TEXT,
|
| 202 |
-
show_copy_button=True,
|
| 203 |
-
placeholder="Paste lease or contract text here..."
|
| 204 |
-
)
|
| 205 |
-
|
| 206 |
with gr.Row():
|
| 207 |
extract_btn = gr.Button("Extract Metadata", variant="primary")
|
| 208 |
clear_btn = gr.Button("Clear")
|
| 209 |
-
|
| 210 |
with gr.Row():
|
| 211 |
summary_out = gr.Markdown(label="Summary")
|
| 212 |
with gr.Row():
|
| 213 |
json_out = gr.JSON(label="Structured JSON")
|
| 214 |
-
|
| 215 |
-
extract_btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
|
| 216 |
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
|
| 217 |
|
| 218 |
-
#
|
| 219 |
-
|
|
|
|
| 1 |
"""
|
| 2 |
ProofAssets: Real Estate Tokenizer (Heuristic Demo)
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
from __future__ import annotations
|
| 6 |
import re
|
| 7 |
import json
|
| 8 |
from typing import Dict, Any, List, Optional
|
| 9 |
+
from dateparser.search import search_dates
|
| 10 |
import gradio as gr
|
| 11 |
|
|
|
|
|
|
|
| 12 |
def _clean(s: str) -> str:
|
| 13 |
return re.sub(r"\s+", " ", s).strip()
|
| 14 |
|
|
|
|
|
|
|
| 15 |
def extract_parties(text: str) -> Optional[List[str]]:
|
|
|
|
| 16 |
m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
|
| 17 |
if m:
|
| 18 |
p1 = _clean(m.group(1))
|
|
|
|
| 20 |
p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
|
| 21 |
p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
|
| 22 |
return [p1, p2]
|
|
|
|
|
|
|
| 23 |
parties = []
|
| 24 |
m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 25 |
m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
|
| 26 |
+
if m1: parties.append(_clean(m1.group(1)))
|
| 27 |
+
if m2: parties.append(_clean(m2.group(1)))
|
|
|
|
|
|
|
| 28 |
return parties or None
|
| 29 |
|
| 30 |
def extract_address(text: str) -> Optional[str]:
|
| 31 |
+
for pat in [
|
| 32 |
r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
|
| 33 |
r"\blocated at\s+(.+?)(?:[,.;\n])",
|
| 34 |
r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
|
| 35 |
+
]:
|
|
|
|
| 36 |
m = re.search(pat, text, flags=re.I)
|
| 37 |
if m:
|
| 38 |
return _clean(m.group(1))
|
| 39 |
return None
|
| 40 |
|
| 41 |
def extract_term(text: str) -> Optional[str]:
|
| 42 |
+
m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if m:
|
| 44 |
+
n = int(m.group(1)); unit = m.group(2).lower()
|
|
|
|
| 45 |
return f"{n} years" if unit.startswith("year") else f"{n} months"
|
|
|
|
| 46 |
m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
|
| 47 |
+
if m: return f"{int(m.group(1))} months"
|
|
|
|
| 48 |
return None
|
| 49 |
|
| 50 |
def extract_start_date(text: str) -> Optional[str]:
|
|
|
|
| 51 |
window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
|
| 52 |
candidates = []
|
| 53 |
if window:
|
| 54 |
parsed = search_dates(window.group(0))
|
| 55 |
+
if parsed: candidates.extend([d for _, d in parsed])
|
|
|
|
| 56 |
if not candidates:
|
| 57 |
parsed = search_dates(text)
|
| 58 |
+
if parsed: candidates.extend([d for _, d in parsed])
|
|
|
|
| 59 |
if candidates:
|
| 60 |
dt = sorted(candidates)[0]
|
| 61 |
return dt.strftime("%Y-%m-%d")
|
| 62 |
return None
|
| 63 |
|
| 64 |
def extract_rent(text: str) -> Optional[Dict[str, Any]]:
|
| 65 |
+
near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
if not near:
|
| 67 |
near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
|
| 68 |
if near:
|
|
|
|
| 73 |
return None
|
| 74 |
|
| 75 |
def detect_jurisdiction(text: str) -> Optional[str]:
|
| 76 |
+
if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US"
|
| 77 |
+
if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT"
|
| 78 |
+
if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK"
|
|
|
|
|
|
|
|
|
|
| 79 |
return None
|
| 80 |
|
| 81 |
CLAUSE_KEYWORDS = {
|
|
|
|
| 94 |
def extract_clauses(text: str) -> Dict[str, bool]:
|
| 95 |
return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
|
| 96 |
|
|
|
|
|
|
|
| 97 |
def extract_metadata(contract_text: str) -> Dict[str, Any]:
|
| 98 |
+
text = (contract_text or "").strip()
|
| 99 |
if not text:
|
| 100 |
return {"error": "No text provided."}
|
| 101 |
+
return {
|
|
|
|
| 102 |
"parties": extract_parties(text),
|
| 103 |
"property_address": extract_address(text),
|
| 104 |
"lease_term": extract_term(text),
|
|
|
|
| 107 |
"jurisdiction": detect_jurisdiction(text),
|
| 108 |
"clauses": extract_clauses(text),
|
| 109 |
}
|
|
|
|
| 110 |
|
| 111 |
def summarize(metadata: Dict[str, Any]) -> str:
|
| 112 |
if "error" in metadata:
|
| 113 |
return f"**Error:** {metadata['error']}"
|
|
|
|
| 114 |
parts: List[str] = []
|
| 115 |
+
if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
|
| 116 |
+
if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}")
|
| 117 |
+
if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}")
|
| 118 |
+
if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
if metadata.get("rent"):
|
| 120 |
+
r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
|
|
|
|
| 121 |
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
|
| 122 |
+
if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
|
|
|
|
| 123 |
if metadata.get("clauses"):
|
| 124 |
+
on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v]
|
| 125 |
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
|
|
|
|
| 126 |
return " \n".join(parts) if parts else "_No summary available._"
|
| 127 |
|
|
|
|
|
|
|
| 128 |
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
|
| 129 |
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
|
| 130 |
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
|
|
|
|
| 134 |
|
| 135 |
def run_pipeline(text: str):
|
| 136 |
try:
|
| 137 |
+
meta = extract_metadata(text)
|
| 138 |
+
return summarize(meta), meta
|
|
|
|
|
|
|
| 139 |
except Exception as e:
|
| 140 |
+
return f"**Error:** {e}", {"error": str(e)}
|
|
|
|
| 141 |
|
| 142 |
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
|
| 143 |
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
|
| 144 |
+
gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
with gr.Row():
|
| 146 |
+
inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
with gr.Row():
|
| 148 |
extract_btn = gr.Button("Extract Metadata", variant="primary")
|
| 149 |
clear_btn = gr.Button("Clear")
|
|
|
|
| 150 |
with gr.Row():
|
| 151 |
summary_out = gr.Markdown(label="Summary")
|
| 152 |
with gr.Row():
|
| 153 |
json_out = gr.JSON(label="Structured JSON")
|
| 154 |
+
extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out])
|
|
|
|
| 155 |
clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
|
| 156 |
|
| 157 |
+
# Some runners look for `app`; provide an alias
|
| 158 |
+
app = demo
|