Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,193 @@
|
|
| 1 |
-
import
|
| 2 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
def
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
}
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
if __name__ == "__main__":
|
| 30 |
demo.launch()
|
|
|
|
| 1 |
+
import re
|
| 2 |
import json
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
import dateparser
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
# ---------- Heuristic extractors (no ML) ----------
|
| 8 |
+
|
| 9 |
+
def _clean(s: str) -> str:
|
| 10 |
+
return re.sub(r'\s+', ' ', s).strip()
|
| 11 |
+
|
| 12 |
+
def extract_parties(text: str):
|
| 13 |
+
# Common phrasing: "This Lease Agreement is made between X and Y"
|
| 14 |
+
m = re.search(r'\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)', text, re.IGNORECASE | re.DOTALL)
|
| 15 |
+
if m:
|
| 16 |
+
p1 = _clean(m.group(1))
|
| 17 |
+
p2 = _clean(m.group(2))
|
| 18 |
+
# Strip leading roles if present
|
| 19 |
+
p1 = re.sub(r'^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*', '', p1, flags=re.I)
|
| 20 |
+
p2 = re.sub(r'^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*', '', p2, flags=re.I)
|
| 21 |
+
return [p1, p2]
|
| 22 |
+
# Fallback: look for "Landlord: X" / "Tenant: Y"
|
| 23 |
+
p = []
|
| 24 |
+
m1 = re.search(r'\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
|
| 25 |
+
m2 = re.search(r'\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
|
| 26 |
+
if m1: p.append(_clean(m1.group(1)))
|
| 27 |
+
if m2: p.append(_clean(m2.group(1)))
|
| 28 |
+
return p or None
|
| 29 |
+
|
| 30 |
+
def extract_address(text: str):
|
| 31 |
+
# "premises located at 123 Main St, City, ST 12345"
|
| 32 |
+
patterns = [
|
| 33 |
+
r'(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])',
|
| 34 |
+
r'\blocated at\s+(.+?)(?:[,.;\n])',
|
| 35 |
+
r'\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])'
|
| 36 |
+
]
|
| 37 |
+
for pat in patterns:
|
| 38 |
+
m = re.search(pat, text, flags=re.I)
|
| 39 |
+
if m:
|
| 40 |
+
return _clean(m.group(1))
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
def extract_term(text: str):
|
| 44 |
+
# "lease term is 12 months", "for a term of 2 years"
|
| 45 |
+
m = re.search(r'(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)', text, flags=re.I)
|
| 46 |
+
if m:
|
| 47 |
+
n = int(m.group(1))
|
| 48 |
+
unit = m.group(2).lower()
|
| 49 |
+
if unit.startswith('year'):
|
| 50 |
+
return f"{n} years"
|
| 51 |
+
return f"{n} months"
|
| 52 |
+
# "12-month term"
|
| 53 |
+
m = re.search(r'(\d{1,3})\s*-\s*month\s+term', text, flags=re.I)
|
| 54 |
+
if m:
|
| 55 |
+
return f"{int(m.group(1))} months"
|
| 56 |
+
return None
|
| 57 |
+
|
| 58 |
+
def extract_start_date(text: str):
|
| 59 |
+
# Try to find the first plausible date near "commencing/starting/effective"
|
| 60 |
+
window = re.search(r'(?:commencing|starting|effective)\s+on\s+(.{0,60})', text, flags=re.I)
|
| 61 |
+
candidates = []
|
| 62 |
+
if window:
|
| 63 |
+
parsed = dateparser.search.search_dates(window.group(0))
|
| 64 |
+
if parsed:
|
| 65 |
+
candidates.extend([d for _, d in parsed])
|
| 66 |
+
if not candidates:
|
| 67 |
+
parsed = dateparser.search.search_dates(text)
|
| 68 |
+
if parsed:
|
| 69 |
+
candidates.extend([d for _, d in parsed])
|
| 70 |
+
if candidates:
|
| 71 |
+
# Return earliest found (often the start date)
|
| 72 |
+
dt = sorted(candidates)[0]
|
| 73 |
+
return dt.strftime('%Y-%m-%d')
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
def extract_rent(text: str):
|
| 77 |
+
# Capture currency and amount, e.g., "$1,500", "€1200", "£900"
|
| 78 |
+
# Prefer near "monthly rent"
|
| 79 |
+
near = re.search(r'(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
|
| 80 |
+
if not near:
|
| 81 |
+
near = re.search(r'\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
|
| 82 |
+
if near:
|
| 83 |
+
cur = near.group(1) or ''
|
| 84 |
+
amt = float(near.group(2).replace(',', ''))
|
| 85 |
+
currency = {'$': 'USD', '€': 'EUR', '£': 'GBP'}.get(cur, None)
|
| 86 |
+
return {'amount': amt, 'currency': currency, 'period': 'monthly'}
|
| 87 |
+
return None
|
| 88 |
+
|
| 89 |
+
def detect_jurisdiction(text: str):
|
| 90 |
+
# Basic hints only
|
| 91 |
+
if re.search(r'\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b', text): return "US"
|
| 92 |
+
if re.search(r'\bEuropean Union\b|\bPortugal\b|\bLisbon\b', text, flags=re.I): return "EU/PT"
|
| 93 |
+
if re.search(r'\bUnited Kingdom\b|\bEngland\b|\bWales\b', text, flags=re.I): return "UK"
|
| 94 |
+
return None
|
| 95 |
+
|
| 96 |
+
CLAUSE_KEYWORDS = {
|
| 97 |
+
'termination': [r'\bterminate(?:d|s|ion)?\b', r'\bearly termination\b'],
|
| 98 |
+
'liability': [r'\bliabilit(?:y|ies)\b', r'\blimit(?:ation)? of liability\b'],
|
| 99 |
+
'indemnity': [r'\bindemnif(?:y|ication)\b', r'\bindemnitor\b'],
|
| 100 |
+
'governing_law': [r'\bgoverning law\b', r'\blaws? of\b'],
|
| 101 |
+
'security_deposit': [r'\bsecurity deposit\b'],
|
| 102 |
+
'late_fee': [r'\blate fee\b', r'\bpenalt(y|ies)\b'],
|
| 103 |
+
'sublet_assignment': [r'\bsublet\b', r'\bassignment\b'],
|
| 104 |
+
'maintenance_repairs': [r'\bmaintenance\b', r'\brepairs?\b'],
|
| 105 |
+
'utilities': [r'\butilities\b', r'\bwater|gas|electricity\b'],
|
| 106 |
+
'entry_access': [r'\bright of entry\b', r'\baccess\b'],
|
| 107 |
+
}
|
| 108 |
|
| 109 |
+
def extract_clauses(text: str) -> Dict[str, bool]:
|
| 110 |
+
results = {}
|
| 111 |
+
for name, pats in CLAUSE_KEYWORDS.items():
|
| 112 |
+
found = any(re.search(pat, text, flags=re.I) for pat in pats)
|
| 113 |
+
results[name] = bool(found)
|
| 114 |
+
return results
|
| 115 |
+
|
| 116 |
+
def summarize(metadata: Dict[str, Any]) -> str:
|
| 117 |
+
parts = []
|
| 118 |
+
if metadata.get('parties'):
|
| 119 |
+
parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
|
| 120 |
+
if metadata.get('property_address'):
|
| 121 |
+
parts.append(f"**Property:** {metadata['property_address']}")
|
| 122 |
+
if metadata.get('lease_term'):
|
| 123 |
+
parts.append(f"**Term:** {metadata['lease_term']}")
|
| 124 |
+
if metadata.get('start_date'):
|
| 125 |
+
parts.append(f"**Start Date:** {metadata['start_date']}")
|
| 126 |
+
if metadata.get('rent'):
|
| 127 |
+
r = metadata['rent']
|
| 128 |
+
cur = {'USD': '$', 'EUR': '€', 'GBP': '£'}.get(r.get('currency'), '')
|
| 129 |
+
parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
|
| 130 |
+
if metadata.get('jurisdiction'):
|
| 131 |
+
parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
|
| 132 |
+
# Clauses brief
|
| 133 |
+
if metadata.get('clauses'):
|
| 134 |
+
on = [k.replace('_',' ') for k, v in metadata['clauses'].items() if v]
|
| 135 |
+
parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
|
| 136 |
+
return " \n".join(parts) if parts else "_No summary available._"
|
| 137 |
+
|
| 138 |
+
def extract_metadata(contract_text: str) -> Dict[str, Any]:
|
| 139 |
+
text = contract_text.strip()
|
| 140 |
+
if not text:
|
| 141 |
+
return {"error": "No text provided."}
|
| 142 |
+
|
| 143 |
+
parties = extract_parties(text)
|
| 144 |
+
address = extract_address(text)
|
| 145 |
+
term = extract_term(text)
|
| 146 |
+
start_date = extract_start_date(text)
|
| 147 |
+
rent = extract_rent(text)
|
| 148 |
+
juris = detect_jurisdiction(text)
|
| 149 |
+
clauses = extract_clauses(text)
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
"parties": parties,
|
| 153 |
+
"property_address": address,
|
| 154 |
+
"lease_term": term,
|
| 155 |
+
"start_date": start_date,
|
| 156 |
+
"rent": rent,
|
| 157 |
+
"jurisdiction": juris,
|
| 158 |
+
"clauses": clauses
|
| 159 |
}
|
| 160 |
+
|
| 161 |
+
# ---------- Gradio UI ----------
|
| 162 |
+
|
| 163 |
+
EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
|
| 164 |
+
The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
|
| 165 |
+
A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
|
| 166 |
+
Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent.
|
| 167 |
+
Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
def run_pipeline(text):
|
| 171 |
+
meta = extract_metadata(text)
|
| 172 |
+
# Pretty JSON + human summary
|
| 173 |
+
summary_md = summarize(meta)
|
| 174 |
+
json_str = json.dumps(meta, indent=2, ensure_ascii=False)
|
| 175 |
+
return summary_md, json_str
|
| 176 |
+
|
| 177 |
+
with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
|
| 178 |
+
gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
|
| 179 |
+
gr.Markdown("Paste a lease/contract. The app will extract key fields and detect common clauses. *(Heuristic demo — no legal advice.)*")
|
| 180 |
+
|
| 181 |
+
with gr.Row():
|
| 182 |
+
inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
|
| 183 |
+
with gr.Row():
|
| 184 |
+
btn = gr.Button("Extract Metadata", variant="primary")
|
| 185 |
+
with gr.Row():
|
| 186 |
+
summary_out = gr.Markdown(label="Summary")
|
| 187 |
+
with gr.Row():
|
| 188 |
+
json_out = gr.JSON(label="Structured JSON")
|
| 189 |
+
|
| 190 |
+
btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
|
| 191 |
|
| 192 |
if __name__ == "__main__":
|
| 193 |
demo.launch()
|