Proooof commited on
Commit
460c910
·
verified ·
1 Parent(s): bf35370

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -114
app.py CHANGED
@@ -1,40 +1,49 @@
 
 
 
 
 
 
 
 
1
  import re
2
  import json
3
- from typing import Dict, Any
4
- import dateparser
5
- from dateparser.search import search_dates # 👈 ADD THIS
6
  import gradio as gr
7
 
8
-
9
- # ---------- Heuristic extractors (no ML) ----------
10
 
11
  def _clean(s: str) -> str:
12
- return re.sub(r'\s+', ' ', s).strip()
13
 
14
- def extract_parties(text: str):
15
- # Common phrasing: "This Lease Agreement is made between X and Y"
16
- m = re.search(r'\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)', text, re.IGNORECASE | re.DOTALL)
 
 
17
  if m:
18
  p1 = _clean(m.group(1))
19
  p2 = _clean(m.group(2))
20
- # Strip leading roles if present
21
- p1 = re.sub(r'^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*', '', p1, flags=re.I)
22
- p2 = re.sub(r'^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*', '', p2, flags=re.I)
23
  return [p1, p2]
24
- # Fallback: look for "Landlord: X" / "Tenant: Y"
25
- p = []
26
- m1 = re.search(r'\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
27
- m2 = re.search(r'\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
28
- if m1: p.append(_clean(m1.group(1)))
29
- if m2: p.append(_clean(m2.group(1)))
30
- return p or None
31
-
32
- def extract_address(text: str):
33
- # "premises located at 123 Main St, City, ST 12345"
 
 
34
  patterns = [
35
- r'(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])',
36
- r'\blocated at\s+(.+?)(?:[,.;\n])',
37
- r'\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])'
38
  ]
39
  for pat in patterns:
40
  m = re.search(pat, text, flags=re.I)
@@ -42,24 +51,25 @@ def extract_address(text: str):
42
  return _clean(m.group(1))
43
  return None
44
 
45
- def extract_term(text: str):
46
- # "lease term is 12 months", "for a term of 2 years"
47
- m = re.search(r'(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)', text, flags=re.I)
 
 
 
48
  if m:
49
  n = int(m.group(1))
50
  unit = m.group(2).lower()
51
- if unit.startswith('year'):
52
- return f"{n} years"
53
- return f"{n} months"
54
- # "12-month term"
55
- m = re.search(r'(\d{1,3})\s*-\s*month\s+term', text, flags=re.I)
56
  if m:
57
  return f"{int(m.group(1))} months"
58
  return None
59
 
60
- ef extract_start_date(text: str):
61
- # Try to find the first plausible date near "commencing/starting/effective"
62
- window = re.search(r'(?:commencing|starting|effective)\s+on\s+(.{0,60})', text, flags=re.I)
63
  candidates = []
64
  if window:
65
  parsed = search_dates(window.group(0))
@@ -71,95 +81,94 @@ ef extract_start_date(text: str):
71
  candidates.extend([d for _, d in parsed])
72
  if candidates:
73
  dt = sorted(candidates)[0]
74
- return dt.strftime('%Y-%m-%d')
75
  return None
76
 
77
- def extract_rent(text: str):
78
- # Capture currency and amount, e.g., "$1,500", "€1200", "£900"
79
- # Prefer near "monthly rent"
80
- near = re.search(r'(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
 
 
 
81
  if not near:
82
- near = re.search(r'\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
83
  if near:
84
- cur = near.group(1) or ''
85
- amt = float(near.group(2).replace(',', ''))
86
- currency = {'$': 'USD', '': 'EUR', '£': 'GBP'}.get(cur, None)
87
- return {'amount': amt, 'currency': currency, 'period': 'monthly'}
88
  return None
89
 
90
- def detect_jurisdiction(text: str):
91
- # Basic hints only
92
- if re.search(r'\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b', text): return "US"
93
- if re.search(r'\bEuropean Union\b|\bPortugal\b|\bLisbon\b', text, flags=re.I): return "EU/PT"
94
- if re.search(r'\bUnited Kingdom\b|\bEngland\b|\bWales\b', text, flags=re.I): return "UK"
 
 
95
  return None
96
 
97
  CLAUSE_KEYWORDS = {
98
- 'termination': [r'\bterminate(?:d|s|ion)?\b', r'\bearly termination\b'],
99
- 'liability': [r'\bliabilit(?:y|ies)\b', r'\blimit(?:ation)? of liability\b'],
100
- 'indemnity': [r'\bindemnif(?:y|ication)\b', r'\bindemnitor\b'],
101
- 'governing_law': [r'\bgoverning law\b', r'\blaws? of\b'],
102
- 'security_deposit': [r'\bsecurity deposit\b'],
103
- 'late_fee': [r'\blate fee\b', r'\bpenalt(y|ies)\b'],
104
- 'sublet_assignment': [r'\bsublet\b', r'\bassignment\b'],
105
- 'maintenance_repairs': [r'\bmaintenance\b', r'\brepairs?\b'],
106
- 'utilities': [r'\butilities\b', r'\bwater|gas|electricity\b'],
107
- 'entry_access': [r'\bright of entry\b', r'\baccess\b'],
108
  }
109
 
110
  def extract_clauses(text: str) -> Dict[str, bool]:
111
- results = {}
112
- for name, pats in CLAUSE_KEYWORDS.items():
113
- found = any(re.search(pat, text, flags=re.I) for pat in pats)
114
- results[name] = bool(found)
115
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def summarize(metadata: Dict[str, Any]) -> str:
118
- parts = []
119
- if metadata.get('parties'):
 
 
 
120
  parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
121
- if metadata.get('property_address'):
122
  parts.append(f"**Property:** {metadata['property_address']}")
123
- if metadata.get('lease_term'):
124
  parts.append(f"**Term:** {metadata['lease_term']}")
125
- if metadata.get('start_date'):
126
  parts.append(f"**Start Date:** {metadata['start_date']}")
127
- if metadata.get('rent'):
128
- r = metadata['rent']
129
- cur = {'USD': '$', 'EUR': '', 'GBP': '£'}.get(r.get('currency'), '')
130
  parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
131
- if metadata.get('jurisdiction'):
132
  parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
133
- # Clauses brief
134
- if metadata.get('clauses'):
135
- on = [k.replace('_',' ') for k, v in metadata['clauses'].items() if v]
136
  parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
137
- return " \n".join(parts) if parts else "_No summary available._"
138
 
139
- def extract_metadata(contract_text: str) -> Dict[str, Any]:
140
- text = contract_text.strip()
141
- if not text:
142
- return {"error": "No text provided."}
143
-
144
- parties = extract_parties(text)
145
- address = extract_address(text)
146
- term = extract_term(text)
147
- start_date = extract_start_date(text)
148
- rent = extract_rent(text)
149
- juris = detect_jurisdiction(text)
150
- clauses = extract_clauses(text)
151
-
152
- return {
153
- "parties": parties,
154
- "property_address": address,
155
- "lease_term": term,
156
- "start_date": start_date,
157
- "rent": rent,
158
- "jurisdiction": juris,
159
- "clauses": clauses
160
- }
161
 
162
- # ---------- Gradio UI ----------
163
 
164
  EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
165
  The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
@@ -168,27 +177,43 @@ Either party may terminate this agreement for cause subject to 30 days' notice.
168
  Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
169
  """
170
 
171
- def run_pipeline(text):
172
- meta = extract_metadata(text)
173
- # Pretty JSON + human summary
174
- summary_md = summarize(meta)
175
- json_str = json.dumps(meta, indent=2, ensure_ascii=False)
176
- return summary_md, json_str
 
 
 
177
 
178
  with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
179
  gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
180
- gr.Markdown("Paste a lease/contract. The app will extract key fields and detect common clauses. *(Heuristic demo — no legal advice.)*")
 
 
 
181
 
182
  with gr.Row():
183
- inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
 
 
 
 
 
 
 
184
  with gr.Row():
185
- btn = gr.Button("Extract Metadata", variant="primary")
 
 
186
  with gr.Row():
187
  summary_out = gr.Markdown(label="Summary")
188
  with gr.Row():
189
  json_out = gr.JSON(label="Structured JSON")
190
 
191
- btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
 
 
 
192
 
193
- if __name__ == "__main__":
194
- demo.launch()
 
1
+ """
2
+ ProofAssets: Real Estate Tokenizer (Heuristic Demo)
3
+ - Extracts key fields from lease/contract text using regex + date parsing
4
+ - Outputs a human-friendly summary + structured JSON
5
+ - No legal advice; for demo purposes only
6
+ """
7
+
8
+ from __future__ import annotations
9
  import re
10
  import json
11
+ from typing import Dict, Any, List, Optional
12
+ from dateparser.search import search_dates # <-- correct import
 
13
  import gradio as gr
14
 
15
+ # ----------------------------- Helpers -----------------------------
 
16
 
17
  def _clean(s: str) -> str:
18
+ return re.sub(r"\s+", " ", s).strip()
19
 
20
+ # -------------------------- Extractors -----------------------------
21
+
22
+ def extract_parties(text: str) -> Optional[List[str]]:
23
+ # "made between X and Y"
24
+ m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
25
  if m:
26
  p1 = _clean(m.group(1))
27
  p2 = _clean(m.group(2))
28
+ p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
29
+ p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
 
30
  return [p1, p2]
31
+
32
+ # Fallback: "Landlord: X" / "Tenant: Y"
33
+ parties = []
34
+ m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
35
+ m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
36
+ if m1:
37
+ parties.append(_clean(m1.group(1)))
38
+ if m2:
39
+ parties.append(_clean(m2.group(1)))
40
+ return parties or None
41
+
42
+ def extract_address(text: str) -> Optional[str]:
43
  patterns = [
44
+ r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
45
+ r"\blocated at\s+(.+?)(?:[,.;\n])",
46
+ r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
47
  ]
48
  for pat in patterns:
49
  m = re.search(pat, text, flags=re.I)
 
51
  return _clean(m.group(1))
52
  return None
53
 
54
+ def extract_term(text: str) -> Optional[str]:
55
+ m = re.search(
56
+ r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)",
57
+ text,
58
+ flags=re.I,
59
+ )
60
  if m:
61
  n = int(m.group(1))
62
  unit = m.group(2).lower()
63
+ return f"{n} years" if unit.startswith("year") else f"{n} months"
64
+
65
+ m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
 
 
66
  if m:
67
  return f"{int(m.group(1))} months"
68
  return None
69
 
70
+ def extract_start_date(text: str) -> Optional[str]:
71
+ # Prefer a window near "commencing/starting/effective"
72
+ window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
73
  candidates = []
74
  if window:
75
  parsed = search_dates(window.group(0))
 
81
  candidates.extend([d for _, d in parsed])
82
  if candidates:
83
  dt = sorted(candidates)[0]
84
+ return dt.strftime("%Y-%m-%d")
85
  return None
86
 
87
+ def extract_rent(text: str) -> Optional[Dict[str, Any]]:
88
+ # Prefer near "monthly/base rent"
89
+ near = re.search(
90
+ r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})",
91
+ text,
92
+ flags=re.I,
93
+ )
94
  if not near:
95
+ near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
96
  if near:
97
+ cur = near.group(1) or ""
98
+ amt = float(near.group(2).replace(",", ""))
99
+ currency = {"$": "USD", "": "EUR", "£": "GBP"}.get(cur, None)
100
+ return {"amount": amt, "currency": currency, "period": "monthly"}
101
  return None
102
 
103
+ def detect_jurisdiction(text: str) -> Optional[str]:
104
+ if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text):
105
+ return "US"
106
+ if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I):
107
+ return "EU/PT"
108
+ if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I):
109
+ return "UK"
110
  return None
111
 
112
  CLAUSE_KEYWORDS = {
113
+ "termination": [r"\bterminate(?:d|s|ion)?\b", r"\bearly termination\b"],
114
+ "liability": [r"\bliabilit(?:y|ies)\b", r"\blimit(?:ation)? of liability\b"],
115
+ "indemnity": [r"\bindemnif(?:y|ication)\b", r"\bindemnitor\b"],
116
+ "governing_law": [r"\bgoverning law\b", r"\blaws? of\b"],
117
+ "security_deposit": [r"\bsecurity deposit\b"],
118
+ "late_fee": [r"\blate fee\b", r"\bpenalt(y|ies)\b"],
119
+ "sublet_assignment": [r"\bsublet\b", r"\bassignment\b"],
120
+ "maintenance_repairs": [r"\bmaintenance\b", r"\brepairs?\b"],
121
+ "utilities": [r"\butilities\b", r"\bwater|gas|electricity\b"],
122
+ "entry_access": [r"\bright of entry\b", r"\baccess\b"],
123
  }
124
 
125
  def extract_clauses(text: str) -> Dict[str, bool]:
126
+ return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
127
+
128
+ # --------------------------- Orchestration --------------------------
129
+
130
+ def extract_metadata(contract_text: str) -> Dict[str, Any]:
131
+ text = contract_text.strip()
132
+ if not text:
133
+ return {"error": "No text provided."}
134
+
135
+ meta = {
136
+ "parties": extract_parties(text),
137
+ "property_address": extract_address(text),
138
+ "lease_term": extract_term(text),
139
+ "start_date": extract_start_date(text),
140
+ "rent": extract_rent(text),
141
+ "jurisdiction": detect_jurisdiction(text),
142
+ "clauses": extract_clauses(text),
143
+ }
144
+ return meta
145
 
146
  def summarize(metadata: Dict[str, Any]) -> str:
147
+ if "error" in metadata:
148
+ return f"**Error:** {metadata['error']}"
149
+
150
+ parts: List[str] = []
151
+ if metadata.get("parties"):
152
  parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
153
+ if metadata.get("property_address"):
154
  parts.append(f"**Property:** {metadata['property_address']}")
155
+ if metadata.get("lease_term"):
156
  parts.append(f"**Term:** {metadata['lease_term']}")
157
+ if metadata.get("start_date"):
158
  parts.append(f"**Start Date:** {metadata['start_date']}")
159
+ if metadata.get("rent"):
160
+ r = metadata["rent"]
161
+ cur = {"USD": "$", "EUR": "", "GBP": "£"}.get(r.get("currency"), "")
162
  parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
163
+ if metadata.get("jurisdiction"):
164
  parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
165
+ if metadata.get("clauses"):
166
+ on = [k.replace("_", " ") for k, v in metadata["clauses"].items() if v]
 
167
  parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
 
168
 
169
+ return " \n".join(parts) if parts else "_No summary available._"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ # ------------------------------ UI ---------------------------------
172
 
173
  EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
174
  The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
 
177
  Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
178
  """
179
 
180
+ def run_pipeline(text: str):
181
+ try:
182
+ meta = extract_metadata(text or "")
183
+ summary_md = summarize(meta)
184
+ # Return dict for gr.JSON (it renders nicer than a JSON string)
185
+ return summary_md, meta
186
+ except Exception as e:
187
+ err = {"error": str(e)}
188
+ return f"**Error:** {e}", err
189
 
190
  with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
191
  gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
192
+ gr.Markdown(
193
+ "Paste a lease/contract. The app will extract key fields and detect common clauses. "
194
+ "_Heuristic demo — no legal advice._"
195
+ )
196
 
197
  with gr.Row():
198
+ inp = gr.Textbox(
199
+ label="Contract Text",
200
+ lines=14,
201
+ value=EXAMPLE_TEXT,
202
+ show_copy_button=True,
203
+ placeholder="Paste lease or contract text here..."
204
+ )
205
+
206
  with gr.Row():
207
+ extract_btn = gr.Button("Extract Metadata", variant="primary")
208
+ clear_btn = gr.Button("Clear")
209
+
210
  with gr.Row():
211
  summary_out = gr.Markdown(label="Summary")
212
  with gr.Row():
213
  json_out = gr.JSON(label="Structured JSON")
214
 
215
+ extract_btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
216
+ clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
217
+
218
+ # NOTE: Do NOT call demo.launch() in Spaces. The platform loads `demo` automatically.
219