Proooof commited on
Commit
168847f
·
verified ·
1 Parent(s): 460c910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -92
app.py CHANGED
@@ -1,26 +1,18 @@
1
  """
2
  ProofAssets: Real Estate Tokenizer (Heuristic Demo)
3
- - Extracts key fields from lease/contract text using regex + date parsing
4
- - Outputs a human-friendly summary + structured JSON
5
- - No legal advice; for demo purposes only
6
  """
7
 
8
  from __future__ import annotations
9
  import re
10
  import json
11
  from typing import Dict, Any, List, Optional
12
- from dateparser.search import search_dates # <-- correct import
13
  import gradio as gr
14
 
15
- # ----------------------------- Helpers -----------------------------
16
-
17
  def _clean(s: str) -> str:
18
  return re.sub(r"\s+", " ", s).strip()
19
 
20
- # -------------------------- Extractors -----------------------------
21
-
22
  def extract_parties(text: str) -> Optional[List[str]]:
23
- # "made between X and Y"
24
  m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
25
  if m:
26
  p1 = _clean(m.group(1))
@@ -28,69 +20,49 @@ def extract_parties(text: str) -> Optional[List[str]]:
28
  p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
29
  p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
30
  return [p1, p2]
31
-
32
- # Fallback: "Landlord: X" / "Tenant: Y"
33
  parties = []
34
  m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
35
  m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
36
- if m1:
37
- parties.append(_clean(m1.group(1)))
38
- if m2:
39
- parties.append(_clean(m2.group(1)))
40
  return parties or None
41
 
42
  def extract_address(text: str) -> Optional[str]:
43
- patterns = [
44
  r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
45
  r"\blocated at\s+(.+?)(?:[,.;\n])",
46
  r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
47
- ]
48
- for pat in patterns:
49
  m = re.search(pat, text, flags=re.I)
50
  if m:
51
  return _clean(m.group(1))
52
  return None
53
 
54
  def extract_term(text: str) -> Optional[str]:
55
- m = re.search(
56
- r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)",
57
- text,
58
- flags=re.I,
59
- )
60
  if m:
61
- n = int(m.group(1))
62
- unit = m.group(2).lower()
63
  return f"{n} years" if unit.startswith("year") else f"{n} months"
64
-
65
  m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
66
- if m:
67
- return f"{int(m.group(1))} months"
68
  return None
69
 
70
  def extract_start_date(text: str) -> Optional[str]:
71
- # Prefer a window near "commencing/starting/effective"
72
  window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
73
  candidates = []
74
  if window:
75
  parsed = search_dates(window.group(0))
76
- if parsed:
77
- candidates.extend([d for _, d in parsed])
78
  if not candidates:
79
  parsed = search_dates(text)
80
- if parsed:
81
- candidates.extend([d for _, d in parsed])
82
  if candidates:
83
  dt = sorted(candidates)[0]
84
  return dt.strftime("%Y-%m-%d")
85
  return None
86
 
87
  def extract_rent(text: str) -> Optional[Dict[str, Any]]:
88
- # Prefer near "monthly/base rent"
89
- near = re.search(
90
- r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})",
91
- text,
92
- flags=re.I,
93
- )
94
  if not near:
95
  near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
96
  if near:
@@ -101,12 +73,9 @@ def extract_rent(text: str) -> Optional[Dict[str, Any]]:
101
  return None
102
 
103
  def detect_jurisdiction(text: str) -> Optional[str]:
104
- if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text):
105
- return "US"
106
- if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I):
107
- return "EU/PT"
108
- if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I):
109
- return "UK"
110
  return None
111
 
112
  CLAUSE_KEYWORDS = {
@@ -125,14 +94,11 @@ CLAUSE_KEYWORDS = {
125
  def extract_clauses(text: str) -> Dict[str, bool]:
126
  return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
127
 
128
- # --------------------------- Orchestration --------------------------
129
-
130
  def extract_metadata(contract_text: str) -> Dict[str, Any]:
131
- text = contract_text.strip()
132
  if not text:
133
  return {"error": "No text provided."}
134
-
135
- meta = {
136
  "parties": extract_parties(text),
137
  "property_address": extract_address(text),
138
  "lease_term": extract_term(text),
@@ -141,35 +107,24 @@ def extract_metadata(contract_text: str) -> Dict[str, Any]:
141
  "jurisdiction": detect_jurisdiction(text),
142
  "clauses": extract_clauses(text),
143
  }
144
- return meta
145
 
146
  def summarize(metadata: Dict[str, Any]) -> str:
147
  if "error" in metadata:
148
  return f"**Error:** {metadata['error']}"
149
-
150
  parts: List[str] = []
151
- if metadata.get("parties"):
152
- parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
153
- if metadata.get("property_address"):
154
- parts.append(f"**Property:** {metadata['property_address']}")
155
- if metadata.get("lease_term"):
156
- parts.append(f"**Term:** {metadata['lease_term']}")
157
- if metadata.get("start_date"):
158
- parts.append(f"**Start Date:** {metadata['start_date']}")
159
  if metadata.get("rent"):
160
- r = metadata["rent"]
161
- cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
162
  parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
163
- if metadata.get("jurisdiction"):
164
- parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
165
  if metadata.get("clauses"):
166
- on = [k.replace("_", " ") for k, v in metadata["clauses"].items() if v]
167
  parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
168
-
169
  return " \n".join(parts) if parts else "_No summary available._"
170
 
171
- # ------------------------------ UI ---------------------------------
172
-
173
  EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
174
  The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
175
  A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
@@ -179,41 +134,25 @@ Landlord is responsible for structural repairs; Tenant shall handle routine main
179
 
180
  def run_pipeline(text: str):
181
  try:
182
- meta = extract_metadata(text or "")
183
- summary_md = summarize(meta)
184
- # Return dict for gr.JSON (it renders nicer than a JSON string)
185
- return summary_md, meta
186
  except Exception as e:
187
- err = {"error": str(e)}
188
- return f"**Error:** {e}", err
189
 
190
  with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
191
  gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
192
- gr.Markdown(
193
- "Paste a lease/contract. The app will extract key fields and detect common clauses. "
194
- "_Heuristic demo — no legal advice._"
195
- )
196
-
197
  with gr.Row():
198
- inp = gr.Textbox(
199
- label="Contract Text",
200
- lines=14,
201
- value=EXAMPLE_TEXT,
202
- show_copy_button=True,
203
- placeholder="Paste lease or contract text here..."
204
- )
205
-
206
  with gr.Row():
207
  extract_btn = gr.Button("Extract Metadata", variant="primary")
208
  clear_btn = gr.Button("Clear")
209
-
210
  with gr.Row():
211
  summary_out = gr.Markdown(label="Summary")
212
  with gr.Row():
213
  json_out = gr.JSON(label="Structured JSON")
214
-
215
- extract_btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
216
  clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
217
 
218
- # NOTE: Do NOT call demo.launch() in Spaces. The platform loads `demo` automatically.
219
-
 
1
  """
2
  ProofAssets: Real Estate Tokenizer (Heuristic Demo)
 
 
 
3
  """
4
 
5
  from __future__ import annotations
6
  import re
7
  import json
8
  from typing import Dict, Any, List, Optional
9
+ from dateparser.search import search_dates
10
  import gradio as gr
11
 
 
 
12
  def _clean(s: str) -> str:
13
  return re.sub(r"\s+", " ", s).strip()
14
 
 
 
15
  def extract_parties(text: str) -> Optional[List[str]]:
 
16
  m = re.search(r"\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)", text, re.I | re.S)
17
  if m:
18
  p1 = _clean(m.group(1))
 
20
  p1 = re.sub(r"^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*", "", p1, flags=re.I)
21
  p2 = re.sub(r"^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*", "", p2, flags=re.I)
22
  return [p1, p2]
 
 
23
  parties = []
24
  m1 = re.search(r"\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
25
  m2 = re.search(r"\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])", text, flags=re.I)
26
+ if m1: parties.append(_clean(m1.group(1)))
27
+ if m2: parties.append(_clean(m2.group(1)))
 
 
28
  return parties or None
29
 
30
  def extract_address(text: str) -> Optional[str]:
31
+ for pat in [
32
  r"(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])",
33
  r"\blocated at\s+(.+?)(?:[,.;\n])",
34
  r"\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])",
35
+ ]:
 
36
  m = re.search(pat, text, flags=re.I)
37
  if m:
38
  return _clean(m.group(1))
39
  return None
40
 
41
  def extract_term(text: str) -> Optional[str]:
42
+ m = re.search(r"(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)", text, flags=re.I)
 
 
 
 
43
  if m:
44
+ n = int(m.group(1)); unit = m.group(2).lower()
 
45
  return f"{n} years" if unit.startswith("year") else f"{n} months"
 
46
  m = re.search(r"(\d{1,3})\s*-\s*month\s+term", text, flags=re.I)
47
+ if m: return f"{int(m.group(1))} months"
 
48
  return None
49
 
50
  def extract_start_date(text: str) -> Optional[str]:
 
51
  window = re.search(r"(?:commencing|starting|effective)\s+on\s+(.{0,60})", text, flags=re.I)
52
  candidates = []
53
  if window:
54
  parsed = search_dates(window.group(0))
55
+ if parsed: candidates.extend([d for _, d in parsed])
 
56
  if not candidates:
57
  parsed = search_dates(text)
58
+ if parsed: candidates.extend([d for _, d in parsed])
 
59
  if candidates:
60
  dt = sorted(candidates)[0]
61
  return dt.strftime("%Y-%m-%d")
62
  return None
63
 
64
  def extract_rent(text: str) -> Optional[Dict[str, Any]]:
65
+ near = re.search(r"(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
 
 
 
 
 
66
  if not near:
67
  near = re.search(r"\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})", text, flags=re.I)
68
  if near:
 
73
  return None
74
 
75
  def detect_jurisdiction(text: str) -> Optional[str]:
76
+ if re.search(r"\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b", text): return "US"
77
+ if re.search(r"\bEuropean Union\b|\bPortugal\b|\bLisbon\b", text, flags=re.I): return "EU/PT"
78
+ if re.search(r"\bUnited Kingdom\b|\bEngland\b|\bWales\b", text, flags=re.I): return "UK"
 
 
 
79
  return None
80
 
81
  CLAUSE_KEYWORDS = {
 
94
  def extract_clauses(text: str) -> Dict[str, bool]:
95
  return {name: any(re.search(p, text, flags=re.I) for p in pats) for name, pats in CLAUSE_KEYWORDS.items()}
96
 
 
 
97
  def extract_metadata(contract_text: str) -> Dict[str, Any]:
98
+ text = (contract_text or "").strip()
99
  if not text:
100
  return {"error": "No text provided."}
101
+ return {
 
102
  "parties": extract_parties(text),
103
  "property_address": extract_address(text),
104
  "lease_term": extract_term(text),
 
107
  "jurisdiction": detect_jurisdiction(text),
108
  "clauses": extract_clauses(text),
109
  }
 
110
 
111
  def summarize(metadata: Dict[str, Any]) -> str:
112
  if "error" in metadata:
113
  return f"**Error:** {metadata['error']}"
 
114
  parts: List[str] = []
115
+ if metadata.get("parties"): parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
116
+ if metadata.get("property_address"): parts.append(f"**Property:** {metadata['property_address']}")
117
+ if metadata.get("lease_term"): parts.append(f"**Term:** {metadata['lease_term']}")
118
+ if metadata.get("start_date"): parts.append(f"**Start Date:** {metadata['start_date']}")
 
 
 
 
119
  if metadata.get("rent"):
120
+ r = metadata["rent"]; cur = {"USD": "$", "EUR": "€", "GBP": "£"}.get(r.get("currency"), "")
 
121
  parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
122
+ if metadata.get("jurisdiction"): parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
 
123
  if metadata.get("clauses"):
124
+ on = [k.replace("_"," ") for k,v in metadata["clauses"].items() if v]
125
  parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
 
126
  return " \n".join(parts) if parts else "_No summary available._"
127
 
 
 
128
  EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
129
  The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
130
  A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
 
134
 
135
  def run_pipeline(text: str):
136
  try:
137
+ meta = extract_metadata(text)
138
+ return summarize(meta), meta
 
 
139
  except Exception as e:
140
+ return f"**Error:** {e}", {"error": str(e)}
 
141
 
142
  with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
143
  gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
144
+ gr.Markdown("Paste a lease/contract. The app extracts key fields and detects common clauses. _Heuristic demo — no legal advice._")
 
 
 
 
145
  with gr.Row():
146
+ inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
 
 
 
 
 
 
 
147
  with gr.Row():
148
  extract_btn = gr.Button("Extract Metadata", variant="primary")
149
  clear_btn = gr.Button("Clear")
 
150
  with gr.Row():
151
  summary_out = gr.Markdown(label="Summary")
152
  with gr.Row():
153
  json_out = gr.JSON(label="Structured JSON")
154
+ extract_btn.click(run_pipeline, inputs=inp, outputs=[summary_out, json_out])
 
155
  clear_btn.click(lambda: ("", {}), None, [summary_out, json_out])
156
 
157
+ # Some runners look for `app`; provide an alias
158
+ app = demo