Proooof commited on
Commit
1327f78
·
verified ·
1 Parent(s): 9fe1402

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -24
app.py CHANGED
@@ -1,30 +1,193 @@
1
- import gradio as gr
2
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def extract_metadata(contract_text):
5
- # Placeholder extraction (replace with model pipeline later)
6
- metadata = {
7
- "parties": ["Landlord", "Tenant"],
8
- "property_address": "123 Main Street",
9
- "lease_term": "12 months",
10
- "start_date": "2025-06-01",
11
- "rent_amount": 1500,
12
- "clauses": {
13
- "termination": True,
14
- "liability": True,
15
- "indemnity": False
16
- },
17
- "jurisdiction": "US"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
- return json.dumps(metadata, indent=2)
20
-
21
- demo = gr.Interface(
22
- fn=extract_metadata,
23
- inputs=gr.Textbox(lines=12, placeholder="Paste lease or contract here..."),
24
- outputs="json",
25
- title="🏠 ProofAssets: Real Estate Tokenizer",
26
- description="Extract metadata from real estate contracts and convert into structured format for tokenization."
27
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  if __name__ == "__main__":
30
  demo.launch()
 
1
+ import re
2
  import json
3
+ from typing import Dict, Any
4
+ import dateparser
5
+ import gradio as gr
6
+
7
+ # ---------- Heuristic extractors (no ML) ----------
8
+
9
+ def _clean(s: str) -> str:
10
+ return re.sub(r'\s+', ' ', s).strip()
11
+
12
+ def extract_parties(text: str):
13
+ # Common phrasing: "This Lease Agreement is made between X and Y"
14
+ m = re.search(r'\bbetween\s+(.+?)\s+and\s+(.+?)(?:[,.;\n]| for\b)', text, re.IGNORECASE | re.DOTALL)
15
+ if m:
16
+ p1 = _clean(m.group(1))
17
+ p2 = _clean(m.group(2))
18
+ # Strip leading roles if present
19
+ p1 = re.sub(r'^(?:the\s+)?(landlord|lessor|owner)\s*[:\-]*\s*', '', p1, flags=re.I)
20
+ p2 = re.sub(r'^(?:the\s+)?(tenant|lessee|occupant)\s*[:\-]*\s*', '', p2, flags=re.I)
21
+ return [p1, p2]
22
+ # Fallback: look for "Landlord: X" / "Tenant: Y"
23
+ p = []
24
+ m1 = re.search(r'\bLandlord\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
25
+ m2 = re.search(r'\bTenant\s*[:\-]\s*(.+?)(?:[,.;\n])', text, flags=re.I)
26
+ if m1: p.append(_clean(m1.group(1)))
27
+ if m2: p.append(_clean(m2.group(1)))
28
+ return p or None
29
+
30
+ def extract_address(text: str):
31
+ # "premises located at 123 Main St, City, ST 12345"
32
+ patterns = [
33
+ r'(?:premises|property)\s+(?:located\s+)?at\s+(.+?)(?:[,.;\n])',
34
+ r'\blocated at\s+(.+?)(?:[,.;\n])',
35
+ r'\baddress\s*[:\-]\s*(.+?)(?:[,.;\n])'
36
+ ]
37
+ for pat in patterns:
38
+ m = re.search(pat, text, flags=re.I)
39
+ if m:
40
+ return _clean(m.group(1))
41
+ return None
42
+
43
+ def extract_term(text: str):
44
+ # "lease term is 12 months", "for a term of 2 years"
45
+ m = re.search(r'(?:lease\s+term\s+is|for\s+a\s+term\s+of|term\s*[:\-])\s*(\d{1,3})\s*(month|months|year|years)', text, flags=re.I)
46
+ if m:
47
+ n = int(m.group(1))
48
+ unit = m.group(2).lower()
49
+ if unit.startswith('year'):
50
+ return f"{n} years"
51
+ return f"{n} months"
52
+ # "12-month term"
53
+ m = re.search(r'(\d{1,3})\s*-\s*month\s+term', text, flags=re.I)
54
+ if m:
55
+ return f"{int(m.group(1))} months"
56
+ return None
57
+
58
+ def extract_start_date(text: str):
59
+ # Try to find the first plausible date near "commencing/starting/effective"
60
+ window = re.search(r'(?:commencing|starting|effective)\s+on\s+(.{0,60})', text, flags=re.I)
61
+ candidates = []
62
+ if window:
63
+ parsed = dateparser.search.search_dates(window.group(0))
64
+ if parsed:
65
+ candidates.extend([d for _, d in parsed])
66
+ if not candidates:
67
+ parsed = dateparser.search.search_dates(text)
68
+ if parsed:
69
+ candidates.extend([d for _, d in parsed])
70
+ if candidates:
71
+ # Return earliest found (often the start date)
72
+ dt = sorted(candidates)[0]
73
+ return dt.strftime('%Y-%m-%d')
74
+ return None
75
+
76
+ def extract_rent(text: str):
77
+ # Capture currency and amount, e.g., "$1,500", "€1200", "£900"
78
+ # Prefer near "monthly rent"
79
+ near = re.search(r'(?:monthly|base)\s+rent[^.\n]{0,40}?(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
80
+ if not near:
81
+ near = re.search(r'\b(?:rent\s+of|rent\s+is)\s*(\$|€|£)?\s*([0-9][\d,]*\.?\d{0,2})', text, flags=re.I)
82
+ if near:
83
+ cur = near.group(1) or ''
84
+ amt = float(near.group(2).replace(',', ''))
85
+ currency = {'$': 'USD', '€': 'EUR', '£': 'GBP'}.get(cur, None)
86
+ return {'amount': amt, 'currency': currency, 'period': 'monthly'}
87
+ return None
88
+
89
+ def detect_jurisdiction(text: str):
90
+ # Basic hints only
91
+ if re.search(r'\bUnited States\b|\bState of [A-Z][a-z]+|\bNY\b|\bCA\b', text): return "US"
92
+ if re.search(r'\bEuropean Union\b|\bPortugal\b|\bLisbon\b', text, flags=re.I): return "EU/PT"
93
+ if re.search(r'\bUnited Kingdom\b|\bEngland\b|\bWales\b', text, flags=re.I): return "UK"
94
+ return None
95
+
96
+ CLAUSE_KEYWORDS = {
97
+ 'termination': [r'\bterminate(?:d|s|ion)?\b', r'\bearly termination\b'],
98
+ 'liability': [r'\bliabilit(?:y|ies)\b', r'\blimit(?:ation)? of liability\b'],
99
+ 'indemnity': [r'\bindemnif(?:y|ication)\b', r'\bindemnitor\b'],
100
+ 'governing_law': [r'\bgoverning law\b', r'\blaws? of\b'],
101
+ 'security_deposit': [r'\bsecurity deposit\b'],
102
+ 'late_fee': [r'\blate fee\b', r'\bpenalt(y|ies)\b'],
103
+ 'sublet_assignment': [r'\bsublet\b', r'\bassignment\b'],
104
+ 'maintenance_repairs': [r'\bmaintenance\b', r'\brepairs?\b'],
105
+ 'utilities': [r'\butilities\b', r'\bwater|gas|electricity\b'],
106
+ 'entry_access': [r'\bright of entry\b', r'\baccess\b'],
107
+ }
108
 
109
+ def extract_clauses(text: str) -> Dict[str, bool]:
110
+ results = {}
111
+ for name, pats in CLAUSE_KEYWORDS.items():
112
+ found = any(re.search(pat, text, flags=re.I) for pat in pats)
113
+ results[name] = bool(found)
114
+ return results
115
+
116
+ def summarize(metadata: Dict[str, Any]) -> str:
117
+ parts = []
118
+ if metadata.get('parties'):
119
+ parts.append(f"**Parties:** {', '.join(metadata['parties'])}")
120
+ if metadata.get('property_address'):
121
+ parts.append(f"**Property:** {metadata['property_address']}")
122
+ if metadata.get('lease_term'):
123
+ parts.append(f"**Term:** {metadata['lease_term']}")
124
+ if metadata.get('start_date'):
125
+ parts.append(f"**Start Date:** {metadata['start_date']}")
126
+ if metadata.get('rent'):
127
+ r = metadata['rent']
128
+ cur = {'USD': '$', 'EUR': '€', 'GBP': '£'}.get(r.get('currency'), '')
129
+ parts.append(f"**Rent:** {cur}{r.get('amount')} / {r.get('period','monthly')}")
130
+ if metadata.get('jurisdiction'):
131
+ parts.append(f"**Jurisdiction:** {metadata['jurisdiction']}")
132
+ # Clauses brief
133
+ if metadata.get('clauses'):
134
+ on = [k.replace('_',' ') for k, v in metadata['clauses'].items() if v]
135
+ parts.append(f"**Detected Clauses:** {', '.join(on) if on else 'None detected'}")
136
+ return " \n".join(parts) if parts else "_No summary available._"
137
+
138
+ def extract_metadata(contract_text: str) -> Dict[str, Any]:
139
+ text = contract_text.strip()
140
+ if not text:
141
+ return {"error": "No text provided."}
142
+
143
+ parties = extract_parties(text)
144
+ address = extract_address(text)
145
+ term = extract_term(text)
146
+ start_date = extract_start_date(text)
147
+ rent = extract_rent(text)
148
+ juris = detect_jurisdiction(text)
149
+ clauses = extract_clauses(text)
150
+
151
+ return {
152
+ "parties": parties,
153
+ "property_address": address,
154
+ "lease_term": term,
155
+ "start_date": start_date,
156
+ "rent": rent,
157
+ "jurisdiction": juris,
158
+ "clauses": clauses
159
  }
160
+
161
+ # ---------- Gradio UI ----------
162
+
163
+ EXAMPLE_TEXT = """This Lease Agreement is made between Landlord X and Tenant Y for the premises located at 123 Main Street, Albany, NY 12207.
164
+ The lease term is 12 months, commencing on June 1, 2025. The monthly rent is $1,500, due on the first of each month.
165
+ A security deposit is required. Late fees may apply. The governing law shall be the laws of the State of New York.
166
+ Either party may terminate this agreement for cause subject to 30 days' notice. Sublet or assignment requires prior written consent.
167
+ Landlord is responsible for structural repairs; Tenant shall handle routine maintenance. Utilities include water and electricity.
168
+ """
169
+
170
+ def run_pipeline(text):
171
+ meta = extract_metadata(text)
172
+ # Pretty JSON + human summary
173
+ summary_md = summarize(meta)
174
+ json_str = json.dumps(meta, indent=2, ensure_ascii=False)
175
+ return summary_md, json_str
176
+
177
+ with gr.Blocks(title="ProofAssets: Real Estate Tokenizer") as demo:
178
+ gr.Markdown("# 🏠 ProofAssets: Real Estate Tokenizer")
179
+ gr.Markdown("Paste a lease/contract. The app will extract key fields and detect common clauses. *(Heuristic demo — no legal advice.)*")
180
+
181
+ with gr.Row():
182
+ inp = gr.Textbox(label="Contract Text", lines=14, value=EXAMPLE_TEXT, show_copy_button=True)
183
+ with gr.Row():
184
+ btn = gr.Button("Extract Metadata", variant="primary")
185
+ with gr.Row():
186
+ summary_out = gr.Markdown(label="Summary")
187
+ with gr.Row():
188
+ json_out = gr.JSON(label="Structured JSON")
189
+
190
+ btn.click(fn=run_pipeline, inputs=inp, outputs=[summary_out, json_out])
191
 
192
  if __name__ == "__main__":
193
  demo.launch()