Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
| 6 |
import tempfile
|
| 7 |
from tqdm import tqdm
|
| 8 |
import re
|
| 9 |
-
from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm
|
| 10 |
from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
|
| 11 |
from salesforce_utils import get_token, create_or_update_record
|
| 12 |
|
|
@@ -28,8 +28,8 @@ def save_temp_file(pdf_bytes):
|
|
| 28 |
def detect_risks(data):
|
| 29 |
"""Detect risks (e.g., missing dates, large amounts)."""
|
| 30 |
risks = []
|
| 31 |
-
if not data.get("Date"):
|
| 32 |
-
risks.append("No
|
| 33 |
if data.get("Amount") and float(data.get("Amount", "0").replace('$', '').replace(',', '')) > 1000000:
|
| 34 |
risks.append("Large amount detected - review for financial risk.")
|
| 35 |
return risks
|
|
@@ -58,6 +58,10 @@ def process_contract(pdf_bytes, object_type):
|
|
| 58 |
print(f"Extraction failed: {key_data.get('error', 'Unknown error')}")
|
| 59 |
return f"❌ Extraction failed: {key_data.get('error', 'Unknown error')}", {}, [], "0/1"
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
print("Detecting risks")
|
| 62 |
risks = detect_risks(key_data)
|
| 63 |
print(f"Detected risks: {risks}")
|
|
@@ -66,6 +70,8 @@ def process_contract(pdf_bytes, object_type):
|
|
| 66 |
# Mock CLM fields with Salesforce-ready structure
|
| 67 |
clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type__c": object_type, "Status__c": status}
|
| 68 |
clm_fields.update({k: v for k, v in key_data.items() if k not in ["status", "error", "key_values"]})
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# Optional Salesforce sync
|
| 71 |
try:
|
|
@@ -81,6 +87,7 @@ def process_contract(pdf_bytes, object_type):
|
|
| 81 |
contract_id = f"Contract_{len(contract_data) + 1}"
|
| 82 |
contract_data[contract_id] = {
|
| 83 |
"data": key_data,
|
|
|
|
| 84 |
"risks": risks,
|
| 85 |
"clm_fields": clm_fields,
|
| 86 |
"status": status
|
|
@@ -108,26 +115,30 @@ with gr.Blocks(title="Contract Intelligence App") as demo:
|
|
| 108 |
process_button = gr.Button("Process Contracts")
|
| 109 |
status_output = gr.Textbox(label="Status", interactive=False)
|
| 110 |
extracted_data_output = gr.JSON(label="Extracted Data")
|
|
|
|
| 111 |
risks_output = gr.Textbox(label="Detected Risks", interactive=False)
|
| 112 |
|
| 113 |
def process_and_display(files, obj_type):
|
| 114 |
if not files:
|
| 115 |
-
return "❌ No files uploaded.", {}, "No risks detected", gr.update(value="0/0")
|
| 116 |
results = []
|
| 117 |
all_data = {}
|
|
|
|
| 118 |
all_risks = []
|
| 119 |
for i, file in enumerate(files):
|
| 120 |
status, data, risks, _ = process_contract(file, obj_type)
|
|
|
|
| 121 |
results.append(f"{status} - File: File_{i}")
|
| 122 |
all_data.update({f"File_{i}": data})
|
|
|
|
| 123 |
all_risks.extend(risks)
|
| 124 |
progress = f"{len(files)}/{len(files)}"
|
| 125 |
-
return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
|
| 126 |
|
| 127 |
process_button.click(
|
| 128 |
fn=process_and_display,
|
| 129 |
inputs=[file_input, object_type],
|
| 130 |
-
outputs=[status_output, extracted_data_output, risks_output, upload_progress]
|
| 131 |
)
|
| 132 |
|
| 133 |
with gr.Tab("Contract Repository"):
|
|
|
|
| 6 |
import tempfile
|
| 7 |
from tqdm import tqdm
|
| 8 |
import re
|
| 9 |
+
from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm, extract_clauses
|
| 10 |
from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
|
| 11 |
from salesforce_utils import get_token, create_or_update_record
|
| 12 |
|
|
|
|
| 28 |
def detect_risks(data):
|
| 29 |
"""Detect risks (e.g., missing dates, large amounts)."""
|
| 30 |
risks = []
|
| 31 |
+
if not data.get("Agreement Start Date") and not data.get("Agreement End Date"):
|
| 32 |
+
risks.append("No agreement dates detected - potential obligation risk.")
|
| 33 |
if data.get("Amount") and float(data.get("Amount", "0").replace('$', '').replace(',', '')) > 1000000:
|
| 34 |
risks.append("Large amount detected - review for financial risk.")
|
| 35 |
return risks
|
|
|
|
| 58 |
print(f"Extraction failed: {key_data.get('error', 'Unknown error')}")
|
| 59 |
return f"❌ Extraction failed: {key_data.get('error', 'Unknown error')}", {}, [], "0/1"
|
| 60 |
|
| 61 |
+
print("Extracting clauses")
|
| 62 |
+
clauses = extract_clauses(page_data)
|
| 63 |
+
print(f"Extracted clauses: {clauses}")
|
| 64 |
+
|
| 65 |
print("Detecting risks")
|
| 66 |
risks = detect_risks(key_data)
|
| 67 |
print(f"Detected risks: {risks}")
|
|
|
|
| 70 |
# Mock CLM fields with Salesforce-ready structure
|
| 71 |
clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type__c": object_type, "Status__c": status}
|
| 72 |
clm_fields.update({k: v for k, v in key_data.items() if k not in ["status", "error", "key_values"]})
|
| 73 |
+
for clause_name, clause_text in clauses.items():
|
| 74 |
+
clm_fields[f"{clause_name}_Text__c"] = clause_text
|
| 75 |
|
| 76 |
# Optional Salesforce sync
|
| 77 |
try:
|
|
|
|
| 87 |
contract_id = f"Contract_{len(contract_data) + 1}"
|
| 88 |
contract_data[contract_id] = {
|
| 89 |
"data": key_data,
|
| 90 |
+
"clauses": clauses,
|
| 91 |
"risks": risks,
|
| 92 |
"clm_fields": clm_fields,
|
| 93 |
"status": status
|
|
|
|
| 115 |
process_button = gr.Button("Process Contracts")
|
| 116 |
status_output = gr.Textbox(label="Status", interactive=False)
|
| 117 |
extracted_data_output = gr.JSON(label="Extracted Data")
|
| 118 |
+
clauses_output = gr.JSON(label="Extracted Clauses")
|
| 119 |
risks_output = gr.Textbox(label="Detected Risks", interactive=False)
|
| 120 |
|
| 121 |
def process_and_display(files, obj_type):
|
| 122 |
if not files:
|
| 123 |
+
return "❌ No files uploaded.", {}, {}, "No risks detected", gr.update(value="0/0")
|
| 124 |
results = []
|
| 125 |
all_data = {}
|
| 126 |
+
all_clauses = {}
|
| 127 |
all_risks = []
|
| 128 |
for i, file in enumerate(files):
|
| 129 |
status, data, risks, _ = process_contract(file, obj_type)
|
| 130 |
+
clauses = contract_data[f"Contract_{len(contract_data)}"]["clauses"] # Get clauses from latest contract
|
| 131 |
results.append(f"{status} - File: File_{i}")
|
| 132 |
all_data.update({f"File_{i}": data})
|
| 133 |
+
all_clauses.update({f"File_{i}": clauses})
|
| 134 |
all_risks.extend(risks)
|
| 135 |
progress = f"{len(files)}/{len(files)}"
|
| 136 |
+
return "\n".join(results), all_data, all_clauses, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
|
| 137 |
|
| 138 |
process_button.click(
|
| 139 |
fn=process_and_display,
|
| 140 |
inputs=[file_input, object_type],
|
| 141 |
+
outputs=[status_output, extracted_data_output, clauses_output, risks_output, upload_progress]
|
| 142 |
)
|
| 143 |
|
| 144 |
with gr.Tab("Contract Repository"):
|