pavansuresh commited on
Commit
49c58d2
·
verified ·
1 Parent(s): 91704ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  import tempfile
7
  from tqdm import tqdm
8
  import re
9
- from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm
10
  from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
11
  from salesforce_utils import get_token, create_or_update_record
12
 
@@ -28,8 +28,8 @@ def save_temp_file(pdf_bytes):
28
  def detect_risks(data):
29
  """Detect risks (e.g., missing dates, large amounts)."""
30
  risks = []
31
- if not data.get("Date"):
32
- risks.append("No expiration date detected - potential obligation risk.")
33
  if data.get("Amount") and float(data.get("Amount", "0").replace('$', '').replace(',', '')) > 1000000:
34
  risks.append("Large amount detected - review for financial risk.")
35
  return risks
@@ -58,6 +58,10 @@ def process_contract(pdf_bytes, object_type):
58
  print(f"Extraction failed: {key_data.get('error', 'Unknown error')}")
59
  return f"❌ Extraction failed: {key_data.get('error', 'Unknown error')}", {}, [], "0/1"
60
 
 
 
 
 
61
  print("Detecting risks")
62
  risks = detect_risks(key_data)
63
  print(f"Detected risks: {risks}")
@@ -66,6 +70,8 @@ def process_contract(pdf_bytes, object_type):
66
  # Mock CLM fields with Salesforce-ready structure
67
  clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type__c": object_type, "Status__c": status}
68
  clm_fields.update({k: v for k, v in key_data.items() if k not in ["status", "error", "key_values"]})
 
 
69
 
70
  # Optional Salesforce sync
71
  try:
@@ -81,6 +87,7 @@ def process_contract(pdf_bytes, object_type):
81
  contract_id = f"Contract_{len(contract_data) + 1}"
82
  contract_data[contract_id] = {
83
  "data": key_data,
 
84
  "risks": risks,
85
  "clm_fields": clm_fields,
86
  "status": status
@@ -108,26 +115,30 @@ with gr.Blocks(title="Contract Intelligence App") as demo:
108
  process_button = gr.Button("Process Contracts")
109
  status_output = gr.Textbox(label="Status", interactive=False)
110
  extracted_data_output = gr.JSON(label="Extracted Data")
 
111
  risks_output = gr.Textbox(label="Detected Risks", interactive=False)
112
 
113
  def process_and_display(files, obj_type):
114
  if not files:
115
- return "❌ No files uploaded.", {}, "No risks detected", gr.update(value="0/0")
116
  results = []
117
  all_data = {}
 
118
  all_risks = []
119
  for i, file in enumerate(files):
120
  status, data, risks, _ = process_contract(file, obj_type)
 
121
  results.append(f"{status} - File: File_{i}")
122
  all_data.update({f"File_{i}": data})
 
123
  all_risks.extend(risks)
124
  progress = f"{len(files)}/{len(files)}"
125
- return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
126
 
127
  process_button.click(
128
  fn=process_and_display,
129
  inputs=[file_input, object_type],
130
- outputs=[status_output, extracted_data_output, risks_output, upload_progress]
131
  )
132
 
133
  with gr.Tab("Contract Repository"):
 
6
  import tempfile
7
  from tqdm import tqdm
8
  import re
9
+ from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm, extract_clauses
10
  from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
11
  from salesforce_utils import get_token, create_or_update_record
12
 
 
28
  def detect_risks(data):
29
  """Detect risks (e.g., missing dates, large amounts)."""
30
  risks = []
31
+ if not data.get("Agreement Start Date") and not data.get("Agreement End Date"):
32
+ risks.append("No agreement dates detected - potential obligation risk.")
33
  if data.get("Amount") and float(data.get("Amount", "0").replace('$', '').replace(',', '')) > 1000000:
34
  risks.append("Large amount detected - review for financial risk.")
35
  return risks
 
58
  print(f"Extraction failed: {key_data.get('error', 'Unknown error')}")
59
  return f"❌ Extraction failed: {key_data.get('error', 'Unknown error')}", {}, [], "0/1"
60
 
61
+ print("Extracting clauses")
62
+ clauses = extract_clauses(page_data)
63
+ print(f"Extracted clauses: {clauses}")
64
+
65
  print("Detecting risks")
66
  risks = detect_risks(key_data)
67
  print(f"Detected risks: {risks}")
 
70
  # Mock CLM fields with Salesforce-ready structure
71
  clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type__c": object_type, "Status__c": status}
72
  clm_fields.update({k: v for k, v in key_data.items() if k not in ["status", "error", "key_values"]})
73
+ for clause_name, clause_text in clauses.items():
74
+ clm_fields[f"{clause_name}_Text__c"] = clause_text
75
 
76
  # Optional Salesforce sync
77
  try:
 
87
  contract_id = f"Contract_{len(contract_data) + 1}"
88
  contract_data[contract_id] = {
89
  "data": key_data,
90
+ "clauses": clauses,
91
  "risks": risks,
92
  "clm_fields": clm_fields,
93
  "status": status
 
115
  process_button = gr.Button("Process Contracts")
116
  status_output = gr.Textbox(label="Status", interactive=False)
117
  extracted_data_output = gr.JSON(label="Extracted Data")
118
+ clauses_output = gr.JSON(label="Extracted Clauses")
119
  risks_output = gr.Textbox(label="Detected Risks", interactive=False)
120
 
121
  def process_and_display(files, obj_type):
122
  if not files:
123
+ return "❌ No files uploaded.", {}, {}, "No risks detected", gr.update(value="0/0")
124
  results = []
125
  all_data = {}
126
+ all_clauses = {}
127
  all_risks = []
128
  for i, file in enumerate(files):
129
  status, data, risks, _ = process_contract(file, obj_type)
130
+ clauses = contract_data[f"Contract_{len(contract_data)}"]["clauses"] # Get clauses from latest contract
131
  results.append(f"{status} - File: File_{i}")
132
  all_data.update({f"File_{i}": data})
133
+ all_clauses.update({f"File_{i}": clauses})
134
  all_risks.extend(risks)
135
  progress = f"{len(files)}/{len(files)}"
136
+ return "\n".join(results), all_data, all_clauses, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
137
 
138
  process_button.click(
139
  fn=process_and_display,
140
  inputs=[file_input, object_type],
141
+ outputs=[status_output, extracted_data_output, clauses_output, risks_output, upload_progress]
142
  )
143
 
144
  with gr.Tab("Contract Repository"):