Spaces:

pavansuresh
/

SmartContractMigrator

Sleeping

App Files Files Community

pavansuresh commited on Jul 15, 2025

Commit

f214078

verified ·

1 Parent(s): 9101271

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -159

app.py CHANGED Viewed

@@ -1,178 +1,144 @@
 import gradio as gr
-from salesforce_utils import get_salesforce_objects, get_salesforce_object_fields, get_token, create_record, attach_pdf
-from ai_mapping import run_ai_mapping
-from ocr_utils import extract_text_from_pdf
 import os
 import tempfile
-import json
-# Initialize global state for failed records
-failed_records = []
-def save_failed_record(pdf_name, object_name, error, mappings):
-    """Log failed records for reconciliation."""
-    global failed_records
-    failed_records.append({
-        "pdf_name": pdf_name,
-        "object_name": object_name,
-        "error": error,
-        "mappings": mappings
-    })
-def process_contract(uploaded_files, object_name, manual_mappings):
-    """Process uploaded PDFs and create Salesforce records."""
-    if not uploaded_files:
-        return "❌ No files uploaded.", None, failed_records
     try:
         token, instance_url = get_token()
     except Exception as e:
-        return f"❌ Salesforce authentication failed: {str(e)}", None, failed_records
-    try:
-        object_fields = get_salesforce_object_fields(token, instance_url, object_name)
-        object_field_names = [field['name'] for field in object_fields if field.get('createable')]
-    except Exception as e:
-        return f"❌ Failed to fetch object fields: {str(e)}", None, failed_records
-    results = []
-    for pdf_file in uploaded_files:
-        pdf_name = pdf_file.name
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
-            tmp.write(pdf_file.getvalue())
-            tmp_path = tmp.name
-        try:
-            # Step 1: OCR
-            text_data = extract_text_from_pdf(tmp_path)
-            if not text_data:
-                save_failed_record(pdf_name, object_name, "No text extracted from PDF", {})
-                results.append(f"⚠️ {pdf_name}: No text extracted")
-                continue
-            # Step 2: AI Mapping
-            ai_result = run_ai_mapping(text_data, tmp_path, object_field_names)
-            if ai_result['status'] == 'failed':
-                save_failed_record(pdf_name, object_name, ai_result['error'], ai_result['mappings'])
-                results.append(f"❌ {pdf_name}: {ai_result['error']}")
-                continue
-            # Apply manual mappings (if provided)
-            mappings = {k: v for k, v in ai_result['mappings'].items()}
-            for field, value in manual_mappings.items():
-                if value and field in object_field_names:
-                    mappings[field] = value
-            # Step 3: Create Salesforce record
-            record_response = create_record(object_name, mappings, token, instance_url)
-            if 'id' in record_response:
-                attach_pdf(record_response['id'], tmp_path, token, instance_url)
-                results.append(f"✅ {pdf_name}: Record created (ID: {record_response['id']})")
-            else:
-                save_failed_record(pdf_name, object_name, f"Failed to create record: {record_response}", mappings)
-                results.append(f"❌ {pdf_name}: Failed to create record: {record_response}")
-        except Exception as e:
-            save_failed_record(pdf_name, object_name, str(e), {})
-            results.append(f"❌ {pdf_name}: {str(e)}")
-        finally:
-            os.unlink(tmp_path)
-    return "\n".join(results), ai_result, failed_records
-def retry_failed_record(index, object_name, manual_mappings):
-    """Retry a failed record with manual corrections."""
-    global failed_records
-    if 0 <= index < len(failed_records):
-        failed_record = failed_records.pop(index)
-        pdf_name = failed_record['pdf_name']
-        with open(pdf_name, 'rb') as f:  # Adjust path if needed
-            result, ai_result, updated_records = process_contract([f], object_name, manual_mappings)
-        failed_records = updated_records
-        return result, updated_records
-    return "❌ Invalid record index.", failed_records
 # Gradio UI
-with gr.Blocks(title="Smart Contract Migrator (Gradio)") as demo:
-    # Epic 1: PDF Upload
     with gr.Row():
-        uploaded_files = gr.File(file_types=["pdf"], file_count="multiple", label="Upload Contract PDFs")
-    # Epic 2: Salesforce Object Selection
-    token, instance_url = get_token()
-    objects = get_salesforce_objects(token, instance_url)
-    object_names = [obj['name'] for obj in objects if obj.get('createable')]
-    object_name = gr.Dropdown(choices=object_names, label="Select Salesforce Object")
-    # Display object fields and create dynamic manual mappings
-    object_fields_state = gr.State(value=[])
-    def update_fields_and_mappings(selected_object):
-        if selected_object:
-            try:
-                token, instance_url = get_token()
-                object_fields = get_salesforce_object_fields(token, instance_url, selected_object)
-                field_names = [field['name'] for field in object_fields if field.get('createable')]
-                # Create a list of textboxes dynamically
-                mapping_inputs = [gr.Textbox(label=f"{field}", interactive=True) for field in field_names]
-                return field_names, mapping_inputs, gr.update(visible=True, value="\n".join(field_names))
-            except Exception as e:
-                return [], [], gr.update(visible=False, value=f"❌ Failed to fetch fields: {str(e)}")
-        return [], [], gr.update(visible=False)
-    object_fields_output = gr.Textbox(label="Available Fields", interactive=False)
-    manual_mapping_inputs = gr.State(value=[])  # Store the list of textbox components
-    object_name.change(
-        fn=update_fields_and_mappings,
-        inputs=object_name,
-        outputs=[object_fields_state, manual_mapping_inputs, object_fields_output]
-    )
-    # Process button
-    process_button = gr.Button("Extract, Map, and Upload")
-    status_output = gr.Textbox(label="Status", interactive=False)
-    ai_result_output = gr.JSON(label="AI Mapping Results", visible=False)
-    def process_and_display(files, obj_name, *mapping_values):
-        field_names = object_fields_state.value
-        manual_mappings_dict = {field: value for field, value in zip(field_names, mapping_values) if value}
-        status, ai_result, updated_records = process_contract(files, obj_name, manual_mappings_dict)
-        global failed_records
-        failed_records = updated_records
-        return status, ai_result if ai_result else {}, len(failed_records) > 0
     process_button.click(
         fn=process_and_display,
-        inputs=[uploaded_files, object_name] + [comp for comp in manual_mapping_inputs.value],
-        outputs=[status_output, ai_result_output, gr.State(visible=True)]
     )
-    # Epic 6: Reconciliation Dashboard
-    with gr.Tab("Reconciliation Dashboard"):
-        failed_records_output = gr.Textbox(label="Failed Records", interactive=False, value="No failed records.")
-        def update_reconciliation():
-            global failed_records
-            if failed_records:
-                return "\n".join([f"{i}: {rec['pdf_name']} - {rec['error']}" for i, rec in enumerate(failed_records)])
-            return "No failed records."
-        def retry_and_update(index, obj_name, *mapping_values):
-            manual_mappings_dict = {field: value for field, value in zip(object_fields_state.value, mapping_values) if value}
-            result, updated_records = retry_failed_record(int(index), obj_name, manual_mappings_dict)
-            global failed_records
-            failed_records = updated_records
-            return result, update_reconciliation()
-        retry_index = gr.Number(label="Select Failed Record Index", interactive=True)
-        retry_manual_inputs = gr.State(value=[gr.Textbox(label=f"{field} (Retry)", interactive=True) for field in object_fields_state.value])
-        retry_button = gr.Button("Retry")
-        retry_status = gr.Textbox(label="Retry Status", interactive=False)
-        retry_button.click(
-            fn=retry_and_update,
-            inputs=[retry_index, object_name] + [comp for comp in retry_manual_inputs.value],
-            outputs=[retry_status, failed_records_output]
         )
 demo.launch()

 import gradio as gr
+from transformers import LayoutLMv3Tokenizer, LayoutLMv3ForTokenClassification
+import torch
+from PIL import Image
 import os
 import tempfile
+from tqdm import tqdm
+import re
+from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm
+from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
+from salesforce_utils import get_token, create_or_update_record
+# Initialize global state
+contract_data = {}  # In-memory contract repository
+processed_files = 0
+total_files = 0
+# Load pre-trained LayoutLMv3 model and tokenizer (placeholder for future fine-tuning)
+tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base")
+model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
+def save_temp_file(pdf_bytes):
+    """Save PDF bytes to a temporary file and return the path."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+        tmp.write(pdf_bytes)
+        return tmp.name
+def detect_risks(data):
+    """Detect risks (e.g., missing dates, large amounts)."""
+    risks = []
+    if not data.get("Date"):
+        risks.append("No expiration date detected - potential obligation risk.")
+    if data.get("Amount") and float(data.get("Amount", "0").replace('$', '').replace(',', '')) > 1000000:
+        risks.append("Large amount detected - review for financial risk.")
+    return risks
+def process_contract(pdf_bytes, object_type):
+    """Process contract and simulate CCI workflow."""
+    global processed_files, total_files
+    total_files = 1
+    processed_files = 0
+    print("Received file - Starting processing")
+    temp_path = save_temp_file(pdf_bytes)
+    print(f"Temporary file created at: {temp_path}")
+    page_data = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)
+    print(f"OCR result pages: {len(page_data)}")
+    if not page_data or all("No text detected" in page["text"] for page in page_data):
+        os.unlink(temp_path)
+        print("No text extracted from PDF.")
+        return "❌ No text extracted from PDF.", {}, [], "0/1"
+    print("Extracting key data")
+    key_data = extract_key_values_with_layoutlm(page_data, temp_path)
+    print(f"Key data extracted: {key_data}")
+    if "status" in key_data and key_data["status"] == "failed":
+        os.unlink(temp_path)
+        print(f"Extraction failed: {key_data.get('error', 'Unknown error')}")
+        return f"❌ Extraction failed: {key_data.get('error', 'Unknown error')}", {}, [], "0/1"
+    print("Detecting risks")
+    risks = detect_risks(key_data)
+    print(f"Detected risks: {risks}")
+    status = "✅ Processed" if not risks else "⚠️ Processed with risks"
+    # Mock CLM fields with Salesforce-ready structure
+    clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type__c": object_type, "Status__c": status}
+    clm_fields.update({k: v for k, v in key_data.items() if k not in ["status", "error", "key_values"]})
+    # Optional Salesforce sync
     try:
         token, instance_url = get_token()
+        sf_response = create_or_update_record(f"{object_type}__c", clm_fields, token, instance_url)
+        if "error" in sf_response:
+            print(f"Salesforce sync failed: {sf_response['error']}")
+        else:
+            print(f"Salesforce sync successful: {sf_response}")
     except Exception as e:
+        print(f"Salesforce sync error: {str(e)}")
+    contract_id = f"Contract_{len(contract_data) + 1}"
+    contract_data[contract_id] = {
+        "data": key_data,
+        "risks": risks,
+        "clm_fields": clm_fields,
+        "status": status
+    }
+    processed_files = 1
+    progress = "1/1"
+    print(f"Processing completed - ID: {contract_id}, Progress: {progress}")
+    os.unlink(temp_path)
+    return status, key_data, risks, progress
+def search_contracts(query):
+    """Search contract repository."""
+    results = {cid: data for cid, data in contract_data.items() if query.lower() in str(data).lower()}
+    return results if results else {"No matches": "No contracts found matching the query."}
 # Gradio UI
+with gr.Blocks(title="Contract Intelligence App") as demo:
     with gr.Row():
+        file_input = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contracts")
+        upload_progress = gr.Textbox(label="Progress", value="0/0", interactive=False)
+    object_type = gr.Dropdown(choices=["Contract", "Agreement", "Invoice"], label="Select Object Type")
+    process_button = gr.Button("Process Contracts")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    extracted_data_output = gr.JSON(label="Extracted Data")
+    risks_output = gr.Textbox(label="Detected Risks", interactive=False)
+    def process_and_display(files, obj_type):
+        if not files:
+            return "❌ No files uploaded.", {}, "No risks detected", gr.update(value="0/0")
+        results = []
+        all_data = {}
+        all_risks = []
+        for i, file in enumerate(files):
+            status, data, risks, _ = process_contract(file, obj_type)
+            results.append(f"{status} - File: File_{i}")
+            all_data.update({f"File_{i}": data})
+            all_risks.extend(risks)
+        progress = f"{len(files)}/{len(files)}"
+        return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
     process_button.click(
         fn=process_and_display,
+        inputs=[file_input, object_type],
+        outputs=[status_output, extracted_data_output, risks_output, upload_progress]
     )
+    with gr.Tab("Contract Repository"):
+        search_query = gr.Textbox(label="Search Contracts", placeholder="Enter keyword...")
+        search_results = gr.JSON(label="Search Results")
+        search_button = gr.Button("Search")
+        search_button.click(
+            fn=search_contracts,
+            inputs=search_query,
+            outputs=search_results
         )
 demo.launch()