Spaces:

pavansuresh
/

SmartContractMigrator

Sleeping

App Files Files Community

pavansuresh commited on Jul 5, 2025

Commit

04bf8c5

verified ·

1 Parent(s): c8b6167

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -22

app.py CHANGED Viewed

@@ -11,12 +11,11 @@ import subprocess
 import re
 # Initialize global state
-contract_data = {}  # In-memory repository
-failed_records = []
 processed_files = 0
 total_files = 0
-# Load pre-trained LayoutLMv3 model and tokenizer
 tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base")
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
@@ -50,24 +49,22 @@ def extract_text_from_pdf(pdf_bytes):
         text = ""
         for img in images:
             text += pytesseract.image_to_string(img) + "\n"
-        print(f"OCR completed - Extracted text length: {len(text)}")
         return text
     except Exception as e:
-        print(f"OCR failed: {str(e)}")
         return f"Error extracting text: {str(e)}"
     finally:
         if os.path.exists(temp_path):
             os.unlink(temp_path)
 def extract_key_data(text):
-    """Extract key data (dates, amounts, clauses) using simple regex as a mock AI."""
     dates = re.findall(r'\d{1,2}/\d{1,2}/\d{4}', text)
     amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
     clauses = re.findall(r'(?:Section|Clause)\s+\d+\.\d+\s+(.+?)(?=\n|$)', text, re.DOTALL)
     return {"dates": dates, "amounts": amounts, "clauses": clauses}
 def detect_risks(data):
-    """Basic risk detection: flag missing dates or large amounts."""
     risks = []
     if not data["dates"]:
         risks.append("No expiration date detected - potential obligation risk.")
@@ -81,19 +78,19 @@ def process_contract(pdf_bytes, object_type):
     total_files = 1
     processed_files = 0
-    print(f"Received file - Starting processing")
     text = extract_text_from_pdf(pdf_bytes)
     if isinstance(text, str) and text.startswith("Error"):
         return text, {}, [], "0/1"
-    print(f"Extracting key data")
     key_data = extract_key_data(text)
-    print(f"Detecting risks")
     risks = detect_risks(key_data)
     status = "✅ Processed" if not risks else "⚠️ Processed with risks"
     # Mock CLM integration with predefined fields
-    clm_fields = {"Name": "Contract_001", "Type": object_type, "Status": status}
     clm_fields.update(key_data)
     contract_id = f"Contract_{len(contract_data) + 1}"
@@ -117,21 +114,29 @@ def search_contracts(query):
 # Gradio UI
 with gr.Blocks(title="Contract Intelligence App") as demo:
     with gr.Row():
-        file_input = gr.File(type="binary", file_types=["pdf"], file_count="single", label="Upload Contract PDF")
         upload_progress = gr.Textbox(label="Progress", value="0/0", interactive=False)
     object_type = gr.Dropdown(choices=["Contract", "Agreement", "Invoice"], label="Select Object Type")
-    process_button = gr.Button("Process Contract")
     status_output = gr.Textbox(label="Status", interactive=False)
     extracted_data_output = gr.JSON(label="Extracted Data")
     risks_output = gr.Textbox(label="Detected Risks", interactive=False)
-    def process_and_display(file, obj_type):
-        if file:
-            status, data, risks, progress = process_contract(file, obj_type)
-            return status, data, "\n".join(risks) if risks else "No risks detected", gr.update(value=progress)
-        return "❌ No file uploaded.", {}, "No risks detected", gr.update(value="0/0")
     process_button.click(
         fn=process_and_display,
@@ -144,11 +149,8 @@ with gr.Blocks(title="Contract Intelligence App") as demo:
         search_results = gr.JSON(label="Search Results")
         search_button = gr.Button("Search")
-        def search_and_display(query):
-            return search_contracts(query)
         search_button.click(
-            fn=search_and_display,
             inputs=search_query,
             outputs=search_results
         )

 import re
 # Initialize global state
+contract_data = {}  # In-memory contract repository
 processed_files = 0
 total_files = 0
+# Load pre-trained LayoutLMv3 model and tokenizer (placeholder for future fine-tuning)
 tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base")
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
         text = ""
         for img in images:
             text += pytesseract.image_to_string(img) + "\n"
         return text
     except Exception as e:
         return f"Error extracting text: {str(e)}"
     finally:
         if os.path.exists(temp_path):
             os.unlink(temp_path)
 def extract_key_data(text):
+    """Extract key data (dates, amounts, clauses) using regex as a mock AI."""
     dates = re.findall(r'\d{1,2}/\d{1,2}/\d{4}', text)
     amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
     clauses = re.findall(r'(?:Section|Clause)\s+\d+\.\d+\s+(.+?)(?=\n|$)', text, re.DOTALL)
     return {"dates": dates, "amounts": amounts, "clauses": clauses}
 def detect_risks(data):
+    """Detect risks (e.g., missing dates, large amounts)."""
     risks = []
     if not data["dates"]:
         risks.append("No expiration date detected - potential obligation risk.")
     total_files = 1
     processed_files = 0
+    print("Received file - Starting processing")
     text = extract_text_from_pdf(pdf_bytes)
     if isinstance(text, str) and text.startswith("Error"):
         return text, {}, [], "0/1"
+    print("Extracting key data")
     key_data = extract_key_data(text)
+    print("Detecting risks")
     risks = detect_risks(key_data)
     status = "✅ Processed" if not risks else "⚠️ Processed with risks"
     # Mock CLM integration with predefined fields
+    clm_fields = {"Name": f"Contract_{len(contract_data) + 1}", "Type": object_type, "Status": status}
     clm_fields.update(key_data)
     contract_id = f"Contract_{len(contract_data) + 1}"
 # Gradio UI
 with gr.Blocks(title="Contract Intelligence App") as demo:
     with gr.Row():
+        file_input = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contracts")
         upload_progress = gr.Textbox(label="Progress", value="0/0", interactive=False)
     object_type = gr.Dropdown(choices=["Contract", "Agreement", "Invoice"], label="Select Object Type")
+    process_button = gr.Button("Process Contracts")
     status_output = gr.Textbox(label="Status", interactive=False)
     extracted_data_output = gr.JSON(label="Extracted Data")
     risks_output = gr.Textbox(label="Detected Risks", interactive=False)
+    def process_and_display(files, obj_type):
+        if not files:
+            return "❌ No files uploaded.", {}, "No risks detected", gr.update(value="0/0")
+        results = []
+        all_data = {}
+        all_risks = []
+        for file in files:
+            status, data, risks, _ = process_contract(file, obj_type)
+            results.append(f"{status} - File: {os.path.basename(file.decode() if isinstance(file, bytes) else file)}")
+            all_data.update({f"File_{len(all_data)}": data})
+            all_risks.extend(risks)
+        progress = f"{len(files)}/{len(files)}"
+        return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
     process_button.click(
         fn=process_and_display,
         search_results = gr.JSON(label="Search Results")
         search_button = gr.Button("Search")
         search_button.click(
+            fn=search_contracts,
             inputs=search_query,
             outputs=search_results
         )