Spaces:

pavansuresh
/

SmartContractMigrator

Sleeping

App Files Files Community

pavansuresh commited on Jul 9, 2025

Commit

83973ae

verified ·

1 Parent(s): 4c19d6c

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -22

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ from PIL import Image
 import os
 import tempfile
 from tqdm import tqdm
-import subprocess
 import re
 from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm
 from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
@@ -20,22 +19,6 @@ total_files = 0
 tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base")
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
-def check_poppler():
-    """Check if poppler-utils is installed."""
-    try:
-        subprocess.run(['pdftoppm', '-v'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except FileNotFoundError:
-        return False
-def check_tesseract():
-    """Check if tesseract-ocr is installed."""
-    try:
-        subprocess.run(['tesseract', '-v'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        return True
-    except FileNotFoundError:
-        return False
 def save_temp_file(pdf_bytes):
     """Save PDF bytes to a temporary file and return the path."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
@@ -58,11 +41,6 @@ def process_contract(pdf_bytes, object_type):
     processed_files = 0
     print("Received file - Starting processing")
-    if not check_poppler() or not check_tesseract():
-        error_msg = "Error: Required dependencies missing. Install poppler-utils (e.g., 'sudo apt-get install poppler-utils') and tesseract-ocr (e.g., 'sudo apt-get install tesseract-ocr')."
-        print(error_msg)
-        return error_msg, {}, [], "0/1"
     temp_path = save_temp_file(pdf_bytes)
     print(f"Temporary file created at: {temp_path}")
     text = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)

 import os
 import tempfile
 from tqdm import tqdm
 import re
 from ai_mapping import extract_key_values_with_layoutlm, run_ai_mapping_with_layoutlm
 from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
 tokenizer = LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base")
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
 def save_temp_file(pdf_bytes):
     """Save PDF bytes to a temporary file and return the path."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
     processed_files = 0
     print("Received file - Starting processing")
     temp_path = save_temp_file(pdf_bytes)
     print(f"Temporary file created at: {temp_path}")
     text = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)