pavansuresh commited on
Commit
38c0370
·
verified ·
1 Parent(s): af44981

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -0
app.py CHANGED
@@ -4,6 +4,7 @@ from ocr_utils import extract_text_from_pdf_with_tesseract_or_layoutlm
4
  import os
5
  import tempfile
6
  from tqdm import tqdm
 
7
 
8
  # Initialize global state for failed records and uploaded files
9
  failed_records = []
@@ -14,6 +15,14 @@ def is_pdf_file(file_bytes):
14
  valid_pdf_header = b'%PDF-'
15
  return file_bytes.startswith(valid_pdf_header) if file_bytes else False
16
 
 
 
 
 
 
 
 
 
17
  def save_failed_record(pdf_name, object_name, error, mappings):
18
  """Log failed records for reconciliation."""
19
  global failed_records
@@ -39,11 +48,16 @@ def process_contract(uploaded_files, object_name, manual_mappings):
39
  for i, file_bytes in enumerate(uploaded_files):
40
  print(f"File {i} header: {file_bytes[:5]}")
41
 
 
 
 
 
42
  # Mock Salesforce object fields (replace with dynamic logic later)
43
  mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
44
  total_files = len(uploaded_files)
45
  processed_files = 0
46
  results = []
 
47
  with tqdm(total=total_files, desc="Processing PDFs") as pbar:
48
  for i, file_bytes in enumerate(uploaded_files):
49
  # Generate a filename based on index since name is not available with type="binary"
 
4
  import os
5
  import tempfile
6
  from tqdm import tqdm
7
+ import subprocess
8
 
9
  # Initialize global state for failed records and uploaded files
10
  failed_records = []
 
15
  valid_pdf_header = b'%PDF-'
16
  return file_bytes.startswith(valid_pdf_header) if file_bytes else False
17
 
18
+ def check_poppler():
19
+ """Check if poppler-utils is installed and in PATH."""
20
+ try:
21
+ subprocess.run(['pdftoppm', '-v'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
22
+ return True
23
+ except FileNotFoundError:
24
+ return False
25
+
26
  def save_failed_record(pdf_name, object_name, error, mappings):
27
  """Log failed records for reconciliation."""
28
  global failed_records
 
48
  for i, file_bytes in enumerate(uploaded_files):
49
  print(f"File {i} header: {file_bytes[:5]}")
50
 
51
+ # Check for poppler-utils
52
+ if not check_poppler():
53
+ return "❌ Error: poppler-utils is not installed or not in PATH. Please install it (e.g., 'sudo apt-get install poppler-utils' on Linux).", None, failed_records, "0/0"
54
+
55
  # Mock Salesforce object fields (replace with dynamic logic later)
56
  mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
57
  total_files = len(uploaded_files)
58
  processed_files = 0
59
  results = []
60
+ ai_result = None # Initialize to avoid UnboundLocalError
61
  with tqdm(total=total_files, desc="Processing PDFs") as pbar:
62
  for i, file_bytes in enumerate(uploaded_files):
63
  # Generate a filename based on index since name is not available with type="binary"