pavansuresh commited on
Commit
0ae7721
·
verified ·
1 Parent(s): 6ceb267

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +4 -0
utils.py CHANGED
@@ -58,6 +58,8 @@ def extract_text_from_pdf(pdf_path):
58
  text_data = [pytesseract.image_to_string(img) for img in images]
59
  return {"pages": text_data}, None
60
  except Exception as e:
 
 
61
  return {}, str(e)
62
 
63
  # Key-Value Pair Extraction using LayoutLMv3
@@ -74,6 +76,8 @@ def extract_key_value_pairs(pdf_path):
74
  extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
75
  return extracted_data, None
76
  except Exception as e:
 
 
77
  return [], str(e)
78
 
79
  # Map Extracted Data to Salesforce Fields
 
58
  text_data = [pytesseract.image_to_string(img) for img in images]
59
  return {"pages": text_data}, None
60
  except Exception as e:
61
+ if "poppler" in str(e).lower():
62
+ return {}, "Error: Unable to process PDF. Please ensure Poppler is installed and in PATH (e.g., 'apt-get install poppler-utils' on Ubuntu)."
63
  return {}, str(e)
64
 
65
  # Key-Value Pair Extraction using LayoutLMv3
 
76
  extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
77
  return extracted_data, None
78
  except Exception as e:
79
+ if "poppler" in str(e).lower():
80
+ return [], "Error: Unable to process PDF. Please ensure Poppler is installed and in PATH (e.g., 'apt-get install poppler-utils' on Ubuntu)."
81
  return [], str(e)
82
 
83
  # Map Extracted Data to Salesforce Fields