Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -58,6 +58,8 @@ def extract_text_from_pdf(pdf_path):
|
|
| 58 |
text_data = [pytesseract.image_to_string(img) for img in images]
|
| 59 |
return {"pages": text_data}, None
|
| 60 |
except Exception as e:
|
|
|
|
|
|
|
| 61 |
return {}, str(e)
|
| 62 |
|
| 63 |
# Key-Value Pair Extraction using LayoutLMv3
|
|
@@ -74,6 +76,8 @@ def extract_key_value_pairs(pdf_path):
|
|
| 74 |
extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
|
| 75 |
return extracted_data, None
|
| 76 |
except Exception as e:
|
|
|
|
|
|
|
| 77 |
return [], str(e)
|
| 78 |
|
| 79 |
# Map Extracted Data to Salesforce Fields
|
|
|
|
| 58 |
text_data = [pytesseract.image_to_string(img) for img in images]
|
| 59 |
return {"pages": text_data}, None
|
| 60 |
except Exception as e:
|
| 61 |
+
if "poppler" in str(e).lower():
|
| 62 |
+
return {}, "Error: Unable to process PDF. Please ensure Poppler is installed and in PATH (e.g., 'apt-get install poppler-utils' on Ubuntu)."
|
| 63 |
return {}, str(e)
|
| 64 |
|
| 65 |
# Key-Value Pair Extraction using LayoutLMv3
|
|
|
|
| 76 |
extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
|
| 77 |
return extracted_data, None
|
| 78 |
except Exception as e:
|
| 79 |
+
if "poppler" in str(e).lower():
|
| 80 |
+
return [], "Error: Unable to process PDF. Please ensure Poppler is installed and in PATH (e.g., 'apt-get install poppler-utils' on Ubuntu)."
|
| 81 |
return [], str(e)
|
| 82 |
|
| 83 |
# Map Extracted Data to Salesforce Fields
|