Spaces:
Sleeping
Sleeping
Commit
·
93fb443
1
Parent(s):
d813a84
Update
Browse files- processor.py +13 -0
processor.py
CHANGED
|
@@ -3,6 +3,19 @@ import pytesseract
|
|
| 3 |
from pdf2image import convert_from_path
|
| 4 |
from PIL import Image
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def convert_pdf_to_images(pdf_path):
|
| 7 |
return convert_from_path(pdf_path)
|
| 8 |
|
|
|
|
| 3 |
from pdf2image import convert_from_path
|
| 4 |
from PIL import Image
|
| 5 |
|
| 6 |
+
def load_images(uploaded_file):
|
| 7 |
+
if uploaded_file.name.endswith(".pdf"):
|
| 8 |
+
return convert_from_path(uploaded_file)
|
| 9 |
+
else:
|
| 10 |
+
return [Image.open(uploaded_file)]
|
| 11 |
+
|
| 12 |
+
from processor import (
|
| 13 |
+
load_images,
|
| 14 |
+
analyze_layout,
|
| 15 |
+
extract_text_from_blocks,
|
| 16 |
+
rule_based_kv_extraction
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
def convert_pdf_to_images(pdf_path):
|
| 20 |
return convert_from_path(pdf_path)
|
| 21 |
|