resolverkatla commited on
Commit
93fb443
·
1 Parent(s): d813a84
Files changed (1) hide show
  1. processor.py +13 -0
processor.py CHANGED
@@ -3,6 +3,19 @@ import pytesseract
3
  from pdf2image import convert_from_path
4
  from PIL import Image
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def convert_pdf_to_images(pdf_path):
7
  return convert_from_path(pdf_path)
8
 
 
3
  from pdf2image import convert_from_path
4
  from PIL import Image
5
 
6
+ def load_images(uploaded_file):
7
+ if uploaded_file.name.endswith(".pdf"):
8
+ return convert_from_path(uploaded_file)
9
+ else:
10
+ return [Image.open(uploaded_file)]
11
+
12
+ from processor import (
13
+ load_images,
14
+ analyze_layout,
15
+ extract_text_from_blocks,
16
+ rule_based_kv_extraction
17
+ )
18
+
19
  def convert_pdf_to_images(pdf_path):
20
  return convert_from_path(pdf_path)
21