Spaces:
Sleeping
Sleeping
| import layoutparser as lp | |
| import pytesseract | |
| from pdf2image import convert_from_path | |
| from PIL import Image | |
| def load_images(uploaded_file): | |
| if uploaded_file.name.endswith(".pdf"): | |
| return convert_from_path(uploaded_file) | |
| else: | |
| return [Image.open(uploaded_file)] | |
| from processor import ( | |
| load_images, | |
| analyze_layout, | |
| extract_text_from_blocks, | |
| rule_based_kv_extraction | |
| ) | |
| def convert_pdf_to_images(pdf_path): | |
| return convert_from_path(pdf_path) | |
| def analyze_layout(image): | |
| model = lp.EfficientDetLayoutModel( | |
| "lp://efficientdet/PubLayNet", | |
| extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.6], | |
| label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"} | |
| ) | |
| return model.detect(image) | |