Spaces:
Build error
Build error
| import pandas as pd | |
| import PIL | |
| from PIL import Image | |
| from PIL import ImageDraw | |
| import gradio as gr | |
| import torch | |
| import easyocr | |
| import fitz # PyMuPDF | |
| # Function to extract images from PDF | |
| def pdf_to_images(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| images = [] | |
| for page_num in range(len(doc)): | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap() | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| images.append(img) | |
| return images | |
| def draw_boxes(image, bounds, color='yellow', width=2): | |
| draw = ImageDraw.Draw(image) | |
| for bound in bounds: | |
| p0, p1, p2, p3 = bound[0] | |
| draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) | |
| return image | |
| def inference(pdf, lang): | |
| reader = easyocr.Reader(lang) | |
| images = pdf_to_images(pdf.name) | |
| results = [] | |
| for i, img in enumerate(images): | |
| img_path = f'page_{i + 1}.jpg' | |
| img.save(img_path) | |
| bounds = reader.readtext(img_path) | |
| draw_boxes(img, bounds) | |
| result_img_path = f'result_{i + 1}.jpg' | |
| img.save(result_img_path) | |
| results.append((result_img_path, pd.DataFrame(bounds).iloc[:, 1:])) | |
| return results | |
| title = 'EasyOCR' | |
| description = 'Realtime EasyOCR.' | |
| article = "<p style='text-align: center'><a href='https://www.jaided.ai/easyocr/'>OCR for written scripts.</a> | <a href='https://github.com/JaidedAI/EasyOCR'>Github Repo</a></p>" | |
| examples = [['example.pdf',['en']]] | |
| css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" | |
| choices = [ | |
| "en", | |
| "hi", | |
| ] | |
| gr.Interface( | |
| inference, | |
| [gr.inputs.File(type='file', label='Input PDF'), gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='language')], | |
| [gr.outputs.File(type='file', label='Output Images'), gr.outputs.Dataframe(headers=['text', 'confidence'])], | |
| title=title, | |
| description=description, | |
| article=article, | |
| examples=examples, | |
| css=css, | |
| enable_queue=True | |
| ).launch(debug=True) | |