Spaces:
Sleeping
Sleeping
| import fitz # PyMuPDF | |
| from PIL import Image | |
| import easyocr | |
| from fpdf import FPDF | |
| import gradio as gr | |
| # Initialize EasyOCR reader | |
| reader = easyocr.Reader(['en']) # Specify the languages, e.g., 'en' for English | |
| # Step 1: Convert PDF Pages to Images | |
| def pdf_to_images(pdf_path): | |
| pdf_document = fitz.open(pdf_path) | |
| images = [] | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document[page_num] | |
| pix = page.get_pixmap() # Render the page as an image | |
| image_path = f"page_{page_num + 1}.png" | |
| pix.save(image_path) | |
| images.append(image_path) | |
| return images | |
| # Step 2: Extract Text Using EasyOCR | |
| def extract_text_easyocr(images): | |
| text_pages = [] | |
| for image_path in images: | |
| # Perform OCR on the image | |
| text = reader.readtext(image_path, detail=0) # Extract text without bounding box details | |
| text_pages.append("\n".join(text)) | |
| return text_pages | |
| # Step 3: Create Editable PDF | |
| def create_editable_pdf(text_pages, output_pdf_path): | |
| pdf = FPDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| for text in text_pages: | |
| pdf.add_page() | |
| pdf.set_font("Arial", size=12) | |
| pdf.multi_cell(0, 10, text) | |
| pdf.output(output_pdf_path) | |
| # Main Function | |
| def process_pdf(file): | |
| input_pdf_path = file.name | |
| output_pdf_path = "Editable_Output.pdf" | |
| # Convert PDF to images and extract text | |
| images = pdf_to_images(input_pdf_path) | |
| text_pages = extract_text_easyocr(images) | |
| # Create a new PDF with extracted text | |
| create_editable_pdf(text_pages, output_pdf_path) | |
| return output_pdf_path | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=process_pdf, | |
| inputs=gr.File(label="Upload PDF"), | |
| outputs=gr.File(label="Download Editable PDF"), | |
| title="OCR PDF to Editable Text", | |
| description="Upload a PDF to extract and replace curved text with editable text.", | |
| ) | |
| iface.launch(share=True) | |