Spaces:
Sleeping
Sleeping
File size: 1,926 Bytes
5071de2 5eb110a 5071de2 5eb110a e949cd7 5eb110a e949cd7 5eb110a e949cd7 5eb110a e949cd7 5eb110a e949cd7 5eb110a 25ea517 e949cd7 5eb110a 5071de2 e949cd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import fitz # PyMuPDF
from PIL import Image
import easyocr
from fpdf import FPDF
import gradio as gr
# Initialize EasyOCR reader
reader = easyocr.Reader(['en']) # Specify the languages, e.g., 'en' for English
# Step 1: Convert PDF Pages to Images
def pdf_to_images(pdf_path):
pdf_document = fitz.open(pdf_path)
images = []
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
pix = page.get_pixmap() # Render the page as an image
image_path = f"page_{page_num + 1}.png"
pix.save(image_path)
images.append(image_path)
return images
# Step 2: Extract Text Using EasyOCR
def extract_text_easyocr(images):
text_pages = []
for image_path in images:
# Perform OCR on the image
text = reader.readtext(image_path, detail=0) # Extract text without bounding box details
text_pages.append("\n".join(text))
return text_pages
# Step 3: Create Editable PDF
def create_editable_pdf(text_pages, output_pdf_path):
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
for text in text_pages:
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, text)
pdf.output(output_pdf_path)
# Main Function
def process_pdf(file):
input_pdf_path = file.name
output_pdf_path = "Editable_Output.pdf"
# Convert PDF to images and extract text
images = pdf_to_images(input_pdf_path)
text_pages = extract_text_easyocr(images)
# Create a new PDF with extracted text
create_editable_pdf(text_pages, output_pdf_path)
return output_pdf_path
# Gradio Interface
iface = gr.Interface(
fn=process_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=gr.File(label="Download Editable PDF"),
title="OCR PDF to Editable Text",
description="Upload a PDF to extract and replace curved text with editable text.",
)
iface.launch(share=True)
|