Spaces:
Sleeping
Sleeping
File size: 1,319 Bytes
ea94616 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import PyPDF2
import gradio as gr
def split_pdf(file):
# Load the uploaded PDF file
pdf_reader = PyPDF2.PdfReader(file.name)
# Define the batch size
batch_size = 100
# Calculate the number of batches
num_batches = len(pdf_reader.pages) // batch_size + 1
# Chunk the PDF into smaller files
chunk_files = []
for b in range(num_batches):
writer = PyPDF2.PdfWriter()
# Get the start and end page numbers for this batch
start_page = b * batch_size
end_page = min((b+1) * batch_size, len(pdf_reader.pages))
# Add pages in this batch to the writer
for i in range(start_page, end_page):
writer.add_page(pdf_reader.pages[i])
# Save the batch to a separate PDF file
batch_filename = f'chunk_{b+1}.pdf'
with open(batch_filename, 'wb') as output_file:
writer.write(output_file)
chunk_files.append(batch_filename)
return chunk_files
# Create the Gradio interface
demo = gr.Interface(
fn=split_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=gr.File(label="Chunked PDFs"),
title="PDF Splitter",
description="Upload a PDF file to split it into chunks of 100 pages each."
)
# Launch the Gradio app
demo.launch() |