ChrisSacrumCor's picture
Create app.py
ea94616 verified
import PyPDF2
import gradio as gr
def split_pdf(file):
# Load the uploaded PDF file
pdf_reader = PyPDF2.PdfReader(file.name)
# Define the batch size
batch_size = 100
# Calculate the number of batches
num_batches = len(pdf_reader.pages) // batch_size + 1
# Chunk the PDF into smaller files
chunk_files = []
for b in range(num_batches):
writer = PyPDF2.PdfWriter()
# Get the start and end page numbers for this batch
start_page = b * batch_size
end_page = min((b+1) * batch_size, len(pdf_reader.pages))
# Add pages in this batch to the writer
for i in range(start_page, end_page):
writer.add_page(pdf_reader.pages[i])
# Save the batch to a separate PDF file
batch_filename = f'chunk_{b+1}.pdf'
with open(batch_filename, 'wb') as output_file:
writer.write(output_file)
chunk_files.append(batch_filename)
return chunk_files
# Create the Gradio interface
demo = gr.Interface(
fn=split_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=gr.File(label="Chunked PDFs"),
title="PDF Splitter",
description="Upload a PDF file to split it into chunks of 100 pages each."
)
# Launch the Gradio app
demo.launch()