import PyPDF2 import gradio as gr def split_pdf(file): # Load the uploaded PDF file pdf_reader = PyPDF2.PdfReader(file.name) # Define the batch size batch_size = 100 # Calculate the number of batches num_batches = len(pdf_reader.pages) // batch_size + 1 # Chunk the PDF into smaller files chunk_files = [] for b in range(num_batches): writer = PyPDF2.PdfWriter() # Get the start and end page numbers for this batch start_page = b * batch_size end_page = min((b+1) * batch_size, len(pdf_reader.pages)) # Add pages in this batch to the writer for i in range(start_page, end_page): writer.add_page(pdf_reader.pages[i]) # Save the batch to a separate PDF file batch_filename = f'chunk_{b+1}.pdf' with open(batch_filename, 'wb') as output_file: writer.write(output_file) chunk_files.append(batch_filename) return chunk_files # Create the Gradio interface demo = gr.Interface( fn=split_pdf, inputs=gr.File(label="Upload PDF"), outputs=gr.File(label="Chunked PDFs"), title="PDF Splitter", description="Upload a PDF file to split it into chunks of 100 pages each." ) # Launch the Gradio app demo.launch()