Spaces:
Sleeping
Sleeping
| import PyPDF2 | |
| import gradio as gr | |
| def split_pdf(file): | |
| # Load the uploaded PDF file | |
| pdf_reader = PyPDF2.PdfReader(file.name) | |
| # Define the batch size | |
| batch_size = 100 | |
| # Calculate the number of batches | |
| num_batches = len(pdf_reader.pages) // batch_size + 1 | |
| # Chunk the PDF into smaller files | |
| chunk_files = [] | |
| for b in range(num_batches): | |
| writer = PyPDF2.PdfWriter() | |
| # Get the start and end page numbers for this batch | |
| start_page = b * batch_size | |
| end_page = min((b+1) * batch_size, len(pdf_reader.pages)) | |
| # Add pages in this batch to the writer | |
| for i in range(start_page, end_page): | |
| writer.add_page(pdf_reader.pages[i]) | |
| # Save the batch to a separate PDF file | |
| batch_filename = f'chunk_{b+1}.pdf' | |
| with open(batch_filename, 'wb') as output_file: | |
| writer.write(output_file) | |
| chunk_files.append(batch_filename) | |
| return chunk_files | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=split_pdf, | |
| inputs=gr.File(label="Upload PDF"), | |
| outputs=gr.File(label="Chunked PDFs"), | |
| title="PDF Splitter", | |
| description="Upload a PDF file to split it into chunks of 100 pages each." | |
| ) | |
| # Launch the Gradio app | |
| demo.launch() |