File size: 1,319 Bytes
ea94616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import PyPDF2
import gradio as gr

def split_pdf(file):
    # Load the uploaded PDF file
    pdf_reader = PyPDF2.PdfReader(file.name)
    
    # Define the batch size
    batch_size = 100
    
    # Calculate the number of batches
    num_batches = len(pdf_reader.pages) // batch_size + 1
    
    # Chunk the PDF into smaller files
    chunk_files = []
    for b in range(num_batches):
        writer = PyPDF2.PdfWriter()
        
        # Get the start and end page numbers for this batch
        start_page = b * batch_size
        end_page = min((b+1) * batch_size, len(pdf_reader.pages))
        
        # Add pages in this batch to the writer
        for i in range(start_page, end_page):
            writer.add_page(pdf_reader.pages[i])
        
        # Save the batch to a separate PDF file
        batch_filename = f'chunk_{b+1}.pdf'
        with open(batch_filename, 'wb') as output_file:
            writer.write(output_file)
        
        chunk_files.append(batch_filename)
    
    return chunk_files

# Create the Gradio interface
demo = gr.Interface(
    fn=split_pdf,
    inputs=gr.File(label="Upload PDF"),
    outputs=gr.File(label="Chunked PDFs"),
    title="PDF Splitter",
    description="Upload a PDF file to split it into chunks of 100 pages each."
)

# Launch the Gradio app
demo.launch()