File size: 5,635 Bytes
dff2b65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import gradio as gr
from PyPDF2 import PdfWriter, PdfReader
import zipfile
import tempfile
import fitz  # PyMuPDF

def merge_pdfs(pdf_files):
    if not pdf_files:
        return "❌ No PDF files uploaded.", None

    output_dir = tempfile.mkdtemp()
    output_file = os.path.join(output_dir, "merged.pdf")

    doc_out = fitz.open()
    a4_rect = fitz.paper_rect("a4")

    for file in pdf_files:
        src = fitz.open(file.name)
        for page in src:
            page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height)
            page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True)
        src.close()

    doc_out.save(output_file)
    doc_out.close()
    return "βœ… PDFs merged successfully.", output_file


def compress_pdf(file, dpi_threshold, dpi_target, quality):
    input_path = file.name

    # Create a unique temporary file for the compressed output
    temp_dir = tempfile.mkdtemp()
    output_path = os.path.join(temp_dir, "compressed_output.pdf")

    doc = fitz.open(input_path)

    # Recompress images above dpi_threshold to target DPI with JPEG at quality
    doc.rewrite_images(
        dpi_threshold=dpi_threshold,
        dpi_target=dpi_target,
        quality=quality,
        lossy=True,
        lossless=True,
        bitonal=True,
        color=True,
        gray=True,
        set_to_gray=False,
    )

    # Subset fonts and apply full garbage collection + stream compression
    doc.subset_fonts()
    doc.save(output_path,
             garbage=3,
             deflate=True,
             use_objstms=True)

    status = f"βœ… PDF compressed successfully!"
    return status, output_path


def split_pdf(file_path, start_page, end_page):
    file_name = os.path.basename(file_path)
    base_name = file_name[:-4]  # Remove .pdf extension
    output_dir = tempfile.mkdtemp()
    zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip")

    input_pdf = PdfReader(open(file_path, "rb"))
    total_pages = len(input_pdf.pages)

    # Clamp values within range
    start_page = max(0, min(start_page, total_pages - 1))
    end_page = max(start_page, min(end_page, total_pages - 1))

    zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)

    for i in range(start_page, end_page + 1):
        writer = PdfWriter()
        writer.add_page(input_pdf.pages[i])
        split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf")
        with open(split_pdf_path, "wb") as f_out:
            writer.write(f_out)
        zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path))

    zipf.close()
    return zip_path, start_page + 1, end_page + 1

def process_pdf(file, start_page, end_page):
    if file is None:
        return "❌ No file uploaded.", None
    zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page)
    status = f"βœ… File '{file.name}' split from page {actual_start} to {actual_end}."
    return status, zip_file_path

with gr.Blocks(title="PDF Utility") as demo:
    gr.Markdown("# πŸ“„ PDF Utility App")

    with gr.Tabs():
        with gr.TabItem("Split PDF"):
            gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.")
            with gr.Row():
                file_input = gr.File(label="Upload PDF", file_types=[".pdf"])

            with gr.Row():
                start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0)
                end_page = gr.Number(label="End Page (0-based)", value=0, precision=0)

            split_button = gr.Button("πŸš€ Split PDF")

            status_text = gr.Textbox(label="Status", lines=2)
            download_link = gr.File(label="Download ZIP")

            split_button.click(
                fn=process_pdf,
                inputs=[file_input, start_page, end_page],
                outputs=[status_text, download_link]
            )
            
        with gr.TabItem("Compress PDF"):
            gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.")
            with gr.Row():
                file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"])
            
            with gr.Row():
                dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0)
                dpi_target = gr.Number(label="Target DPI", value=72, precision=0)
                quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0)
                
            compress_button = gr.Button("πŸš€ Compress PDF")
            
            status_text_compress = gr.Textbox(label="Status", lines=2)
            download_link_compress = gr.File(label="Download compressed PDF")

            compress_button.click(
                fn=compress_pdf,
                inputs=[file_input_compress, dpi_threshold, dpi_target, quality],
                outputs=[status_text_compress, download_link_compress]
            )
            
        with gr.TabItem("Merge PDFs"):
            gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.")
            pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")

            merge_button = gr.Button("πŸ“Ž Merge PDF Files")

            merge_status = gr.Textbox(label="Status", lines=2)
            merged_file = gr.File(label="Download Merged PDF")

            merge_button.click(
                fn=merge_pdfs,
                inputs=[pdf_uploads],
                outputs=[merge_status, merged_file]
            )

if __name__ == "__main__":
    demo.launch()