File size: 1,948 Bytes
5392a31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import fitz
import tempfile
import zipfile
from pathlib import Path
import re


def sanitize_filename(text: str) -> str:
    text = re.sub(r"[^\w\s-]", "", text)
    return re.sub(r"\s+", "_", text).strip("_")


def split_pdf(pdf_file):
    doc = fitz.open(pdf_file.name)
    toc = doc.get_toc()

    if not toc:
        return None, "❌ No bookmarks found in this PDF."

    chapters = [item for item in toc if item[0] == 1]

    if not chapters:
        return None, "❌ No level-1 chapters found."

    temp_dir = Path(tempfile.mkdtemp())
    zip_path = temp_dir / "chapters.zip"

    with zipfile.ZipFile(zip_path, "w") as zf:
        for i, (_, title, page) in enumerate(chapters):
            start_page = page - 1
            end_page = (
                chapters[i + 1][2] - 2
                if i + 1 < len(chapters)
                else doc.page_count - 1
            )

            if start_page > end_page:
                continue

            new_doc = fitz.open()
            new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page)

            name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf"
            output = temp_dir / name
            new_doc.save(output)
            new_doc.close()

            zf.write(output, arcname=name)

    doc.close()
    return str(zip_path), "βœ… Chapters extracted successfully!"


with gr.Blocks(title="πŸ“š Smart PDF Chapter Splitter") as demo:
    gr.Markdown("## πŸ“š Smart PDF Chapter Splitter")
    gr.Markdown(
        "Upload a PDF with bookmarks and get clean chapter files β€” fast and deterministic."
    )

    pdf_input = gr.File(label="πŸ“– Upload PDF", file_types=[".pdf"])
    output_zip = gr.File(label="πŸ“¦ Download Chapters (ZIP)")
    status = gr.Markdown()

    split_btn = gr.Button("βœ‚οΈ Split PDF")

    split_btn.click(
        fn=split_pdf,
        inputs=pdf_input,
        outputs=[output_zip, status],
    )

demo.launch()