Spaces:
Sleeping
Sleeping
File size: 1,948 Bytes
5392a31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import gradio as gr
import fitz
import tempfile
import zipfile
from pathlib import Path
import re
def sanitize_filename(text: str) -> str:
text = re.sub(r"[^\w\s-]", "", text)
return re.sub(r"\s+", "_", text).strip("_")
def split_pdf(pdf_file):
doc = fitz.open(pdf_file.name)
toc = doc.get_toc()
if not toc:
return None, "β No bookmarks found in this PDF."
chapters = [item for item in toc if item[0] == 1]
if not chapters:
return None, "β No level-1 chapters found."
temp_dir = Path(tempfile.mkdtemp())
zip_path = temp_dir / "chapters.zip"
with zipfile.ZipFile(zip_path, "w") as zf:
for i, (_, title, page) in enumerate(chapters):
start_page = page - 1
end_page = (
chapters[i + 1][2] - 2
if i + 1 < len(chapters)
else doc.page_count - 1
)
if start_page > end_page:
continue
new_doc = fitz.open()
new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page)
name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf"
output = temp_dir / name
new_doc.save(output)
new_doc.close()
zf.write(output, arcname=name)
doc.close()
return str(zip_path), "β
Chapters extracted successfully!"
with gr.Blocks(title="π Smart PDF Chapter Splitter") as demo:
gr.Markdown("## π Smart PDF Chapter Splitter")
gr.Markdown(
"Upload a PDF with bookmarks and get clean chapter files β fast and deterministic."
)
pdf_input = gr.File(label="π Upload PDF", file_types=[".pdf"])
output_zip = gr.File(label="π¦ Download Chapters (ZIP)")
status = gr.Markdown()
split_btn = gr.Button("βοΈ Split PDF")
split_btn.click(
fn=split_pdf,
inputs=pdf_input,
outputs=[output_zip, status],
)
demo.launch()
|