AhmedBou commited on
Commit
5392a31
Β·
verified Β·
1 Parent(s): cc402f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz
3
+ import tempfile
4
+ import zipfile
5
+ from pathlib import Path
6
+ import re
7
+
8
+
9
+ def sanitize_filename(text: str) -> str:
10
+ text = re.sub(r"[^\w\s-]", "", text)
11
+ return re.sub(r"\s+", "_", text).strip("_")
12
+
13
+
14
+ def split_pdf(pdf_file):
15
+ doc = fitz.open(pdf_file.name)
16
+ toc = doc.get_toc()
17
+
18
+ if not toc:
19
+ return None, "❌ No bookmarks found in this PDF."
20
+
21
+ chapters = [item for item in toc if item[0] == 1]
22
+
23
+ if not chapters:
24
+ return None, "❌ No level-1 chapters found."
25
+
26
+ temp_dir = Path(tempfile.mkdtemp())
27
+ zip_path = temp_dir / "chapters.zip"
28
+
29
+ with zipfile.ZipFile(zip_path, "w") as zf:
30
+ for i, (_, title, page) in enumerate(chapters):
31
+ start_page = page - 1
32
+ end_page = (
33
+ chapters[i + 1][2] - 2
34
+ if i + 1 < len(chapters)
35
+ else doc.page_count - 1
36
+ )
37
+
38
+ if start_page > end_page:
39
+ continue
40
+
41
+ new_doc = fitz.open()
42
+ new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page)
43
+
44
+ name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf"
45
+ output = temp_dir / name
46
+ new_doc.save(output)
47
+ new_doc.close()
48
+
49
+ zf.write(output, arcname=name)
50
+
51
+ doc.close()
52
+ return str(zip_path), "βœ… Chapters extracted successfully!"
53
+
54
+
55
+ with gr.Blocks(title="πŸ“š Smart PDF Chapter Splitter") as demo:
56
+ gr.Markdown("## πŸ“š Smart PDF Chapter Splitter")
57
+ gr.Markdown(
58
+ "Upload a PDF with bookmarks and get clean chapter files β€” fast and deterministic."
59
+ )
60
+
61
+ pdf_input = gr.File(label="πŸ“– Upload PDF", file_types=[".pdf"])
62
+ output_zip = gr.File(label="πŸ“¦ Download Chapters (ZIP)")
63
+ status = gr.Markdown()
64
+
65
+ split_btn = gr.Button("βœ‚οΈ Split PDF")
66
+
67
+ split_btn.click(
68
+ fn=split_pdf,
69
+ inputs=pdf_input,
70
+ outputs=[output_zip, status],
71
+ )
72
+
73
+ demo.launch()