daswer123 commited on
Commit
e1cf46f
·
verified ·
1 Parent(s): 531a5f7

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +29 -0
  2. funcs.py +39 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ import os
3
+ import gradio as gr
4
+ from funcs import pdf_to_epub
5
+
6
+ def greet(file):
7
+ try:
8
+ print(f"Received file: {file}")
9
+
10
+ # Create folder result
11
+ os.makedirs("result", exist_ok=True)
12
+ timestamp_formated = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
13
+ result_file = os.path.join("result", timestamp_formated+".epub")
14
+
15
+ file = pdf_to_epub(file, "output.epub")
16
+ except Exception as e:
17
+ print(f"Error: {e}")
18
+ return "Error", None
19
+
20
+ return "Завершенно","output.epub"
21
+
22
+ demo = gr.Interface(
23
+ fn=greet,
24
+ inputs=["file",],
25
+ outputs=["label","file"],
26
+ allow_flagging=False
27
+ )
28
+
29
+ demo.launch()
funcs.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import pypandoc
3
+ from pdf2docx import Converter
4
+
5
+
6
+ def pdf_to_epub(pdf_path, epub_path, ignore_header_footer=True):
7
+ docx_path = pdf_path.replace('.pdf', '.docx')
8
+ # Initialize converter with options to ignore headers and footers
9
+ convert_settings = {
10
+ "ignore_footer": ignore_header_footer,
11
+ "ignore_header": ignore_header_footer,
12
+ }
13
+ cv = Converter(pdf_path)
14
+
15
+ # Convert PDF to DOCX with specified settings
16
+ cv.convert(docx_path, **convert_settings)
17
+ cv.close()
18
+
19
+ # Step 2: Convert DOCX to EPUB
20
+ output = pypandoc.convert_file(docx_path, 'epub', outputfile=epub_path)
21
+ print(output)
22
+
23
+
24
+ def main():
25
+ # Parse command-line arguments
26
+ parser = argparse.ArgumentParser(description='Convert a PDF file to EPUB format.')
27
+ parser.add_argument('pdf_path', type=str, help='Path to the PDF file to convert.')
28
+ args = parser.parse_args()
29
+
30
+ # Derive EPUB path from PDF path
31
+ epub_path = args.pdf_path.replace('.pdf', '.epub')
32
+
33
+ # Perform conversion
34
+ pdf_to_epub(args.pdf_path, epub_path)
35
+ print(f"Conversion complete. EPUB file saved to: {epub_path}")
36
+
37
+
38
+ if __name__ == '__main__':
39
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pdf2docx
2
+ pypandoc
3
+ gradio