Threscomma commited on
Commit
e479c17
Β·
verified Β·
1 Parent(s): 8b1ed47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -109
app.py CHANGED
@@ -3,124 +3,47 @@ import subprocess
3
  import tempfile
4
  import os
5
  from pdf2image import convert_from_path
6
- import logging
7
- import time
8
 
9
- # Configure logging
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- # ===== HEALTH CHECKS =====
14
- def system_check():
15
- """Verify all required binaries exist."""
16
- required = ["libreoffice", "pdftoppm"]
17
- missing = []
18
- for cmd in required:
19
- try:
20
- subprocess.run([cmd, "--version"],
21
- check=True,
22
- stdout=subprocess.PIPE,
23
- stderr=subprocess.PIPE)
24
- logger.info(f"βœ… {cmd} is installed")
25
- except Exception:
26
- missing.append(cmd)
27
-
28
- if missing:
29
- logger.critical(f"❌ Missing: {', '.join(missing)}")
30
- raise RuntimeError("System dependencies missing. Check .dockerfile")
31
-
32
- # Run checks on startup
33
- system_check()
34
-
35
- # ===== CORE CONVERSION =====
36
- def convert_docx_to_image(docx_path: str):
37
- """Convert DOCX to PNG with error recovery."""
38
- start_time = time.time()
39
-
40
  with tempfile.TemporaryDirectory() as tmpdir:
41
- try:
42
- # Step 1: DOCX β†’ PDF
43
- pdf_path = os.path.join(tmpdir, "output.pdf")
44
- cmd = [
45
- "libreoffice",
46
- "--headless",
47
- "--convert-to", "pdf",
48
- "--outdir", tmpdir,
49
- docx_path
50
- ]
51
-
52
- logger.info("Starting DOCX to PDF conversion...")
53
- subprocess.run(cmd, check=True, timeout=60)
54
-
55
- if not os.path.exists(pdf_path):
56
- raise FileNotFoundError("PDF not generated")
57
-
58
- # Step 2: PDF β†’ PNG
59
- logger.info("Converting PDF to PNG...")
60
- images = convert_from_path(
61
- pdf_path,
62
- dpi=300,
63
- fmt="png",
64
- poppler_path="/usr/bin", # Critical for Hugging Face
65
- thread_count=4
66
- )
67
-
68
- if not images:
69
- raise ValueError("No images generated")
70
-
71
- output_path = os.path.join(tmpdir, "output.png")
72
- images[0].save(output_path, "PNG")
73
-
74
- logger.info(f"Conversion successful in {time.time()-start_time:.2f}s")
75
- return output_path
76
-
77
- except subprocess.TimeoutExpired:
78
- logger.error("Conversion timed out after 60s")
79
- raise gr.Error("Timeout: Try a smaller file or upgrade hardware")
80
- except Exception as e:
81
- logger.error(f"Conversion failed: {str(e)}", exc_info=True)
82
- raise gr.Error(f"Error: {str(e)}")
83
 
84
- # ===== GRADIO UI =====
85
- with gr.Blocks(title="DOCX to Image Converter") as app:
86
- gr.Markdown("## πŸ“„β†’πŸ–ΌοΈ Professional DOCX to Image")
87
-
88
  with gr.Row():
89
  input_file = gr.File(
90
- label="Upload DOCX File",
91
  file_types=[".docx"],
92
  type="filepath"
93
  )
94
- output_image = gr.Image(
95
- label="Converted Image",
96
- type="filepath",
97
- interactive=False
98
- )
99
-
100
- status = gr.Textbox(label="Conversion Status", visible=False)
101
 
102
- convert_btn = gr.Button("Convert", variant="primary")
103
-
104
- # Detailed output for debugging
105
- with gr.Accordion("Advanced Info", open=False):
106
- gr.Markdown("""
107
- **Technical Details:**
108
- - Uses LibreOffice 7.5 for perfect rendering
109
- - 300 DPI high-quality output
110
- - Handles tables, fonts, and complex layouts
111
- """)
112
-
113
- def process(file_path: str):
114
- try:
115
- image_path = convert_docx_to_image(file_path)
116
- return image_path, "βœ… Conversion successful!"
117
- except Exception as e:
118
- return None, f"❌ Error: {str(e)}"
119
-
120
- convert_btn.click(
121
- fn=process,
122
  inputs=input_file,
123
- outputs=[output_image, status]
124
  )
125
 
126
- app.launch(server_name="0.0.0.0")
 
3
  import tempfile
4
  import os
5
  from pdf2image import convert_from_path
 
 
6
 
7
+ def convert_docx_to_image(docx_path: str) -> str:
8
+ """Convert DOCX to PNG with LibreOffice (pixel-perfect)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  with tempfile.TemporaryDirectory() as tmpdir:
10
+ # Step 1: DOCX β†’ PDF using LibreOffice's render engine
11
+ pdf_path = os.path.join(tmpdir, "output.pdf")
12
+ cmd = [
13
+ "libreoffice",
14
+ "--headless",
15
+ "--convert-to", "pdf",
16
+ "--outdir", tmpdir,
17
+ docx_path
18
+ ]
19
+ subprocess.run(cmd, check=True, timeout=30)
20
+
21
+ # Step 2: PDF β†’ PNG (300 DPI for sharpness)
22
+ images = convert_from_path(
23
+ pdf_path,
24
+ dpi=300,
25
+ fmt="png",
26
+ poppler_path="/usr/bin" # Critical for Hugging Face
27
+ )
28
+ output_path = os.path.join(tmpdir, "output.png")
29
+ images[0].save(output_path, "PNG")
30
+ return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Gradio UI
33
+ with gr.Blocks() as app:
34
+ gr.Markdown("## πŸ–₯️ DOCX to Image (1:1 Conversion)")
 
35
  with gr.Row():
36
  input_file = gr.File(
37
+ label="Upload DOCX",
38
  file_types=[".docx"],
39
  type="filepath"
40
  )
41
+ output_image = gr.Image(label="Output", type="filepath")
 
 
 
 
 
 
42
 
43
+ gr.Button("Convert").click(
44
+ fn=convert_docx_to_image,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  inputs=input_file,
46
+ outputs=output_image
47
  )
48
 
49
+ app.launch()