Threscomma commited on
Commit
a1f6e97
Β·
verified Β·
1 Parent(s): d8b1c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -26
app.py CHANGED
@@ -4,67 +4,123 @@ import tempfile
4
  import os
5
  from pdf2image import convert_from_path
6
  import logging
 
7
 
8
- # ===== HEALTH CHECK =====
9
- def verify_libreoffice():
10
- try:
11
- subprocess.run(["libreoffice", "--version"], check=True, capture_output=True)
12
- return True
13
- except (subprocess.CalledProcessError, FileNotFoundError):
14
- return False
15
 
16
- if not verify_libreoffice():
17
- raise RuntimeError("❌ LibreOffice not installed. Check your .dockerfile!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # ===== MAIN CONVERSION FUNCTION =====
20
- def convert_docx_to_image(docx_path: str) -> str:
 
 
 
21
  with tempfile.TemporaryDirectory() as tmpdir:
22
  try:
23
  # Step 1: DOCX β†’ PDF
24
  pdf_path = os.path.join(tmpdir, "output.pdf")
25
  cmd = [
26
- "libreoffice",
27
  "--headless",
28
  "--convert-to", "pdf",
29
  "--outdir", tmpdir,
30
  docx_path
31
  ]
32
- subprocess.run(cmd, check=True, timeout=30)
 
 
33
 
34
  if not os.path.exists(pdf_path):
35
- raise FileNotFoundError("PDF was not generated")
36
 
37
  # Step 2: PDF β†’ PNG
 
38
  images = convert_from_path(
39
  pdf_path,
40
  dpi=300,
41
- output_folder=tmpdir,
42
  fmt="png",
43
- single_file=True
 
44
  )
 
 
 
 
45
  output_path = os.path.join(tmpdir, "output.png")
46
  images[0].save(output_path, "PNG")
 
 
47
  return output_path
48
-
 
 
 
49
  except Exception as e:
50
- logging.error(f"Conversion failed: {str(e)}", exc_info=True)
51
- raise gr.Error(f"Conversion error: {str(e)}")
52
 
53
  # ===== GRADIO UI =====
54
- with gr.Blocks() as app:
55
- gr.Markdown("## πŸ“„ DOCX to Image (Pixel-Perfect)")
 
56
  with gr.Row():
57
  input_file = gr.File(
58
- label="Upload DOCX",
59
  file_types=[".docx"],
60
  type="filepath"
61
  )
62
- output_image = gr.Image(label="Output Image", type="filepath")
 
 
 
 
 
 
 
 
63
 
64
- gr.Button("Convert").click(
65
- fn=convert_docx_to_image,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  inputs=input_file,
67
- outputs=output_image
68
  )
69
 
70
  app.launch(server_name="0.0.0.0")
 
4
  import os
5
  from pdf2image import convert_from_path
6
  import logging
7
+ import time
8
 
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
 
 
 
 
12
 
13
+ # ===== HEALTH CHECKS =====
14
+ def system_check():
15
+ """Verify all required binaries exist."""
16
+ required = ["libreoffice", "pdftoppm"]
17
+ missing = []
18
+ for cmd in required:
19
+ try:
20
+ subprocess.run([cmd, "--version"],
21
+ check=True,
22
+ stdout=subprocess.PIPE,
23
+ stderr=subprocess.PIPE)
24
+ logger.info(f"βœ… {cmd} is installed")
25
+ except Exception:
26
+ missing.append(cmd)
27
+
28
+ if missing:
29
+ logger.critical(f"❌ Missing: {', '.join(missing)}")
30
+ raise RuntimeError("System dependencies missing. Check .dockerfile")
31
+
32
+ # Run checks on startup
33
+ system_check()
34
 
35
+ # ===== CORE CONVERSION =====
36
+ def convert_docx_to_image(docx_path: str):
37
+ """Convert DOCX to PNG with error recovery."""
38
+ start_time = time.time()
39
+
40
  with tempfile.TemporaryDirectory() as tmpdir:
41
  try:
42
  # Step 1: DOCX β†’ PDF
43
  pdf_path = os.path.join(tmpdir, "output.pdf")
44
  cmd = [
45
+ "libreoffice",
46
  "--headless",
47
  "--convert-to", "pdf",
48
  "--outdir", tmpdir,
49
  docx_path
50
  ]
51
+
52
+ logger.info("Starting DOCX to PDF conversion...")
53
+ subprocess.run(cmd, check=True, timeout=60)
54
 
55
  if not os.path.exists(pdf_path):
56
+ raise FileNotFoundError("PDF not generated")
57
 
58
  # Step 2: PDF β†’ PNG
59
+ logger.info("Converting PDF to PNG...")
60
  images = convert_from_path(
61
  pdf_path,
62
  dpi=300,
 
63
  fmt="png",
64
+ poppler_path="/usr/bin", # Critical for Hugging Face
65
+ thread_count=4
66
  )
67
+
68
+ if not images:
69
+ raise ValueError("No images generated")
70
+
71
  output_path = os.path.join(tmpdir, "output.png")
72
  images[0].save(output_path, "PNG")
73
+
74
+ logger.info(f"Conversion successful in {time.time()-start_time:.2f}s")
75
  return output_path
76
+
77
+ except subprocess.TimeoutExpired:
78
+ logger.error("Conversion timed out after 60s")
79
+ raise gr.Error("Timeout: Try a smaller file or upgrade hardware")
80
  except Exception as e:
81
+ logger.error(f"Conversion failed: {str(e)}", exc_info=True)
82
+ raise gr.Error(f"Error: {str(e)}")
83
 
84
  # ===== GRADIO UI =====
85
+ with gr.Blocks(title="DOCX to Image Converter") as app:
86
+ gr.Markdown("## πŸ“„β†’πŸ–ΌοΈ Professional DOCX to Image")
87
+
88
  with gr.Row():
89
  input_file = gr.File(
90
+ label="Upload DOCX File",
91
  file_types=[".docx"],
92
  type="filepath"
93
  )
94
+ output_image = gr.Image(
95
+ label="Converted Image",
96
+ type="filepath",
97
+ interactive=False
98
+ )
99
+
100
+ status = gr.Textbox(label="Conversion Status", visible=False)
101
+
102
+ convert_btn = gr.Button("Convert", variant="primary")
103
 
104
+ # Detailed output for debugging
105
+ with gr.Accordion("Advanced Info", open=False):
106
+ gr.Markdown("""
107
+ **Technical Details:**
108
+ - Uses LibreOffice 7.5 for perfect rendering
109
+ - 300 DPI high-quality output
110
+ - Handles tables, fonts, and complex layouts
111
+ """)
112
+
113
+ def process(file_path: str):
114
+ try:
115
+ image_path = convert_docx_to_image(file_path)
116
+ return image_path, "βœ… Conversion successful!"
117
+ except Exception as e:
118
+ return None, f"❌ Error: {str(e)}"
119
+
120
+ convert_btn.click(
121
+ fn=process,
122
  inputs=input_file,
123
+ outputs=[output_image, status]
124
  )
125
 
126
  app.launch(server_name="0.0.0.0")