nazib61 commited on
Commit
aec0998
·
verified ·
1 Parent(s): 09e7db1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -33
app.py CHANGED
@@ -4,8 +4,8 @@ import os
4
  import base64
5
  import uuid
6
 
7
- def process_conversion(file_input, base64_input):
8
- # Use /tmp for reliability on Hugging Face
9
  unique_id = uuid.uuid4().hex
10
  temp_docx = f"/tmp/input_{unique_id}.docx"
11
  output_dir = "/tmp/outputs"
@@ -14,55 +14,72 @@ def process_conversion(file_input, base64_input):
14
  os.makedirs(output_dir)
15
 
16
  try:
17
- # 1. Handle Input
18
- if file_input is not None:
19
- input_path = file_input.name
20
- elif base64_input and base64_input.strip():
21
- # Clean base64
22
- if "," in base64_input:
23
- base64_input = base64_input.split(",")[1]
24
- with open(temp_docx, "wb") as f:
25
- f.write(base64.b64decode(base64_input))
26
- input_path = temp_docx
27
- else:
28
- return None, "No input provided."
29
 
30
- # 2. Run Conversion
31
- # Adding timeout and capturing errors
 
 
 
 
 
 
 
 
32
  result = subprocess.run([
33
  'libreoffice', '--headless', '--convert-to', 'pdf',
34
- '--outdir', output_dir, input_path
35
  ], capture_output=True, text=True, timeout=30)
36
 
37
  if result.returncode != 0:
38
  return None, f"LibreOffice Error: {result.stderr}"
39
 
40
- # 3. Get Result
41
- filename = os.path.basename(input_path)
42
- pdf_filename = os.path.splitext(filename)[0] + ".pdf"
43
  pdf_path = os.path.join(output_dir, pdf_filename)
44
 
45
  if os.path.exists(pdf_path):
46
  with open(pdf_path, "rb") as f:
47
- pdf_base64 = base64.b64encode(f.read()).decode('utf-8')
 
 
 
 
 
 
 
48
  return gr.FileData(path=pdf_path, mime_type="application/pdf"), pdf_base64
49
 
50
- return None, "Conversion failed - file not found."
51
 
52
  except Exception as e:
53
- # This prevents 'event: error' and shows the real problem
54
  return None, f"System Error: {str(e)}"
55
 
 
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# DOCX to PDF")
58
- with gr.Row():
59
- file_in = gr.File(label="Upload", file_types=[".docx"])
60
- text_in = gr.Textbox(label="Base64 Input")
61
- btn = gr.Button("Convert")
62
- with gr.Row():
63
- file_out = gr.File(label="PDF File")
64
- text_out = gr.Textbox(label="Base64 Output")
65
 
66
- btn.click(process_conversion, [file_in, text_in], [file_out, text_out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- demo.launch()
 
 
4
  import base64
5
  import uuid
6
 
7
+ def process_conversion(base64_input):
8
+ # Setup paths in /tmp for Hugging Face environment
9
  unique_id = uuid.uuid4().hex
10
  temp_docx = f"/tmp/input_{unique_id}.docx"
11
  output_dir = "/tmp/outputs"
 
14
  os.makedirs(output_dir)
15
 
16
  try:
17
+ # 1. Handle Base64 Input
18
+ if not base64_input or not base64_input.strip():
19
+ return None, "Error: No Base64 data provided."
 
 
 
 
 
 
 
 
 
20
 
21
+ # Clean base64 string (remove data header if present)
22
+ if "," in base64_input:
23
+ base64_input = base64_input.split(",")[1]
24
+
25
+ # Write the binary data to a temporary .docx file
26
+ # LibreOffice needs a physical file to perform the conversion
27
+ with open(temp_docx, "wb") as f:
28
+ f.write(base64.b64decode(base64_input))
29
+
30
+ # 2. Run LibreOffice Conversion
31
  result = subprocess.run([
32
  'libreoffice', '--headless', '--convert-to', 'pdf',
33
+ '--outdir', output_dir, temp_docx
34
  ], capture_output=True, text=True, timeout=30)
35
 
36
  if result.returncode != 0:
37
  return None, f"LibreOffice Error: {result.stderr}"
38
 
39
+ # 3. Locate and Read the Resulting PDF
40
+ pdf_filename = f"input_{unique_id}.pdf"
 
41
  pdf_path = os.path.join(output_dir, pdf_filename)
42
 
43
  if os.path.exists(pdf_path):
44
  with open(pdf_path, "rb") as f:
45
+ pdf_bytes = f.read()
46
+ pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
47
+
48
+ # Cleanup the temporary docx file
49
+ if os.path.exists(temp_docx):
50
+ os.remove(temp_docx)
51
+
52
+ # Return File object for download and the Base64 string for API use
53
  return gr.FileData(path=pdf_path, mime_type="application/pdf"), pdf_base64
54
 
55
+ return None, "Error: Conversion failed - PDF not generated."
56
 
57
  except Exception as e:
 
58
  return None, f"System Error: {str(e)}"
59
 
60
+ # UI Layout
61
  with gr.Blocks() as demo:
62
+ gr.Markdown("# 📄 DOCX to PDF (Base64 Only)")
63
+ gr.Markdown("Paste the Base64 string of a `.docx` file to convert it to `.pdf`.")
 
 
 
 
 
 
64
 
65
+ with gr.Row():
66
+ with gr.Column():
67
+ text_in = gr.Textbox(
68
+ label="Paste Base64 DOCX",
69
+ placeholder="Input base64 string here...",
70
+ lines=8
71
+ )
72
+ submit_btn = gr.Button("Convert to PDF", variant="primary")
73
+
74
+ with gr.Column():
75
+ file_out = gr.File(label="Download PDF File")
76
+ text_out = gr.Textbox(label="Result Base64 PDF", lines=8, show_copy_button=True)
77
+
78
+ submit_btn.click(
79
+ fn=process_conversion,
80
+ inputs=[text_in],
81
+ outputs=[file_out, text_out]
82
+ )
83
 
84
+ if __name__ == "__main__":
85
+ demo.launch()