ABDALLALSWAITI commited on
Commit
1ea8766
·
verified ·
1 Parent(s): 19596d2

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +250 -510
src/streamlit_app.py CHANGED
@@ -1,89 +1,54 @@
1
- import streamlit as st
2
- import subprocess
3
- import os
 
4
  import tempfile
5
  import shutil
6
- from pathlib import Path
 
7
  import base64
8
- import re
 
9
 
10
- st.set_page_config(
11
- page_title="HTML to PDF Converter",
12
- page_icon="📄",
13
- layout="wide"
14
  )
15
 
16
- def detect_aspect_ratio(html_content):
17
- """
18
- Detect aspect ratio from HTML content
19
- Returns: "16:9", "1:1", or "9:16"
20
- """
21
- # Check for viewport meta tag
22
- viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
23
- if viewport_match:
24
- viewport = viewport_match.group(1).lower()
25
- if 'width=device-width' in viewport or 'width=100%' in viewport:
26
- # Check for orientation hints
27
- if 'orientation=portrait' in viewport:
28
- return "9:16"
29
- elif 'orientation=landscape' in viewport:
30
- return "16:9"
31
-
32
- # Check for CSS aspect-ratio property
33
- aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
34
- if aspect_match:
35
- width = int(aspect_match.group(1))
36
- height = int(aspect_match.group(2))
37
- ratio = width / height
38
- if ratio > 1.5:
39
- return "16:9"
40
- elif ratio < 0.7:
41
- return "9:16"
42
- else:
43
- return "1:1"
44
-
45
- # Check for common presentation frameworks
46
- if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
47
- return "16:9"
48
-
49
- # Check body style for width/height hints
50
- body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
51
- if body_match:
52
- style = body_match.group(1).lower()
53
- if 'width' in style and 'height' in style:
54
- width_match = re.search(r'width\s*:\s*(\d+)', style)
55
- height_match = re.search(r'height\s*:\s*(\d+)', style)
56
- if width_match and height_match:
57
- w = int(width_match.group(1))
58
- h = int(height_match.group(1))
59
- ratio = w / h
60
- if ratio > 1.5:
61
- return "16:9"
62
- elif ratio < 0.7:
63
- return "9:16"
64
-
65
- # Default to A4 portrait for documents
66
- return "9:16"
67
 
68
- def save_uploaded_images(images, temp_dir):
69
- """Save uploaded images and return mapping"""
70
  image_mapping = {}
71
  images_dir = os.path.join(temp_dir, "images")
72
  os.makedirs(images_dir, exist_ok=True)
73
 
74
  for image in images:
75
- # Save image
76
- image_path = os.path.join(images_dir, image.name)
77
- with open(image_path, 'wb') as f:
78
- f.write(image.getvalue())
79
-
80
- # Create mapping
81
- image_mapping[image.name] = f"images/{image.name}"
82
- print(f"Saved image: {image.name} -> {image_path}")
 
 
 
 
 
83
 
84
  return image_mapping
85
 
86
- def process_html_with_images(html_content, temp_dir, image_mapping):
87
  """Process HTML to handle image references with absolute file paths"""
88
  import re
89
 
@@ -93,25 +58,33 @@ def process_html_with_images(html_content, temp_dir, image_mapping):
93
  file_url = f"file://{absolute_path}"
94
 
95
  # Replace various image reference patterns
96
- # Pattern 1: src="filename" or src='filename'
 
 
 
 
 
 
 
 
97
  html_content = re.sub(
98
- f'src=["\'](?:\.\/)?{re.escape(original_name)}["\']',
99
  f'src="{file_url}"',
100
  html_content,
101
  flags=re.IGNORECASE
102
  )
103
 
104
- # Pattern 2: background-image: url(filename)
105
  html_content = re.sub(
106
- f'url\(["\']?(?:\.\/)?{re.escape(original_name)}["\']?\)',
107
  f'url("{file_url}")',
108
  html_content,
109
  flags=re.IGNORECASE
110
  )
111
 
112
- # Pattern 3: href for links
113
  html_content = re.sub(
114
- f'href=["\'](?:\.\/)?{re.escape(original_name)}["\']',
115
  f'href="{file_url}"',
116
  html_content,
117
  flags=re.IGNORECASE
@@ -119,134 +92,13 @@ def process_html_with_images(html_content, temp_dir, image_mapping):
119
 
120
  return html_content
121
 
122
- def render_html_preview(html_content):
123
- """Render HTML preview in an iframe"""
124
- # Encode HTML content
125
- b64 = base64.b64encode(html_content.encode()).decode()
126
- iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
127
- return iframe_html
128
-
129
- def render_pdf_preview(pdf_bytes):
130
- """Render PDF preview using embedded PDF.js"""
131
- b64 = base64.b64encode(pdf_bytes).decode()
132
-
133
- pdf_viewer_html = f'''
134
- <!DOCTYPE html>
135
- <html>
136
- <head>
137
- <style>
138
- body {{
139
- margin: 0;
140
- padding: 0;
141
- overflow: hidden;
142
- background: #525659;
143
- }}
144
- #pdf-container {{
145
- width: 100%;
146
- height: 100vh;
147
- overflow: auto;
148
- display: flex;
149
- flex-direction: column;
150
- align-items: center;
151
- padding: 20px;
152
- box-sizing: border-box;
153
- }}
154
- canvas {{
155
- box-shadow: 0 2px 8px rgba(0,0,0,0.3);
156
- margin-bottom: 10px;
157
- background: white;
158
- }}
159
- #loading {{
160
- color: white;
161
- font-family: Arial, sans-serif;
162
- font-size: 18px;
163
- padding: 20px;
164
- }}
165
- .error {{
166
- color: #ff6b6b;
167
- font-family: Arial, sans-serif;
168
- padding: 20px;
169
- background: rgba(0,0,0,0.5);
170
- border-radius: 5px;
171
- margin: 20px;
172
- }}
173
- </style>
174
- </head>
175
- <body>
176
- <div id="pdf-container">
177
- <div id="loading">Loading PDF...</div>
178
- </div>
179
-
180
- <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
181
- <script>
182
- pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
183
-
184
- const pdfData = atob('{b64}');
185
- const pdfContainer = document.getElementById('pdf-container');
186
- const loading = document.getElementById('loading');
187
-
188
- const uint8Array = new Uint8Array(pdfData.length);
189
- for (let i = 0; i < pdfData.length; i++) {{
190
- uint8Array[i] = pdfData.charCodeAt(i);
191
- }}
192
-
193
- pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
194
- loading.style.display = 'none';
195
-
196
- const numPages = pdf.numPages;
197
- const promises = [];
198
-
199
- for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
200
- promises.push(
201
- pdf.getPage(pageNum).then(function(page) {{
202
- const scale = 1.5;
203
- const viewport = page.getViewport({{scale: scale}});
204
-
205
- const canvas = document.createElement('canvas');
206
- const context = canvas.getContext('2d');
207
- canvas.height = viewport.height;
208
- canvas.width = viewport.width;
209
-
210
- pdfContainer.appendChild(canvas);
211
-
212
- return page.render({{
213
- canvasContext: context,
214
- viewport: viewport
215
- }}).promise;
216
- }})
217
- );
218
- }}
219
-
220
- return Promise.all(promises);
221
- }}).catch(function(error) {{
222
- loading.innerHTML = '<div class="error">Error loading PDF: ' + error.message + '</div>';
223
- console.error('Error loading PDF:', error);
224
- }});
225
- </script>
226
- </body>
227
- </html>
228
- '''
229
- return pdf_viewer_html
230
-
231
- def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
232
- """
233
- Convert HTML content to PDF using Puppeteer with better styling preservation
234
-
235
- Args:
236
- html_content: String containing HTML content
237
- aspect_ratio: One of "16:9", "1:1", or "9:16"
238
- temp_dir: Temporary directory for processing
239
-
240
- Returns:
241
- Tuple of (pdf_bytes, error_message)
242
- """
243
  try:
244
- # Inject CSS to preserve styles better
245
  style_injection = """
246
  <style>
247
- @page {
248
- margin: 0;
249
- }
250
  * {
251
  -webkit-print-color-adjust: exact !important;
252
  print-color-adjust: exact !important;
@@ -259,7 +111,6 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
259
  </style>
260
  """
261
 
262
- # Insert style injection before closing head tag or at the start of body
263
  if '</head>' in html_content:
264
  html_content = html_content.replace('</head>', style_injection + '</head>')
265
  elif '<body' in html_content:
@@ -267,358 +118,247 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
267
  else:
268
  html_content = style_injection + html_content
269
 
270
- # Save HTML content to temporary file
271
  html_file = os.path.join(temp_dir, "input.html")
272
  with open(html_file, 'w', encoding='utf-8') as f:
273
  f.write(html_content)
274
 
275
- # Get the path to puppeteer_pdf.js
276
  script_dir = os.path.dirname(os.path.abspath(__file__))
277
- puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
278
 
279
- # Run Node.js script to convert HTML to PDF
280
  result = subprocess.run(
281
  ['node', puppeteer_script, html_file, aspect_ratio],
282
  capture_output=True,
283
  text=True,
284
  timeout=60,
285
- cwd=os.path.dirname(script_dir)
286
  )
287
 
288
  if result.returncode != 0:
289
- return None, f"PDF conversion failed: {result.stderr}"
290
 
291
- # Get the generated PDF path
292
  pdf_file = html_file.replace('.html', '.pdf')
293
 
294
  if not os.path.exists(pdf_file):
295
- return None, "PDF file was not generated"
296
 
297
- # Read PDF file into memory
298
  with open(pdf_file, 'rb') as f:
299
  pdf_bytes = f.read()
300
 
301
- return pdf_bytes, None
302
 
303
- except subprocess.TimeoutExpired:
304
- return None, "Error: PDF conversion timed out (60 seconds)"
305
  except Exception as e:
306
- return None, f"Error: {str(e)}"
307
 
308
- # Page header
309
- st.title("📄 HTML to PDF Converter")
310
- st.markdown("""
311
- Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
312
- **NEW:** Upload images alongside your HTML files!
313
- """)
 
 
 
 
 
 
 
 
314
 
315
- # Create tabs
316
- tab1, tab2 = st.tabs(["📤 Upload HTML File", "📝 Paste HTML Code"])
 
 
317
 
318
- # Tab 1: Upload HTML File
319
- with tab1:
320
- uploaded_file = st.file_uploader(
321
- "Choose an HTML file",
322
- type=['html', 'htm'],
323
- key="file_uploader",
324
- help="Upload an HTML file (max 200MB)",
325
- accept_multiple_files=False
326
- )
327
 
328
- # Image uploader
329
- uploaded_images = st.file_uploader(
330
- "📷 Upload Images (optional)",
331
- type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
332
- key="image_uploader",
333
- help="Upload images referenced in your HTML",
334
- accept_multiple_files=True
335
- )
336
 
337
- if uploaded_images:
338
- st.success(f" {len(uploaded_images)} image(s) uploaded")
339
- with st.expander("View uploaded images"):
340
- cols = st.columns(min(len(uploaded_images), 4))
341
- for idx, img in enumerate(uploaded_images):
342
- with cols[idx % 4]:
343
- st.image(img, caption=img.name, use_container_width=True)
344
 
345
- if uploaded_file is not None:
346
- st.success(f"✅ File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
 
 
347
 
348
- # Read file content
349
- uploaded_file.seek(0)
350
  try:
351
- html_content = uploaded_file.getvalue().decode('utf-8')
352
  except UnicodeDecodeError:
353
- uploaded_file.seek(0)
354
- html_content = uploaded_file.getvalue().decode('latin-1')
355
 
356
- # Auto-detect aspect ratio
357
- detected_ratio = detect_aspect_ratio(html_content)
 
 
358
 
359
- col1, col2 = st.columns([1, 1])
 
360
 
361
- with col1:
362
- st.subheader("⚙️ Settings")
363
-
364
- auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
365
-
366
- if auto_detect:
367
- aspect_ratio_file = detected_ratio
368
- st.info(f"🔍 Detected: **{detected_ratio}**")
369
- else:
370
- aspect_ratio_file = st.radio(
371
- "Aspect Ratio",
372
- options=["16:9", "1:1", "9:16"],
373
- index=["16:9", "1:1", "9:16"].index(detected_ratio),
374
- key="aspect_file",
375
- help="Select the page orientation and dimensions"
376
- )
377
-
378
- st.markdown(f"""
379
- **Selected: {aspect_ratio_file}**
380
- - 16:9 = Landscape (297mm × 210mm)
381
- - 1:1 = Square (210mm × 210mm)
382
- - 9:16 = Portrait (210mm × 297mm)
383
- """)
384
-
385
- convert_file_btn = st.button("🔄 Convert to PDF", key="convert_file", type="primary", use_container_width=True)
386
 
387
- with col2:
388
- st.subheader("👁️ HTML Preview")
389
- with st.expander("Show HTML Preview", expanded=False):
390
- st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
 
 
 
 
 
 
 
 
391
 
392
- # Conversion section
393
- if convert_file_btn:
394
- temp_dir = None
395
- try:
396
- with st.spinner("Converting HTML to PDF..."):
397
- # Create temp directory
398
- temp_dir = tempfile.mkdtemp()
399
-
400
- # Process images if uploaded
401
- if uploaded_images:
402
- image_mapping = save_uploaded_images(uploaded_images, temp_dir)
403
- html_content = process_html_with_images(html_content, temp_dir, image_mapping)
404
- st.info(f"📷 Processed {len(uploaded_images)} image(s)")
405
- # Debug info
406
- with st.expander("🔍 Debug: Image Mapping"):
407
- for orig, new in image_mapping.items():
408
- st.text(f"{orig} -> {new}")
409
- full_path = os.path.join(temp_dir, new)
410
- st.text(f"Full path: {full_path}")
411
- st.text(f"Exists: {os.path.exists(full_path)}")
412
-
413
- # Convert to PDF
414
- pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir)
415
-
416
- # Cleanup
417
- if temp_dir:
418
- shutil.rmtree(temp_dir, ignore_errors=True)
419
-
420
- if error:
421
- st.error(f"❌ {error}")
422
- with st.expander("Show error details"):
423
- st.code(error)
424
- else:
425
- st.success("✅ PDF generated successfully!")
426
-
427
- col_a, col_b = st.columns([1, 1])
428
-
429
- with col_a:
430
- output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
431
- if not output_filename.endswith('.pdf'):
432
- output_filename += '.pdf'
433
-
434
- st.download_button(
435
- label="⬇️ Download PDF",
436
- data=pdf_bytes,
437
- file_name=output_filename,
438
- mime="application/pdf",
439
- use_container_width=True,
440
- key="download_file_pdf"
441
- )
442
-
443
- with col_b:
444
- st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
445
-
446
- # PDF Preview
447
- st.subheader("📄 PDF Preview")
448
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
449
- except Exception as e:
450
- if temp_dir:
451
- shutil.rmtree(temp_dir, ignore_errors=True)
452
- st.error(f"❌ Error: {str(e)}")
453
 
454
- # Tab 2: Paste HTML Code
455
- with tab2:
456
- col1, col2 = st.columns([1, 1])
 
 
 
 
 
 
457
 
458
- with col1:
459
- html_code = st.text_area(
460
- "HTML Content",
461
- value="""<!DOCTYPE html>
462
- <html>
463
- <head>
464
- <title>Sample Document</title>
465
- <style>
466
- body {
467
- font-family: Arial, sans-serif;
468
- margin: 40px;
469
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
470
- color: white;
471
- }
472
- h1 {
473
- font-size: 48px;
474
- margin-bottom: 20px;
475
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
476
- }
477
- p {
478
- font-size: 18px;
479
- line-height: 1.6;
480
- }
481
- .box {
482
- background: rgba(255,255,255,0.1);
483
- padding: 20px;
484
- border-radius: 10px;
485
- margin-top: 20px;
486
- }
487
- </style>
488
- </head>
489
- <body>
490
- <h1>Hello, PDF World! 🌍</h1>
491
- <p>This is a sample HTML document converted to PDF.</p>
492
- <div class="box">
493
- <p>✨ Styles, colors, and gradients are preserved!</p>
494
- </div>
495
- </body>
496
- </html>""",
497
- height=400,
498
- key="html_code"
499
- )
500
 
501
- # Image uploader for text tab
502
- uploaded_images_text = st.file_uploader(
503
- "📷 Upload Images (optional)",
504
- type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
505
- key="image_uploader_text",
506
- help="Upload images referenced in your HTML code",
507
- accept_multiple_files=True
508
- )
509
 
510
- if uploaded_images_text:
511
- st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
512
- with st.expander("View uploaded images"):
513
- cols = st.columns(min(len(uploaded_images_text), 4))
514
- for idx, img in enumerate(uploaded_images_text):
515
- with cols[idx % 4]:
516
- st.image(img, caption=img.name, use_container_width=True)
517
 
518
- if html_code and html_code.strip():
519
- # Auto-detect aspect ratio
520
- detected_ratio_text = detect_aspect_ratio(html_code)
521
-
522
- auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
523
-
524
- if auto_detect_text:
525
- aspect_ratio_text = detected_ratio_text
526
- st.info(f"🔍 Detected: **{detected_ratio_text}**")
527
- else:
528
- aspect_ratio_text = st.radio(
529
- "Aspect Ratio",
530
- options=["16:9", "1:1", "9:16"],
531
- index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
532
- key="aspect_text",
533
- help="Select the page orientation and dimensions"
534
- )
535
-
536
- convert_text_btn = st.button("🔄 Convert to PDF", key="convert_text", type="primary", use_container_width=True)
537
  else:
538
- convert_text_btn = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
- with col2:
541
- if html_code and html_code.strip():
542
- st.subheader("👁️ HTML Preview")
543
- with st.expander("Show HTML Preview", expanded=False):
544
- st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
 
 
 
 
545
 
546
- if convert_text_btn and html_code and html_code.strip():
547
- temp_dir = None
 
 
 
 
 
548
  try:
549
- with st.spinner("Converting HTML to PDF..."):
550
- # Create temp directory
551
- temp_dir = tempfile.mkdtemp()
552
-
553
- # Process images if uploaded
554
- processed_html = html_code
555
- if uploaded_images_text:
556
- image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
557
- processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
558
- st.info(f"📷 Processed {len(uploaded_images_text)} image(s)")
559
- # Debug info
560
- with st.expander("🔍 Debug: Image Mapping"):
561
- for orig, new in image_mapping.items():
562
- st.text(f"{orig} -> {new}")
563
- full_path = os.path.join(temp_dir, new)
564
- st.text(f"Full path: {full_path}")
565
- st.text(f"Exists: {os.path.exists(full_path)}")
566
-
567
- # Convert to PDF
568
- pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
569
-
570
- # Cleanup
571
- if temp_dir:
572
- shutil.rmtree(temp_dir, ignore_errors=True)
573
-
574
- if error:
575
- st.error(f"❌ {error}")
576
- with st.expander("Show error details"):
577
- st.code(error)
578
- else:
579
- st.success("✅ PDF generated successfully!")
580
-
581
- col_a, col_b = st.columns([1, 1])
582
-
583
- with col_a:
584
- st.download_button(
585
- label="⬇️ Download PDF",
586
- data=pdf_bytes,
587
- file_name="converted.pdf",
588
- mime="application/pdf",
589
- use_container_width=True,
590
- key="download_text_pdf"
591
- )
592
-
593
- with col_b:
594
- st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
595
-
596
- # PDF Preview
597
- st.subheader("📄 PDF Preview")
598
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
599
- except Exception as e:
600
- if temp_dir:
601
- shutil.rmtree(temp_dir, ignore_errors=True)
602
- st.error(f"❌ Error: {str(e)}")
603
-
604
- # Footer with tips
605
- st.markdown("---")
606
- st.markdown("""
607
- ### 💡 Tips:
608
- - **Auto-detection** analyzes your HTML to suggest the best aspect ratio
609
- - **16:9** - Best for presentations and landscape documents (297mm × 210mm)
610
- - **1:1** - Square format (210mm × 210mm)
611
- - **9:16** - Portrait format, standard A4 (210mm × 297mm)
612
- - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
613
- - All CSS styles, colors, gradients, and fonts are preserved
614
- - Use inline CSS or `<style>` tags for best results
615
- - Reference images by filename in your HTML (e.g., `<img src="image.jpg">`)
616
- - External resources should use absolute URLs
617
- - **PDF Preview** renders directly in the browser using PDF.js
618
 
619
- ### 🖼️ Using Images:
620
- 1. Upload your HTML file
621
- 2. Upload all images referenced in the HTML
622
- 3. Make sure image filenames in HTML match uploaded files exactly
623
- 4. The converter will automatically embed images in the PDF
624
- """)
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
2
+ from fastapi.responses import Response, JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from typing import List, Optional
5
  import tempfile
6
  import shutil
7
+ import os
8
+ import subprocess
9
  import base64
10
+ from pathlib import Path
11
+ import mimetypes
12
 
13
+ app = FastAPI(
14
+ title="HTML to PDF API with Image Support",
15
+ description="Convert HTML to PDF using Puppeteer with image upload support",
16
+ version="2.0.0"
17
  )
18
 
19
+ # Enable CORS
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=["*"],
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def save_uploaded_images(images: List[UploadFile], temp_dir: str):
29
+ """Save uploaded images to temp directory and return mapping"""
30
  image_mapping = {}
31
  images_dir = os.path.join(temp_dir, "images")
32
  os.makedirs(images_dir, exist_ok=True)
33
 
34
  for image in images:
35
+ if image.filename:
36
+ # Save image to temp directory
37
+ image_path = os.path.join(images_dir, image.filename)
38
+ with open(image_path, 'wb') as f:
39
+ content = image.file.read()
40
+ f.write(content)
41
+
42
+ # Reset file pointer for potential reuse
43
+ image.file.seek(0)
44
+
45
+ # Create mapping with relative path
46
+ image_mapping[image.filename] = f"images/{image.filename}"
47
+ print(f"Saved image: {image.filename} -> {image_path}")
48
 
49
  return image_mapping
50
 
51
+ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict):
52
  """Process HTML to handle image references with absolute file paths"""
53
  import re
54
 
 
58
  file_url = f"file://{absolute_path}"
59
 
60
  # Replace various image reference patterns
61
+ # Pattern 1: src="filename"
62
+ html_content = re.sub(
63
+ rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
64
+ f'src="{file_url}"',
65
+ html_content,
66
+ flags=re.IGNORECASE
67
+ )
68
+
69
+ # Pattern 2: src='filename'
70
  html_content = re.sub(
71
+ rf"src=['\"](?:\./)?{re.escape(original_name)}['\"]",
72
  f'src="{file_url}"',
73
  html_content,
74
  flags=re.IGNORECASE
75
  )
76
 
77
+ # Pattern 3: background-image: url(filename)
78
  html_content = re.sub(
79
+ rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
80
  f'url("{file_url}")',
81
  html_content,
82
  flags=re.IGNORECASE
83
  )
84
 
85
+ # Pattern 4: href for links
86
  html_content = re.sub(
87
+ rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
88
  f'href="{file_url}"',
89
  html_content,
90
  flags=re.IGNORECASE
 
92
 
93
  return html_content
94
 
95
+ def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
96
+ """Convert HTML content to PDF"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
+ # Style injection for better PDF rendering
99
  style_injection = """
100
  <style>
101
+ @page { margin: 0; }
 
 
102
  * {
103
  -webkit-print-color-adjust: exact !important;
104
  print-color-adjust: exact !important;
 
111
  </style>
112
  """
113
 
 
114
  if '</head>' in html_content:
115
  html_content = html_content.replace('</head>', style_injection + '</head>')
116
  elif '<body' in html_content:
 
118
  else:
119
  html_content = style_injection + html_content
120
 
121
+ # Save HTML to temp file
122
  html_file = os.path.join(temp_dir, "input.html")
123
  with open(html_file, 'w', encoding='utf-8') as f:
124
  f.write(html_content)
125
 
126
+ # Get puppeteer script path
127
  script_dir = os.path.dirname(os.path.abspath(__file__))
128
+ puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
129
 
130
+ # Run conversion
131
  result = subprocess.run(
132
  ['node', puppeteer_script, html_file, aspect_ratio],
133
  capture_output=True,
134
  text=True,
135
  timeout=60,
136
+ cwd=script_dir
137
  )
138
 
139
  if result.returncode != 0:
140
+ raise Exception(f"PDF conversion failed: {result.stderr}")
141
 
 
142
  pdf_file = html_file.replace('.html', '.pdf')
143
 
144
  if not os.path.exists(pdf_file):
145
+ raise Exception("PDF file was not generated")
146
 
 
147
  with open(pdf_file, 'rb') as f:
148
  pdf_bytes = f.read()
149
 
150
+ return pdf_bytes
151
 
 
 
152
  except Exception as e:
153
+ raise e
154
 
155
+ @app.get("/")
156
+ async def root():
157
+ """API root endpoint"""
158
+ return {
159
+ "message": "HTML to PDF Conversion API with Image Support",
160
+ "version": "2.0.0",
161
+ "endpoints": {
162
+ "POST /convert": "Convert HTML to PDF (file upload with optional images)",
163
+ "POST /convert-text": "Convert HTML text to PDF (with optional image files)",
164
+ "POST /convert-with-images": "Convert HTML with multiple images",
165
+ "GET /health": "Health check",
166
+ "GET /docs": "API documentation (Swagger UI)"
167
+ }
168
+ }
169
 
170
+ @app.get("/health")
171
+ async def health_check():
172
+ """Health check endpoint"""
173
+ return {"status": "healthy", "service": "html-to-pdf-api"}
174
 
175
+ @app.post("/convert")
176
+ async def convert_file(
177
+ file: UploadFile = File(...),
178
+ images: Optional[List[UploadFile]] = File(None),
179
+ aspect_ratio: str = Form(default="9:16")
180
+ ):
181
+ """
182
+ Convert uploaded HTML file to PDF with optional images
 
183
 
184
+ - **file**: HTML file to convert
185
+ - **images**: Optional list of image files (jpg, png, gif, svg, webp)
186
+ - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
187
+ """
188
+ if not file.filename.lower().endswith(('.html', '.htm')):
189
+ raise HTTPException(status_code=400, detail="File must be HTML (.html or .htm)")
 
 
190
 
191
+ if aspect_ratio not in ["16:9", "1:1", "9:16"]:
192
+ raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
 
 
 
 
 
193
 
194
+ temp_dir = None
195
+ try:
196
+ # Create temporary directory
197
+ temp_dir = tempfile.mkdtemp()
198
 
199
+ # Read HTML content
200
+ content = await file.read()
201
  try:
202
+ html_content = content.decode('utf-8')
203
  except UnicodeDecodeError:
204
+ html_content = content.decode('latin-1')
 
205
 
206
+ # Process images if provided
207
+ if images:
208
+ image_mapping = save_uploaded_images(images, temp_dir)
209
+ html_content = process_html_with_images(html_content, temp_dir, image_mapping)
210
 
211
+ # Convert to PDF
212
+ pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
213
 
214
+ # Clean up
215
+ shutil.rmtree(temp_dir, ignore_errors=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ # Return PDF file
218
+ filename = file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
219
+ if not filename.endswith('.pdf'):
220
+ filename += '.pdf'
221
+
222
+ return Response(
223
+ content=pdf_bytes,
224
+ media_type="application/pdf",
225
+ headers={
226
+ "Content-Disposition": f"attachment; filename={filename}"
227
+ }
228
+ )
229
 
230
+ except Exception as e:
231
+ if temp_dir:
232
+ shutil.rmtree(temp_dir, ignore_errors=True)
233
+ raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
+ @app.post("/convert-text")
236
+ async def convert_text(
237
+ html: str = Form(...),
238
+ images: Optional[List[UploadFile]] = File(None),
239
+ aspect_ratio: str = Form(default="9:16"),
240
+ return_base64: bool = Form(default=False)
241
+ ):
242
+ """
243
+ Convert HTML text to PDF with optional images
244
 
245
+ - **html**: HTML content as string
246
+ - **images**: Optional list of image files
247
+ - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
248
+ - **return_base64**: If true, returns base64 encoded PDF in JSON
249
+ """
250
+ if aspect_ratio not in ["16:9", "1:1", "9:16"]:
251
+ raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
252
+
253
+ temp_dir = None
254
+ try:
255
+ # Create temporary directory
256
+ temp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
+ # Process images if provided
259
+ if images:
260
+ image_mapping = save_uploaded_images(images, temp_dir)
261
+ html = process_html_with_images(html, temp_dir, image_mapping)
 
 
 
 
262
 
263
+ # Convert to PDF
264
+ pdf_bytes = convert_html_to_pdf(html, aspect_ratio, temp_dir)
 
 
 
 
 
265
 
266
+ # Clean up
267
+ shutil.rmtree(temp_dir, ignore_errors=True)
268
+
269
+ if return_base64:
270
+ # Return as JSON with base64 encoded PDF
271
+ pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
272
+ return JSONResponse(content={
273
+ "success": True,
274
+ "pdf_base64": pdf_base64,
275
+ "size_bytes": len(pdf_bytes)
276
+ })
 
 
 
 
 
 
 
 
277
  else:
278
+ # Return PDF file directly
279
+ return Response(
280
+ content=pdf_bytes,
281
+ media_type="application/pdf",
282
+ headers={
283
+ "Content-Disposition": "attachment; filename=converted.pdf"
284
+ }
285
+ )
286
+
287
+ except Exception as e:
288
+ if temp_dir:
289
+ shutil.rmtree(temp_dir, ignore_errors=True)
290
+ raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
291
+
292
+ @app.post("/convert-with-images")
293
+ async def convert_with_images(
294
+ html_file: UploadFile = File(...),
295
+ images: List[UploadFile] = File(...),
296
+ aspect_ratio: str = Form(default="9:16")
297
+ ):
298
+ """
299
+ Convert HTML with multiple images - dedicated endpoint
300
+
301
+ - **html_file**: HTML file to convert
302
+ - **images**: List of image files (required)
303
+ - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
304
+ """
305
+ if not html_file.filename.lower().endswith(('.html', '.htm')):
306
+ raise HTTPException(status_code=400, detail="HTML file must be .html or .htm")
307
+
308
+ if aspect_ratio not in ["16:9", "1:1", "9:16"]:
309
+ raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
310
 
311
+ # Validate image files
312
+ allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'}
313
+ for img in images:
314
+ ext = Path(img.filename).suffix.lower()
315
+ if ext not in allowed_extensions:
316
+ raise HTTPException(
317
+ status_code=400,
318
+ detail=f"Invalid image format: {img.filename}. Allowed: {', '.join(allowed_extensions)}"
319
+ )
320
 
321
+ temp_dir = None
322
+ try:
323
+ # Create temporary directory
324
+ temp_dir = tempfile.mkdtemp()
325
+
326
+ # Read HTML content
327
+ content = await html_file.read()
328
  try:
329
+ html_content = content.decode('utf-8')
330
+ except UnicodeDecodeError:
331
+ html_content = content.decode('latin-1')
332
+
333
+ # Save and process images
334
+ image_mapping = save_uploaded_images(images, temp_dir)
335
+ html_content = process_html_with_images(html_content, temp_dir, image_mapping)
336
+
337
+ # Convert to PDF
338
+ pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
339
+
340
+ # Clean up
341
+ shutil.rmtree(temp_dir, ignore_errors=True)
342
+
343
+ # Return PDF
344
+ filename = html_file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
345
+ if not filename.endswith('.pdf'):
346
+ filename += '.pdf'
347
+
348
+ return Response(
349
+ content=pdf_bytes,
350
+ media_type="application/pdf",
351
+ headers={
352
+ "Content-Disposition": f"attachment; filename={filename}",
353
+ "X-Image-Count": str(len(images))
354
+ }
355
+ )
356
+
357
+ except Exception as e:
358
+ if temp_dir:
359
+ shutil.rmtree(temp_dir, ignore_errors=True)
360
+ raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
+ if __name__ == "__main__":
363
+ import uvicorn
364
+ uvicorn.run(app, host="0.0.0.0", port=7860)