ABDALLALSWAITI commited on
Commit
69a76c1
Β·
verified Β·
1 Parent(s): 1ea8766

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +507 -247
src/streamlit_app.py CHANGED
@@ -1,54 +1,89 @@
1
- from fastapi import FastAPI, File, UploadFile, Form, HTTPException
2
- from fastapi.responses import Response, JSONResponse
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from typing import List, Optional
5
  import tempfile
6
  import shutil
7
- import os
8
- import subprocess
9
- import base64
10
  from pathlib import Path
11
- import mimetypes
 
12
 
13
- app = FastAPI(
14
- title="HTML to PDF API with Image Support",
15
- description="Convert HTML to PDF using Puppeteer with image upload support",
16
- version="2.0.0"
17
  )
18
 
19
- # Enable CORS
20
- app.add_middleware(
21
- CORSMiddleware,
22
- allow_origins=["*"],
23
- allow_credentials=True,
24
- allow_methods=["*"],
25
- allow_headers=["*"],
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- def save_uploaded_images(images: List[UploadFile], temp_dir: str):
29
- """Save uploaded images to temp directory and return mapping"""
30
  image_mapping = {}
31
  images_dir = os.path.join(temp_dir, "images")
32
  os.makedirs(images_dir, exist_ok=True)
33
 
34
  for image in images:
35
- if image.filename:
36
- # Save image to temp directory
37
- image_path = os.path.join(images_dir, image.filename)
38
- with open(image_path, 'wb') as f:
39
- content = image.file.read()
40
- f.write(content)
41
-
42
- # Reset file pointer for potential reuse
43
- image.file.seek(0)
44
-
45
- # Create mapping with relative path
46
- image_mapping[image.filename] = f"images/{image.filename}"
47
- print(f"Saved image: {image.filename} -> {image_path}")
48
 
49
  return image_mapping
50
 
51
- def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict):
52
  """Process HTML to handle image references with absolute file paths"""
53
  import re
54
 
@@ -58,7 +93,7 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
58
  file_url = f"file://{absolute_path}"
59
 
60
  # Replace various image reference patterns
61
- # Pattern 1: src="filename"
62
  html_content = re.sub(
63
  rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
64
  f'src="{file_url}"',
@@ -66,15 +101,7 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
66
  flags=re.IGNORECASE
67
  )
68
 
69
- # Pattern 2: src='filename'
70
- html_content = re.sub(
71
- rf"src=['\"](?:\./)?{re.escape(original_name)}['\"]",
72
- f'src="{file_url}"',
73
- html_content,
74
- flags=re.IGNORECASE
75
- )
76
-
77
- # Pattern 3: background-image: url(filename)
78
  html_content = re.sub(
79
  rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
80
  f'url("{file_url}")',
@@ -82,7 +109,7 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
82
  flags=re.IGNORECASE
83
  )
84
 
85
- # Pattern 4: href for links
86
  html_content = re.sub(
87
  rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
88
  f'href="{file_url}"',
@@ -92,13 +119,134 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
92
 
93
  return html_content
94
 
95
- def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
96
- """Convert HTML content to PDF"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
- # Style injection for better PDF rendering
99
  style_injection = """
100
  <style>
101
- @page { margin: 0; }
 
 
102
  * {
103
  -webkit-print-color-adjust: exact !important;
104
  print-color-adjust: exact !important;
@@ -111,6 +259,7 @@ def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
111
  </style>
112
  """
113
 
 
114
  if '</head>' in html_content:
115
  html_content = html_content.replace('</head>', style_injection + '</head>')
116
  elif '<body' in html_content:
@@ -118,247 +267,358 @@ def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
118
  else:
119
  html_content = style_injection + html_content
120
 
121
- # Save HTML to temp file
122
  html_file = os.path.join(temp_dir, "input.html")
123
  with open(html_file, 'w', encoding='utf-8') as f:
124
  f.write(html_content)
125
 
126
- # Get puppeteer script path
127
  script_dir = os.path.dirname(os.path.abspath(__file__))
128
- puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
129
 
130
- # Run conversion
131
  result = subprocess.run(
132
  ['node', puppeteer_script, html_file, aspect_ratio],
133
  capture_output=True,
134
  text=True,
135
  timeout=60,
136
- cwd=script_dir
137
  )
138
 
139
  if result.returncode != 0:
140
- raise Exception(f"PDF conversion failed: {result.stderr}")
141
 
 
142
  pdf_file = html_file.replace('.html', '.pdf')
143
 
144
  if not os.path.exists(pdf_file):
145
- raise Exception("PDF file was not generated")
146
 
 
147
  with open(pdf_file, 'rb') as f:
148
  pdf_bytes = f.read()
149
 
150
- return pdf_bytes
151
 
 
 
152
  except Exception as e:
153
- raise e
154
 
155
- @app.get("/")
156
- async def root():
157
- """API root endpoint"""
158
- return {
159
- "message": "HTML to PDF Conversion API with Image Support",
160
- "version": "2.0.0",
161
- "endpoints": {
162
- "POST /convert": "Convert HTML to PDF (file upload with optional images)",
163
- "POST /convert-text": "Convert HTML text to PDF (with optional image files)",
164
- "POST /convert-with-images": "Convert HTML with multiple images",
165
- "GET /health": "Health check",
166
- "GET /docs": "API documentation (Swagger UI)"
167
- }
168
- }
169
 
170
- @app.get("/health")
171
- async def health_check():
172
- """Health check endpoint"""
173
- return {"status": "healthy", "service": "html-to-pdf-api"}
174
 
175
- @app.post("/convert")
176
- async def convert_file(
177
- file: UploadFile = File(...),
178
- images: Optional[List[UploadFile]] = File(None),
179
- aspect_ratio: str = Form(default="9:16")
180
- ):
181
- """
182
- Convert uploaded HTML file to PDF with optional images
 
183
 
184
- - **file**: HTML file to convert
185
- - **images**: Optional list of image files (jpg, png, gif, svg, webp)
186
- - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
187
- """
188
- if not file.filename.lower().endswith(('.html', '.htm')):
189
- raise HTTPException(status_code=400, detail="File must be HTML (.html or .htm)")
 
 
190
 
191
- if aspect_ratio not in ["16:9", "1:1", "9:16"]:
192
- raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
 
 
 
 
 
193
 
194
- temp_dir = None
195
- try:
196
- # Create temporary directory
197
- temp_dir = tempfile.mkdtemp()
198
 
199
- # Read HTML content
200
- content = await file.read()
201
  try:
202
- html_content = content.decode('utf-8')
203
  except UnicodeDecodeError:
204
- html_content = content.decode('latin-1')
205
-
206
- # Process images if provided
207
- if images:
208
- image_mapping = save_uploaded_images(images, temp_dir)
209
- html_content = process_html_with_images(html_content, temp_dir, image_mapping)
210
 
211
- # Convert to PDF
212
- pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
213
 
214
- # Clean up
215
- shutil.rmtree(temp_dir, ignore_errors=True)
216
 
217
- # Return PDF file
218
- filename = file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
219
- if not filename.endswith('.pdf'):
220
- filename += '.pdf'
221
 
222
- return Response(
223
- content=pdf_bytes,
224
- media_type="application/pdf",
225
- headers={
226
- "Content-Disposition": f"attachment; filename={filename}"
227
- }
228
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
- except Exception as e:
231
- if temp_dir:
232
- shutil.rmtree(temp_dir, ignore_errors=True)
233
- raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
- @app.post("/convert-text")
236
- async def convert_text(
237
- html: str = Form(...),
238
- images: Optional[List[UploadFile]] = File(None),
239
- aspect_ratio: str = Form(default="9:16"),
240
- return_base64: bool = Form(default=False)
241
- ):
242
- """
243
- Convert HTML text to PDF with optional images
244
 
245
- - **html**: HTML content as string
246
- - **images**: Optional list of image files
247
- - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
248
- - **return_base64**: If true, returns base64 encoded PDF in JSON
249
- """
250
- if aspect_ratio not in ["16:9", "1:1", "9:16"]:
251
- raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
252
-
253
- temp_dir = None
254
- try:
255
- # Create temporary directory
256
- temp_dir = tempfile.mkdtemp()
257
-
258
- # Process images if provided
259
- if images:
260
- image_mapping = save_uploaded_images(images, temp_dir)
261
- html = process_html_with_images(html, temp_dir, image_mapping)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- # Convert to PDF
264
- pdf_bytes = convert_html_to_pdf(html, aspect_ratio, temp_dir)
 
 
 
 
 
 
265
 
266
- # Clean up
267
- shutil.rmtree(temp_dir, ignore_errors=True)
 
 
 
 
 
268
 
269
- if return_base64:
270
- # Return as JSON with base64 encoded PDF
271
- pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
272
- return JSONResponse(content={
273
- "success": True,
274
- "pdf_base64": pdf_base64,
275
- "size_bytes": len(pdf_bytes)
276
- })
 
 
 
 
 
 
 
 
 
 
 
277
  else:
278
- # Return PDF file directly
279
- return Response(
280
- content=pdf_bytes,
281
- media_type="application/pdf",
282
- headers={
283
- "Content-Disposition": "attachment; filename=converted.pdf"
284
- }
285
- )
286
-
287
- except Exception as e:
288
- if temp_dir:
289
- shutil.rmtree(temp_dir, ignore_errors=True)
290
- raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
291
-
292
- @app.post("/convert-with-images")
293
- async def convert_with_images(
294
- html_file: UploadFile = File(...),
295
- images: List[UploadFile] = File(...),
296
- aspect_ratio: str = Form(default="9:16")
297
- ):
298
- """
299
- Convert HTML with multiple images - dedicated endpoint
300
-
301
- - **html_file**: HTML file to convert
302
- - **images**: List of image files (required)
303
- - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
304
- """
305
- if not html_file.filename.lower().endswith(('.html', '.htm')):
306
- raise HTTPException(status_code=400, detail="HTML file must be .html or .htm")
307
-
308
- if aspect_ratio not in ["16:9", "1:1", "9:16"]:
309
- raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
310
 
311
- # Validate image files
312
- allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'}
313
- for img in images:
314
- ext = Path(img.filename).suffix.lower()
315
- if ext not in allowed_extensions:
316
- raise HTTPException(
317
- status_code=400,
318
- detail=f"Invalid image format: {img.filename}. Allowed: {', '.join(allowed_extensions)}"
319
- )
320
 
321
- temp_dir = None
322
- try:
323
- # Create temporary directory
324
- temp_dir = tempfile.mkdtemp()
325
-
326
- # Read HTML content
327
- content = await html_file.read()
328
  try:
329
- html_content = content.decode('utf-8')
330
- except UnicodeDecodeError:
331
- html_content = content.decode('latin-1')
332
-
333
- # Save and process images
334
- image_mapping = save_uploaded_images(images, temp_dir)
335
- html_content = process_html_with_images(html_content, temp_dir, image_mapping)
336
-
337
- # Convert to PDF
338
- pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
339
-
340
- # Clean up
341
- shutil.rmtree(temp_dir, ignore_errors=True)
342
-
343
- # Return PDF
344
- filename = html_file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
345
- if not filename.endswith('.pdf'):
346
- filename += '.pdf'
347
-
348
- return Response(
349
- content=pdf_bytes,
350
- media_type="application/pdf",
351
- headers={
352
- "Content-Disposition": f"attachment; filename={filename}",
353
- "X-Image-Count": str(len(images))
354
- }
355
- )
356
-
357
- except Exception as e:
358
- if temp_dir:
359
- shutil.rmtree(temp_dir, ignore_errors=True)
360
- raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
- if __name__ == "__main__":
363
- import uvicorn
364
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
1
+ import streamlit as st
2
+ import subprocess
3
+ import os
 
4
  import tempfile
5
  import shutil
 
 
 
6
  from pathlib import Path
7
+ import base64
8
+ import re
9
 
10
+ st.set_page_config(
11
+ page_title="HTML to PDF Converter",
12
+ page_icon="πŸ“„",
13
+ layout="wide"
14
  )
15
 
16
+ def detect_aspect_ratio(html_content):
17
+ """
18
+ Detect aspect ratio from HTML content
19
+ Returns: "16:9", "1:1", or "9:16"
20
+ """
21
+ # Check for viewport meta tag
22
+ viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
23
+ if viewport_match:
24
+ viewport = viewport_match.group(1).lower()
25
+ if 'width=device-width' in viewport or 'width=100%' in viewport:
26
+ # Check for orientation hints
27
+ if 'orientation=portrait' in viewport:
28
+ return "9:16"
29
+ elif 'orientation=landscape' in viewport:
30
+ return "16:9"
31
+
32
+ # Check for CSS aspect-ratio property
33
+ aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
34
+ if aspect_match:
35
+ width = int(aspect_match.group(1))
36
+ height = int(aspect_match.group(2))
37
+ ratio = width / height
38
+ if ratio > 1.5:
39
+ return "16:9"
40
+ elif ratio < 0.7:
41
+ return "9:16"
42
+ else:
43
+ return "1:1"
44
+
45
+ # Check for common presentation frameworks
46
+ if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
47
+ return "16:9"
48
+
49
+ # Check body style for width/height hints
50
+ body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
51
+ if body_match:
52
+ style = body_match.group(1).lower()
53
+ if 'width' in style and 'height' in style:
54
+ width_match = re.search(r'width\s*:\s*(\d+)', style)
55
+ height_match = re.search(r'height\s*:\s*(\d+)', style)
56
+ if width_match and height_match:
57
+ w = int(width_match.group(1))
58
+ h = int(height_match.group(1))
59
+ ratio = w / h
60
+ if ratio > 1.5:
61
+ return "16:9"
62
+ elif ratio < 0.7:
63
+ return "9:16"
64
+
65
+ # Default to A4 portrait for documents
66
+ return "9:16"
67
 
68
+ def save_uploaded_images(images, temp_dir):
69
+ """Save uploaded images and return mapping"""
70
  image_mapping = {}
71
  images_dir = os.path.join(temp_dir, "images")
72
  os.makedirs(images_dir, exist_ok=True)
73
 
74
  for image in images:
75
+ # Save image
76
+ image_path = os.path.join(images_dir, image.name)
77
+ with open(image_path, 'wb') as f:
78
+ f.write(image.getvalue())
79
+
80
+ # Create mapping
81
+ image_mapping[image.name] = f"images/{image.name}"
82
+ print(f"Saved image: {image.name} -> {image_path}")
 
 
 
 
 
83
 
84
  return image_mapping
85
 
86
+ def process_html_with_images(html_content, temp_dir, image_mapping):
87
  """Process HTML to handle image references with absolute file paths"""
88
  import re
89
 
 
93
  file_url = f"file://{absolute_path}"
94
 
95
  # Replace various image reference patterns
96
+ # Pattern 1: src="filename" or src='filename'
97
  html_content = re.sub(
98
  rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
99
  f'src="{file_url}"',
 
101
  flags=re.IGNORECASE
102
  )
103
 
104
+ # Pattern 2: background-image: url(filename)
 
 
 
 
 
 
 
 
105
  html_content = re.sub(
106
  rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
107
  f'url("{file_url}")',
 
109
  flags=re.IGNORECASE
110
  )
111
 
112
+ # Pattern 3: href for links
113
  html_content = re.sub(
114
  rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
115
  f'href="{file_url}"',
 
119
 
120
  return html_content
121
 
122
+ def render_html_preview(html_content):
123
+ """Render HTML preview in an iframe"""
124
+ # Encode HTML content
125
+ b64 = base64.b64encode(html_content.encode()).decode()
126
+ iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
127
+ return iframe_html
128
+
129
+ def render_pdf_preview(pdf_bytes):
130
+ """Render PDF preview using embedded PDF.js"""
131
+ b64 = base64.b64encode(pdf_bytes).decode()
132
+
133
+ pdf_viewer_html = f'''
134
+ <!DOCTYPE html>
135
+ <html>
136
+ <head>
137
+ <style>
138
+ body {{
139
+ margin: 0;
140
+ padding: 0;
141
+ overflow: hidden;
142
+ background: #525659;
143
+ }}
144
+ #pdf-container {{
145
+ width: 100%;
146
+ height: 100vh;
147
+ overflow: auto;
148
+ display: flex;
149
+ flex-direction: column;
150
+ align-items: center;
151
+ padding: 20px;
152
+ box-sizing: border-box;
153
+ }}
154
+ canvas {{
155
+ box-shadow: 0 2px 8px rgba(0,0,0,0.3);
156
+ margin-bottom: 10px;
157
+ background: white;
158
+ }}
159
+ #loading {{
160
+ color: white;
161
+ font-family: Arial, sans-serif;
162
+ font-size: 18px;
163
+ padding: 20px;
164
+ }}
165
+ .error {{
166
+ color: #ff6b6b;
167
+ font-family: Arial, sans-serif;
168
+ padding: 20px;
169
+ background: rgba(0,0,0,0.5);
170
+ border-radius: 5px;
171
+ margin: 20px;
172
+ }}
173
+ </style>
174
+ </head>
175
+ <body>
176
+ <div id="pdf-container">
177
+ <div id="loading">Loading PDF...</div>
178
+ </div>
179
+
180
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
181
+ <script>
182
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
183
+
184
+ const pdfData = atob('{b64}');
185
+ const pdfContainer = document.getElementById('pdf-container');
186
+ const loading = document.getElementById('loading');
187
+
188
+ const uint8Array = new Uint8Array(pdfData.length);
189
+ for (let i = 0; i < pdfData.length; i++) {{
190
+ uint8Array[i] = pdfData.charCodeAt(i);
191
+ }}
192
+
193
+ pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
194
+ loading.style.display = 'none';
195
+
196
+ const numPages = pdf.numPages;
197
+ const promises = [];
198
+
199
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
200
+ promises.push(
201
+ pdf.getPage(pageNum).then(function(page) {{
202
+ const scale = 1.5;
203
+ const viewport = page.getViewport({{scale: scale}});
204
+
205
+ const canvas = document.createElement('canvas');
206
+ const context = canvas.getContext('2d');
207
+ canvas.height = viewport.height;
208
+ canvas.width = viewport.width;
209
+
210
+ pdfContainer.appendChild(canvas);
211
+
212
+ return page.render({{
213
+ canvasContext: context,
214
+ viewport: viewport
215
+ }}).promise;
216
+ }})
217
+ );
218
+ }}
219
+
220
+ return Promise.all(promises);
221
+ }}).catch(function(error) {{
222
+ loading.innerHTML = '<div class="error">Error loading PDF: ' + error.message + '</div>';
223
+ console.error('Error loading PDF:', error);
224
+ }});
225
+ </script>
226
+ </body>
227
+ </html>
228
+ '''
229
+ return pdf_viewer_html
230
+
231
+ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
232
+ """
233
+ Convert HTML content to PDF using Puppeteer with better styling preservation
234
+
235
+ Args:
236
+ html_content: String containing HTML content
237
+ aspect_ratio: One of "16:9", "1:1", or "9:16"
238
+ temp_dir: Temporary directory for processing
239
+
240
+ Returns:
241
+ Tuple of (pdf_bytes, error_message)
242
+ """
243
  try:
244
+ # Inject CSS to preserve styles better
245
  style_injection = """
246
  <style>
247
+ @page {
248
+ margin: 0;
249
+ }
250
  * {
251
  -webkit-print-color-adjust: exact !important;
252
  print-color-adjust: exact !important;
 
259
  </style>
260
  """
261
 
262
+ # Insert style injection before closing head tag or at the start of body
263
  if '</head>' in html_content:
264
  html_content = html_content.replace('</head>', style_injection + '</head>')
265
  elif '<body' in html_content:
 
267
  else:
268
  html_content = style_injection + html_content
269
 
270
+ # Save HTML content to temporary file
271
  html_file = os.path.join(temp_dir, "input.html")
272
  with open(html_file, 'w', encoding='utf-8') as f:
273
  f.write(html_content)
274
 
275
+ # Get the path to puppeteer_pdf.js
276
  script_dir = os.path.dirname(os.path.abspath(__file__))
277
+ puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
278
 
279
+ # Run Node.js script to convert HTML to PDF
280
  result = subprocess.run(
281
  ['node', puppeteer_script, html_file, aspect_ratio],
282
  capture_output=True,
283
  text=True,
284
  timeout=60,
285
+ cwd=os.path.dirname(script_dir)
286
  )
287
 
288
  if result.returncode != 0:
289
+ return None, f"PDF conversion failed: {result.stderr}"
290
 
291
+ # Get the generated PDF path
292
  pdf_file = html_file.replace('.html', '.pdf')
293
 
294
  if not os.path.exists(pdf_file):
295
+ return None, "PDF file was not generated"
296
 
297
+ # Read PDF file into memory
298
  with open(pdf_file, 'rb') as f:
299
  pdf_bytes = f.read()
300
 
301
+ return pdf_bytes, None
302
 
303
+ except subprocess.TimeoutExpired:
304
+ return None, "Error: PDF conversion timed out (60 seconds)"
305
  except Exception as e:
306
+ return None, f"Error: {str(e)}"
307
 
308
+ # Page header
309
+ st.title("πŸ“„ HTML to PDF Converter")
310
+ st.markdown("""
311
+ Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
312
+ ✨ **NEW:** Upload images alongside your HTML files!
313
+ """)
 
 
 
 
 
 
 
 
314
 
315
+ # Create tabs
316
+ tab1, tab2 = st.tabs(["πŸ“€ Upload HTML File", "πŸ“ Paste HTML Code"])
 
 
317
 
318
+ # Tab 1: Upload HTML File
319
+ with tab1:
320
+ uploaded_file = st.file_uploader(
321
+ "Choose an HTML file",
322
+ type=['html', 'htm'],
323
+ key="file_uploader",
324
+ help="Upload an HTML file (max 200MB)",
325
+ accept_multiple_files=False
326
+ )
327
 
328
+ # Image uploader
329
+ uploaded_images = st.file_uploader(
330
+ "πŸ“· Upload Images (optional)",
331
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
332
+ key="image_uploader",
333
+ help="Upload images referenced in your HTML",
334
+ accept_multiple_files=True
335
+ )
336
 
337
+ if uploaded_images:
338
+ st.success(f"βœ… {len(uploaded_images)} image(s) uploaded")
339
+ with st.expander("View uploaded images"):
340
+ cols = st.columns(min(len(uploaded_images), 4))
341
+ for idx, img in enumerate(uploaded_images):
342
+ with cols[idx % 4]:
343
+ st.image(img, caption=img.name, use_container_width=True)
344
 
345
+ if uploaded_file is not None:
346
+ st.success(f"βœ… File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
 
 
347
 
348
+ # Read file content
349
+ uploaded_file.seek(0)
350
  try:
351
+ html_content = uploaded_file.getvalue().decode('utf-8')
352
  except UnicodeDecodeError:
353
+ uploaded_file.seek(0)
354
+ html_content = uploaded_file.getvalue().decode('latin-1')
 
 
 
 
355
 
356
+ # Auto-detect aspect ratio
357
+ detected_ratio = detect_aspect_ratio(html_content)
358
 
359
+ col1, col2 = st.columns([1, 1])
 
360
 
361
+ with col1:
362
+ st.subheader("βš™οΈ Settings")
 
 
363
 
364
+ auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
365
+
366
+ if auto_detect:
367
+ aspect_ratio_file = detected_ratio
368
+ st.info(f"πŸ” Detected: **{detected_ratio}**")
369
+ else:
370
+ aspect_ratio_file = st.radio(
371
+ "Aspect Ratio",
372
+ options=["16:9", "1:1", "9:16"],
373
+ index=["16:9", "1:1", "9:16"].index(detected_ratio),
374
+ key="aspect_file",
375
+ help="Select the page orientation and dimensions"
376
+ )
377
+
378
+ st.markdown(f"""
379
+ **Selected: {aspect_ratio_file}**
380
+ - 16:9 = Landscape (297mm Γ— 210mm)
381
+ - 1:1 = Square (210mm Γ— 210mm)
382
+ - 9:16 = Portrait (210mm Γ— 297mm)
383
+ """)
384
+
385
+ convert_file_btn = st.button("πŸ”„ Convert to PDF", key="convert_file", type="primary", width="stretch")
386
 
387
+ with col2:
388
+ st.subheader("πŸ‘οΈ HTML Preview")
389
+ with st.expander("Show HTML Preview", expanded=False):
390
+ st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
391
+
392
+ # Conversion section
393
+ if convert_file_btn:
394
+ temp_dir = None
395
+ try:
396
+ with st.spinner("Converting HTML to PDF..."):
397
+ # Create temp directory
398
+ temp_dir = tempfile.mkdtemp()
399
+
400
+ # Process images if uploaded
401
+ if uploaded_images:
402
+ image_mapping = save_uploaded_images(uploaded_images, temp_dir)
403
+ html_content = process_html_with_images(html_content, temp_dir, image_mapping)
404
+ st.info(f"πŸ“· Processed {len(uploaded_images)} image(s)")
405
+ # Debug info
406
+ with st.expander("πŸ” Debug: Image Mapping"):
407
+ for orig, new in image_mapping.items():
408
+ st.text(f"{orig} -> {new}")
409
+ full_path = os.path.join(temp_dir, new)
410
+ st.text(f"Full path: {full_path}")
411
+ st.text(f"Exists: {os.path.exists(full_path)}")
412
+
413
+ # Convert to PDF
414
+ pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir)
415
+
416
+ # Cleanup
417
+ if temp_dir:
418
+ shutil.rmtree(temp_dir, ignore_errors=True)
419
+
420
+ if error:
421
+ st.error(f"❌ {error}")
422
+ with st.expander("Show error details"):
423
+ st.code(error)
424
+ else:
425
+ st.success("βœ… PDF generated successfully!")
426
+
427
+ col_a, col_b = st.columns([1, 1])
428
+
429
+ with col_a:
430
+ output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
431
+ if not output_filename.endswith('.pdf'):
432
+ output_filename += '.pdf'
433
+
434
+ st.download_button(
435
+ label="⬇️ Download PDF",
436
+ data=pdf_bytes,
437
+ file_name=output_filename,
438
+ mime="application/pdf",
439
+ width="stretch",
440
+ key="download_file_pdf"
441
+ )
442
+
443
+ with col_b:
444
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
445
+
446
+ # PDF Preview
447
+ st.subheader("πŸ“„ PDF Preview")
448
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
449
+ except Exception as e:
450
+ if temp_dir:
451
+ shutil.rmtree(temp_dir, ignore_errors=True)
452
+ st.error(f"❌ Error: {str(e)}")
453
 
454
+ # Tab 2: Paste HTML Code
455
+ with tab2:
456
+ col1, col2 = st.columns([1, 1])
 
 
 
 
 
 
457
 
458
+ with col1:
459
+ html_code = st.text_area(
460
+ "HTML Content",
461
+ value="""<!DOCTYPE html>
462
+ <html>
463
+ <head>
464
+ <title>Sample Document</title>
465
+ <style>
466
+ body {
467
+ font-family: Arial, sans-serif;
468
+ margin: 40px;
469
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
470
+ color: white;
471
+ }
472
+ h1 {
473
+ font-size: 48px;
474
+ margin-bottom: 20px;
475
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
476
+ }
477
+ p {
478
+ font-size: 18px;
479
+ line-height: 1.6;
480
+ }
481
+ .box {
482
+ background: rgba(255,255,255,0.1);
483
+ padding: 20px;
484
+ border-radius: 10px;
485
+ margin-top: 20px;
486
+ }
487
+ </style>
488
+ </head>
489
+ <body>
490
+ <h1>Hello, PDF World! 🌍</h1>
491
+ <p>This is a sample HTML document converted to PDF.</p>
492
+ <div class="box">
493
+ <p>✨ Styles, colors, and gradients are preserved!</p>
494
+ </div>
495
+ </body>
496
+ </html>""",
497
+ height=400,
498
+ key="html_code"
499
+ )
500
 
501
+ # Image uploader for text tab
502
+ uploaded_images_text = st.file_uploader(
503
+ "πŸ“· Upload Images (optional)",
504
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
505
+ key="image_uploader_text",
506
+ help="Upload images referenced in your HTML code",
507
+ accept_multiple_files=True
508
+ )
509
 
510
+ if uploaded_images_text:
511
+ st.success(f"βœ… {len(uploaded_images_text)} image(s) uploaded")
512
+ with st.expander("View uploaded images"):
513
+ cols = st.columns(min(len(uploaded_images_text), 4))
514
+ for idx, img in enumerate(uploaded_images_text):
515
+ with cols[idx % 4]:
516
+ st.image(img, caption=img.name, use_container_width=True)
517
 
518
+ if html_code and html_code.strip():
519
+ # Auto-detect aspect ratio
520
+ detected_ratio_text = detect_aspect_ratio(html_code)
521
+
522
+ auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
523
+
524
+ if auto_detect_text:
525
+ aspect_ratio_text = detected_ratio_text
526
+ st.info(f"πŸ” Detected: **{detected_ratio_text}**")
527
+ else:
528
+ aspect_ratio_text = st.radio(
529
+ "Aspect Ratio",
530
+ options=["16:9", "1:1", "9:16"],
531
+ index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
532
+ key="aspect_text",
533
+ help="Select the page orientation and dimensions"
534
+ )
535
+
536
+ convert_text_btn = st.button("πŸ”„ Convert to PDF", key="convert_text", type="primary", width="stretch")
537
  else:
538
+ convert_text_btn = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
+ with col2:
541
+ if html_code and html_code.strip():
542
+ st.subheader("πŸ‘οΈ HTML Preview")
543
+ with st.expander("Show HTML Preview", expanded=False):
544
+ st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
 
 
 
 
545
 
546
+ if convert_text_btn and html_code and html_code.strip():
547
+ temp_dir = None
 
 
 
 
 
548
  try:
549
+ with st.spinner("Converting HTML to PDF..."):
550
+ # Create temp directory
551
+ temp_dir = tempfile.mkdtemp()
552
+
553
+ # Process images if uploaded
554
+ processed_html = html_code
555
+ if uploaded_images_text:
556
+ image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
557
+ processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
558
+ st.info(f"πŸ“· Processed {len(uploaded_images_text)} image(s)")
559
+ # Debug info
560
+ with st.expander("πŸ” Debug: Image Mapping"):
561
+ for orig, new in image_mapping.items():
562
+ st.text(f"{orig} -> {new}")
563
+ full_path = os.path.join(temp_dir, new)
564
+ st.text(f"Full path: {full_path}")
565
+ st.text(f"Exists: {os.path.exists(full_path)}")
566
+
567
+ # Convert to PDF
568
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
569
+
570
+ # Cleanup
571
+ if temp_dir:
572
+ shutil.rmtree(temp_dir, ignore_errors=True)
573
+
574
+ if error:
575
+ st.error(f"❌ {error}")
576
+ with st.expander("Show error details"):
577
+ st.code(error)
578
+ else:
579
+ st.success("βœ… PDF generated successfully!")
580
+
581
+ col_a, col_b = st.columns([1, 1])
582
+
583
+ with col_a:
584
+ st.download_button(
585
+ label="⬇️ Download PDF",
586
+ data=pdf_bytes,
587
+ file_name="converted.pdf",
588
+ mime="application/pdf",
589
+ width="stretch",
590
+ key="download_text_pdf"
591
+ )
592
+
593
+ with col_b:
594
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
595
+
596
+ # PDF Preview
597
+ st.subheader("πŸ“„ PDF Preview")
598
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
599
+ except Exception as e:
600
+ if temp_dir:
601
+ shutil.rmtree(temp_dir, ignore_errors=True)
602
+ st.error(f"❌ Error: {str(e)}")
603
+
604
+ # Footer with tips
605
+ st.markdown("---")
606
+ st.markdown("""
607
+ ### πŸ’‘ Tips:
608
+ - **Auto-detection** analyzes your HTML to suggest the best aspect ratio
609
+ - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
610
+ - **1:1** - Square format (210mm Γ— 210mm)
611
+ - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
612
+ - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
613
+ - All CSS styles, colors, gradients, and fonts are preserved
614
+ - Use inline CSS or `<style>` tags for best results
615
+ - Reference images by filename in your HTML (e.g., `<img src="image.jpg">`)
616
+ - External resources should use absolute URLs
617
+ - **PDF Preview** renders directly in the browser using PDF.js
618
 
619
+ ### πŸ–ΌοΈ Using Images:
620
+ 1. Upload your HTML file
621
+ 2. Upload all images referenced in the HTML
622
+ 3. Make sure image filenames in HTML match uploaded files exactly
623
+ 4. The converter will automatically embed images in the PDF
624
+ """)