ABDALLALSWAITI commited on
Commit
9f4d1a1
Β·
verified Β·
1 Parent(s): a75900b

Upload streamlit_app (1).py

Browse files
Files changed (1) hide show
  1. src/streamlit_app (1).py +695 -0
src/streamlit_app (1).py ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Streamlit HTML to PDF Converter with Image Support and Proper Page Breaks
3
+ Save this file as: src/streamlit_app.py
4
+ """
5
+ import streamlit as st
6
+ import subprocess
7
+ import os
8
+ import tempfile
9
+ import shutil
10
+ from pathlib import Path
11
+ import base64
12
+ import re
13
+ import mimetypes
14
+
15
+ st.set_page_config(
16
+ page_title="HTML to PDF Converter",
17
+ page_icon="πŸ“„",
18
+ layout="wide"
19
+ )
20
+
21
+ def detect_aspect_ratio(html_content):
22
+ """Detect aspect ratio from HTML content"""
23
+ viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
24
+ if viewport_match:
25
+ viewport = viewport_match.group(1).lower()
26
+ if 'orientation=portrait' in viewport:
27
+ return "9:16"
28
+ elif 'orientation=landscape' in viewport:
29
+ return "16:9"
30
+
31
+ aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
32
+ if aspect_match:
33
+ width = int(aspect_match.group(1))
34
+ height = int(aspect_match.group(2))
35
+ ratio = width / height
36
+ if ratio > 1.5:
37
+ return "16:9"
38
+ elif ratio < 0.7:
39
+ return "9:16"
40
+ else:
41
+ return "1:1"
42
+
43
+ if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
44
+ return "16:9"
45
+
46
+ return "9:16"
47
+
48
+ def image_to_base64(image_file):
49
+ """Convert uploaded image to base64 data URL"""
50
+ try:
51
+ image_bytes = image_file.getvalue()
52
+ mime_type, _ = mimetypes.guess_type(image_file.name)
53
+ if not mime_type:
54
+ ext = os.path.splitext(image_file.name)[1].lower()
55
+ mime_map = {
56
+ '.jpg': 'image/jpeg',
57
+ '.jpeg': 'image/jpeg',
58
+ '.png': 'image/png',
59
+ '.gif': 'image/gif',
60
+ '.svg': 'image/svg+xml',
61
+ '.webp': 'image/webp',
62
+ '.bmp': 'image/bmp'
63
+ }
64
+ mime_type = mime_map.get(ext, 'image/png')
65
+
66
+ b64_data = base64.b64encode(image_bytes).decode('utf-8')
67
+ data_url = f"data:{mime_type};base64,{b64_data}"
68
+ return data_url
69
+ except Exception as e:
70
+ st.error(f"Error converting {image_file.name} to base64: {str(e)}")
71
+ return None
72
+
73
+ def embed_images_as_base64(html_content, uploaded_images):
74
+ """Embed all images directly as base64 data URLs in the HTML"""
75
+ if not uploaded_images:
76
+ return html_content, {}
77
+
78
+ image_data_urls = {}
79
+ for img in uploaded_images:
80
+ data_url = image_to_base64(img)
81
+ if data_url:
82
+ image_data_urls[img.name] = data_url
83
+ st.write(f"βœ“ Converted {img.name} to base64 ({len(data_url)} chars)")
84
+
85
+ if not image_data_urls:
86
+ return html_content, {}
87
+
88
+ replacements = {}
89
+
90
+ for filename, data_url in image_data_urls.items():
91
+ escaped_name = re.escape(filename)
92
+
93
+ # Pattern 1: img src attribute
94
+ pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
95
+ matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE | re.DOTALL))
96
+ count1 = len(matches1)
97
+ if matches1:
98
+ html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
99
+ replacements[f"{filename} (img src)"] = count1
100
+
101
+ # Pattern 2: background-image
102
+ pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
103
+ matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
104
+ count2 = len(matches2)
105
+ if matches2:
106
+ html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
107
+ replacements[f"{filename} (bg-image)"] = count2
108
+
109
+ # Pattern 3: CSS url()
110
+ pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
111
+ matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
112
+ count3 = len(matches3)
113
+ if matches3:
114
+ html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
115
+ replacements[f"{filename} (url)"] = count3
116
+
117
+ if replacements:
118
+ st.success("βœ… Image Replacements:")
119
+ for key, count in replacements.items():
120
+ st.write(f" β€’ {key}: {count} replacement(s)")
121
+ else:
122
+ st.warning("⚠️ No image references found in HTML matching uploaded files!")
123
+ st.write("Uploaded files:", [img.name for img in uploaded_images])
124
+
125
+ with st.expander("πŸ” Debug: Show HTML image references"):
126
+ img_lines = [line for line in html_content.split('\n')
127
+ if any(k in line.lower() for k in ['<img', 'src=', 'url(', 'background'])]
128
+ if img_lines:
129
+ for line in img_lines[:10]:
130
+ st.code(line.strip(), language='html')
131
+ else:
132
+ st.write("No image-related lines found in HTML")
133
+
134
+ return html_content, replacements
135
+
136
+ def inject_page_breaks(html_content: str, aspect_ratio: str):
137
+ """Automatically inject page breaks and page sizing CSS"""
138
+
139
+ # Determine page orientation
140
+ if aspect_ratio == "16:9":
141
+ page_size = "A4 landscape"
142
+ orientation = "landscape"
143
+ elif aspect_ratio == "1:1":
144
+ page_size = "210mm 210mm"
145
+ orientation = "portrait"
146
+ else: # 9:16
147
+ page_size = "A4 portrait"
148
+ orientation = "portrait"
149
+
150
+ # Comprehensive page break CSS
151
+ page_css = f"""
152
+ <style id="auto-page-breaks">
153
+ /* Define page size */
154
+ @page {{
155
+ size: {page_size};
156
+ margin: 0;
157
+ }}
158
+
159
+ /* Reset body */
160
+ html, body {{
161
+ margin: 0 !important;
162
+ padding: 0 !important;
163
+ width: 100% !important;
164
+ height: 100% !important;
165
+ }}
166
+
167
+ /* Page containers - each should be one page */
168
+ .page, .slide, section.page, article.page, div[class*="page"], div[class*="slide"] {{
169
+ width: 100% !important;
170
+ min-height: 100vh !important;
171
+ height: 100vh !important;
172
+ page-break-after: always !important;
173
+ break-after: page !important;
174
+ page-break-inside: avoid !important;
175
+ break-inside: avoid !important;
176
+ position: relative !important;
177
+ box-sizing: border-box !important;
178
+ overflow: hidden !important;
179
+ }}
180
+
181
+ /* Last page shouldn't force a break */
182
+ .page:last-child, .slide:last-child,
183
+ section.page:last-child, article.page:last-child {{
184
+ page-break-after: auto !important;
185
+ break-after: auto !important;
186
+ }}
187
+
188
+ /* If no explicit page class, treat direct body children as pages */
189
+ body > section:not(.no-page-break),
190
+ body > article:not(.no-page-break),
191
+ body > div:not(.no-page-break) {{
192
+ page-break-after: always !important;
193
+ break-after: page !important;
194
+ min-height: 100vh;
195
+ }}
196
+
197
+ body > section:last-child,
198
+ body > article:last-child,
199
+ body > div:last-child {{
200
+ page-break-after: auto !important;
201
+ }}
202
+
203
+ /* Utility classes for manual control */
204
+ .page-break, .page-break-after {{
205
+ page-break-after: always !important;
206
+ break-after: page !important;
207
+ }}
208
+
209
+ .page-break-before {{
210
+ page-break-before: always !important;
211
+ break-before: page !important;
212
+ }}
213
+
214
+ .no-page-break, .keep-together {{
215
+ page-break-inside: avoid !important;
216
+ break-inside: avoid !important;
217
+ }}
218
+
219
+ /* Prevent awkward breaks in content */
220
+ h1, h2, h3, h4, h5, h6 {{
221
+ page-break-after: avoid !important;
222
+ break-after: avoid !important;
223
+ page-break-inside: avoid !important;
224
+ break-inside: avoid !important;
225
+ }}
226
+
227
+ img, figure, table, pre, blockquote {{
228
+ page-break-inside: avoid !important;
229
+ break-inside: avoid !important;
230
+ }}
231
+
232
+ /* Preserve colors and backgrounds */
233
+ * {{
234
+ -webkit-print-color-adjust: exact !important;
235
+ print-color-adjust: exact !important;
236
+ color-adjust: exact !important;
237
+ }}
238
+ </style>
239
+ """
240
+
241
+ # Inject CSS into HTML
242
+ if '</head>' in html_content:
243
+ html_content = html_content.replace('</head>', page_css + '</head>')
244
+ elif '<body' in html_content:
245
+ html_content = html_content.replace('<body', page_css + '<body', 1)
246
+ else:
247
+ html_content = page_css + html_content
248
+
249
+ return html_content
250
+
251
+ def render_html_preview(html_content):
252
+ """Render HTML preview in an iframe"""
253
+ b64 = base64.b64encode(html_content.encode()).decode()
254
+ iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
255
+ return iframe_html
256
+
257
+ def render_pdf_preview(pdf_bytes):
258
+ """Render PDF preview using embedded PDF.js"""
259
+ b64 = base64.b64encode(pdf_bytes).decode()
260
+
261
+ pdf_viewer_html = f'''
262
+ <!DOCTYPE html>
263
+ <html>
264
+ <head>
265
+ <style>
266
+ body {{
267
+ margin: 0;
268
+ padding: 0;
269
+ overflow: hidden;
270
+ background: #525659;
271
+ }}
272
+ #pdf-container {{
273
+ width: 100%;
274
+ height: 100vh;
275
+ overflow: auto;
276
+ display: flex;
277
+ flex-direction: column;
278
+ align-items: center;
279
+ padding: 20px;
280
+ box-sizing: border-box;
281
+ }}
282
+ canvas {{
283
+ box-shadow: 0 2px 8px rgba(0,0,0,0.3);
284
+ margin-bottom: 10px;
285
+ background: white;
286
+ }}
287
+ #loading {{
288
+ color: white;
289
+ font-family: Arial, sans-serif;
290
+ font-size: 18px;
291
+ padding: 20px;
292
+ }}
293
+ </style>
294
+ </head>
295
+ <body>
296
+ <div id="pdf-container">
297
+ <div id="loading">Loading PDF...</div>
298
+ </div>
299
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
300
+ <script>
301
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
302
+ const pdfData = atob('{b64}');
303
+ const pdfContainer = document.getElementById('pdf-container');
304
+ const loading = document.getElementById('loading');
305
+ const uint8Array = new Uint8Array(pdfData.length);
306
+ for (let i = 0; i < pdfData.length; i++) {{
307
+ uint8Array[i] = pdfData.charCodeAt(i);
308
+ }}
309
+ pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
310
+ loading.style.display = 'none';
311
+ const numPages = pdf.numPages;
312
+ const promises = [];
313
+ for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
314
+ promises.push(
315
+ pdf.getPage(pageNum).then(function(page) {{
316
+ const scale = 1.5;
317
+ const viewport = page.getViewport({{scale: scale}});
318
+ const canvas = document.createElement('canvas');
319
+ const context = canvas.getContext('2d');
320
+ canvas.height = viewport.height;
321
+ canvas.width = viewport.width;
322
+ pdfContainer.appendChild(canvas);
323
+ return page.render({{
324
+ canvasContext: context,
325
+ viewport: viewport
326
+ }}).promise;
327
+ }})
328
+ );
329
+ }}
330
+ return Promise.all(promises);
331
+ }}).catch(function(error) {{
332
+ loading.innerHTML = '<div style="color:#ff6b6b;">Error: ' + error.message + '</div>';
333
+ }});
334
+ </script>
335
+ </body>
336
+ </html>
337
+ '''
338
+ return pdf_viewer_html
339
+
340
+ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
341
+ """Convert HTML content to PDF using Puppeteer with proper page breaks"""
342
+ try:
343
+ # Step 1: Inject page break CSS
344
+ st.write("πŸ”§ Injecting page break CSS...")
345
+ html_content = inject_page_breaks(html_content, aspect_ratio)
346
+
347
+ # Save HTML to temp file
348
+ html_file = os.path.join(temp_dir, "input.html")
349
+ with open(html_file, 'w', encoding='utf-8') as f:
350
+ f.write(html_content)
351
+
352
+ st.write(f"πŸ“ Saved HTML: {os.path.getsize(html_file):,} bytes")
353
+
354
+ # Find puppeteer script
355
+ script_dir = os.path.dirname(os.path.abspath(__file__))
356
+ possible_paths = [
357
+ os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js'),
358
+ os.path.join(script_dir, 'puppeteer_pdf.js'),
359
+ os.path.join(script_dir, '..', 'puppeteer_pdf.js'),
360
+ 'puppeteer_pdf.js'
361
+ ]
362
+
363
+ puppeteer_script = None
364
+ for path in possible_paths:
365
+ if os.path.exists(path):
366
+ puppeteer_script = path
367
+ break
368
+
369
+ if not puppeteer_script:
370
+ return None, "Error: puppeteer_pdf.js not found"
371
+
372
+ st.write(f"πŸ”§ Using Puppeteer: {puppeteer_script}")
373
+
374
+ # Run conversion
375
+ result = subprocess.run(
376
+ ['node', puppeteer_script, html_file, aspect_ratio],
377
+ capture_output=True,
378
+ text=True,
379
+ timeout=60,
380
+ cwd=os.path.dirname(os.path.abspath(puppeteer_script))
381
+ )
382
+
383
+ if result.returncode != 0:
384
+ return None, f"PDF conversion failed: {result.stderr}"
385
+
386
+ # Read PDF
387
+ pdf_file = html_file.replace('.html', '.pdf')
388
+ if not os.path.exists(pdf_file):
389
+ return None, "PDF file was not generated"
390
+
391
+ with open(pdf_file, 'rb') as f:
392
+ pdf_bytes = f.read()
393
+
394
+ st.write(f"βœ… PDF generated: {len(pdf_bytes):,} bytes")
395
+ return pdf_bytes, None
396
+
397
+ except subprocess.TimeoutExpired:
398
+ return None, "Error: PDF conversion timed out (60 seconds)"
399
+ except Exception as e:
400
+ return None, f"Error: {str(e)}"
401
+
402
+ # Main UI
403
+ st.title("πŸ“„ HTML to PDF Converter")
404
+ st.markdown("""
405
+ Convert HTML to PDF with **proper page breaks** and **embedded base64 images**!
406
+ ✨ Each page in your HTML will be preserved as a separate PDF page.
407
+ """)
408
+
409
+ # Create tabs
410
+ tab1, tab2 = st.tabs(["πŸ“€ Upload HTML File", "πŸ“ Paste HTML Code"])
411
+
412
+ # Tab 1: Upload HTML File
413
+ with tab1:
414
+ uploaded_file = st.file_uploader(
415
+ "Choose an HTML file",
416
+ type=['html', 'htm'],
417
+ key="file_uploader",
418
+ help="Upload an HTML file"
419
+ )
420
+
421
+ uploaded_images = st.file_uploader(
422
+ "πŸ“· Upload Images",
423
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
424
+ key="image_uploader",
425
+ help="Upload images - they will be embedded as base64 in the HTML",
426
+ accept_multiple_files=True
427
+ )
428
+
429
+ if uploaded_images:
430
+ st.success(f"βœ… {len(uploaded_images)} image(s) uploaded")
431
+ with st.expander("View uploaded images"):
432
+ cols = st.columns(min(len(uploaded_images), 4))
433
+ for idx, img in enumerate(uploaded_images):
434
+ with cols[idx % 4]:
435
+ st.image(img, caption=img.name, use_container_width=True)
436
+
437
+ if uploaded_file:
438
+ st.success(f"βœ… File: {uploaded_file.name}")
439
+
440
+ uploaded_file.seek(0)
441
+ try:
442
+ html_content = uploaded_file.getvalue().decode('utf-8')
443
+ except UnicodeDecodeError:
444
+ uploaded_file.seek(0)
445
+ html_content = uploaded_file.getvalue().decode('latin-1')
446
+
447
+ detected_ratio = detect_aspect_ratio(html_content)
448
+
449
+ col1, col2 = st.columns([1, 1])
450
+
451
+ with col1:
452
+ st.subheader("βš™οΈ Settings")
453
+ auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_file")
454
+
455
+ if auto_detect:
456
+ aspect_ratio = detected_ratio
457
+ st.info(f"πŸ” Detected: **{detected_ratio}**")
458
+ else:
459
+ aspect_ratio = st.radio(
460
+ "Aspect Ratio",
461
+ options=["16:9", "1:1", "9:16"],
462
+ index=["16:9", "1:1", "9:16"].index(detected_ratio),
463
+ key="aspect_file"
464
+ )
465
+
466
+ convert_btn = st.button("πŸ”„ Convert to PDF", key="conv_file", type="primary", use_container_width=True)
467
+
468
+ with col2:
469
+ st.subheader("πŸ‘οΈ Preview")
470
+ with st.expander("Show HTML"):
471
+ st.components.v1.html(render_html_preview(html_content), height=400, scrolling=True)
472
+
473
+ if convert_btn:
474
+ temp_dir = None
475
+ try:
476
+ with st.spinner("Converting..."):
477
+ temp_dir = tempfile.mkdtemp()
478
+
479
+ # Embed images as base64
480
+ processed_html = html_content
481
+ if uploaded_images:
482
+ with st.expander("πŸ–ΌοΈ Image Processing", expanded=True):
483
+ processed_html, replacements = embed_images_as_base64(html_content, uploaded_images)
484
+
485
+ if not replacements:
486
+ st.warning("⚠️ Images uploaded but no matches found in HTML!")
487
+ st.write("**Tip:** Make sure image filenames in HTML match uploaded files exactly")
488
+
489
+ # Convert to PDF
490
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio, temp_dir)
491
+
492
+ if error:
493
+ st.error(f"❌ {error}")
494
+ else:
495
+ st.success("βœ… PDF generated with proper page breaks!")
496
+
497
+ output_name = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
498
+ if not output_name.endswith('.pdf'):
499
+ output_name += '.pdf'
500
+
501
+ col_a, col_b = st.columns(2)
502
+ with col_a:
503
+ st.download_button(
504
+ "⬇️ Download PDF",
505
+ data=pdf_bytes,
506
+ file_name=output_name,
507
+ mime="application/pdf",
508
+ use_container_width=True
509
+ )
510
+ with col_b:
511
+ st.info(f"Size: {len(pdf_bytes):,} bytes")
512
+
513
+ st.subheader("πŸ“„ PDF Preview")
514
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
515
+ except Exception as e:
516
+ st.error(f"❌ Error: {str(e)}")
517
+ finally:
518
+ if temp_dir and os.path.exists(temp_dir):
519
+ shutil.rmtree(temp_dir, ignore_errors=True)
520
+
521
+ # Tab 2: Paste HTML
522
+ with tab2:
523
+ html_code = st.text_area(
524
+ "HTML Content",
525
+ value="""<!DOCTYPE html>
526
+ <html>
527
+ <head>
528
+ <style>
529
+ body {
530
+ font-family: Arial;
531
+ margin: 0;
532
+ padding: 0;
533
+ }
534
+ .page {
535
+ width: 100%;
536
+ height: 100vh;
537
+ display: flex;
538
+ align-items: center;
539
+ justify-content: center;
540
+ box-sizing: border-box;
541
+ padding: 40px;
542
+ }
543
+ .page:nth-child(1) {
544
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
545
+ color: white;
546
+ }
547
+ .page:nth-child(2) {
548
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
549
+ color: white;
550
+ }
551
+ .page:nth-child(3) {
552
+ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
553
+ color: white;
554
+ }
555
+ h1 { font-size: 48px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
556
+ </style>
557
+ </head>
558
+ <body>
559
+ <div class="page">
560
+ <h1>Page 1: Hello PDF! 🌍</h1>
561
+ </div>
562
+
563
+ <div class="page">
564
+ <h1>Page 2: Separate Page! πŸ“„</h1>
565
+ </div>
566
+
567
+ <div class="page">
568
+ <h1>Page 3: Final Page! ✨</h1>
569
+ </div>
570
+ </body>
571
+ </html>""",
572
+ height=400,
573
+ key="html_code"
574
+ )
575
+
576
+ uploaded_images_text = st.file_uploader(
577
+ "πŸ“· Upload Images",
578
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
579
+ key="image_text",
580
+ help="Upload images to embed in your HTML",
581
+ accept_multiple_files=True
582
+ )
583
+
584
+ if uploaded_images_text:
585
+ st.success(f"βœ… {len(uploaded_images_text)} image(s) uploaded")
586
+ with st.expander("View images"):
587
+ cols = st.columns(min(len(uploaded_images_text), 4))
588
+ for idx, img in enumerate(uploaded_images_text):
589
+ with cols[idx % 4]:
590
+ st.image(img, caption=img.name, use_container_width=True)
591
+
592
+ if html_code.strip():
593
+ detected_ratio_text = detect_aspect_ratio(html_code)
594
+ auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_text")
595
+
596
+ if auto_detect_text:
597
+ aspect_ratio_text = detected_ratio_text
598
+ st.info(f"πŸ” Detected: **{detected_ratio_text}**")
599
+ else:
600
+ aspect_ratio_text = st.radio(
601
+ "Aspect Ratio",
602
+ options=["16:9", "1:1", "9:16"],
603
+ index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
604
+ key="aspect_text"
605
+ )
606
+
607
+ convert_text_btn = st.button("πŸ”„ Convert", key="conv_text", type="primary", use_container_width=True)
608
+
609
+ if convert_text_btn:
610
+ temp_dir = None
611
+ try:
612
+ with st.spinner("Converting..."):
613
+ temp_dir = tempfile.mkdtemp()
614
+
615
+ processed_html = html_code
616
+ if uploaded_images_text:
617
+ with st.expander("πŸ–ΌοΈ Image Processing", expanded=True):
618
+ processed_html, replacements = embed_images_as_base64(html_code, uploaded_images_text)
619
+
620
+ if not replacements:
621
+ st.warning("⚠️ Images uploaded but no matches found!")
622
+
623
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
624
+
625
+ if error:
626
+ st.error(f"❌ {error}")
627
+ else:
628
+ st.success("βœ… PDF generated with proper page breaks!")
629
+
630
+ col_a, col_b = st.columns(2)
631
+ with col_a:
632
+ st.download_button(
633
+ "⬇️ Download PDF",
634
+ data=pdf_bytes,
635
+ file_name="converted.pdf",
636
+ mime="application/pdf",
637
+ use_container_width=True
638
+ )
639
+ with col_b:
640
+ st.info(f"Size: {len(pdf_bytes):,} bytes")
641
+
642
+ st.subheader("πŸ“„ PDF Preview")
643
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
644
+ except Exception as e:
645
+ st.error(f"❌ Error: {str(e)}")
646
+ finally:
647
+ if temp_dir and os.path.exists(temp_dir):
648
+ shutil.rmtree(temp_dir, ignore_errors=True)
649
+
650
+ # Footer
651
+ st.markdown("---")
652
+ st.markdown("""
653
+ ### πŸ’‘ How Page Breaks Work:
654
+
655
+ **Automatic Page Detection:**
656
+ - Elements with class `page`, `slide`, or `section.page` are treated as separate pages
657
+ - Each page automatically gets `page-break-after: always` CSS
658
+ - Last page won't have a trailing break
659
+
660
+ **HTML Structure for Multiple Pages:**
661
+ ```html
662
+ <div class="page">Page 1 content</div>
663
+ <div class="page">Page 2 content</div>
664
+ <div class="page">Page 3 content</div>
665
+ ```
666
+
667
+ **Manual Page Breaks:**
668
+ - Add class `page-break` to force a break after an element
669
+ - Add class `page-break-before` to force a break before an element
670
+ - Add class `no-page-break` to prevent breaks inside an element
671
+
672
+ **Image Embedding:**
673
+ - Images are converted to base64 and embedded directly in HTML
674
+ - Ensures images always appear in the PDF
675
+ - Filename in HTML must match uploaded file exactly
676
+
677
+ ### πŸ“ Example HTML:
678
+ ```html
679
+ <!DOCTYPE html>
680
+ <html>
681
+ <body>
682
+ <div class="page">
683
+ <h1>First Page</h1>
684
+ <img src="logo.png" alt="Logo">
685
+ </div>
686
+
687
+ <div class="page">
688
+ <h1>Second Page</h1>
689
+ <p>Content here...</p>
690
+ </div>
691
+ </body>
692
+ </html>
693
+ ```
694
+ Then upload a file named: `logo.png`
695
+ """)