ABDALLALSWAITI commited on
Commit
f9f8ffb
·
verified ·
1 Parent(s): dd2814d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -159
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
2
- from fastapi.responses import Response, JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import subprocess
5
  import os
@@ -9,13 +9,15 @@ import base64
9
  import re
10
  import mimetypes
11
  from typing import List, Optional
 
 
 
 
12
 
13
  app = FastAPI(
14
- title="HTML to PDF Converter API",
15
- description="Convert HTML to PDF with image support",
16
- version="1.0.0",
17
- docs_url="/",
18
- redoc_url="/redoc"
19
  )
20
 
21
  app.add_middleware(
@@ -26,139 +28,137 @@ app.add_middleware(
26
  allow_headers=["*"],
27
  )
28
 
29
- def detect_aspect_ratio(html_content: str) -> str:
30
- viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
31
- if viewport_match:
32
- viewport = viewport_match.group(1).lower()
33
- if 'orientation=portrait' in viewport:
34
- return "9:16"
35
- elif 'orientation=landscape' in viewport:
36
- return "16:9"
37
-
38
- aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
39
- if aspect_match:
40
- width = int(aspect_match.group(1))
41
- height = int(aspect_match.group(2))
42
- ratio = width / height
43
- if ratio > 1.5:
44
- return "16:9"
45
- elif ratio < 0.7:
46
- return "9:16"
47
- else:
48
- return "1:1"
49
-
50
- if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  return "16:9"
52
-
53
  return "9:16"
54
 
55
- def image_to_base64(image_bytes: bytes, filename: str) -> str:
56
- mime_type, _ = mimetypes.guess_type(filename)
57
- if not mime_type:
58
- ext = os.path.splitext(filename)[1].lower()
59
- mime_map = {
60
- '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
61
- '.png': 'image/png', '.gif': 'image/gif',
62
- '.svg': 'image/svg+xml', '.webp': 'image/webp',
63
- '.bmp': 'image/bmp'
64
- }
65
- mime_type = mime_map.get(ext, 'image/png')
66
 
67
- b64_data = base64.b64encode(image_bytes).decode('utf-8')
68
- return f"data:{mime_type};base64,{b64_data}"
 
 
 
69
 
70
- def embed_images(html_content: str, images_dict: dict) -> str:
71
- if not images_dict:
72
- return html_content
73
-
74
- for filename, data_url in images_dict.items():
75
- escaped_name = re.escape(filename)
76
 
77
- pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
78
- html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
79
 
80
- pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
81
- html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
82
 
83
- pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
84
- html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
85
-
86
- return html_content
87
-
88
- def inject_page_breaks(html_content: str, aspect_ratio: str) -> str:
89
- if aspect_ratio == "16:9":
90
- page_size = "A4 landscape"
91
- elif aspect_ratio == "1:1":
92
- page_size = "210mm 210mm"
93
- else:
94
- page_size = "A4 portrait"
95
-
96
- page_css = f"""
97
- <style id="auto-page-breaks">
98
- @page {{ size: {page_size}; margin: 0; }}
99
- html, body {{ margin: 0 !important; padding: 0 !important; width: 100% !important; height: 100% !important; }}
100
- .page, .slide, section.page, article.page, div[class*="page"], div[class*="slide"] {{
101
- width: 100% !important; min-height: 100vh !important; height: 100vh !important;
102
- page-break-after: always !important; break-after: page !important;
103
- page-break-inside: avoid !important; break-inside: avoid !important;
104
- position: relative !important; box-sizing: border-box !important; overflow: hidden !important;
105
- }}
106
- .page:last-child, .slide:last-child, section.page:last-child, article.page:last-child {{
107
- page-break-after: auto !important; break-after: auto !important;
108
- }}
109
- body > section:not(.no-page-break), body > article:not(.no-page-break), body > div:not(.no-page-break) {{
110
- page-break-after: always !important; break-after: page !important; min-height: 100vh;
111
- }}
112
- body > section:last-child, body > article:last-child, body > div:last-child {{
113
- page-break-after: auto !important;
114
- }}
115
- .page-break, .page-break-after {{ page-break-after: always !important; break-after: page !important; }}
116
- .page-break-before {{ page-break-before: always !important; break-before: page !important; }}
117
- .no-page-break, .keep-together {{ page-break-inside: avoid !important; break-inside: avoid !important; }}
118
- h1, h2, h3, h4, h5, h6 {{ page-break-after: avoid !important; break-after: avoid !important; }}
119
- img, figure, table, pre, blockquote {{ page-break-inside: avoid !important; break-inside: avoid !important; }}
120
- * {{ -webkit-print-color-adjust: exact !important; print-color-adjust: exact !important; }}
121
- </style>
122
- """
123
-
124
- if '</head>' in html_content:
125
- html_content = html_content.replace('</head>', page_css + '</head>')
126
- elif '<body' in html_content:
127
- html_content = html_content.replace('<body', page_css + '<body', 1)
128
- else:
129
- html_content = page_css + html_content
130
-
131
- return html_content
132
-
133
- def convert_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str) -> bytes:
134
- html_content = inject_page_breaks(html_content, aspect_ratio)
135
-
136
- html_file = os.path.join(temp_dir, "input.html")
137
- with open(html_file, 'w', encoding='utf-8') as f:
138
- f.write(html_content)
139
-
140
- puppeteer_script = '/app/puppeteer_pdf.js'
141
- if not os.path.exists(puppeteer_script):
142
- raise Exception("puppeteer_pdf.js not found")
143
-
144
- result = subprocess.run(
145
- ['node', puppeteer_script, html_file, aspect_ratio],
146
- capture_output=True, text=True, timeout=60
147
- )
148
-
149
- if result.returncode != 0:
150
- raise Exception(f"PDF conversion failed: {result.stderr}")
151
-
152
- pdf_file = html_file.replace('.html', '.pdf')
153
- if not os.path.exists(pdf_file):
154
- raise Exception("PDF file was not generated")
155
 
156
- with open(pdf_file, 'rb') as f:
157
- return f.read()
158
-
159
- @app.get("/health")
160
- async def health():
161
- return {"status": "healthy", "service": "HTML to PDF API", "version": "1.0.0"}
162
 
163
  @app.post("/convert")
164
  async def convert(
@@ -167,12 +167,11 @@ async def convert(
167
  auto_detect: bool = Form(True),
168
  images: Optional[List[UploadFile]] = File(None)
169
  ):
170
- temp_dir = None
171
  try:
172
- html_content = (await html_file.read()).decode('utf-8')
173
 
174
  if auto_detect:
175
- aspect_ratio = detect_aspect_ratio(html_content)
176
  elif not aspect_ratio:
177
  aspect_ratio = "9:16"
178
 
@@ -180,25 +179,25 @@ async def convert(
180
  raise HTTPException(400, "Invalid aspect ratio")
181
 
182
  if images:
183
- images_dict = {}
184
  for img in images:
185
  img_bytes = await img.read()
186
- images_dict[img.filename] = image_to_base64(img_bytes, img.filename)
187
- html_content = embed_images(html_content, images_dict)
 
 
 
188
 
189
- temp_dir = tempfile.mkdtemp()
190
- pdf_bytes = convert_to_pdf(html_content, aspect_ratio, temp_dir)
191
 
192
  return Response(
193
- content=pdf_bytes,
194
  media_type="application/pdf",
195
  headers={"Content-Disposition": "attachment; filename=output.pdf"}
196
  )
 
197
  except Exception as e:
 
198
  raise HTTPException(500, str(e))
199
- finally:
200
- if temp_dir and os.path.exists(temp_dir):
201
- shutil.rmtree(temp_dir, ignore_errors=True)
202
 
203
  @app.post("/convert-html")
204
  async def convert_html(
@@ -206,7 +205,6 @@ async def convert_html(
206
  aspect_ratio: Optional[str] = Form(None),
207
  auto_detect: bool = Form(True)
208
  ):
209
- temp_dir = None
210
  try:
211
  if auto_detect:
212
  aspect_ratio = detect_aspect_ratio(html_content)
@@ -216,19 +214,17 @@ async def convert_html(
216
  if aspect_ratio not in ["16:9", "1:1", "9:16"]:
217
  raise HTTPException(400, "Invalid aspect ratio")
218
 
219
- temp_dir = tempfile.mkdtemp()
220
- pdf_bytes = convert_to_pdf(html_content, aspect_ratio, temp_dir)
221
 
222
  return Response(
223
- content=pdf_bytes,
224
  media_type="application/pdf",
225
  headers={"Content-Disposition": "attachment; filename=output.pdf"}
226
  )
 
227
  except Exception as e:
 
228
  raise HTTPException(500, str(e))
229
- finally:
230
- if temp_dir and os.path.exists(temp_dir):
231
- shutil.rmtree(temp_dir, ignore_errors=True)
232
 
233
  @app.post("/convert-base64")
234
  async def convert_base64(
@@ -236,7 +232,6 @@ async def convert_base64(
236
  aspect_ratio: Optional[str] = Form(None),
237
  auto_detect: bool = Form(True)
238
  ):
239
- temp_dir = None
240
  try:
241
  if auto_detect:
242
  aspect_ratio = detect_aspect_ratio(html_content)
@@ -246,18 +241,16 @@ async def convert_base64(
246
  if aspect_ratio not in ["16:9", "1:1", "9:16"]:
247
  raise HTTPException(400, "Invalid aspect ratio")
248
 
249
- temp_dir = tempfile.mkdtemp()
250
- pdf_bytes = convert_to_pdf(html_content, aspect_ratio, temp_dir)
251
- pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
252
 
253
  return JSONResponse({
254
  "success": True,
255
- "pdf_base64": pdf_base64,
256
  "aspect_ratio": aspect_ratio,
257
- "size_bytes": len(pdf_bytes)
258
  })
 
259
  except Exception as e:
260
- raise HTTPException(500, str(e))
261
- finally:
262
- if temp_dir and os.path.exists(temp_dir):
263
- shutil.rmtree(temp_dir, ignore_errors=True)
 
1
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import Response, JSONResponse, HTMLResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import subprocess
5
  import os
 
9
  import re
10
  import mimetypes
11
  from typing import List, Optional
12
+ import logging
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
  app = FastAPI(
18
+ title="HTML to PDF Converter",
19
+ description="Convert HTML to PDF",
20
+ version="1.0.0"
 
 
21
  )
22
 
23
  app.add_middleware(
 
28
  allow_headers=["*"],
29
  )
30
 
31
+ @app.get("/", response_class=HTMLResponse)
32
+ async def root():
33
+ return """
34
+ <!DOCTYPE html>
35
+ <html>
36
+ <head>
37
+ <title>HTML to PDF API</title>
38
+ <style>
39
+ body { font-family: Arial; max-width: 800px; margin: 50px auto; padding: 20px; }
40
+ h1 { color: #333; }
41
+ .endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
42
+ code { background: #e0e0e0; padding: 2px 5px; border-radius: 3px; }
43
+ </style>
44
+ </head>
45
+ <body>
46
+ <h1>🔄 HTML to PDF Converter API</h1>
47
+ <p>Status: <strong style="color: green;">✓ Running</strong></p>
48
+
49
+ <h2>Endpoints:</h2>
50
+
51
+ <div class="endpoint">
52
+ <h3>GET /health</h3>
53
+ <p>Health check endpoint</p>
54
+ </div>
55
+
56
+ <div class="endpoint">
57
+ <h3>POST /convert</h3>
58
+ <p>Upload HTML file and convert to PDF</p>
59
+ <p>Parameters:</p>
60
+ <ul>
61
+ <li><code>html_file</code> - HTML file (required)</li>
62
+ <li><code>aspect_ratio</code> - 16:9, 1:1, or 9:16 (optional)</li>
63
+ <li><code>auto_detect</code> - Auto-detect aspect ratio (default: true)</li>
64
+ <li><code>images</code> - Image files to embed (optional)</li>
65
+ </ul>
66
+ </div>
67
+
68
+ <div class="endpoint">
69
+ <h3>POST /convert-html</h3>
70
+ <p>Convert HTML string to PDF</p>
71
+ <p>Parameters:</p>
72
+ <ul>
73
+ <li><code>html_content</code> - HTML as string (required)</li>
74
+ <li><code>aspect_ratio</code> - 16:9, 1:1, or 9:16 (optional)</li>
75
+ </ul>
76
+ </div>
77
+
78
+ <div class="endpoint">
79
+ <h3>POST /convert-base64</h3>
80
+ <p>Convert HTML to base64-encoded PDF</p>
81
+ <p>Returns JSON with base64 PDF data</p>
82
+ </div>
83
+
84
+ <h2>Documentation:</h2>
85
+ <p><a href="/docs">Interactive API Docs (Swagger)</a></p>
86
+ <p><a href="/redoc">Alternative Docs (ReDoc)</a></p>
87
+
88
+ <h2>Example Usage:</h2>
89
+ <pre>curl -X POST https://YOUR-SPACE.hf.space/convert-html \
90
+ -F "html_content=&lt;html&gt;&lt;body&gt;&lt;h1&gt;Hello PDF!&lt;/h1&gt;&lt;/body&gt;&lt;/html&gt;" \
91
+ -o output.pdf</pre>
92
+ </body>
93
+ </html>
94
+ """
95
+
96
+ @app.get("/health")
97
+ async def health():
98
+ logger.info("Health check requested")
99
+ return {
100
+ "status": "healthy",
101
+ "service": "HTML to PDF Converter",
102
+ "version": "1.0.0"
103
+ }
104
+
105
+ def detect_aspect_ratio(html: str) -> str:
106
+ if 'orientation=portrait' in html.lower():
107
+ return "9:16"
108
+ if 'orientation=landscape' in html.lower():
109
+ return "16:9"
110
+ if any(k in html.lower() for k in ['slide', 'presentation']):
111
  return "16:9"
 
112
  return "9:16"
113
 
114
+ def inject_page_css(html: str, ratio: str) -> str:
115
+ size = "A4 landscape" if ratio == "16:9" else "210mm 210mm" if ratio == "1:1" else "A4 portrait"
116
+ css = f"""<style>
117
+ @page {{ size: {size}; margin: 0; }}
118
+ html, body {{ margin: 0; padding: 0; width: 100%; height: 100%; }}
119
+ .page {{ width: 100%; height: 100vh; page-break-after: always; box-sizing: border-box; }}
120
+ .page:last-child {{ page-break-after: auto; }}
121
+ * {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }}
122
+ </style>"""
 
 
123
 
124
+ if '</head>' in html:
125
+ return html.replace('</head>', css + '</head>')
126
+ elif '<body' in html:
127
+ return html.replace('<body', css + '<body', 1)
128
+ return css + html
129
 
130
+ def convert_to_pdf(html: str, ratio: str) -> bytes:
131
+ temp_dir = tempfile.mkdtemp()
132
+ try:
133
+ html = inject_page_css(html, ratio)
134
+ html_file = os.path.join(temp_dir, "input.html")
 
135
 
136
+ with open(html_file, 'w', encoding='utf-8') as f:
137
+ f.write(html)
138
 
139
+ logger.info(f"Converting with aspect ratio: {ratio}")
 
140
 
141
+ result = subprocess.run(
142
+ ['node', '/app/puppeteer_pdf.js', html_file, ratio],
143
+ capture_output=True,
144
+ text=True,
145
+ timeout=60
146
+ )
147
+
148
+ if result.returncode != 0:
149
+ logger.error(f"Conversion failed: {result.stderr}")
150
+ raise Exception(f"Conversion failed: {result.stderr}")
151
+
152
+ pdf_file = html_file.replace('.html', '.pdf')
153
+
154
+ if not os.path.exists(pdf_file):
155
+ raise Exception("PDF not generated")
156
+
157
+ with open(pdf_file, 'rb') as f:
158
+ return f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ finally:
161
+ shutil.rmtree(temp_dir, ignore_errors=True)
 
 
 
 
162
 
163
  @app.post("/convert")
164
  async def convert(
 
167
  auto_detect: bool = Form(True),
168
  images: Optional[List[UploadFile]] = File(None)
169
  ):
 
170
  try:
171
+ html = (await html_file.read()).decode('utf-8')
172
 
173
  if auto_detect:
174
+ aspect_ratio = detect_aspect_ratio(html)
175
  elif not aspect_ratio:
176
  aspect_ratio = "9:16"
177
 
 
179
  raise HTTPException(400, "Invalid aspect ratio")
180
 
181
  if images:
 
182
  for img in images:
183
  img_bytes = await img.read()
184
+ b64 = base64.b64encode(img_bytes).decode()
185
+ ext = os.path.splitext(img.filename)[1].lower()
186
+ mime = f"image/{'jpeg' if ext in ['.jpg','.jpeg'] else 'png'}"
187
+ data_url = f"data:{mime};base64,{b64}"
188
+ html = html.replace(img.filename, data_url)
189
 
190
+ pdf = convert_to_pdf(html, aspect_ratio)
 
191
 
192
  return Response(
193
+ content=pdf,
194
  media_type="application/pdf",
195
  headers={"Content-Disposition": "attachment; filename=output.pdf"}
196
  )
197
+
198
  except Exception as e:
199
+ logger.error(f"Error: {str(e)}")
200
  raise HTTPException(500, str(e))
 
 
 
201
 
202
  @app.post("/convert-html")
203
  async def convert_html(
 
205
  aspect_ratio: Optional[str] = Form(None),
206
  auto_detect: bool = Form(True)
207
  ):
 
208
  try:
209
  if auto_detect:
210
  aspect_ratio = detect_aspect_ratio(html_content)
 
214
  if aspect_ratio not in ["16:9", "1:1", "9:16"]:
215
  raise HTTPException(400, "Invalid aspect ratio")
216
 
217
+ pdf = convert_to_pdf(html_content, aspect_ratio)
 
218
 
219
  return Response(
220
+ content=pdf,
221
  media_type="application/pdf",
222
  headers={"Content-Disposition": "attachment; filename=output.pdf"}
223
  )
224
+
225
  except Exception as e:
226
+ logger.error(f"Error: {str(e)}")
227
  raise HTTPException(500, str(e))
 
 
 
228
 
229
  @app.post("/convert-base64")
230
  async def convert_base64(
 
232
  aspect_ratio: Optional[str] = Form(None),
233
  auto_detect: bool = Form(True)
234
  ):
 
235
  try:
236
  if auto_detect:
237
  aspect_ratio = detect_aspect_ratio(html_content)
 
241
  if aspect_ratio not in ["16:9", "1:1", "9:16"]:
242
  raise HTTPException(400, "Invalid aspect ratio")
243
 
244
+ pdf = convert_to_pdf(html_content, aspect_ratio)
245
+ pdf_b64 = base64.b64encode(pdf).decode()
 
246
 
247
  return JSONResponse({
248
  "success": True,
249
+ "pdf_base64": pdf_b64,
250
  "aspect_ratio": aspect_ratio,
251
+ "size_bytes": len(pdf)
252
  })
253
+
254
  except Exception as e:
255
+ logger.error(f"Error: {str(e)}")
256
+ raise HTTPException(500, str(e))