Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, UploadFile, Form, HTTPException | |
| from fastapi.responses import Response, JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import List, Optional | |
| import tempfile | |
| import shutil | |
| import os | |
| import subprocess | |
| import base64 | |
| from pathlib import Path | |
| import mimetypes | |
| app = FastAPI( | |
| title="HTML to PDF API with Image Support", | |
| description="Convert HTML to PDF using Puppeteer with image upload support", | |
| version="2.0.0" | |
| ) | |
| # Enable CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| def save_uploaded_images(images: List[UploadFile], temp_dir: str): | |
| """Save uploaded images to temp directory and return mapping""" | |
| image_mapping = {} | |
| images_dir = os.path.join(temp_dir, "images") | |
| os.makedirs(images_dir, exist_ok=True) | |
| for image in images: | |
| if image.filename: | |
| # Save image to temp directory | |
| image_path = os.path.join(images_dir, image.filename) | |
| with open(image_path, 'wb') as f: | |
| content = image.file.read() | |
| f.write(content) | |
| # Reset file pointer for potential reuse | |
| image.file.seek(0) | |
| # Create mapping with relative path | |
| image_mapping[image.filename] = f"images/{image.filename}" | |
| print(f"Saved image: {image.filename} -> {image_path}") | |
| return image_mapping | |
| def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict): | |
| """Process HTML to handle image references with absolute file paths""" | |
| import re | |
| for original_name, relative_path in image_mapping.items(): | |
| # Get absolute path for the image | |
| absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path)) | |
| file_url = f"file://{absolute_path}" | |
| # Replace various image reference patterns | |
| # Pattern 1: src="filename" | |
| html_content = re.sub( | |
| f'src=["\'](?:\.\/)?{re.escape(original_name)}["\']', | |
| f'src="{file_url}"', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| # Pattern 2: src='filename' | |
| html_content = re.sub( | |
| f"src=['\"](?:\.\/)?{re.escape(original_name)}['\"]", | |
| f'src="{file_url}"', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| # Pattern 3: background-image: url(filename) | |
| html_content = re.sub( | |
| f'url\(["\']?(?:\.\/)?{re.escape(original_name)}["\']?\)', | |
| f'url("{file_url}")', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| # Pattern 4: href for links | |
| html_content = re.sub( | |
| f'href=["\'](?:\.\/)?{re.escape(original_name)}["\']', | |
| f'href="{file_url}"', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| return html_content | |
| def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str): | |
| """Convert HTML content to PDF""" | |
| try: | |
| # Style injection for better PDF rendering | |
| style_injection = """ | |
| <style> | |
| @page { margin: 0; } | |
| * { | |
| -webkit-print-color-adjust: exact !important; | |
| print-color-adjust: exact !important; | |
| color-adjust: exact !important; | |
| } | |
| body { | |
| -webkit-print-color-adjust: exact !important; | |
| print-color-adjust: exact !important; | |
| } | |
| </style> | |
| """ | |
| if '</head>' in html_content: | |
| html_content = html_content.replace('</head>', style_injection + '</head>') | |
| elif '<body' in html_content: | |
| html_content = html_content.replace('<body', style_injection + '<body', 1) | |
| else: | |
| html_content = style_injection + html_content | |
| # Save HTML to temp file | |
| html_file = os.path.join(temp_dir, "input.html") | |
| with open(html_file, 'w', encoding='utf-8') as f: | |
| f.write(html_content) | |
| # Get puppeteer script path | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js') | |
| # Run conversion | |
| result = subprocess.run( | |
| ['node', puppeteer_script, html_file, aspect_ratio], | |
| capture_output=True, | |
| text=True, | |
| timeout=60, | |
| cwd=script_dir | |
| ) | |
| if result.returncode != 0: | |
| raise Exception(f"PDF conversion failed: {result.stderr}") | |
| pdf_file = html_file.replace('.html', '.pdf') | |
| if not os.path.exists(pdf_file): | |
| raise Exception("PDF file was not generated") | |
| with open(pdf_file, 'rb') as f: | |
| pdf_bytes = f.read() | |
| return pdf_bytes | |
| except Exception as e: | |
| raise e | |
| async def root(): | |
| """API root endpoint""" | |
| return { | |
| "message": "HTML to PDF Conversion API with Image Support", | |
| "version": "2.0.0", | |
| "endpoints": { | |
| "POST /convert": "Convert HTML to PDF (file upload with optional images)", | |
| "POST /convert-text": "Convert HTML text to PDF (with optional image files)", | |
| "POST /convert-with-images": "Convert HTML with multiple images", | |
| "GET /health": "Health check", | |
| "GET /docs": "API documentation (Swagger UI)" | |
| } | |
| } | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return {"status": "healthy", "service": "html-to-pdf-api"} | |
| async def convert_file( | |
| file: UploadFile = File(...), | |
| images: Optional[List[UploadFile]] = File(None), | |
| aspect_ratio: str = Form(default="9:16") | |
| ): | |
| """ | |
| Convert uploaded HTML file to PDF with optional images | |
| - **file**: HTML file to convert | |
| - **images**: Optional list of image files (jpg, png, gif, svg, webp) | |
| - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16) | |
| """ | |
| if not file.filename.lower().endswith(('.html', '.htm')): | |
| raise HTTPException(status_code=400, detail="File must be HTML (.html or .htm)") | |
| if aspect_ratio not in ["16:9", "1:1", "9:16"]: | |
| raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16") | |
| temp_dir = None | |
| try: | |
| # Create temporary directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Read HTML content | |
| content = await file.read() | |
| try: | |
| html_content = content.decode('utf-8') | |
| except UnicodeDecodeError: | |
| html_content = content.decode('latin-1') | |
| # Process images if provided | |
| if images: | |
| image_mapping = save_uploaded_images(images, temp_dir) | |
| html_content = process_html_with_images(html_content, temp_dir, image_mapping) | |
| # Convert to PDF | |
| pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir) | |
| # Clean up | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| # Return PDF file | |
| filename = file.filename.replace('.html', '.pdf').replace('.htm', '.pdf') | |
| if not filename.endswith('.pdf'): | |
| filename += '.pdf' | |
| return Response( | |
| content=pdf_bytes, | |
| media_type="application/pdf", | |
| headers={ | |
| "Content-Disposition": f"attachment; filename={filename}" | |
| } | |
| ) | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") | |
| async def convert_text( | |
| html: str = Form(...), | |
| images: Optional[List[UploadFile]] = File(None), | |
| aspect_ratio: str = Form(default="9:16"), | |
| return_base64: bool = Form(default=False) | |
| ): | |
| """ | |
| Convert HTML text to PDF with optional images | |
| - **html**: HTML content as string | |
| - **images**: Optional list of image files | |
| - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16) | |
| - **return_base64**: If true, returns base64 encoded PDF in JSON | |
| """ | |
| if aspect_ratio not in ["16:9", "1:1", "9:16"]: | |
| raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16") | |
| temp_dir = None | |
| try: | |
| # Create temporary directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Process images if provided | |
| if images: | |
| image_mapping = save_uploaded_images(images, temp_dir) | |
| html = process_html_with_images(html, temp_dir, image_mapping) | |
| # Convert to PDF | |
| pdf_bytes = convert_html_to_pdf(html, aspect_ratio, temp_dir) | |
| # Clean up | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| if return_base64: | |
| # Return as JSON with base64 encoded PDF | |
| pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') | |
| return JSONResponse(content={ | |
| "success": True, | |
| "pdf_base64": pdf_base64, | |
| "size_bytes": len(pdf_bytes) | |
| }) | |
| else: | |
| # Return PDF file directly | |
| return Response( | |
| content=pdf_bytes, | |
| media_type="application/pdf", | |
| headers={ | |
| "Content-Disposition": "attachment; filename=converted.pdf" | |
| } | |
| ) | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") | |
| async def convert_with_images( | |
| html_file: UploadFile = File(...), | |
| images: List[UploadFile] = File(...), | |
| aspect_ratio: str = Form(default="9:16") | |
| ): | |
| """ | |
| Convert HTML with multiple images - dedicated endpoint | |
| - **html_file**: HTML file to convert | |
| - **images**: List of image files (required) | |
| - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16) | |
| """ | |
| if not html_file.filename.lower().endswith(('.html', '.htm')): | |
| raise HTTPException(status_code=400, detail="HTML file must be .html or .htm") | |
| if aspect_ratio not in ["16:9", "1:1", "9:16"]: | |
| raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16") | |
| # Validate image files | |
| allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'} | |
| for img in images: | |
| ext = Path(img.filename).suffix.lower() | |
| if ext not in allowed_extensions: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Invalid image format: {img.filename}. Allowed: {', '.join(allowed_extensions)}" | |
| ) | |
| temp_dir = None | |
| try: | |
| # Create temporary directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Read HTML content | |
| content = await html_file.read() | |
| try: | |
| html_content = content.decode('utf-8') | |
| except UnicodeDecodeError: | |
| html_content = content.decode('latin-1') | |
| # Save and process images | |
| image_mapping = save_uploaded_images(images, temp_dir) | |
| html_content = process_html_with_images(html_content, temp_dir, image_mapping) | |
| # Convert to PDF | |
| pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir) | |
| # Clean up | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| # Return PDF | |
| filename = html_file.filename.replace('.html', '.pdf').replace('.htm', '.pdf') | |
| if not filename.endswith('.pdf'): | |
| filename += '.pdf' | |
| return Response( | |
| content=pdf_bytes, | |
| media_type="application/pdf", | |
| headers={ | |
| "Content-Disposition": f"attachment; filename={filename}", | |
| "X-Image-Count": str(len(images)) | |
| } | |
| ) | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |