""" FastAPI Backend for HTML to PDF Conversion Runs alongside Streamlit on port 7860 """ from fastapi import FastAPI, UploadFile, File, Form, HTTPException from fastapi.responses import Response, JSONResponse from fastapi.middleware.cors import CORSMiddleware import subprocess import os import tempfile import shutil import re import mimetypes from typing import List, Optional from pathlib import Path app = FastAPI( title="HTML to PDF API", description="Convert HTML to PDF with image support and page breaks", version="1.0.0" ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) def detect_aspect_ratio(html_content): """Detect aspect ratio from HTML content""" viewport_match = re.search(r']*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE) if viewport_match: viewport = viewport_match.group(1).lower() if 'orientation=portrait' in viewport: return "9:16" elif 'orientation=landscape' in viewport: return "16:9" aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE) if aspect_match: width = int(aspect_match.group(1)) height = int(aspect_match.group(2)) ratio = width / height if ratio > 1.5: return "16:9" elif ratio < 0.7: return "9:16" else: return "1:1" if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']): return "16:9" return "9:16" def normalize_image_paths(html_content): """Replace complex image paths with just filenames""" replacements = {} # Pattern for img src - capture full path and extract filename def replace_img_src(match): prefix = match.group(1) quote = match.group(2) full_path = match.group(3) # Extract just the filename from the full path filename = os.path.basename(full_path) replacements[f"img: {full_path}"] = filename return f'{prefix}{quote}{filename}{quote}' html_content = re.sub( r'(]*\s+src\s*=\s*)(["\'])([^"\']+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)', replace_img_src, html_content, flags=re.IGNORECASE ) # Pattern for background-image def replace_bg_image(match): prefix = match.group(1) quote = match.group(2) full_path = match.group(3) suffix = match.group(5) # Extract just the filename from the full path filename = os.path.basename(full_path) replacements[f"bg: {full_path}"] = filename return f'{prefix}{quote}{filename}{quote}{suffix}' html_content = re.sub( r'(background-image\s*:\s*url\s*\()(["\']?)([^)"\'/]+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)(\))', replace_bg_image, html_content, flags=re.IGNORECASE ) # Pattern for CSS url() def replace_url(match): prefix = match.group(1) quote = match.group(2) full_path = match.group(3) suffix = match.group(5) # Extract just the filename from the full path filename = os.path.basename(full_path) replacements[f"url: {full_path}"] = filename return f'{prefix}{quote}{filename}{quote}{suffix}' html_content = re.sub( r'(url\s*\()(["\']?)([^)"\'/]+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)(\))', replace_url, html_content, flags=re.IGNORECASE ) return html_content, replacements def inject_page_breaks(html_content: str, aspect_ratio: str): """Automatically inject page breaks and page sizing CSS""" if aspect_ratio == "16:9": page_size = "288mm 162mm" elif aspect_ratio == "1:1": page_size = "210mm 210mm" else: page_size = "162mm 288mm" page_css = f""" """ if '' in html_content: html_content = html_content.replace('', page_css + '') elif ' "photo.png") clean_filename = os.path.basename(img.filename) image_files[clean_filename] = img_bytes # Convert to PDF pdf_bytes, path_replacements = convert_html_to_pdf( html_content, aspect_ratio, temp_dir, image_files ) # Return PDF return Response( content=pdf_bytes, media_type="application/pdf", headers={ "Content-Disposition": f"attachment; filename=converted.pdf", "X-Aspect-Ratio": aspect_ratio, "X-Path-Replacements": str(len(path_replacements)), "X-PDF-Size": str(len(pdf_bytes)) } ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) finally: if temp_dir and os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) @app.post("/convert-string") async def convert_string_to_pdf( html_content: str = Form(..., description="HTML content as string"), aspect_ratio: Optional[str] = Form(None, description="Aspect ratio: 16:9, 1:1, or 9:16"), auto_detect: bool = Form(True, description="Auto-detect aspect ratio from HTML") ): """ Convert HTML string to PDF (for HTML without external images) - **html_content**: HTML content as string (required) - **aspect_ratio**: Page aspect ratio (optional if auto_detect=true) - **auto_detect**: Auto-detect aspect ratio from HTML content """ temp_dir = None try: # Detect or use provided aspect ratio if auto_detect: detected_ratio = detect_aspect_ratio(html_content) aspect_ratio = detected_ratio elif not aspect_ratio: aspect_ratio = "9:16" # Validate aspect ratio if aspect_ratio not in ["16:9", "1:1", "9:16"]: raise HTTPException(status_code=400, detail="Invalid aspect ratio. Must be 16:9, 1:1, or 9:16") # Create temp directory and convert temp_dir = tempfile.mkdtemp() pdf_bytes, path_replacements = convert_html_to_pdf(html_content, aspect_ratio, temp_dir, None) return Response( content=pdf_bytes, media_type="application/pdf", headers={ "Content-Disposition": f"attachment; filename=converted.pdf", "X-Aspect-Ratio": aspect_ratio, "X-Path-Replacements": str(len(path_replacements)), "X-PDF-Size": str(len(pdf_bytes)) } ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) finally: if temp_dir and os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)