from fastapi import FastAPI, File, UploadFile, Form, HTTPException from fastapi.responses import Response, JSONResponse from fastapi.middleware.cors import CORSMiddleware from typing import List, Optional import tempfile import shutil import os import subprocess import base64 from pathlib import Path import mimetypes app = FastAPI( title="HTML to PDF API with Image Support", description="Convert HTML to PDF using Puppeteer with image upload support", version="2.0.0" ) # Enable CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) def save_uploaded_images(images: List[UploadFile], temp_dir: str): """Save uploaded images to temp directory and return mapping""" image_mapping = {} images_dir = os.path.join(temp_dir, "images") os.makedirs(images_dir, exist_ok=True) for image in images: if image.filename: # Save image to temp directory image_path = os.path.join(images_dir, image.filename) with open(image_path, 'wb') as f: content = image.file.read() f.write(content) # Reset file pointer for potential reuse image.file.seek(0) # Create mapping with relative path image_mapping[image.filename] = f"images/{image.filename}" print(f"Saved image: {image.filename} -> {image_path}") return image_mapping def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict): """Process HTML to handle image references with absolute file paths""" import re for original_name, relative_path in image_mapping.items(): # Get absolute path for the image absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path)) file_url = f"file://{absolute_path}" # Replace various image reference patterns # Pattern 1: src="filename" html_content = re.sub( f'src=["\'](?:\.\/)?{re.escape(original_name)}["\']', f'src="{file_url}"', html_content, flags=re.IGNORECASE ) # Pattern 2: src='filename' html_content = re.sub( f"src=['\"](?:\.\/)?{re.escape(original_name)}['\"]", f'src="{file_url}"', html_content, flags=re.IGNORECASE ) # Pattern 3: background-image: url(filename) html_content = re.sub( f'url\(["\']?(?:\.\/)?{re.escape(original_name)}["\']?\)', f'url("{file_url}")', html_content, flags=re.IGNORECASE ) # Pattern 4: href for links html_content = re.sub( f'href=["\'](?:\.\/)?{re.escape(original_name)}["\']', f'href="{file_url}"', html_content, flags=re.IGNORECASE ) return html_content def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str): """Convert HTML content to PDF""" try: # Style injection for better PDF rendering style_injection = """ """ if '' in html_content: html_content = html_content.replace('', style_injection + '') elif '