htmlpdfs

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 17, 2025

Commit

e58da64

verified ·

1 Parent(s): 06295a6

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -425

app.py DELETED Viewed

@@ -1,425 +0,0 @@
-"""
-FastAPI Backend for HTML to PDF Conversion
-Runs alongside Streamlit on port 7860
-"""
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.responses import Response, JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-import subprocess
-import os
-import tempfile
-import shutil
-import re
-import mimetypes
-from typing import List, Optional
-from pathlib import Path
-app = FastAPI(
-    title="HTML to PDF API",
-    description="Convert HTML to PDF with image support and page breaks",
-    version="1.0.0"
-)
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-def detect_aspect_ratio(html_content):
-    """Detect aspect ratio from HTML content"""
-    viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
-    if viewport_match:
-        viewport = viewport_match.group(1).lower()
-        if 'orientation=portrait' in viewport:
-            return "9:16"
-        elif 'orientation=landscape' in viewport:
-            return "16:9"
-    aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
-    if aspect_match:
-        width = int(aspect_match.group(1))
-        height = int(aspect_match.group(2))
-        ratio = width / height
-        if ratio > 1.5:
-            return "16:9"
-        elif ratio < 0.7:
-            return "9:16"
-        else:
-            return "1:1"
-    if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
-        return "16:9"
-    return "9:16"
-def normalize_image_paths(html_content):
-    """Replace complex image paths with just filenames"""
-    replacements = {}
-    # Pattern 1: img src with paths - extract filename only
-    pattern1 = r'(<img[^>]*\s+src\s*=\s*)(["\'])([^"\']*?/)?([^/"\'>]+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)'
-    def replace_img_src(match):
-        prefix = match.group(1)
-        quote = match.group(2)
-        filename = match.group(4)
-        replacements[f"img src: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}'
-    html_content = re.sub(pattern1, replace_img_src, html_content, flags=re.IGNORECASE)
-    # Pattern 2: background-image with paths
-    pattern2 = r'(background-image\s*:\s*url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)(\))'
-    def replace_bg_image(match):
-        prefix = match.group(1)
-        quote = match.group(2)
-        filename = match.group(4)
-        suffix = match.group(7)
-        replacements[f"bg-image: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}{suffix}'
-    html_content = re.sub(pattern2, replace_bg_image, html_content, flags=re.IGNORECASE)
-    # Pattern 3: CSS url() with paths
-    pattern3 = r'(url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)(\))'
-    def replace_url(match):
-        # Skip if already processed by background-image pattern
-        if 'background-image' in html_content[max(0, match.start()-50):match.start()]:
-            return match.group(0)
-        prefix = match.group(1)
-        quote = match.group(2)
-        filename = match.group(4)
-        suffix = match.group(7)
-        replacements[f"url: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}{suffix}'
-    html_content = re.sub(pattern3, replace_url, html_content, flags=re.IGNORECASE)
-    return html_content, replacements
-def inject_page_breaks(html_content: str, aspect_ratio: str):
-    """Automatically inject page breaks and page sizing CSS"""
-    if aspect_ratio == "16:9":
-        page_size = "A4 landscape"
-    elif aspect_ratio == "1:1":
-        page_size = "210mm 210mm"
-    else:
-        page_size = "A4 portrait"
-    page_css = f"""
-    <style id="auto-page-breaks">
-        @page {{
-            size: {page_size};
-            margin: 0;
-        }}
-        html, body {{
-            margin: 0 !important;
-            padding: 0 !important;
-            width: 100% !important;
-            height: 100% !important;
-        }}
-        .page, .slide, section.page, article.page, div[class*="page"], div[class*="slide"] {{
-            width: 100% !important;
-            min-height: 100vh !important;
-            height: 100vh !important;
-            page-break-after: always !important;
-            break-after: page !important;
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-            position: relative !important;
-            box-sizing: border-box !important;
-            overflow: hidden !important;
-        }}
-        .page:last-child, .slide:last-child,
-        section.page:last-child, article.page:last-child {{
-            page-break-after: auto !important;
-            break-after: auto !important;
-        }}
-        body > section:not(.no-page-break),
-        body > article:not(.no-page-break),
-        body > div:not(.no-page-break) {{
-            page-break-after: always !important;
-            break-after: page !important;
-            min-height: 100vh;
-        }}
-        body > section:last-child,
-        body > article:last-child,
-        body > div:last-child {{
-            page-break-after: auto !important;
-        }}
-        .page-break, .page-break-after {{
-            page-break-after: always !important;
-            break-after: page !important;
-        }}
-        .page-break-before {{
-            page-break-before: always !important;
-            break-before: page !important;
-        }}
-        .no-page-break, .keep-together {{
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        h1, h2, h3, h4, h5, h6 {{
-            page-break-after: avoid !important;
-            break-after: avoid !important;
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        img, figure, table, pre, blockquote {{
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        * {{
-            -webkit-print-color-adjust: exact !important;
-            print-color-adjust: exact !important;
-            color-adjust: exact !important;
-        }}
-    </style>
-    """
-    if '</head>' in html_content:
-        html_content = html_content.replace('</head>', page_css + '</head>')
-    elif '<body' in html_content:
-        html_content = html_content.replace('<body', page_css + '<body', 1)
-    else:
-        html_content = page_css + html_content
-    return html_content
-def convert_html_to_pdf(html_content, aspect_ratio, temp_dir, images=None):
-    """Convert HTML content to PDF using Puppeteer"""
-    try:
-        # Normalize image paths in HTML
-        html_content, path_replacements = normalize_image_paths(html_content)
-        # Inject page breaks
-        html_content = inject_page_breaks(html_content, aspect_ratio)
-        # Save HTML file
-        html_file = os.path.join(temp_dir, "input.html")
-        with open(html_file, 'w', encoding='utf-8') as f:
-            f.write(html_content)
-        # Save image files to the same directory
-        if images:
-            for img in images:
-                img_path = os.path.join(temp_dir, img.filename)
-                with open(img_path, 'wb') as f:
-                    f.write(img.file.read())
-        # Find puppeteer script
-        possible_paths = [
-            'puppeteer_pdf.js',
-            '/app/puppeteer_pdf.js',
-            os.path.join(os.path.dirname(__file__), 'puppeteer_pdf.js'),
-        ]
-        puppeteer_script = None
-        for path in possible_paths:
-            if os.path.exists(path):
-                puppeteer_script = path
-                break
-        if not puppeteer_script:
-            raise Exception("puppeteer_pdf.js not found")
-        result = subprocess.run(
-            ['node', puppeteer_script, html_file, aspect_ratio],
-            capture_output=True,
-            text=True,
-            timeout=60,
-            cwd=temp_dir  # Run in temp directory so images are accessible
-        )
-        if result.returncode != 0:
-            raise Exception(f"PDF conversion failed: {result.stderr}")
-        pdf_file = html_file.replace('.html', '.pdf')
-        if not os.path.exists(pdf_file):
-            raise Exception("PDF file was not generated")
-        with open(pdf_file, 'rb') as f:
-            pdf_bytes = f.read()
-        return pdf_bytes, path_replacements
-    except subprocess.TimeoutExpired:
-        raise Exception("PDF conversion timed out (60 seconds)")
-    except Exception as e:
-        raise Exception(f"Error: {str(e)}")
-@app.get("/")
-async def root():
-    """API root endpoint"""
-    return {
-        "message": "HTML to PDF Converter API",
-        "version": "1.0.0",
-        "endpoints": {
-            "POST /convert": "Convert HTML to PDF",
-            "GET /health": "Health check",
-            "GET /docs": "API documentation"
-        }
-    }
-@app.get("/health")
-async def health():
-    """Health check endpoint"""
-    return {"status": "healthy"}
-@app.post("/convert")
-async def convert_to_pdf(
-    html_file: UploadFile = File(..., description="HTML file to convert"),
-    aspect_ratio: Optional[str] = Form(None, description="Aspect ratio: 16:9, 1:1, or 9:16"),
-    auto_detect: bool = Form(True, description="Auto-detect aspect ratio from HTML"),
-    images: Optional[List[UploadFile]] = File(None, description="Images referenced in HTML")
-):
-    """
-    Convert HTML to PDF with image files in same directory
-    - **html_file**: HTML file to convert (required)
-    - **aspect_ratio**: Page aspect ratio (optional if auto_detect=true)
-    - **auto_detect**: Auto-detect aspect ratio from HTML content
-    - **images**: Image files referenced in HTML (saved to temp directory)
-    """
-    temp_dir = None
-    try:
-        # Read HTML content
-        html_content = await html_file.read()
-        try:
-            html_content = html_content.decode('utf-8')
-        except UnicodeDecodeError:
-            html_content = html_content.decode('latin-1')
-        # Detect or use provided aspect ratio
-        if auto_detect:
-            detected_ratio = detect_aspect_ratio(html_content)
-            aspect_ratio = detected_ratio
-        elif not aspect_ratio:
-            aspect_ratio = "9:16"
-        # Validate aspect ratio
-        if aspect_ratio not in ["16:9", "1:1", "9:16"]:
-            raise HTTPException(status_code=400, detail="Invalid aspect ratio. Must be 16:9, 1:1, or 9:16")
-        # Create temp directory and convert
-        temp_dir = tempfile.mkdtemp()
-        # Read images into memory before conversion
-        images_list = []
-        if images:
-            for img in images:
-                img_bytes = await img.read()
-                # Create a simple object to hold filename and bytes
-                class ImageFile:
-                    def __init__(self, filename, content):
-                        self.filename = filename
-                        self.content = content
-                        self.file = None
-                    def get_bytes(self):
-                        return self.content
-                img_obj = ImageFile(img.filename, img_bytes)
-                # Create a file-like object for backwards compatibility
-                import io
-                img_obj.file = io.BytesIO(img_bytes)
-                images_list.append(img_obj)
-        pdf_bytes, path_replacements = convert_html_to_pdf(
-            html_content,
-            aspect_ratio,
-            temp_dir,
-            images_list if images_list else None
-        )
-        # Return PDF
-        return Response(
-            content=pdf_bytes,
-            media_type="application/pdf",
-            headers={
-                "Content-Disposition": f"attachment; filename=converted.pdf",
-                "X-Aspect-Ratio": aspect_ratio,
-                "X-Path-Replacements": str(len(path_replacements)),
-                "X-PDF-Size": str(len(pdf_bytes))
-            }
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir, ignore_errors=True)
-@app.post("/convert-base64")
-async def convert_to_pdf_base64(
-    html_content: str = Form(..., description="HTML content as string"),
-    aspect_ratio: Optional[str] = Form(None, description="Aspect ratio: 16:9, 1:1, or 9:16"),
-    auto_detect: bool = Form(True, description="Auto-detect aspect ratio from HTML")
-):
-    """
-    Convert HTML string to PDF (for HTML without external images)
-    - **html_content**: HTML content as string (required)
-    - **aspect_ratio**: Page aspect ratio (optional if auto_detect=true)
-    - **auto_detect**: Auto-detect aspect ratio from HTML content
-    """
-    temp_dir = None
-    try:
-        # Detect or use provided aspect ratio
-        if auto_detect:
-            detected_ratio = detect_aspect_ratio(html_content)
-            aspect_ratio = detected_ratio
-        elif not aspect_ratio:
-            aspect_ratio = "9:16"
-        # Validate aspect ratio
-        if aspect_ratio not in ["16:9", "1:1", "9:16"]:
-            raise HTTPException(status_code=400, detail="Invalid aspect ratio. Must be 16:9, 1:1, or 9:16")
-        # Create temp directory and convert
-        temp_dir = tempfile.mkdtemp()
-        pdf_bytes, path_replacements = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
-        return Response(
-            content=pdf_bytes,
-            media_type="application/pdf",
-            headers={
-                "Content-Disposition": f"attachment; filename=converted.pdf",
-                "X-Aspect-Ratio": aspect_ratio,
-                "X-Path-Replacements": str(len(path_replacements)),
-                "X-PDF-Size": str(len(pdf_bytes))
-            }
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir, ignore_errors=True)
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)