Spaces:

innoai
/

PDFConverter-ENG

Sleeping

File size: 1,932 Bytes

80a3675

#!/usr/bin/env python3
"""HTML to PDF converter using WeasyPrint."""
import argparse, json, sys
from pathlib import Path

def convert(input_path, output_path):
    try:
        from weasyprint import HTML
        if not output_path:
            output_path = str(Path(input_path).with_suffix('.pdf'))
        HTML(filename=input_path).write_pdf(output_path)
        return output_path
    except ImportError:
        # Fallback: basic HTML to text then to PDF via reportlab
        from reportlab.lib.pagesizes import A4
        from reportlab.pdfgen import canvas
        import html
        import re
        
        with open(input_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        text = re.sub(r'<[^>]+>', '', content)
        text = html.unescape(text)
        
        if not output_path:
            output_path = str(Path(input_path).with_suffix('.pdf'))
        
        c = canvas.Canvas(output_path, pagesize=A4)
        width, height = A4
        y = height - 72
        for line in text.split('\n'):
            line = line.strip()
            if not line:
                continue
            if y < 72:
                c.showPage()
                y = height - 72
            c.drawString(72, y, line[:100])
            y -= 14
        c.save()
        return output_path

def main():
    parser = argparse.ArgumentParser(description='Convert HTML to PDF')
    parser.add_argument('--input', required=True)
    parser.add_argument('--output', required=True)
    args = parser.parse_args()
    try:
        result = convert(args.input, args.output)
        print(json.dumps({"success": True, "output": result, "message": "HTML converted to PDF successfully"}))
    except Exception as e:
        print(json.dumps({"success": False, "output": "", "message": str(e)}))
        sys.exit(1)

if __name__ == '__main__':
    main()