Spaces:
Sleeping
Sleeping
File size: 1,932 Bytes
80a3675 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | #!/usr/bin/env python3
"""HTML to PDF converter using WeasyPrint."""
import argparse, json, sys
from pathlib import Path
def convert(input_path, output_path):
try:
from weasyprint import HTML
if not output_path:
output_path = str(Path(input_path).with_suffix('.pdf'))
HTML(filename=input_path).write_pdf(output_path)
return output_path
except ImportError:
# Fallback: basic HTML to text then to PDF via reportlab
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
import html
import re
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
text = re.sub(r'<[^>]+>', '', content)
text = html.unescape(text)
if not output_path:
output_path = str(Path(input_path).with_suffix('.pdf'))
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
y = height - 72
for line in text.split('\n'):
line = line.strip()
if not line:
continue
if y < 72:
c.showPage()
y = height - 72
c.drawString(72, y, line[:100])
y -= 14
c.save()
return output_path
def main():
parser = argparse.ArgumentParser(description='Convert HTML to PDF')
parser.add_argument('--input', required=True)
parser.add_argument('--output', required=True)
args = parser.parse_args()
try:
result = convert(args.input, args.output)
print(json.dumps({"success": True, "output": result, "message": "HTML converted to PDF successfully"}))
except Exception as e:
print(json.dumps({"success": False, "output": "", "message": str(e)}))
sys.exit(1)
if __name__ == '__main__':
main()
|