from docx import Document from docx.shared import Pt from bs4 import BeautifulSoup _HAS_WEASY = False try: from weasyprint import HTML _HAS_WEASY = True except Exception: _HAS_WEASY = False def html_to_pdf(html_str: str, out_pdf_path: str): if _HAS_WEASY: try: HTML(string=html_str).write_pdf(out_pdf_path) return except Exception: pass from xhtml2pdf import pisa with open(out_pdf_path, "wb") as f: pisa.CreatePDF(src=html_str, dest=f) def html_to_docx(html_str: str, out_docx_path: str): doc = Document() soup = BeautifulSoup(html_str, "html.parser") for tag in soup.find_all(["h1","h2","h3","p","li"]): txt = tag.get_text(strip=True) if not txt: continue if tag.name == "h1": p = doc.add_heading(txt, level=0) elif tag.name == "h2": p = doc.add_heading(txt, level=1) elif tag.name == "h3": p = doc.add_heading(txt, level=2) else: p = doc.add_paragraph(txt) for run in p.runs: run.font.size = Pt(11) doc.save(out_docx_path)