from weasyprint import HTML from docx import Document from docx.shared import Pt from bs4 import BeautifulSoup def html_to_pdf(html_str:str,out_pdf_path:str): """ Convert HTML string to PDF file. """ HTML(string=html_str).write_pdf(out_pdf_path) def html_to_docx(html_str:str,out_docx_path:str): doc = Document() soup = BeautifulSoup(html_str, 'html.parser') for tag in soup.find_all(["h1","h2","h3","p","li"]): txt=tag.get_text(strip=True) if not txt: continue if tag.name =="h1": p=doc.add_heading(txt,level=0) elif tag.name =="h2": p = doc.add_heading(txt,level=1) elif tag.name =="h3": p= doc.add_heading(txt,level=2) else: p=doc.add_paragraph(txt) for run in p.runs: run.font.size = Pt(11) doc.save(out_docx_path)