| from weasyprint import HTML | |
| from docx import Document | |
| from docx.shared import Pt | |
| from bs4 import BeautifulSoup | |
| def html_to_pdf(html_str:str,out_pdf_path:str): | |
| """ | |
| Convert HTML string to PDF file. | |
| """ | |
| HTML(string=html_str).write_pdf(out_pdf_path) | |
| def html_to_docx(html_str:str,out_docx_path:str): | |
| doc = Document() | |
| soup = BeautifulSoup(html_str, 'html.parser') | |
| for tag in soup.find_all(["h1","h2","h3","p","li"]): | |
| txt=tag.get_text(strip=True) | |
| if not txt: | |
| continue | |
| if tag.name =="h1": | |
| p=doc.add_heading(txt,level=0) | |
| elif tag.name =="h2": | |
| p = doc.add_heading(txt,level=1) | |
| elif tag.name =="h3": | |
| p= doc.add_heading(txt,level=2) | |
| else: | |
| p=doc.add_paragraph(txt) | |
| for run in p.runs: | |
| run.font.size = Pt(11) | |
| doc.save(out_docx_path) |