Spaces:
Sleeping
Sleeping
File size: 1,098 Bytes
5823ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from docx import Document
from docx.shared import Pt
from bs4 import BeautifulSoup
_HAS_WEASY = False
try:
from weasyprint import HTML
_HAS_WEASY = True
except Exception:
_HAS_WEASY = False
def html_to_pdf(html_str: str, out_pdf_path: str):
if _HAS_WEASY:
try:
HTML(string=html_str).write_pdf(out_pdf_path)
return
except Exception:
pass
from xhtml2pdf import pisa
with open(out_pdf_path, "wb") as f:
pisa.CreatePDF(src=html_str, dest=f)
def html_to_docx(html_str: str, out_docx_path: str):
doc = Document()
soup = BeautifulSoup(html_str, "html.parser")
for tag in soup.find_all(["h1","h2","h3","p","li"]):
txt = tag.get_text(strip=True)
if not txt:
continue
if tag.name == "h1": p = doc.add_heading(txt, level=0)
elif tag.name == "h2": p = doc.add_heading(txt, level=1)
elif tag.name == "h3": p = doc.add_heading(txt, level=2)
else: p = doc.add_paragraph(txt)
for run in p.runs: run.font.size = Pt(11)
doc.save(out_docx_path)
|