Spaces:
Sleeping
Sleeping
File size: 4,313 Bytes
9242cc4 daa9aa5 a37291d daa9aa5 9242cc4 daa9aa5 9242cc4 daa9aa5 a37291d 31e0dac a37291d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | from fastapi import FastAPI, Request, UploadFile, File
from fastapi.responses import StreamingResponse, JSONResponse
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Inches
from docx.enum.section import WD_ORIENT
from PIL import Image
from io import BytesIO
import markdown2
import io
app = FastAPI()
# === Page Layout Utility ===
def set_a4_page_layout(doc):
section = doc.sections[0]
section.page_height = Inches(11.69)
section.page_width = Inches(8.27)
section.orientation = WD_ORIENT.PORTRAIT
section.top_margin = Inches(1)
section.bottom_margin = Inches(1)
section.left_margin = Inches(1)
section.right_margin = Inches(1)
# === HTML Cleaning Utilities ===
def remove_empty_paragraphs_around(soup, tag_names):
for tag_name in tag_names:
for tag in soup.find_all(tag_name):
for prev in tag.find_all_previous():
if prev.name == "p" and not prev.text.strip():
prev.decompose()
break
elif prev.name not in ["p", "br", None]:
break
for next_ in tag.find_all_next():
if next_.name == "p" and not next_.text.strip():
next_.decompose()
break
elif next_.name not in ["p", "br", None]:
break
def clean_extra_spacing_around_tables(soup):
for p in soup.find_all("p"):
if not p.text.strip():
p.decompose()
for table in soup.find_all("table"):
next_sibling = table.find_next_sibling()
while next_sibling and (next_sibling.name == "br" or (next_sibling.name == "p" and not next_sibling.text.strip())):
temp = next_sibling.find_next_sibling()
next_sibling.decompose()
next_sibling = temp
def add_table_borders_to_html(html_content: str) -> str:
soup = BeautifulSoup(html_content, "html.parser")
for table in soup.find_all("table"):
table['border'] = "1"
table['style'] = "border: 1px solid black; border-collapse: collapse; width: 100%;"
first_row = table.find("tr")
if first_row:
col_count = len(first_row.find_all(["td", "th"]))
colgroup = soup.new_tag("colgroup")
for _ in range(col_count):
col = soup.new_tag("col")
col['style'] = "width: {}%;".format(round(100 / col_count))
colgroup.append(col)
table.insert(0, colgroup)
rows = table.find_all("tr")
if rows:
thead = soup.new_tag("thead")
thead.append(rows[0])
tbody = soup.new_tag("tbody")
for row in rows[1:]:
tbody.append(row)
table.append(thead)
table.append(tbody)
for row in table.find_all("tr"):
for cell in row.find_all(["th", "td"]):
existing_style = cell.get('style', '')
new_style = "border: 1px solid black; padding: 6px;"
cell['style'] = f"{existing_style} {new_style}".strip()
return str(soup)
# === API 1: Markdown to HTML ===
@app.post("/convert-md-to-html")
async def convert_md_to_html(request: Request):
data = await request.json()
md_text = data.get("markdown", "")
client_name = data.get("client_name", "Client").strip()
if not md_text:
return {"error": "No markdown text provided"}
html = markdown2.markdown(md_text, extras=[
"tables",
"fenced-code-blocks",
"cuddled-lists",
"footnotes"
])
soup = BeautifulSoup(html, "html.parser")
remove_empty_paragraphs_around(soup, ["table", "img", "h1", "h2", "h3", "h4", "h5", "h6"])
clean_extra_spacing_around_tables(soup)
cleaned_html = add_table_borders_to_html(str(soup))
html_bytes = cleaned_html.encode("utf-8")
html_io = BytesIO(html_bytes)
html_io.seek(0)
safe_client_name = "".join(c for c in client_name if c.isalnum() or c in (" ", "_", "-")).strip()
filename = f"Proposal for {safe_client_name}.html"
headers = {
'Content-Disposition': f'attachment; filename="{filename}"'
}
return StreamingResponse(
html_io,
media_type='text/html',
headers=headers
) |