Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, UploadFile, File | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| from bs4 import BeautifulSoup | |
| from docx import Document | |
| from docx.shared import Inches | |
| from docx.enum.section import WD_ORIENT | |
| from PIL import Image | |
| from io import BytesIO | |
| import markdown2 | |
| import io | |
| app = FastAPI() | |
| # === Page Layout Utility === | |
| def set_a4_page_layout(doc): | |
| section = doc.sections[0] | |
| section.page_height = Inches(11.69) | |
| section.page_width = Inches(8.27) | |
| section.orientation = WD_ORIENT.PORTRAIT | |
| section.top_margin = Inches(1) | |
| section.bottom_margin = Inches(1) | |
| section.left_margin = Inches(1) | |
| section.right_margin = Inches(1) | |
| # === HTML Cleaning Utilities === | |
| def remove_empty_paragraphs_around(soup, tag_names): | |
| for tag_name in tag_names: | |
| for tag in soup.find_all(tag_name): | |
| for prev in tag.find_all_previous(): | |
| if prev.name == "p" and not prev.text.strip(): | |
| prev.decompose() | |
| break | |
| elif prev.name not in ["p", "br", None]: | |
| break | |
| for next_ in tag.find_all_next(): | |
| if next_.name == "p" and not next_.text.strip(): | |
| next_.decompose() | |
| break | |
| elif next_.name not in ["p", "br", None]: | |
| break | |
| def clean_extra_spacing_around_tables(soup): | |
| for p in soup.find_all("p"): | |
| if not p.text.strip(): | |
| p.decompose() | |
| for table in soup.find_all("table"): | |
| next_sibling = table.find_next_sibling() | |
| while next_sibling and (next_sibling.name == "br" or (next_sibling.name == "p" and not next_sibling.text.strip())): | |
| temp = next_sibling.find_next_sibling() | |
| next_sibling.decompose() | |
| next_sibling = temp | |
| def add_table_borders_to_html(html_content: str) -> str: | |
| soup = BeautifulSoup(html_content, "html.parser") | |
| for table in soup.find_all("table"): | |
| table['border'] = "1" | |
| table['style'] = "border: 1px solid black; border-collapse: collapse; width: 100%;" | |
| first_row = table.find("tr") | |
| if first_row: | |
| col_count = len(first_row.find_all(["td", "th"])) | |
| colgroup = soup.new_tag("colgroup") | |
| for _ in range(col_count): | |
| col = soup.new_tag("col") | |
| col['style'] = "width: {}%;".format(round(100 / col_count)) | |
| colgroup.append(col) | |
| table.insert(0, colgroup) | |
| rows = table.find_all("tr") | |
| if rows: | |
| thead = soup.new_tag("thead") | |
| thead.append(rows[0]) | |
| tbody = soup.new_tag("tbody") | |
| for row in rows[1:]: | |
| tbody.append(row) | |
| table.append(thead) | |
| table.append(tbody) | |
| for row in table.find_all("tr"): | |
| for cell in row.find_all(["th", "td"]): | |
| existing_style = cell.get('style', '') | |
| new_style = "border: 1px solid black; padding: 6px;" | |
| cell['style'] = f"{existing_style} {new_style}".strip() | |
| return str(soup) | |
| # === API 1: Markdown to HTML === | |
| async def convert_md_to_html(request: Request): | |
| data = await request.json() | |
| md_text = data.get("markdown", "") | |
| client_name = data.get("client_name", "Client").strip() | |
| if not md_text: | |
| return {"error": "No markdown text provided"} | |
| html = markdown2.markdown(md_text, extras=[ | |
| "tables", | |
| "fenced-code-blocks", | |
| "cuddled-lists", | |
| "footnotes" | |
| ]) | |
| soup = BeautifulSoup(html, "html.parser") | |
| remove_empty_paragraphs_around(soup, ["table", "img", "h1", "h2", "h3", "h4", "h5", "h6"]) | |
| clean_extra_spacing_around_tables(soup) | |
| cleaned_html = add_table_borders_to_html(str(soup)) | |
| html_bytes = cleaned_html.encode("utf-8") | |
| html_io = BytesIO(html_bytes) | |
| html_io.seek(0) | |
| safe_client_name = "".join(c for c in client_name if c.isalnum() or c in (" ", "_", "-")).strip() | |
| filename = f"Proposal for {safe_client_name}.html" | |
| headers = { | |
| 'Content-Disposition': f'attachment; filename="{filename}"' | |
| } | |
| return StreamingResponse( | |
| html_io, | |
| media_type='text/html', | |
| headers=headers | |
| ) |