File size: 4,313 Bytes
9242cc4
daa9aa5
a37291d
daa9aa5
 
 
9242cc4
daa9aa5
9242cc4
 
daa9aa5
 
 
a37291d
 
 
 
 
 
 
 
 
 
 
 
 
31e0dac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a37291d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from fastapi import FastAPI, Request, UploadFile, File
from fastapi.responses import StreamingResponse, JSONResponse
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Inches
from docx.enum.section import WD_ORIENT
from PIL import Image
from io import BytesIO
import markdown2
import io

app = FastAPI()

# === Page Layout Utility ===

def set_a4_page_layout(doc):
    section = doc.sections[0]
    section.page_height = Inches(11.69)
    section.page_width = Inches(8.27)
    section.orientation = WD_ORIENT.PORTRAIT
    section.top_margin = Inches(1)
    section.bottom_margin = Inches(1)
    section.left_margin = Inches(1)
    section.right_margin = Inches(1)

# === HTML Cleaning Utilities ===

def remove_empty_paragraphs_around(soup, tag_names):
    for tag_name in tag_names:
        for tag in soup.find_all(tag_name):
            for prev in tag.find_all_previous():
                if prev.name == "p" and not prev.text.strip():
                    prev.decompose()
                    break
                elif prev.name not in ["p", "br", None]:
                    break
            for next_ in tag.find_all_next():
                if next_.name == "p" and not next_.text.strip():
                    next_.decompose()
                    break
                elif next_.name not in ["p", "br", None]:
                    break

def clean_extra_spacing_around_tables(soup):
    for p in soup.find_all("p"):
        if not p.text.strip():
            p.decompose()

    for table in soup.find_all("table"):
        next_sibling = table.find_next_sibling()
        while next_sibling and (next_sibling.name == "br" or (next_sibling.name == "p" and not next_sibling.text.strip())):
            temp = next_sibling.find_next_sibling()
            next_sibling.decompose()
            next_sibling = temp

def add_table_borders_to_html(html_content: str) -> str:
    soup = BeautifulSoup(html_content, "html.parser")

    for table in soup.find_all("table"):
        table['border'] = "1"
        table['style'] = "border: 1px solid black; border-collapse: collapse; width: 100%;"

        first_row = table.find("tr")
        if first_row:
            col_count = len(first_row.find_all(["td", "th"]))
            colgroup = soup.new_tag("colgroup")
            for _ in range(col_count):
                col = soup.new_tag("col")
                col['style'] = "width: {}%;".format(round(100 / col_count))
                colgroup.append(col)
            table.insert(0, colgroup)

        rows = table.find_all("tr")
        if rows:
            thead = soup.new_tag("thead")
            thead.append(rows[0])
            tbody = soup.new_tag("tbody")
            for row in rows[1:]:
                tbody.append(row)
            table.append(thead)
            table.append(tbody)

        for row in table.find_all("tr"):
            for cell in row.find_all(["th", "td"]):
                existing_style = cell.get('style', '')
                new_style = "border: 1px solid black; padding: 6px;"
                cell['style'] = f"{existing_style} {new_style}".strip()

    return str(soup)

# === API 1: Markdown to HTML ===

@app.post("/convert-md-to-html")
async def convert_md_to_html(request: Request):
    data = await request.json()
    md_text = data.get("markdown", "")
    client_name = data.get("client_name", "Client").strip()

    if not md_text:
        return {"error": "No markdown text provided"}

    html = markdown2.markdown(md_text, extras=[
        "tables",
        "fenced-code-blocks",
        "cuddled-lists",
        "footnotes"
    ])

    soup = BeautifulSoup(html, "html.parser")
    remove_empty_paragraphs_around(soup, ["table", "img", "h1", "h2", "h3", "h4", "h5", "h6"])
    clean_extra_spacing_around_tables(soup)

    cleaned_html = add_table_borders_to_html(str(soup))

    html_bytes = cleaned_html.encode("utf-8")
    html_io = BytesIO(html_bytes)
    html_io.seek(0)

    safe_client_name = "".join(c for c in client_name if c.isalnum() or c in (" ", "_", "-")).strip()
    filename = f"Proposal for {safe_client_name}.html"

    headers = {
        'Content-Disposition': f'attachment; filename="{filename}"'
    }

    return StreamingResponse(
        html_io,
        media_type='text/html',
        headers=headers
    )