Shakauthossain-NH commited on
Commit
31e0dac
·
verified ·
1 Parent(s): 959275d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +106 -0
main.py CHANGED
@@ -7,6 +7,112 @@ from io import BytesIO
7
 
8
  app = FastAPI()
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def insert_cover_image_to_docx(docx_bytes: bytes, image_bytes: bytes) -> BytesIO:
11
  # Validate input
12
  if not docx_bytes:
 
7
 
8
  app = FastAPI()
9
 
10
+ # === Markdown to HTML Utility Functions ===
11
+
12
+ def remove_empty_paragraphs_around(soup, tag_names):
13
+ for tag_name in tag_names:
14
+ for tag in soup.find_all(tag_name):
15
+ for prev in tag.find_all_previous():
16
+ if prev.name == "p" and not prev.text.strip():
17
+ prev.decompose()
18
+ break
19
+ elif prev.name not in ["p", "br", None]:
20
+ break
21
+ for next_ in tag.find_all_next():
22
+ if next_.name == "p" and not next_.text.strip():
23
+ next_.decompose()
24
+ break
25
+ elif next_.name not in ["p", "br", None]:
26
+ break
27
+
28
+ def clean_extra_spacing_around_tables(soup):
29
+ for p in soup.find_all("p"):
30
+ if not p.text.strip():
31
+ p.decompose()
32
+
33
+ for table in soup.find_all("table"):
34
+ next_sibling = table.find_next_sibling()
35
+ while next_sibling and (next_sibling.name == "br" or (next_sibling.name == "p" and not next_sibling.text.strip())):
36
+ temp = next_sibling.find_next_sibling()
37
+ next_sibling.decompose()
38
+ next_sibling = temp
39
+
40
+ def add_table_borders_to_html(html_content: str) -> str:
41
+ soup = BeautifulSoup(html_content, "html.parser")
42
+
43
+ for table in soup.find_all("table"):
44
+ table['border'] = "1"
45
+ table['style'] = "border: 1px solid black; border-collapse: collapse; width: 100%;"
46
+
47
+ first_row = table.find("tr")
48
+ if first_row:
49
+ col_count = len(first_row.find_all(["td", "th"]))
50
+ colgroup = soup.new_tag("colgroup")
51
+ for _ in range(col_count):
52
+ col = soup.new_tag("col")
53
+ col['style'] = "width: {}%;".format(round(100 / col_count))
54
+ colgroup.append(col)
55
+ table.insert(0, colgroup)
56
+
57
+ rows = table.find_all("tr")
58
+ if rows:
59
+ thead = soup.new_tag("thead")
60
+ thead.append(rows[0])
61
+ tbody = soup.new_tag("tbody")
62
+ for row in rows[1:]:
63
+ tbody.append(row)
64
+ table.append(thead)
65
+ table.append(tbody)
66
+
67
+ for row in table.find_all("tr"):
68
+ for cell in row.find_all(["th", "td"]):
69
+ existing_style = cell.get('style', '')
70
+ new_style = "border: 1px solid black; padding: 6px;"
71
+ cell['style'] = f"{existing_style} {new_style}".strip()
72
+
73
+ return str(soup)
74
+
75
+ # === API 1: Markdown to HTML ===
76
+
77
+ @app.post("/convert-md-to-html")
78
+ async def convert_md_to_html(request: Request):
79
+ data = await request.json()
80
+ md_text = data.get("markdown", "")
81
+ client_name = data.get("client_name", "Client").strip()
82
+
83
+ if not md_text:
84
+ return {"error": "No markdown text provided"}
85
+
86
+ html = markdown2.markdown(md_text, extras=[
87
+ "tables",
88
+ "fenced-code-blocks",
89
+ "cuddled-lists",
90
+ "footnotes"
91
+ ])
92
+
93
+ soup = BeautifulSoup(html, "html.parser")
94
+ remove_empty_paragraphs_around(soup, ["table", "img", "h1", "h2", "h3", "h4", "h5", "h6"])
95
+ clean_extra_spacing_around_tables(soup)
96
+
97
+ cleaned_html = add_table_borders_to_html(str(soup))
98
+
99
+ html_bytes = cleaned_html.encode("utf-8")
100
+ html_io = BytesIO(html_bytes)
101
+ html_io.seek(0)
102
+
103
+ safe_client_name = "".join(c for c in client_name if c.isalnum() or c in (" ", "_", "-")).strip()
104
+ filename = f"Proposal for {safe_client_name}.html"
105
+
106
+ headers = {
107
+ 'Content-Disposition': f'attachment; filename="{filename}"'
108
+ }
109
+
110
+ return StreamingResponse(
111
+ html_io,
112
+ media_type='text/html',
113
+ headers=headers
114
+ )
115
+
116
  def insert_cover_image_to_docx(docx_bytes: bytes, image_bytes: bytes) -> BytesIO:
117
  # Validate input
118
  if not docx_bytes: