omarkashif commited on
Commit
f11cb14
·
verified ·
1 Parent(s): d33e648

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -16
app.py CHANGED
@@ -14,6 +14,8 @@ import markdown
14
  from bs4 import BeautifulSoup
15
  from datetime import datetime
16
  from huggingface_hub import hf_hub_download, HfApi
 
 
17
 
18
 
19
  # ----------------- CONFIG -----------------
@@ -154,24 +156,45 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
154
  break
155
  return "\n".join(context_parts), citations
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def markdown_to_docx(md_text: str) -> str:
158
- html = markdown.markdown(md_text)
159
- soup = BeautifulSoup(html, "html.parser")
160
- doc = Document()
161
- for el in soup.descendants:
162
- if el.name == "h1":
163
- doc.add_heading(el.get_text(), level=1)
164
- elif el.name == "h2":
165
- doc.add_heading(el.get_text(), level=2)
166
- elif el.name == "h3":
167
- doc.add_heading(el.get_text(), level=3)
168
- elif el.name == "p":
169
- doc.add_paragraph(el.get_text())
170
- elif el.name == "li":
171
- doc.add_paragraph(f"• {el.get_text()}")
172
  tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
173
- doc.save(tmp_path)
174
- return tmp_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  # ----------------- ANALYZER -----------------
177
  def analyze_template_draft(ref_text: str) -> str:
 
14
  from bs4 import BeautifulSoup
15
  from datetime import datetime
16
  from huggingface_hub import hf_hub_download, HfApi
17
+ import pypandoc
18
+
19
 
20
 
21
  # ----------------- CONFIG -----------------
 
156
  break
157
  return "\n".join(context_parts), citations
158
 
159
+ # def markdown_to_docx(md_text: str) -> str:
160
+ # html = markdown.markdown(md_text)
161
+ # soup = BeautifulSoup(html, "html.parser")
162
+ # doc = Document()
163
+ # for el in soup.descendants:
164
+ # if el.name == "h1":
165
+ # doc.add_heading(el.get_text(), level=1)
166
+ # elif el.name == "h2":
167
+ # doc.add_heading(el.get_text(), level=2)
168
+ # elif el.name == "h3":
169
+ # doc.add_heading(el.get_text(), level=3)
170
+ # elif el.name == "p":
171
+ # doc.add_paragraph(el.get_text())
172
+ # elif el.name == "li":
173
+ # doc.add_paragraph(f"• {el.get_text()}")
174
+ # tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
175
+ # doc.save(tmp_path)
176
+ # return tmp_path
177
+
178
  def markdown_to_docx(md_text: str) -> str:
179
+ """Convert Markdown text to DOCX using Pandoc (preserves full formatting)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
181
+ try:
182
+ pypandoc.convert_text(
183
+ md_text,
184
+ "docx",
185
+ format="md",
186
+ outputfile=tmp_path,
187
+ extra_args=["--standalone"]
188
+ )
189
+ return tmp_path
190
+ except Exception as e:
191
+ # Fallback simple converter
192
+ from docx import Document
193
+ doc = Document()
194
+ doc.add_paragraph("(Conversion via Pandoc failed — fallback applied.)")
195
+ doc.add_paragraph(md_text)
196
+ doc.save(tmp_path)
197
+ return tmp_path
198
 
199
  # ----------------- ANALYZER -----------------
200
  def analyze_template_draft(ref_text: str) -> str: