Spaces:

omarkashif
/

draft-gen

Sleeping

App Files Files Community

omarkashif commited on Oct 5, 2025

Commit

f11cb14

verified ·

1 Parent(s): d33e648

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -16

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ import markdown
 from bs4 import BeautifulSoup
 from datetime import datetime
 from huggingface_hub import hf_hub_download, HfApi
 # ----------------- CONFIG -----------------
@@ -154,24 +156,45 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
                 break
     return "\n".join(context_parts), citations
 def markdown_to_docx(md_text: str) -> str:
-    html = markdown.markdown(md_text)
-    soup = BeautifulSoup(html, "html.parser")
-    doc = Document()
-    for el in soup.descendants:
-        if el.name == "h1":
-            doc.add_heading(el.get_text(), level=1)
-        elif el.name == "h2":
-            doc.add_heading(el.get_text(), level=2)
-        elif el.name == "h3":
-            doc.add_heading(el.get_text(), level=3)
-        elif el.name == "p":
-            doc.add_paragraph(el.get_text())
-        elif el.name == "li":
-            doc.add_paragraph(f"• {el.get_text()}")
     tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
-    doc.save(tmp_path)
-    return tmp_path
 # ----------------- ANALYZER -----------------
 def analyze_template_draft(ref_text: str) -> str:

 from bs4 import BeautifulSoup
 from datetime import datetime
 from huggingface_hub import hf_hub_download, HfApi
+import pypandoc
 # ----------------- CONFIG -----------------
                 break
     return "\n".join(context_parts), citations
+# def markdown_to_docx(md_text: str) -> str:
+#     html = markdown.markdown(md_text)
+#     soup = BeautifulSoup(html, "html.parser")
+#     doc = Document()
+#     for el in soup.descendants:
+#         if el.name == "h1":
+#             doc.add_heading(el.get_text(), level=1)
+#         elif el.name == "h2":
+#             doc.add_heading(el.get_text(), level=2)
+#         elif el.name == "h3":
+#             doc.add_heading(el.get_text(), level=3)
+#         elif el.name == "p":
+#             doc.add_paragraph(el.get_text())
+#         elif el.name == "li":
+#             doc.add_paragraph(f"• {el.get_text()}")
+#     tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
+#     doc.save(tmp_path)
+#     return tmp_path
 def markdown_to_docx(md_text: str) -> str:
+    """Convert Markdown text to DOCX using Pandoc (preserves full formatting)."""
     tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
+    try:
+        pypandoc.convert_text(
+            md_text,
+            "docx",
+            format="md",
+            outputfile=tmp_path,
+            extra_args=["--standalone"]
+        )
+        return tmp_path
+    except Exception as e:
+        # Fallback simple converter
+        from docx import Document
+        doc = Document()
+        doc.add_paragraph("(Conversion via Pandoc failed — fallback applied.)")
+        doc.add_paragraph(md_text)
+        doc.save(tmp_path)
+        return tmp_path
 # ----------------- ANALYZER -----------------
 def analyze_template_draft(ref_text: str) -> str: