Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,8 @@ import markdown
|
|
| 14 |
from bs4 import BeautifulSoup
|
| 15 |
from datetime import datetime
|
| 16 |
from huggingface_hub import hf_hub_download, HfApi
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
# ----------------- CONFIG -----------------
|
|
@@ -154,24 +156,45 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
|
|
| 154 |
break
|
| 155 |
return "\n".join(context_parts), citations
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
def markdown_to_docx(md_text: str) -> str:
|
| 158 |
-
|
| 159 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 160 |
-
doc = Document()
|
| 161 |
-
for el in soup.descendants:
|
| 162 |
-
if el.name == "h1":
|
| 163 |
-
doc.add_heading(el.get_text(), level=1)
|
| 164 |
-
elif el.name == "h2":
|
| 165 |
-
doc.add_heading(el.get_text(), level=2)
|
| 166 |
-
elif el.name == "h3":
|
| 167 |
-
doc.add_heading(el.get_text(), level=3)
|
| 168 |
-
elif el.name == "p":
|
| 169 |
-
doc.add_paragraph(el.get_text())
|
| 170 |
-
elif el.name == "li":
|
| 171 |
-
doc.add_paragraph(f"• {el.get_text()}")
|
| 172 |
tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
# ----------------- ANALYZER -----------------
|
| 177 |
def analyze_template_draft(ref_text: str) -> str:
|
|
|
|
| 14 |
from bs4 import BeautifulSoup
|
| 15 |
from datetime import datetime
|
| 16 |
from huggingface_hub import hf_hub_download, HfApi
|
| 17 |
+
import pypandoc
|
| 18 |
+
|
| 19 |
|
| 20 |
|
| 21 |
# ----------------- CONFIG -----------------
|
|
|
|
| 156 |
break
|
| 157 |
return "\n".join(context_parts), citations
|
| 158 |
|
| 159 |
+
# def markdown_to_docx(md_text: str) -> str:
|
| 160 |
+
# html = markdown.markdown(md_text)
|
| 161 |
+
# soup = BeautifulSoup(html, "html.parser")
|
| 162 |
+
# doc = Document()
|
| 163 |
+
# for el in soup.descendants:
|
| 164 |
+
# if el.name == "h1":
|
| 165 |
+
# doc.add_heading(el.get_text(), level=1)
|
| 166 |
+
# elif el.name == "h2":
|
| 167 |
+
# doc.add_heading(el.get_text(), level=2)
|
| 168 |
+
# elif el.name == "h3":
|
| 169 |
+
# doc.add_heading(el.get_text(), level=3)
|
| 170 |
+
# elif el.name == "p":
|
| 171 |
+
# doc.add_paragraph(el.get_text())
|
| 172 |
+
# elif el.name == "li":
|
| 173 |
+
# doc.add_paragraph(f"• {el.get_text()}")
|
| 174 |
+
# tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
|
| 175 |
+
# doc.save(tmp_path)
|
| 176 |
+
# return tmp_path
|
| 177 |
+
|
| 178 |
def markdown_to_docx(md_text: str) -> str:
|
| 179 |
+
"""Convert Markdown text to DOCX using Pandoc (preserves full formatting)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
|
| 181 |
+
try:
|
| 182 |
+
pypandoc.convert_text(
|
| 183 |
+
md_text,
|
| 184 |
+
"docx",
|
| 185 |
+
format="md",
|
| 186 |
+
outputfile=tmp_path,
|
| 187 |
+
extra_args=["--standalone"]
|
| 188 |
+
)
|
| 189 |
+
return tmp_path
|
| 190 |
+
except Exception as e:
|
| 191 |
+
# Fallback simple converter
|
| 192 |
+
from docx import Document
|
| 193 |
+
doc = Document()
|
| 194 |
+
doc.add_paragraph("(Conversion via Pandoc failed — fallback applied.)")
|
| 195 |
+
doc.add_paragraph(md_text)
|
| 196 |
+
doc.save(tmp_path)
|
| 197 |
+
return tmp_path
|
| 198 |
|
| 199 |
# ----------------- ANALYZER -----------------
|
| 200 |
def analyze_template_draft(ref_text: str) -> str:
|