Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,10 @@ import tempfile
|
|
| 11 |
import json
|
| 12 |
import re
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# ----------------- CONFIG -----------------
|
| 15 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
|
@@ -69,7 +73,7 @@ def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
|
|
| 69 |
{"role": "system", "content": system_prompt},
|
| 70 |
{"role": "user", "content": user_prompt}
|
| 71 |
],
|
| 72 |
-
temperature=0.
|
| 73 |
max_tokens=2000
|
| 74 |
)
|
| 75 |
raw = resp.choices[0].message.content.strip()
|
|
@@ -99,21 +103,45 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
|
|
| 99 |
break
|
| 100 |
return "\n".join(context_parts), citations
|
| 101 |
|
| 102 |
-
def markdown_to_docx(md_text: str) -> str:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
doc = Document()
|
| 108 |
-
for
|
| 109 |
-
if
|
| 110 |
-
doc.add_heading(
|
| 111 |
-
elif
|
| 112 |
-
doc.add_heading(
|
| 113 |
-
|
| 114 |
-
doc.
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# ----------------- MAIN FUNCTION -----------------
|
| 119 |
def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
|
|
@@ -168,7 +196,7 @@ def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
|
|
| 168 |
{"role":"system","content":system_prompt},
|
| 169 |
{"role":"user","content":user_prompt}
|
| 170 |
],
|
| 171 |
-
temperature=0.
|
| 172 |
max_tokens=15000
|
| 173 |
)
|
| 174 |
draft_md = resp.choices[0].message.content.strip()
|
|
@@ -181,7 +209,8 @@ def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
|
|
| 181 |
draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
|
| 182 |
|
| 183 |
docx_path = markdown_to_docx(draft_md)
|
| 184 |
-
return draft_md,
|
|
|
|
| 185 |
|
| 186 |
# ----------------- GRADIO INTERFACE -----------------
|
| 187 |
with gr.Blocks() as demo:
|
|
@@ -192,6 +221,8 @@ with gr.Blocks() as demo:
|
|
| 192 |
add_citations = gr.Checkbox(label="Append citations", value=True)
|
| 193 |
|
| 194 |
draft_output = gr.Markdown(label="Draft Output")
|
|
|
|
|
|
|
| 195 |
download_btn = gr.DownloadButton(label="⬇️ Download Word")
|
| 196 |
# draft_output1 = gr.Markdown("# HI")
|
| 197 |
|
|
@@ -200,7 +231,8 @@ with gr.Blocks() as demo:
|
|
| 200 |
generate_legal_draft,
|
| 201 |
inputs=[case_text, uploaded_file, add_citations],
|
| 202 |
outputs=[draft_output, download_btn],
|
| 203 |
-
|
|
|
|
| 204 |
|
| 205 |
# ----------------- RUN -----------------
|
| 206 |
if __name__ == "__main__":
|
|
|
|
| 11 |
import json
|
| 12 |
import re
|
| 13 |
|
| 14 |
+
import markdown
|
| 15 |
+
from bs4 import BeautifulSoup
|
| 16 |
+
|
| 17 |
+
|
| 18 |
# ----------------- CONFIG -----------------
|
| 19 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 20 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
|
|
|
| 73 |
{"role": "system", "content": system_prompt},
|
| 74 |
{"role": "user", "content": user_prompt}
|
| 75 |
],
|
| 76 |
+
temperature=0.1,
|
| 77 |
max_tokens=2000
|
| 78 |
)
|
| 79 |
raw = resp.choices[0].message.content.strip()
|
|
|
|
| 103 |
break
|
| 104 |
return "\n".join(context_parts), citations
|
| 105 |
|
| 106 |
+
# def markdown_to_docx(md_text: str) -> str:
|
| 107 |
+
# """
|
| 108 |
+
# Convert Markdown text into a temporary .docx file and return its path.
|
| 109 |
+
# """
|
| 110 |
+
# temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
|
| 111 |
+
# doc = Document()
|
| 112 |
+
# for line in md_text.split("\n"):
|
| 113 |
+
# if line.startswith("## "):
|
| 114 |
+
# doc.add_heading(line[3:], level=2)
|
| 115 |
+
# elif line.startswith("# "):
|
| 116 |
+
# doc.add_heading(line[2:], level=1)
|
| 117 |
+
# else:
|
| 118 |
+
# doc.add_paragraph(line)
|
| 119 |
+
# doc.save(temp_file.name)
|
| 120 |
+
# return temp_file.name
|
| 121 |
+
|
| 122 |
+
def markdown_to_docx(md_text: str) -> BytesIO:
|
| 123 |
+
"""Convert Markdown text into Word DOCX with proper formatting."""
|
| 124 |
+
html = markdown.markdown(md_text)
|
| 125 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 126 |
+
|
| 127 |
doc = Document()
|
| 128 |
+
for el in soup.descendants:
|
| 129 |
+
if el.name == "h1":
|
| 130 |
+
doc.add_heading(el.get_text(), level=1)
|
| 131 |
+
elif el.name == "h2":
|
| 132 |
+
doc.add_heading(el.get_text(), level=2)
|
| 133 |
+
elif el.name == "h3":
|
| 134 |
+
doc.add_heading(el.get_text(), level=3)
|
| 135 |
+
elif el.name == "p":
|
| 136 |
+
doc.add_paragraph(el.get_text())
|
| 137 |
+
elif el.name == "li":
|
| 138 |
+
doc.add_paragraph(f"• {el.get_text()}")
|
| 139 |
+
|
| 140 |
+
buf = BytesIO()
|
| 141 |
+
doc.save(buf)
|
| 142 |
+
buf.seek(0)
|
| 143 |
+
return buf
|
| 144 |
+
|
| 145 |
|
| 146 |
# ----------------- MAIN FUNCTION -----------------
|
| 147 |
def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
|
|
|
|
| 196 |
{"role":"system","content":system_prompt},
|
| 197 |
{"role":"user","content":user_prompt}
|
| 198 |
],
|
| 199 |
+
temperature=0.1,
|
| 200 |
max_tokens=15000
|
| 201 |
)
|
| 202 |
draft_md = resp.choices[0].message.content.strip()
|
|
|
|
| 209 |
draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
|
| 210 |
|
| 211 |
docx_path = markdown_to_docx(draft_md)
|
| 212 |
+
return gr.update(value=draft_md), markdown_to_docx(draft_md)
|
| 213 |
+
|
| 214 |
|
| 215 |
# ----------------- GRADIO INTERFACE -----------------
|
| 216 |
with gr.Blocks() as demo:
|
|
|
|
| 221 |
add_citations = gr.Checkbox(label="Append citations", value=True)
|
| 222 |
|
| 223 |
draft_output = gr.Markdown(label="Draft Output")
|
| 224 |
+
draft_md = draft_md.replace("\\n", "\n") # convert escaped newlines
|
| 225 |
+
|
| 226 |
download_btn = gr.DownloadButton(label="⬇️ Download Word")
|
| 227 |
# draft_output1 = gr.Markdown("# HI")
|
| 228 |
|
|
|
|
| 231 |
generate_legal_draft,
|
| 232 |
inputs=[case_text, uploaded_file, add_citations],
|
| 233 |
outputs=[draft_output, download_btn],
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
|
| 237 |
# ----------------- RUN -----------------
|
| 238 |
if __name__ == "__main__":
|