omarkashif commited on
Commit
3f3e7de
·
verified ·
1 Parent(s): 1e3ef90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -18
app.py CHANGED
@@ -11,6 +11,10 @@ import tempfile
11
  import json
12
  import re
13
 
 
 
 
 
14
  # ----------------- CONFIG -----------------
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
@@ -69,7 +73,7 @@ def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
69
  {"role": "system", "content": system_prompt},
70
  {"role": "user", "content": user_prompt}
71
  ],
72
- temperature=0.2,
73
  max_tokens=2000
74
  )
75
  raw = resp.choices[0].message.content.strip()
@@ -99,21 +103,45 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
99
  break
100
  return "\n".join(context_parts), citations
101
 
102
- def markdown_to_docx(md_text: str) -> str:
103
- """
104
- Convert Markdown text into a temporary .docx file and return its path.
105
- """
106
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  doc = Document()
108
- for line in md_text.split("\n"):
109
- if line.startswith("## "):
110
- doc.add_heading(line[3:], level=2)
111
- elif line.startswith("# "):
112
- doc.add_heading(line[2:], level=1)
113
- else:
114
- doc.add_paragraph(line)
115
- doc.save(temp_file.name)
116
- return temp_file.name
 
 
 
 
 
 
 
 
117
 
118
  # ----------------- MAIN FUNCTION -----------------
119
  def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
@@ -168,7 +196,7 @@ def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
168
  {"role":"system","content":system_prompt},
169
  {"role":"user","content":user_prompt}
170
  ],
171
- temperature=0.2,
172
  max_tokens=15000
173
  )
174
  draft_md = resp.choices[0].message.content.strip()
@@ -181,7 +209,8 @@ def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
181
  draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
182
 
183
  docx_path = markdown_to_docx(draft_md)
184
- return draft_md, docx_path
 
185
 
186
  # ----------------- GRADIO INTERFACE -----------------
187
  with gr.Blocks() as demo:
@@ -192,6 +221,8 @@ with gr.Blocks() as demo:
192
  add_citations = gr.Checkbox(label="Append citations", value=True)
193
 
194
  draft_output = gr.Markdown(label="Draft Output")
 
 
195
  download_btn = gr.DownloadButton(label="⬇️ Download Word")
196
  # draft_output1 = gr.Markdown("# HI")
197
 
@@ -200,7 +231,8 @@ with gr.Blocks() as demo:
200
  generate_legal_draft,
201
  inputs=[case_text, uploaded_file, add_citations],
202
  outputs=[draft_output, download_btn],
203
- )
 
204
 
205
  # ----------------- RUN -----------------
206
  if __name__ == "__main__":
 
11
  import json
12
  import re
13
 
14
+ import markdown
15
+ from bs4 import BeautifulSoup
16
+
17
+
18
  # ----------------- CONFIG -----------------
19
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 
73
  {"role": "system", "content": system_prompt},
74
  {"role": "user", "content": user_prompt}
75
  ],
76
+ temperature=0.1,
77
  max_tokens=2000
78
  )
79
  raw = resp.choices[0].message.content.strip()
 
103
  break
104
  return "\n".join(context_parts), citations
105
 
106
+ # def markdown_to_docx(md_text: str) -> str:
107
+ # """
108
+ # Convert Markdown text into a temporary .docx file and return its path.
109
+ # """
110
+ # temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
111
+ # doc = Document()
112
+ # for line in md_text.split("\n"):
113
+ # if line.startswith("## "):
114
+ # doc.add_heading(line[3:], level=2)
115
+ # elif line.startswith("# "):
116
+ # doc.add_heading(line[2:], level=1)
117
+ # else:
118
+ # doc.add_paragraph(line)
119
+ # doc.save(temp_file.name)
120
+ # return temp_file.name
121
+
122
+ def markdown_to_docx(md_text: str) -> BytesIO:
123
+ """Convert Markdown text into Word DOCX with proper formatting."""
124
+ html = markdown.markdown(md_text)
125
+ soup = BeautifulSoup(html, "html.parser")
126
+
127
  doc = Document()
128
+ for el in soup.descendants:
129
+ if el.name == "h1":
130
+ doc.add_heading(el.get_text(), level=1)
131
+ elif el.name == "h2":
132
+ doc.add_heading(el.get_text(), level=2)
133
+ elif el.name == "h3":
134
+ doc.add_heading(el.get_text(), level=3)
135
+ elif el.name == "p":
136
+ doc.add_paragraph(el.get_text())
137
+ elif el.name == "li":
138
+ doc.add_paragraph(f"• {el.get_text()}")
139
+
140
+ buf = BytesIO()
141
+ doc.save(buf)
142
+ buf.seek(0)
143
+ return buf
144
+
145
 
146
  # ----------------- MAIN FUNCTION -----------------
147
  def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
 
196
  {"role":"system","content":system_prompt},
197
  {"role":"user","content":user_prompt}
198
  ],
199
+ temperature=0.1,
200
  max_tokens=15000
201
  )
202
  draft_md = resp.choices[0].message.content.strip()
 
209
  draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
210
 
211
  docx_path = markdown_to_docx(draft_md)
212
+ return gr.update(value=draft_md), markdown_to_docx(draft_md)
213
+
214
 
215
  # ----------------- GRADIO INTERFACE -----------------
216
  with gr.Blocks() as demo:
 
221
  add_citations = gr.Checkbox(label="Append citations", value=True)
222
 
223
  draft_output = gr.Markdown(label="Draft Output")
224
+ draft_md = draft_md.replace("\\n", "\n") # convert escaped newlines
225
+
226
  download_btn = gr.DownloadButton(label="⬇️ Download Word")
227
  # draft_output1 = gr.Markdown("# HI")
228
 
 
231
  generate_legal_draft,
232
  inputs=[case_text, uploaded_file, add_citations],
233
  outputs=[draft_output, download_btn],
234
+ )
235
+
236
 
237
  # ----------------- RUN -----------------
238
  if __name__ == "__main__":