omarkashif commited on
Commit
043adcb
·
verified ·
1 Parent(s): 3ded283

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -50
app.py CHANGED
@@ -61,15 +61,6 @@ def load_templates_json() -> List[Dict]:
61
  except Exception:
62
  return []
63
 
64
- # def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
65
- # try:
66
- # return json.loads(raw_text)
67
- # except:
68
- # matches = re.findall(r'"([^"]+)"', raw_text)
69
- # if matches:
70
- # return matches
71
- # return [fallback[:512]]
72
-
73
  def save_template_to_hf(name: str, analysis: str) -> Tuple[bool, str]:
74
  """Save new template into HF dataset repo (templates.json)."""
75
  try:
@@ -120,7 +111,7 @@ def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
120
  return matches
121
  return [fallback[:512]]
122
 
123
- def build_queries_with_llm(user_text: str, max_queries: int = 20) -> List[str]:
124
  system_prompt = (
125
  "You are a legal research assistant. "
126
  "A new petition needs to be drafted using the following client/case description. "
@@ -129,23 +120,18 @@ def build_queries_with_llm(user_text: str, max_queries: int = 20) -> List[str]:
129
  "and FBR tax ordinances. "
130
  "Return ONLY a JSON array of strings, no extra text."
131
  )
132
- user_prompt = f"Case description:\n{user_text}"
133
-
134
  try:
135
  resp = openai_client.chat.completions.create(
136
  model="gpt-4o-mini",
137
- messages=[
138
- {"role": "system", "content": system_prompt},
139
- {"role": "user", "content": user_prompt}
140
- ],
141
  temperature=0.1,
142
  max_tokens=2000
143
  )
144
  raw = resp.choices[0].message.content.strip()
145
- queries = parse_json_safe(raw, user_text)
146
  except Exception:
147
- queries = [user_text[:512]]
148
- return queries[:max_queries]
149
 
150
  def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
151
  seen_texts, context_parts, citations = set(), [], []
@@ -169,10 +155,8 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
169
  return "\n".join(context_parts), citations
170
 
171
  def markdown_to_docx(md_text: str) -> str:
172
- """Convert Markdown text into a Word DOCX and return a file path."""
173
  html = markdown.markdown(md_text)
174
  soup = BeautifulSoup(html, "html.parser")
175
-
176
  doc = Document()
177
  for el in soup.descendants:
178
  if el.name == "h1":
@@ -185,17 +169,14 @@ def markdown_to_docx(md_text: str) -> str:
185
  doc.add_paragraph(el.get_text())
186
  elif el.name == "li":
187
  doc.add_paragraph(f"• {el.get_text()}")
188
-
189
  tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
190
  doc.save(tmp_path)
191
  return tmp_path
192
 
193
- # ----------------- DRAFT ANALYZER -----------------
194
  def analyze_template_draft(ref_text: str) -> str:
195
- """Analyze uploaded draft structure, tone, style, headings, and expected content."""
196
  if not ref_text:
197
  return "(no template provided)"
198
-
199
  system_prompt = """You are a legal draft analyzer.
200
  Your task is to carefully analyze the uploaded legal draft document and summarize its full structure and style.
201
  Extract the following information clearly and systematically:
@@ -207,32 +188,26 @@ Extract the following information clearly and systematically:
207
  6. Sentence/paragraph length and complexity.
208
  7. Any special legal phrases or terminology patterns.
209
  8. Any notes on length and overall flow.
210
-
211
  Return a structured MARKDOWN report that can be given as instructions to another model.
212
  Do not rewrite the draft, only analyze it."""
213
-
214
  try:
215
  resp = openai_client.chat.completions.create(
216
- model="gpt-4o-mini",
217
- messages=[
218
- {"role": "system", "content": system_prompt},
219
- {"role": "user", "content": ref_text[:40000]} # limit for context
220
- ],
221
- temperature=0.2,
222
- max_tokens=4000
223
  )
224
- analysis = resp.choices[0].message.content.strip()
225
  except Exception as e:
226
- analysis = f"(Analyzer error: {e})"
227
-
228
- return analysis
229
 
230
  # ----------------- MAIN -----------------
231
  def generate_legal_draft(case_text, uploaded_file, template_name, new_template_name, add_citations=True):
232
  yield gr.update(value="🔍 Searching in Knowledge Base..."), None
233
 
234
  queries = build_queries_with_llm(case_text)
235
- context_text, citations = pinecone_search(queries, top_k=15)
236
 
237
  # Handle template
238
  template_analysis = ""
@@ -269,10 +244,8 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
269
  user_prompt = f"""
270
  **User Input:**
271
  {case_text}
272
-
273
  **Knowledge Base Context:**
274
  {context_text or '(no matches)'}
275
-
276
  **Template Draft Analysis:**
277
  {template_analysis}
278
  """
@@ -280,10 +253,8 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
280
  try:
281
  resp = openai_client.chat.completions.create(
282
  model="gpt-5",
283
- messages=[
284
- {"role":"system","content":system_prompt},
285
- {"role":"user","content":user_prompt}
286
- ],
287
  max_completion_tokens=15000,
288
  verbosity="high"
289
  )
@@ -297,9 +268,7 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
297
  draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
298
 
299
  docx_path = markdown_to_docx(draft_md)
300
- yield gr.update(value=draft_md), markdown_to_docx(draft_md)
301
-
302
-
303
 
304
  # ----------------- GRADIO UI -----------------
305
  with gr.Blocks() as demo:
@@ -320,9 +289,9 @@ with gr.Blocks() as demo:
320
 
321
  btn.click(
322
  generate_legal_draft,
323
- inputs=[case_text, template_name,uploaded_file, new_template_name, add_citations],
324
  outputs=[draft_output, download_btn]
325
  )
326
 
327
  if __name__ == "__main__":
328
- demo.launch()
 
61
  except Exception:
62
  return []
63
 
 
 
 
 
 
 
 
 
 
64
  def save_template_to_hf(name: str, analysis: str) -> Tuple[bool, str]:
65
  """Save new template into HF dataset repo (templates.json)."""
66
  try:
 
111
  return matches
112
  return [fallback[:512]]
113
 
114
+ def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
115
  system_prompt = (
116
  "You are a legal research assistant. "
117
  "A new petition needs to be drafted using the following client/case description. "
 
120
  "and FBR tax ordinances. "
121
  "Return ONLY a JSON array of strings, no extra text."
122
  )
 
 
123
  try:
124
  resp = openai_client.chat.completions.create(
125
  model="gpt-4o-mini",
126
+ messages=[{"role": "system", "content": system_prompt},
127
+ {"role": "user", "content": user_text}],
 
 
128
  temperature=0.1,
129
  max_tokens=2000
130
  )
131
  raw = resp.choices[0].message.content.strip()
132
+ return parse_json_safe(raw, user_text)[:max_queries]
133
  except Exception:
134
+ return [user_text[:512]]
 
135
 
136
  def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
137
  seen_texts, context_parts, citations = set(), [], []
 
155
  return "\n".join(context_parts), citations
156
 
157
  def markdown_to_docx(md_text: str) -> str:
 
158
  html = markdown.markdown(md_text)
159
  soup = BeautifulSoup(html, "html.parser")
 
160
  doc = Document()
161
  for el in soup.descendants:
162
  if el.name == "h1":
 
169
  doc.add_paragraph(el.get_text())
170
  elif el.name == "li":
171
  doc.add_paragraph(f"• {el.get_text()}")
 
172
  tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
173
  doc.save(tmp_path)
174
  return tmp_path
175
 
176
+ # ----------------- ANALYZER -----------------
177
  def analyze_template_draft(ref_text: str) -> str:
 
178
  if not ref_text:
179
  return "(no template provided)"
 
180
  system_prompt = """You are a legal draft analyzer.
181
  Your task is to carefully analyze the uploaded legal draft document and summarize its full structure and style.
182
  Extract the following information clearly and systematically:
 
188
  6. Sentence/paragraph length and complexity.
189
  7. Any special legal phrases or terminology patterns.
190
  8. Any notes on length and overall flow.
 
191
  Return a structured MARKDOWN report that can be given as instructions to another model.
192
  Do not rewrite the draft, only analyze it."""
 
193
  try:
194
  resp = openai_client.chat.completions.create(
195
+ model="gpt-5",
196
+ messages=[{"role": "system", "content": system_prompt},
197
+ {"role": "user", "content": ref_text[:40000]}],
198
+ max_completion_tokens=4000,
199
+ verbosity="medium"
 
 
200
  )
201
+ return resp.choices[0].message.content.strip()
202
  except Exception as e:
203
+ return f"(Analyzer error: {e})"
 
 
204
 
205
  # ----------------- MAIN -----------------
206
  def generate_legal_draft(case_text, uploaded_file, template_name, new_template_name, add_citations=True):
207
  yield gr.update(value="🔍 Searching in Knowledge Base..."), None
208
 
209
  queries = build_queries_with_llm(case_text)
210
+ context_text, citations = pinecone_search(queries, top_k=10)
211
 
212
  # Handle template
213
  template_analysis = ""
 
244
  user_prompt = f"""
245
  **User Input:**
246
  {case_text}
 
247
  **Knowledge Base Context:**
248
  {context_text or '(no matches)'}
 
249
  **Template Draft Analysis:**
250
  {template_analysis}
251
  """
 
253
  try:
254
  resp = openai_client.chat.completions.create(
255
  model="gpt-5",
256
+ messages=[{"role": "system", "content": system_prompt},
257
+ {"role": "user", "content": user_prompt}],
 
 
258
  max_completion_tokens=15000,
259
  verbosity="high"
260
  )
 
268
  draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
269
 
270
  docx_path = markdown_to_docx(draft_md)
271
+ yield gr.update(value=draft_md), docx_path
 
 
272
 
273
  # ----------------- GRADIO UI -----------------
274
  with gr.Blocks() as demo:
 
289
 
290
  btn.click(
291
  generate_legal_draft,
292
+ inputs=[case_text, uploaded_file, template_name, new_template_name, add_citations],
293
  outputs=[draft_output, download_btn]
294
  )
295
 
296
  if __name__ == "__main__":
297
+ demo.launch()