Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -61,15 +61,6 @@ def load_templates_json() -> List[Dict]:
|
|
| 61 |
except Exception:
|
| 62 |
return []
|
| 63 |
|
| 64 |
-
# def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
|
| 65 |
-
# try:
|
| 66 |
-
# return json.loads(raw_text)
|
| 67 |
-
# except:
|
| 68 |
-
# matches = re.findall(r'"([^"]+)"', raw_text)
|
| 69 |
-
# if matches:
|
| 70 |
-
# return matches
|
| 71 |
-
# return [fallback[:512]]
|
| 72 |
-
|
| 73 |
def save_template_to_hf(name: str, analysis: str) -> Tuple[bool, str]:
|
| 74 |
"""Save new template into HF dataset repo (templates.json)."""
|
| 75 |
try:
|
|
@@ -120,7 +111,7 @@ def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
|
|
| 120 |
return matches
|
| 121 |
return [fallback[:512]]
|
| 122 |
|
| 123 |
-
def build_queries_with_llm(user_text: str, max_queries: int =
|
| 124 |
system_prompt = (
|
| 125 |
"You are a legal research assistant. "
|
| 126 |
"A new petition needs to be drafted using the following client/case description. "
|
|
@@ -129,23 +120,18 @@ def build_queries_with_llm(user_text: str, max_queries: int = 20) -> List[str]:
|
|
| 129 |
"and FBR tax ordinances. "
|
| 130 |
"Return ONLY a JSON array of strings, no extra text."
|
| 131 |
)
|
| 132 |
-
user_prompt = f"Case description:\n{user_text}"
|
| 133 |
-
|
| 134 |
try:
|
| 135 |
resp = openai_client.chat.completions.create(
|
| 136 |
model="gpt-4o-mini",
|
| 137 |
-
messages=[
|
| 138 |
-
|
| 139 |
-
{"role": "user", "content": user_prompt}
|
| 140 |
-
],
|
| 141 |
temperature=0.1,
|
| 142 |
max_tokens=2000
|
| 143 |
)
|
| 144 |
raw = resp.choices[0].message.content.strip()
|
| 145 |
-
|
| 146 |
except Exception:
|
| 147 |
-
|
| 148 |
-
return queries[:max_queries]
|
| 149 |
|
| 150 |
def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
|
| 151 |
seen_texts, context_parts, citations = set(), [], []
|
|
@@ -169,10 +155,8 @@ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000)
|
|
| 169 |
return "\n".join(context_parts), citations
|
| 170 |
|
| 171 |
def markdown_to_docx(md_text: str) -> str:
|
| 172 |
-
"""Convert Markdown text into a Word DOCX and return a file path."""
|
| 173 |
html = markdown.markdown(md_text)
|
| 174 |
soup = BeautifulSoup(html, "html.parser")
|
| 175 |
-
|
| 176 |
doc = Document()
|
| 177 |
for el in soup.descendants:
|
| 178 |
if el.name == "h1":
|
|
@@ -185,17 +169,14 @@ def markdown_to_docx(md_text: str) -> str:
|
|
| 185 |
doc.add_paragraph(el.get_text())
|
| 186 |
elif el.name == "li":
|
| 187 |
doc.add_paragraph(f"• {el.get_text()}")
|
| 188 |
-
|
| 189 |
tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
|
| 190 |
doc.save(tmp_path)
|
| 191 |
return tmp_path
|
| 192 |
|
| 193 |
-
# -----------------
|
| 194 |
def analyze_template_draft(ref_text: str) -> str:
|
| 195 |
-
"""Analyze uploaded draft structure, tone, style, headings, and expected content."""
|
| 196 |
if not ref_text:
|
| 197 |
return "(no template provided)"
|
| 198 |
-
|
| 199 |
system_prompt = """You are a legal draft analyzer.
|
| 200 |
Your task is to carefully analyze the uploaded legal draft document and summarize its full structure and style.
|
| 201 |
Extract the following information clearly and systematically:
|
|
@@ -207,32 +188,26 @@ Extract the following information clearly and systematically:
|
|
| 207 |
6. Sentence/paragraph length and complexity.
|
| 208 |
7. Any special legal phrases or terminology patterns.
|
| 209 |
8. Any notes on length and overall flow.
|
| 210 |
-
|
| 211 |
Return a structured MARKDOWN report that can be given as instructions to another model.
|
| 212 |
Do not rewrite the draft, only analyze it."""
|
| 213 |
-
|
| 214 |
try:
|
| 215 |
resp = openai_client.chat.completions.create(
|
| 216 |
-
model="gpt-
|
| 217 |
-
messages=[
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
temperature=0.2,
|
| 222 |
-
max_tokens=4000
|
| 223 |
)
|
| 224 |
-
|
| 225 |
except Exception as e:
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
return analysis
|
| 229 |
|
| 230 |
# ----------------- MAIN -----------------
|
| 231 |
def generate_legal_draft(case_text, uploaded_file, template_name, new_template_name, add_citations=True):
|
| 232 |
yield gr.update(value="🔍 Searching in Knowledge Base..."), None
|
| 233 |
|
| 234 |
queries = build_queries_with_llm(case_text)
|
| 235 |
-
context_text, citations = pinecone_search(queries, top_k=
|
| 236 |
|
| 237 |
# Handle template
|
| 238 |
template_analysis = ""
|
|
@@ -269,10 +244,8 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
|
|
| 269 |
user_prompt = f"""
|
| 270 |
**User Input:**
|
| 271 |
{case_text}
|
| 272 |
-
|
| 273 |
**Knowledge Base Context:**
|
| 274 |
{context_text or '(no matches)'}
|
| 275 |
-
|
| 276 |
**Template Draft Analysis:**
|
| 277 |
{template_analysis}
|
| 278 |
"""
|
|
@@ -280,10 +253,8 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
|
|
| 280 |
try:
|
| 281 |
resp = openai_client.chat.completions.create(
|
| 282 |
model="gpt-5",
|
| 283 |
-
messages=[
|
| 284 |
-
|
| 285 |
-
{"role":"user","content":user_prompt}
|
| 286 |
-
],
|
| 287 |
max_completion_tokens=15000,
|
| 288 |
verbosity="high"
|
| 289 |
)
|
|
@@ -297,9 +268,7 @@ def generate_legal_draft(case_text, uploaded_file, template_name, new_template_n
|
|
| 297 |
draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
|
| 298 |
|
| 299 |
docx_path = markdown_to_docx(draft_md)
|
| 300 |
-
yield gr.update(value=draft_md),
|
| 301 |
-
|
| 302 |
-
|
| 303 |
|
| 304 |
# ----------------- GRADIO UI -----------------
|
| 305 |
with gr.Blocks() as demo:
|
|
@@ -320,9 +289,9 @@ with gr.Blocks() as demo:
|
|
| 320 |
|
| 321 |
btn.click(
|
| 322 |
generate_legal_draft,
|
| 323 |
-
inputs=[case_text,
|
| 324 |
outputs=[draft_output, download_btn]
|
| 325 |
)
|
| 326 |
|
| 327 |
if __name__ == "__main__":
|
| 328 |
-
demo.launch()
|
|
|
|
| 61 |
except Exception:
|
| 62 |
return []
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def save_template_to_hf(name: str, analysis: str) -> Tuple[bool, str]:
|
| 65 |
"""Save new template into HF dataset repo (templates.json)."""
|
| 66 |
try:
|
|
|
|
| 111 |
return matches
|
| 112 |
return [fallback[:512]]
|
| 113 |
|
| 114 |
+
def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
|
| 115 |
system_prompt = (
|
| 116 |
"You are a legal research assistant. "
|
| 117 |
"A new petition needs to be drafted using the following client/case description. "
|
|
|
|
| 120 |
"and FBR tax ordinances. "
|
| 121 |
"Return ONLY a JSON array of strings, no extra text."
|
| 122 |
)
|
|
|
|
|
|
|
| 123 |
try:
|
| 124 |
resp = openai_client.chat.completions.create(
|
| 125 |
model="gpt-4o-mini",
|
| 126 |
+
messages=[{"role": "system", "content": system_prompt},
|
| 127 |
+
{"role": "user", "content": user_text}],
|
|
|
|
|
|
|
| 128 |
temperature=0.1,
|
| 129 |
max_tokens=2000
|
| 130 |
)
|
| 131 |
raw = resp.choices[0].message.content.strip()
|
| 132 |
+
return parse_json_safe(raw, user_text)[:max_queries]
|
| 133 |
except Exception:
|
| 134 |
+
return [user_text[:512]]
|
|
|
|
| 135 |
|
| 136 |
def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
|
| 137 |
seen_texts, context_parts, citations = set(), [], []
|
|
|
|
| 155 |
return "\n".join(context_parts), citations
|
| 156 |
|
| 157 |
def markdown_to_docx(md_text: str) -> str:
|
|
|
|
| 158 |
html = markdown.markdown(md_text)
|
| 159 |
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
| 160 |
doc = Document()
|
| 161 |
for el in soup.descendants:
|
| 162 |
if el.name == "h1":
|
|
|
|
| 169 |
doc.add_paragraph(el.get_text())
|
| 170 |
elif el.name == "li":
|
| 171 |
doc.add_paragraph(f"• {el.get_text()}")
|
|
|
|
| 172 |
tmp_path = os.path.join(tempfile.gettempdir(), "draft.docx")
|
| 173 |
doc.save(tmp_path)
|
| 174 |
return tmp_path
|
| 175 |
|
| 176 |
+
# ----------------- ANALYZER -----------------
|
| 177 |
def analyze_template_draft(ref_text: str) -> str:
|
|
|
|
| 178 |
if not ref_text:
|
| 179 |
return "(no template provided)"
|
|
|
|
| 180 |
system_prompt = """You are a legal draft analyzer.
|
| 181 |
Your task is to carefully analyze the uploaded legal draft document and summarize its full structure and style.
|
| 182 |
Extract the following information clearly and systematically:
|
|
|
|
| 188 |
6. Sentence/paragraph length and complexity.
|
| 189 |
7. Any special legal phrases or terminology patterns.
|
| 190 |
8. Any notes on length and overall flow.
|
|
|
|
| 191 |
Return a structured MARKDOWN report that can be given as instructions to another model.
|
| 192 |
Do not rewrite the draft, only analyze it."""
|
|
|
|
| 193 |
try:
|
| 194 |
resp = openai_client.chat.completions.create(
|
| 195 |
+
model="gpt-5",
|
| 196 |
+
messages=[{"role": "system", "content": system_prompt},
|
| 197 |
+
{"role": "user", "content": ref_text[:40000]}],
|
| 198 |
+
max_completion_tokens=4000,
|
| 199 |
+
verbosity="medium"
|
|
|
|
|
|
|
| 200 |
)
|
| 201 |
+
return resp.choices[0].message.content.strip()
|
| 202 |
except Exception as e:
|
| 203 |
+
return f"(Analyzer error: {e})"
|
|
|
|
|
|
|
| 204 |
|
| 205 |
# ----------------- MAIN -----------------
|
| 206 |
def generate_legal_draft(case_text, uploaded_file, template_name, new_template_name, add_citations=True):
|
| 207 |
yield gr.update(value="🔍 Searching in Knowledge Base..."), None
|
| 208 |
|
| 209 |
queries = build_queries_with_llm(case_text)
|
| 210 |
+
context_text, citations = pinecone_search(queries, top_k=10)
|
| 211 |
|
| 212 |
# Handle template
|
| 213 |
template_analysis = ""
|
|
|
|
| 244 |
user_prompt = f"""
|
| 245 |
**User Input:**
|
| 246 |
{case_text}
|
|
|
|
| 247 |
**Knowledge Base Context:**
|
| 248 |
{context_text or '(no matches)'}
|
|
|
|
| 249 |
**Template Draft Analysis:**
|
| 250 |
{template_analysis}
|
| 251 |
"""
|
|
|
|
| 253 |
try:
|
| 254 |
resp = openai_client.chat.completions.create(
|
| 255 |
model="gpt-5",
|
| 256 |
+
messages=[{"role": "system", "content": system_prompt},
|
| 257 |
+
{"role": "user", "content": user_prompt}],
|
|
|
|
|
|
|
| 258 |
max_completion_tokens=15000,
|
| 259 |
verbosity="high"
|
| 260 |
)
|
|
|
|
| 268 |
draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
|
| 269 |
|
| 270 |
docx_path = markdown_to_docx(draft_md)
|
| 271 |
+
yield gr.update(value=draft_md), docx_path
|
|
|
|
|
|
|
| 272 |
|
| 273 |
# ----------------- GRADIO UI -----------------
|
| 274 |
with gr.Blocks() as demo:
|
|
|
|
| 289 |
|
| 290 |
btn.click(
|
| 291 |
generate_legal_draft,
|
| 292 |
+
inputs=[case_text, uploaded_file, template_name, new_template_name, add_citations],
|
| 293 |
outputs=[draft_output, download_btn]
|
| 294 |
)
|
| 295 |
|
| 296 |
if __name__ == "__main__":
|
| 297 |
+
demo.launch()
|