omarkashif commited on
Commit
5b0f3aa
·
verified ·
1 Parent(s): 48adfe6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from io import BytesIO
4
+ from typing import List, Dict, Tuple
5
+ import pdfplumber
6
+ from docx import Document
7
+ from sentence_transformers import SentenceTransformer
8
+ from pinecone import Pinecone
9
+ import openai
10
+ import json
11
+ import re
12
+ import markdown
13
+
14
+ # ----------------- CONFIG -----------------
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
17
+ PINECONE_INDEX = "legal-ai"
18
+
19
+ openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
20
+ pc = Pinecone(api_key=PINECONE_API_KEY)
21
+ index = pc.Index(PINECONE_INDEX)
22
+
23
+ # ----------------- EMBEDDER -----------------
24
+ embedder = SentenceTransformer("all-mpnet-base-v2")
25
+
26
+ # ----------------- HELPERS -----------------
27
+ def load_reference_text(uploaded_file) -> str:
28
+ if uploaded_file.name.lower().endswith(".docx"):
29
+ doc = Document(uploaded_file)
30
+ return "\n".join(p.text for p in doc.paragraphs)
31
+ elif uploaded_file.name.lower().endswith(".pdf"):
32
+ text = ""
33
+ with pdfplumber.open(uploaded_file) as pdf:
34
+ for page in pdf.pages:
35
+ t = page.extract_text()
36
+ if t:
37
+ text += t + "\n"
38
+ return text
39
+ elif uploaded_file.name.lower().endswith(".txt"):
40
+ return uploaded_file.read().decode("utf-8", errors="ignore")
41
+ else:
42
+ return ""
43
+
44
+ def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
45
+ try:
46
+ return json.loads(raw_text)
47
+ except:
48
+ matches = re.findall(r'"([^"]+)"', raw_text)
49
+ if matches:
50
+ return matches
51
+ return [fallback[:512]]
52
+
53
+ def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
54
+ system_prompt = (
55
+ "You are a legal research assistant. "
56
+ "A new petition needs to be drafted using the following client/case description. "
57
+ "Devise 5-6 or more concise queries that will be helpful to retrieve relevant information "
58
+ "from a knowledge base containing the Constitution of Pakistan, Punjab case law, "
59
+ "and FBR tax ordinances. "
60
+ "Return ONLY a JSON array of strings, no extra text."
61
+ )
62
+ user_prompt = f"Case description:\n{user_text}"
63
+
64
+ try:
65
+ resp = openai_client.chat.completions.create(
66
+ model="gpt-4o-mini",
67
+ messages=[
68
+ {"role": "system", "content": system_prompt},
69
+ {"role": "user", "content": user_prompt}
70
+ ],
71
+ temperature=0.2,
72
+ max_tokens=2000
73
+ )
74
+ raw = resp.choices[0].message.content.strip()
75
+ queries = parse_json_safe(raw, user_text)
76
+ except Exception as e:
77
+ queries = [user_text[:512]]
78
+ return queries[:max_queries]
79
+
80
+ def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
81
+ seen_texts, context_parts, citations = set(), [], []
82
+ for q in queries:
83
+ vec = embedder.encode(q).tolist()
84
+ res = index.query(vector=vec, top_k=top_k, include_metadata=True)
85
+ matches = res.get("matches", [])
86
+ for m in matches:
87
+ md = m.get("metadata", {})
88
+ txt = md.get("text") or ""
89
+ if not txt or txt[:200] in seen_texts:
90
+ continue
91
+ seen_texts.add(txt[:200])
92
+ context_parts.append(f"- {txt.strip()}")
93
+ citations.append({
94
+ "score": float(m.get("score") or 0.0),
95
+ "source": md.get("chunk_id") or md.get("title") or "Unknown"
96
+ })
97
+ if sum(len(p) for p in context_parts) > max_chars:
98
+ break
99
+ return "\n".join(context_parts), citations
100
+
101
+ def markdown_to_docx(md_text: str) -> BytesIO:
102
+ doc = Document()
103
+ for line in md_text.split("\n"):
104
+ if line.startswith("## "):
105
+ doc.add_heading(line[3:], level=2)
106
+ elif line.startswith("# "):
107
+ doc.add_heading(line[2:], level=1)
108
+ else:
109
+ doc.add_paragraph(line)
110
+ buf = BytesIO()
111
+ doc.save(buf)
112
+ buf.seek(0)
113
+ return buf
114
+
115
+ # ----------------- MAIN FUNCTION -----------------
116
+ def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
117
+ ref_text = load_reference_text(uploaded_file) if uploaded_file else ""
118
+
119
+ queries = build_queries_with_llm(case_text)
120
+ context_text, citations = pinecone_search(queries, top_k=10)
121
+
122
+ system_prompt = """You are an expert legal drafter for Pakistani law. Your task is to create a professional, court-ready legal petition in MARKDOWN format using three inputs:
123
+ 1. User Input: Case details including client info, petition type, court, facts, relevant laws, and sections.
124
+ 2. Knowledge Base Context: Relevant laws, case precedents, and ordinances retrieved from the vector database (Constitution of Pakistan, Punjab case law, FBR ordinances).
125
+ 3. Reference Template: A legal document uploaded by the user. Extract its **structure, headings, section order, style, tone, and formatting** — but do not copy its text.
126
+ Instructions
127
+ 1. Structure & Headings
128
+ - Replicate the section hierarchy of the uploaded template.
129
+ - Main heading should represent the petition title or case name.
130
+ - Subheadings and any lower-level headings must match the style and order from the template.
131
+ - Sections should include: Parties, Facts, Legal Grounds, Arguments, Prayer, etc., as per the template.
132
+ 2. Tone & Style
133
+ - Use formal, professional, and persuasive legal language.
134
+ - Facts are objective; legal arguments are assertive.
135
+ - Follow the tone and phrasing style of the uploaded template document.
136
+ 3. Content Integration
137
+ - Incorporate relevant context from the vector database where appropriate.
138
+ - Cite legal provisions clearly in-text when relevant.
139
+ - Ensure content is logically coherent, comprehensive, and supports the petition’s objective.
140
+ - Do not hallucinate laws or precedents.
141
+ 4. References
142
+ - Include a "References" section at the end if citations are present.
143
+ - Format as: `1. Source Name (score)`
144
+ 5. Output Rules
145
+ - Produce output MARKDOWN.
146
+ - Do not add explanations, summaries, or extra text.
147
+ - Maintain clarity, professionalism, and adherence to legal drafting standards.
148
+ - Preserve structure, tone, style, and headings from the uploaded template as much as possible.
149
+ 6. Fallback
150
+ - If context or relevant laws are missing, state: "No applicable precedent found" or "Relevant law cited above."
151
+ """
152
+ user_prompt = f"""
153
+ **User Input:**
154
+ {case_text}
155
+ **Knowledge Base Context:**
156
+ {context_text or '(no matches)'}
157
+ **Reference Template (format+headings+structure+tone+language):**
158
+ {(ref_text[:50000] + '...') if ref_text else '(none)'}
159
+ """
160
+ try:
161
+ resp = openai_client.chat.completions.create(
162
+ model="gpt-4o-mini",
163
+ messages=[
164
+ {"role":"system","content":system_prompt},
165
+ {"role":"user","content":user_prompt}
166
+ ],
167
+ temperature=0.2,
168
+ max_tokens=15000
169
+ )
170
+ draft_md = resp.choices[0].message.content.strip()
171
+ except Exception as e:
172
+ draft_md = f"OpenAI error: {e}"
173
+
174
+ if add_citations and citations:
175
+ draft_md += "\n\n### References\n"
176
+ for i, c in enumerate(citations, 1):
177
+ draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
178
+
179
+ return draft_md, (markdown_to_docx(draft_md), "legal_draft.docx")
180
+
181
+ # ----------------- GRADIO INTERFACE -----------------
182
+ iface = gr.Interface(
183
+ fn=generate_legal_draft,
184
+ inputs=[
185
+ gr.Textbox(label="Case Details", lines=10, placeholder="Enter client and case info..."),
186
+ gr.File(label="Reference Template (DOCX/PDF/TXT)", file_types=[".docx",".pdf",".txt"]),
187
+ gr.Checkbox(label="Append citations", value=True)
188
+ ],
189
+ outputs=[
190
+ gr.Textbox(label="Draft Output", lines=30),
191
+ gr.File(label="Download Word")
192
+ ],
193
+ title="⚖️ AI Legal Draft Generator",
194
+ description="Upload a DOCX/PDF/TXT reference template, enter case details, and generate a court-ready legal draft in Markdown and Word."
195
+ )
196
+
197
+ if __name__ == "__main__":
198
+ iface.launch()