Spaces:

omarkashif
/

draft-gen

Sleeping

App Files Files Community

omarkashif commited on Sep 4, 2025

Commit

5b0f3aa

verified ·

1 Parent(s): 48adfe6

Create app.py

Browse files

Files changed (1) hide show

app.py +198 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import os
+import gradio as gr
+from io import BytesIO
+from typing import List, Dict, Tuple
+import pdfplumber
+from docx import Document
+from sentence_transformers import SentenceTransformer
+from pinecone import Pinecone
+import openai
+import json
+import re
+import markdown
+# ----------------- CONFIG -----------------
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_INDEX = "legal-ai"
+openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
+pc = Pinecone(api_key=PINECONE_API_KEY)
+index = pc.Index(PINECONE_INDEX)
+# ----------------- EMBEDDER -----------------
+embedder = SentenceTransformer("all-mpnet-base-v2")
+# ----------------- HELPERS -----------------
+def load_reference_text(uploaded_file) -> str:
+    if uploaded_file.name.lower().endswith(".docx"):
+        doc = Document(uploaded_file)
+        return "\n".join(p.text for p in doc.paragraphs)
+    elif uploaded_file.name.lower().endswith(".pdf"):
+        text = ""
+        with pdfplumber.open(uploaded_file) as pdf:
+            for page in pdf.pages:
+                t = page.extract_text()
+                if t:
+                    text += t + "\n"
+        return text
+    elif uploaded_file.name.lower().endswith(".txt"):
+        return uploaded_file.read().decode("utf-8", errors="ignore")
+    else:
+        return ""
+def parse_json_safe(raw_text: str, fallback: str) -> List[str]:
+    try:
+        return json.loads(raw_text)
+    except:
+        matches = re.findall(r'"([^"]+)"', raw_text)
+        if matches:
+            return matches
+        return [fallback[:512]]
+def build_queries_with_llm(user_text: str, max_queries: int = 15) -> List[str]:
+    system_prompt = (
+        "You are a legal research assistant. "
+        "A new petition needs to be drafted using the following client/case description. "
+        "Devise 5-6 or more concise queries that will be helpful to retrieve relevant information "
+        "from a knowledge base containing the Constitution of Pakistan, Punjab case law, "
+        "and FBR tax ordinances. "
+        "Return ONLY a JSON array of strings, no extra text."
+    )
+    user_prompt = f"Case description:\n{user_text}"
+    try:
+        resp = openai_client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
+            temperature=0.2,
+            max_tokens=2000
+        )
+        raw = resp.choices[0].message.content.strip()
+        queries = parse_json_safe(raw, user_text)
+    except Exception as e:
+        queries = [user_text[:512]]
+    return queries[:max_queries]
+def pinecone_search(queries: List[str], top_k: int = 10, max_chars: int = 10000) -> Tuple[str, List[Dict]]:
+    seen_texts, context_parts, citations = set(), [], []
+    for q in queries:
+        vec = embedder.encode(q).tolist()
+        res = index.query(vector=vec, top_k=top_k, include_metadata=True)
+        matches = res.get("matches", [])
+        for m in matches:
+            md = m.get("metadata", {})
+            txt = md.get("text") or ""
+            if not txt or txt[:200] in seen_texts:
+                continue
+            seen_texts.add(txt[:200])
+            context_parts.append(f"- {txt.strip()}")
+            citations.append({
+                "score": float(m.get("score") or 0.0),
+                "source": md.get("chunk_id") or md.get("title") or "Unknown"
+            })
+            if sum(len(p) for p in context_parts) > max_chars:
+                break
+    return "\n".join(context_parts), citations
+def markdown_to_docx(md_text: str) -> BytesIO:
+    doc = Document()
+    for line in md_text.split("\n"):
+        if line.startswith("## "):
+            doc.add_heading(line[3:], level=2)
+        elif line.startswith("# "):
+            doc.add_heading(line[2:], level=1)
+        else:
+            doc.add_paragraph(line)
+    buf = BytesIO()
+    doc.save(buf)
+    buf.seek(0)
+    return buf
+# ----------------- MAIN FUNCTION -----------------
+def generate_legal_draft(case_text, uploaded_file=None, add_citations=True):
+    ref_text = load_reference_text(uploaded_file) if uploaded_file else ""
+    queries = build_queries_with_llm(case_text)
+    context_text, citations = pinecone_search(queries, top_k=10)
+    system_prompt = """You are an expert legal drafter for Pakistani law. Your task is to create a professional, court-ready legal petition in MARKDOWN format using three inputs:
+1. User Input: Case details including client info, petition type, court, facts, relevant laws, and sections.
+2. Knowledge Base Context: Relevant laws, case precedents, and ordinances retrieved from the vector database (Constitution of Pakistan, Punjab case law, FBR ordinances).
+3. Reference Template: A legal document uploaded by the user. Extract its **structure, headings, section order, style, tone, and formatting** — but do not copy its text.
+Instructions
+1. Structure & Headings
+- Replicate the section hierarchy of the uploaded template.
+- Main heading should represent the petition title or case name.
+- Subheadings and any lower-level headings must match the style and order from the template.
+- Sections should include: Parties, Facts, Legal Grounds, Arguments, Prayer, etc., as per the template.
+2. Tone & Style
+- Use formal, professional, and persuasive legal language.
+- Facts are objective; legal arguments are assertive.
+- Follow the tone and phrasing style of the uploaded template document.
+3. Content Integration
+- Incorporate relevant context from the vector database where appropriate.
+- Cite legal provisions clearly in-text when relevant.
+- Ensure content is logically coherent, comprehensive, and supports the petition’s objective.
+- Do not hallucinate laws or precedents.
+4. References
+- Include a "References" section at the end if citations are present.
+- Format as: `1. Source Name (score)`
+5. Output Rules
+- Produce output MARKDOWN.
+- Do not add explanations, summaries, or extra text.
+- Maintain clarity, professionalism, and adherence to legal drafting standards.
+- Preserve structure, tone, style, and headings from the uploaded template as much as possible.
+6. Fallback
+- If context or relevant laws are missing, state: "No applicable precedent found" or "Relevant law cited above."
+"""
+    user_prompt = f"""
+**User Input:**
+{case_text}
+**Knowledge Base Context:**
+{context_text or '(no matches)'}
+**Reference Template (format+headings+structure+tone+language):**
+{(ref_text[:50000] + '...') if ref_text else '(none)'}
+"""
+    try:
+        resp = openai_client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role":"system","content":system_prompt},
+                {"role":"user","content":user_prompt}
+            ],
+            temperature=0.2,
+            max_tokens=15000
+        )
+        draft_md = resp.choices[0].message.content.strip()
+    except Exception as e:
+        draft_md = f"OpenAI error: {e}"
+    if add_citations and citations:
+        draft_md += "\n\n### References\n"
+        for i, c in enumerate(citations, 1):
+            draft_md += f"{i}. {c['source']} (score: {c['score']:.3f})\n"
+    return draft_md, (markdown_to_docx(draft_md), "legal_draft.docx")
+# ----------------- GRADIO INTERFACE -----------------
+iface = gr.Interface(
+    fn=generate_legal_draft,
+    inputs=[
+        gr.Textbox(label="Case Details", lines=10, placeholder="Enter client and case info..."),
+        gr.File(label="Reference Template (DOCX/PDF/TXT)", file_types=[".docx",".pdf",".txt"]),
+        gr.Checkbox(label="Append citations", value=True)
+    ],
+    outputs=[
+        gr.Textbox(label="Draft Output", lines=30),
+        gr.File(label="Download Word")
+    ],
+    title="⚖️ AI Legal Draft Generator",
+    description="Upload a DOCX/PDF/TXT reference template, enter case details, and generate a court-ready legal draft in Markdown and Word."
+)
+if __name__ == "__main__":
+    iface.launch()