lexpt / app.py
sik247's picture
Deploy LEXPT legal analysis app
61fe0f7 verified
#!/usr/bin/env python3
"""
LEXPT Hugging Face Space - Professional Legal Analysis
Replicates the structure of professional HF Spaces with reasoning display
"""
import gradio as gr
import time
import torch
from typing import Generator
# Global model variables
model = None
tokenizer = None
model_loaded = False
# Model configuration - Using base model
BASE_MODEL_ID = "openai/gpt-oss-20b" # 20B model
BASE_MODEL_ID = "openai/gpt-oss-20b" # Using base model for now # Your adapter (if available)
# Your legal system prompt
SYSTEM_PROMPT = """
ROLE
You are a U.S. legal analysis assistant focused on appellate and habeas issues. Your job is to produce precise, jurisdiction-aware answers drawn from the user's prompt and any text they embed (e.g., an opinion extract). You must not invent facts, quotations, or citations.
CORE DIRECTIVE — FINAL ANSWER ONLY
- Output ONLY the final answer to the user's prompt.
- Do NOT include prefaces, meta-commentary, chain-of-thought, or self-references.
- Do NOT restate the question, apologize, or add disclaimers.
- Do NOT add citations unless the prompt explicitly requests them.
SCOPE & SOURCES
- Default to the jurisdiction and stage implied by the prompt. If an opinion text is provided, treat it as the primary source of truth; do not add outside facts.
- If a request is impossible to answer from the provided materials, respond exactly: "Insufficient information."
- If the prompt asks for general doctrine (e.g., variance vs. constructive amendment, preservation standards, habeas default), state black-letter rules succinctly without citing unless requested.
FORMATTING & STYLE
- If the prompt says "list," return a numbered list starting at 1, with one item per line.
- If the prompt asks for a "checklist," use short bullet points; keep each bullet to one sentence.
- If the prompt asks for an "IRAC," use exactly these section headers in order, each on its own line: Issue; Rule; Application; Conclusion. No extra headings or text.
- If the prompt asks for an "argument for petitioner" or "argument for the state," produce 4–8 concise point-headings with brief supporting parentheticals or sub-bullets.
- If a word/line limit is specified, obey it strictly.
- Use party names and case captions exactly as given in the prompt.
CITATIONS (ONLY IF REQUESTED)
- When citations are explicitly requested, use Bluebook style:
• First mention: full citation with court, year, and pincites if provided/clear.
• Later mentions: short form with pincites.
• For federal rules, cite rule and subdivision (e.g., Fed. R. Evid. 801(d)(2)(E)).
- If the prompt requests a "citation string," include the best supporting authorities in descending order of weight and relevance.
SUBSTANTIVE GUIDANCE (WHEN ASKED)
- Variance vs. constructive amendment: define both; explain that a variance is a proof–pleading discrepancy assessed for prejudice; a constructive amendment alters the charge's elements and is structural on direct review.
- Preservation/waiver: identify the contemporaneous objection rule, motion grounds specificity, and the effect of not requesting a continuance when surprised.
- Habeas procedural default: outline cause-and-prejudice (and actual-innocence gateway) if asked.
- Standards of review: identify the applicable standard (e.g., abuse of discretion, de novo, harmless-error) when requested and tie it to the posture.
- Evidence questions: if asked, cover authentication, hearsay/non-hearsay routes (including 801(d)(2)(E)), Rule 403, and the permissibility of juror aids like transcripts.
CONSTRAINTS
- Do not invent case names, record cites, or quotations.
- Do not introduce new facts not in the prompt's record.
- Keep tense and terminology consistent with the prompt (e.g., "appellant," "petitioner," "respondent," "state").
- Be concise and information-dense; avoid filler.
DEFAULT OUTPUT BEHAVIOR
- If the prompt is ambiguous but answerable, choose the most reasonable interpretation and answer directly without commentary.
- If the prompt requests formatting (e.g., numbered list, IRAC, checklist), follow it exactly.
"""
def load_model():
"""Load the LEXPT model (optimized for HF Spaces)"""
global model, tokenizer, model_loaded
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Check if GPU is available for quantization
if torch.cuda.is_available():
# GPU available - use quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
quantization_config=bnb_config,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
else:
# CPU fallback - no quantization
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True,
low_cpu_mem_usage=True,
)
# Load your adapter with fallback
if ADAPTER_ID:
try:
model = PeftModel.from_pretrained(model, ADAPTER_ID)
adapter_loaded = True
print(f"✅ Loaded adapter: {ADAPTER_ID}")
except Exception as adapter_error:
print(f"⚠️ Adapter loading failed: {adapter_error}")
print("📝 Using base model without adapter")
adapter_loaded = False
else:
adapter_loaded = False
model.eval()
model_loaded = True
adapter_status = f" + {ADAPTER_ID}" if adapter_loaded else " (base model only)"
return f"✅ LEXPT Model loaded successfully!{adapter_status}"
except Exception as e:
print(f"❌ Model loading error: {str(e)}")
return f"❌ Error loading model: {str(e)}\n\n💡 Try upgrading to T4 Small GPU tier in Space settings"
def generate_reasoning_steps(query: str) -> Generator[str, None, None]:
"""Generate reasoning steps (like the HF Space demo)"""
yield "🤔 Analyzing the legal query..."
time.sleep(0.5)
yield "📚 Reviewing relevant legal doctrines and precedents..."
time.sleep(0.5)
yield "⚖️ Applying legal principles to the specific facts..."
time.sleep(0.5)
yield "✍️ Structuring the response according to legal formatting requirements..."
time.sleep(0.5)
yield "🎯 Finalizing analysis with precise legal terminology..."
def generate_response(query: str, show_reasoning: bool = False):
"""Generate legal analysis with optional reasoning display"""
global model, tokenizer, model_loaded
if not model_loaded:
return "❌ Model not loaded. Please wait for initialization.", ""
if not query.strip():
return "Please enter a legal query.", ""
reasoning_text = ""
# Show reasoning steps if requested
if show_reasoning:
reasoning_steps = []
for step in generate_reasoning_steps(query):
reasoning_steps.append(step)
reasoning_text = "\n".join(reasoning_steps)
try:
# Build messages
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": query}
]
# Tokenize
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_tensors="pt"
)
if isinstance(inputs, dict):
inputs = inputs.to(model.device)
else:
inputs = {"input_ids": inputs.to(model.device)}
start_time = time.time()
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=1200,
temperature=0.2,
do_sample=False,
pad_token_id=tokenizer.pad_token_id,
)
latency = time.time() - start_time
# Decode response
prompt_len = inputs["input_ids"].shape[-1]
response = tokenizer.decode(outputs[0][prompt_len:], skip_special_tokens=True).strip()
# Format final output
final_output = f"""## ⚖️ Legal Analysis
**Query:** {query}
**Response Time:** {latency:.2f}s
**Model:** {BASE_MODEL_ID}{f" + {ADAPTER_ID}" if ADAPTER_ID else " (base model)"}
---
{response}
---
*Generated by LEXPT - Legal Analysis AI*
"""
return final_output, reasoning_text
except Exception as e:
return f"❌ Generation error: {str(e)}", reasoning_text
# Example legal queries
EXAMPLE_QUERIES = [
"Draft 5 advocacy point-headings for petitioner that a knife→gun variance violated Sixth-Amendment notice",
"Explain the difference between a 'variance' and a 'constructive amendment' of the charging instrument",
"Analyze prejudice under the variance doctrine: Did the proof at trial (gun vs. knife) mislead the defense?",
"Write a crisp one-page IRAC on Ridgeway: Issue (variance/notice), Rule, Application, Conclusion",
"Create a checklist of record cites you would pull to brief this issue"
]
# Auto-load model on startup (for HF Spaces)
print("🚀 Auto-loading LEXPT model...")
model_status = "Loading..."
try:
for status_update in load_model():
model_status = status_update
print(status_update)
except Exception as e:
model_status = f"❌ Auto-load failed: {str(e)}"
print(model_status)
# Create Gradio interface (replicating professional HF Space design)
with gr.Blocks(
title="LEXPT - Legal Analysis AI",
theme=gr.themes.Soft(),
css="""
.main-header { text-align: center; margin-bottom: 2rem; }
.reasoning-box { background: #f8f9fa; padding: 1rem; border-radius: 8px; margin: 1rem 0; }
.status-box { background: #e8f5e8; padding: 0.5rem; border-radius: 4px; }
"""
) as demo:
gr.Markdown("""
# ⚖️ LEXPT - Legal Analysis AI
**Professional legal analysis powered by fine-tuned GPT-OSS-20B**
Specialized for appellate and habeas corpus issues
*Give it a couple of seconds to start. You can enable reasoning level to see the thinking process.*
""", elem_classes="main-header")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"""
<div class="status-box">
<strong>Model Status:</strong> {model_status}
</div>
""")
with gr.Row():
with gr.Column(scale=3):
query_input = gr.Textbox(
label="Legal Query",
placeholder="Enter your legal analysis request...",
lines=4
)
with gr.Row():
submit_btn = gr.Button("⚖️ Analyze", variant="primary")
reasoning_checkbox = gr.Checkbox(
label="Show Reasoning Process",
value=False
)
with gr.Column(scale=1):
gr.Markdown("### 📋 Example Queries")
gr.Examples(
examples=EXAMPLE_QUERIES,
inputs=query_input,
label=""
)
# Output sections
output_response = gr.Markdown(label="Analysis")
# Reasoning section (collapsible like the HF Space demo)
with gr.Accordion("🤔 Click to view Thinking Process", open=False) as reasoning_accordion:
output_reasoning = gr.Markdown("Reasoning steps will appear here when enabled...")
# Handle submission
submit_btn.click(
fn=generate_response,
inputs=[query_input, reasoning_checkbox],
outputs=[output_response, output_reasoning]
)
gr.Markdown("""
---
### 🔧 Technical Details
- **Base Model:** OpenAI GPT-OSS-20B (20 billion parameters)
- **Fine-tuning:** PEFT adapter trained on legal analysis tasks
- **Specialization:** Appellate law, habeas corpus, constitutional issues
- **Optimization:** 4-bit quantization for efficient inference
*This Space demonstrates professional legal AI deployment on Hugging Face infrastructure.*
""")
if __name__ == "__main__":
# Enable API for external webpage access
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_api=True # This exposes the API for your webpage
)