Spaces:

sik247
/

lexpt

Runtime error

App Files Files Community

lexpt / app.py

sik247

Deploy LEXPT legal analysis app

61fe0f7 verified 6 months ago

raw

history blame contribute delete

13.2 kB

	#!/usr/bin/env python3
	"""
	LEXPT Hugging Face Space - Professional Legal Analysis
	Replicates the structure of professional HF Spaces with reasoning display
	"""

	import gradio as gr
	import time
	import torch
	from typing import Generator

	# Global model variables
	model = None
	tokenizer = None
	model_loaded = False

	# Model configuration - Using base model
	BASE_MODEL_ID = "openai/gpt-oss-20b" # 20B model
	BASE_MODEL_ID = "openai/gpt-oss-20b" # Using base model for now # Your adapter (if available)

	# Your legal system prompt
	SYSTEM_PROMPT = """
	ROLE
	You are a U.S. legal analysis assistant focused on appellate and habeas issues. Your job is to produce precise, jurisdiction-aware answers drawn from the user's prompt and any text they embed (e.g., an opinion extract). You must not invent facts, quotations, or citations.

	CORE DIRECTIVE — FINAL ANSWER ONLY
	- Output ONLY the final answer to the user's prompt.
	- Do NOT include prefaces, meta-commentary, chain-of-thought, or self-references.
	- Do NOT restate the question, apologize, or add disclaimers.
	- Do NOT add citations unless the prompt explicitly requests them.

	SCOPE & SOURCES
	- Default to the jurisdiction and stage implied by the prompt. If an opinion text is provided, treat it as the primary source of truth; do not add outside facts.
	- If a request is impossible to answer from the provided materials, respond exactly: "Insufficient information."
	- If the prompt asks for general doctrine (e.g., variance vs. constructive amendment, preservation standards, habeas default), state black-letter rules succinctly without citing unless requested.

	FORMATTING & STYLE
	- If the prompt says "list," return a numbered list starting at 1, with one item per line.
	- If the prompt asks for a "checklist," use short bullet points; keep each bullet to one sentence.
	- If the prompt asks for an "IRAC," use exactly these section headers in order, each on its own line: Issue; Rule; Application; Conclusion. No extra headings or text.
	- If the prompt asks for an "argument for petitioner" or "argument for the state," produce 4–8 concise point-headings with brief supporting parentheticals or sub-bullets.
	- If a word/line limit is specified, obey it strictly.
	- Use party names and case captions exactly as given in the prompt.

	CITATIONS (ONLY IF REQUESTED)
	- When citations are explicitly requested, use Bluebook style:
	• First mention: full citation with court, year, and pincites if provided/clear.
	• Later mentions: short form with pincites.
	• For federal rules, cite rule and subdivision (e.g., Fed. R. Evid. 801(d)(2)(E)).
	- If the prompt requests a "citation string," include the best supporting authorities in descending order of weight and relevance.

	SUBSTANTIVE GUIDANCE (WHEN ASKED)
	- Variance vs. constructive amendment: define both; explain that a variance is a proof–pleading discrepancy assessed for prejudice; a constructive amendment alters the charge's elements and is structural on direct review.
	- Preservation/waiver: identify the contemporaneous objection rule, motion grounds specificity, and the effect of not requesting a continuance when surprised.
	- Habeas procedural default: outline cause-and-prejudice (and actual-innocence gateway) if asked.
	- Standards of review: identify the applicable standard (e.g., abuse of discretion, de novo, harmless-error) when requested and tie it to the posture.
	- Evidence questions: if asked, cover authentication, hearsay/non-hearsay routes (including 801(d)(2)(E)), Rule 403, and the permissibility of juror aids like transcripts.

	CONSTRAINTS
	- Do not invent case names, record cites, or quotations.
	- Do not introduce new facts not in the prompt's record.
	- Keep tense and terminology consistent with the prompt (e.g., "appellant," "petitioner," "respondent," "state").
	- Be concise and information-dense; avoid filler.

	DEFAULT OUTPUT BEHAVIOR
	- If the prompt is ambiguous but answerable, choose the most reasonable interpretation and answer directly without commentary.
	- If the prompt requests formatting (e.g., numbered list, IRAC, checklist), follow it exactly.
	"""

	def load_model():
	"""Load the LEXPT model (optimized for HF Spaces)"""
	global model, tokenizer, model_loaded

	try:
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from peft import PeftModel
	import torch

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Check if GPU is available for quantization
	if torch.cuda.is_available():
	# GPU available - use quantization
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_use_double_quant=True,
	)

	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	quantization_config=bnb_config,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	)
	else:
	# CPU fallback - no quantization
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	)

	# Load your adapter with fallback
	if ADAPTER_ID:
	try:
	model = PeftModel.from_pretrained(model, ADAPTER_ID)
	adapter_loaded = True
	print(f"✅ Loaded adapter: {ADAPTER_ID}")
	except Exception as adapter_error:
	print(f"⚠️ Adapter loading failed: {adapter_error}")
	print("📝 Using base model without adapter")
	adapter_loaded = False
	else:
	adapter_loaded = False

	model.eval()

	model_loaded = True
	adapter_status = f" + {ADAPTER_ID}" if adapter_loaded else " (base model only)"
	return f"✅ LEXPT Model loaded successfully!{adapter_status}"

	except Exception as e:
	print(f"❌ Model loading error: {str(e)}")
	return f"❌ Error loading model: {str(e)}\n\n💡 Try upgrading to T4 Small GPU tier in Space settings"

	def generate_reasoning_steps(query: str) -> Generator[str, None, None]:
	"""Generate reasoning steps (like the HF Space demo)"""
	yield "🤔 Analyzing the legal query..."
	time.sleep(0.5)

	yield "📚 Reviewing relevant legal doctrines and precedents..."
	time.sleep(0.5)

	yield "⚖️ Applying legal principles to the specific facts..."
	time.sleep(0.5)

	yield "✍️ Structuring the response according to legal formatting requirements..."
	time.sleep(0.5)

	yield "🎯 Finalizing analysis with precise legal terminology..."

	def generate_response(query: str, show_reasoning: bool = False):
	"""Generate legal analysis with optional reasoning display"""
	global model, tokenizer, model_loaded

	if not model_loaded:
	return "❌ Model not loaded. Please wait for initialization.", ""

	if not query.strip():
	return "Please enter a legal query.", ""

	reasoning_text = ""

	# Show reasoning steps if requested
	if show_reasoning:
	reasoning_steps = []
	for step in generate_reasoning_steps(query):
	reasoning_steps.append(step)
	reasoning_text = "\n".join(reasoning_steps)

	try:
	# Build messages
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": query}
	]

	# Tokenize
	inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_tensors="pt"
	)

	if isinstance(inputs, dict):
	inputs = inputs.to(model.device)
	else:
	inputs = {"input_ids": inputs.to(model.device)}

	start_time = time.time()

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=1200,
	temperature=0.2,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id,
	)

	latency = time.time() - start_time

	# Decode response
	prompt_len = inputs["input_ids"].shape[-1]
	response = tokenizer.decode(outputs[0][prompt_len:], skip_special_tokens=True).strip()

	# Format final output
	final_output = f"""## ⚖️ Legal Analysis

	Query: {query}

	Response Time: {latency:.2f}s
	Model: {BASE_MODEL_ID}{f" + {ADAPTER_ID}" if ADAPTER_ID else " (base model)"}

	---

	{response}

	---
	Generated by LEXPT - Legal Analysis AI
	"""

	return final_output, reasoning_text

	except Exception as e:
	return f"❌ Generation error: {str(e)}", reasoning_text

	# Example legal queries
	EXAMPLE_QUERIES = [
	"Draft 5 advocacy point-headings for petitioner that a knife→gun variance violated Sixth-Amendment notice",
	"Explain the difference between a 'variance' and a 'constructive amendment' of the charging instrument",
	"Analyze prejudice under the variance doctrine: Did the proof at trial (gun vs. knife) mislead the defense?",
	"Write a crisp one-page IRAC on Ridgeway: Issue (variance/notice), Rule, Application, Conclusion",
	"Create a checklist of record cites you would pull to brief this issue"
	]

	# Auto-load model on startup (for HF Spaces)
	print("🚀 Auto-loading LEXPT model...")
	model_status = "Loading..."
	try:
	for status_update in load_model():
	model_status = status_update
	print(status_update)
	except Exception as e:
	model_status = f"❌ Auto-load failed: {str(e)}"
	print(model_status)

	# Create Gradio interface (replicating professional HF Space design)
	with gr.Blocks(
	title="LEXPT - Legal Analysis AI",
	theme=gr.themes.Soft(),
	css="""
	.main-header { text-align: center; margin-bottom: 2rem; }
	.reasoning-box { background: #f8f9fa; padding: 1rem; border-radius: 8px; margin: 1rem 0; }
	.status-box { background: #e8f5e8; padding: 0.5rem; border-radius: 4px; }
	"""
	) as demo:

	gr.Markdown("""
	# ⚖️ LEXPT - Legal Analysis AI

	Professional legal analysis powered by fine-tuned GPT-OSS-20B
	Specialized for appellate and habeas corpus issues

	Give it a couple of seconds to start. You can enable reasoning level to see the thinking process.
	""", elem_classes="main-header")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(f"""
	<div class="status-box">
	<strong>Model Status:</strong> {model_status}
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	query_input = gr.Textbox(
	label="Legal Query",
	placeholder="Enter your legal analysis request...",
	lines=4
	)

	with gr.Row():
	submit_btn = gr.Button("⚖️ Analyze", variant="primary")
	reasoning_checkbox = gr.Checkbox(
	label="Show Reasoning Process",
	value=False
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📋 Example Queries")
	gr.Examples(
	examples=EXAMPLE_QUERIES,
	inputs=query_input,
	label=""
	)

	# Output sections
	output_response = gr.Markdown(label="Analysis")

	# Reasoning section (collapsible like the HF Space demo)
	with gr.Accordion("🤔 Click to view Thinking Process", open=False) as reasoning_accordion:
	output_reasoning = gr.Markdown("Reasoning steps will appear here when enabled...")

	# Handle submission
	submit_btn.click(
	fn=generate_response,
	inputs=[query_input, reasoning_checkbox],
	outputs=[output_response, output_reasoning]
	)

	gr.Markdown("""
	---
	### 🔧 Technical Details

	- Base Model: OpenAI GPT-OSS-20B (20 billion parameters)
	- Fine-tuning: PEFT adapter trained on legal analysis tasks
	- Specialization: Appellate law, habeas corpus, constitutional issues
	- Optimization: 4-bit quantization for efficient inference

	This Space demonstrates professional legal AI deployment on Hugging Face infrastructure.
	""")

	if __name__ == "__main__":
	# Enable API for external webpage access
	demo.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	show_api=True # This exposes the API for your webpage
	)