|
|
|
|
|
""" |
|
|
LEXPT Hugging Face Space - Professional Legal Analysis |
|
|
Replicates the structure of professional HF Spaces with reasoning display |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import time |
|
|
import torch |
|
|
from typing import Generator |
|
|
|
|
|
|
|
|
model = None |
|
|
tokenizer = None |
|
|
model_loaded = False |
|
|
|
|
|
|
|
|
BASE_MODEL_ID = "openai/gpt-oss-20b" |
|
|
BASE_MODEL_ID = "openai/gpt-oss-20b" |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """ |
|
|
ROLE |
|
|
You are a U.S. legal analysis assistant focused on appellate and habeas issues. Your job is to produce precise, jurisdiction-aware answers drawn from the user's prompt and any text they embed (e.g., an opinion extract). You must not invent facts, quotations, or citations. |
|
|
|
|
|
CORE DIRECTIVE — FINAL ANSWER ONLY |
|
|
- Output ONLY the final answer to the user's prompt. |
|
|
- Do NOT include prefaces, meta-commentary, chain-of-thought, or self-references. |
|
|
- Do NOT restate the question, apologize, or add disclaimers. |
|
|
- Do NOT add citations unless the prompt explicitly requests them. |
|
|
|
|
|
SCOPE & SOURCES |
|
|
- Default to the jurisdiction and stage implied by the prompt. If an opinion text is provided, treat it as the primary source of truth; do not add outside facts. |
|
|
- If a request is impossible to answer from the provided materials, respond exactly: "Insufficient information." |
|
|
- If the prompt asks for general doctrine (e.g., variance vs. constructive amendment, preservation standards, habeas default), state black-letter rules succinctly without citing unless requested. |
|
|
|
|
|
FORMATTING & STYLE |
|
|
- If the prompt says "list," return a numbered list starting at 1, with one item per line. |
|
|
- If the prompt asks for a "checklist," use short bullet points; keep each bullet to one sentence. |
|
|
- If the prompt asks for an "IRAC," use exactly these section headers in order, each on its own line: Issue; Rule; Application; Conclusion. No extra headings or text. |
|
|
- If the prompt asks for an "argument for petitioner" or "argument for the state," produce 4–8 concise point-headings with brief supporting parentheticals or sub-bullets. |
|
|
- If a word/line limit is specified, obey it strictly. |
|
|
- Use party names and case captions exactly as given in the prompt. |
|
|
|
|
|
CITATIONS (ONLY IF REQUESTED) |
|
|
- When citations are explicitly requested, use Bluebook style: |
|
|
• First mention: full citation with court, year, and pincites if provided/clear. |
|
|
• Later mentions: short form with pincites. |
|
|
• For federal rules, cite rule and subdivision (e.g., Fed. R. Evid. 801(d)(2)(E)). |
|
|
- If the prompt requests a "citation string," include the best supporting authorities in descending order of weight and relevance. |
|
|
|
|
|
SUBSTANTIVE GUIDANCE (WHEN ASKED) |
|
|
- Variance vs. constructive amendment: define both; explain that a variance is a proof–pleading discrepancy assessed for prejudice; a constructive amendment alters the charge's elements and is structural on direct review. |
|
|
- Preservation/waiver: identify the contemporaneous objection rule, motion grounds specificity, and the effect of not requesting a continuance when surprised. |
|
|
- Habeas procedural default: outline cause-and-prejudice (and actual-innocence gateway) if asked. |
|
|
- Standards of review: identify the applicable standard (e.g., abuse of discretion, de novo, harmless-error) when requested and tie it to the posture. |
|
|
- Evidence questions: if asked, cover authentication, hearsay/non-hearsay routes (including 801(d)(2)(E)), Rule 403, and the permissibility of juror aids like transcripts. |
|
|
|
|
|
CONSTRAINTS |
|
|
- Do not invent case names, record cites, or quotations. |
|
|
- Do not introduce new facts not in the prompt's record. |
|
|
- Keep tense and terminology consistent with the prompt (e.g., "appellant," "petitioner," "respondent," "state"). |
|
|
- Be concise and information-dense; avoid filler. |
|
|
|
|
|
DEFAULT OUTPUT BEHAVIOR |
|
|
- If the prompt is ambiguous but answerable, choose the most reasonable interpretation and answer directly without commentary. |
|
|
- If the prompt requests formatting (e.g., numbered list, IRAC, checklist), follow it exactly. |
|
|
""" |
|
|
|
|
|
def load_model(): |
|
|
"""Load the LEXPT model (optimized for HF Spaces)""" |
|
|
global model, tokenizer, model_loaded |
|
|
|
|
|
try: |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
|
from peft import PeftModel |
|
|
import torch |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) |
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", |
|
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
bnb_4bit_use_double_quant=True, |
|
|
) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL_ID, |
|
|
quantization_config=bnb_config, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True, |
|
|
) |
|
|
else: |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL_ID, |
|
|
torch_dtype=torch.float32, |
|
|
device_map="cpu", |
|
|
trust_remote_code=True, |
|
|
low_cpu_mem_usage=True, |
|
|
) |
|
|
|
|
|
|
|
|
if ADAPTER_ID: |
|
|
try: |
|
|
model = PeftModel.from_pretrained(model, ADAPTER_ID) |
|
|
adapter_loaded = True |
|
|
print(f"✅ Loaded adapter: {ADAPTER_ID}") |
|
|
except Exception as adapter_error: |
|
|
print(f"⚠️ Adapter loading failed: {adapter_error}") |
|
|
print("📝 Using base model without adapter") |
|
|
adapter_loaded = False |
|
|
else: |
|
|
adapter_loaded = False |
|
|
|
|
|
model.eval() |
|
|
|
|
|
model_loaded = True |
|
|
adapter_status = f" + {ADAPTER_ID}" if adapter_loaded else " (base model only)" |
|
|
return f"✅ LEXPT Model loaded successfully!{adapter_status}" |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Model loading error: {str(e)}") |
|
|
return f"❌ Error loading model: {str(e)}\n\n💡 Try upgrading to T4 Small GPU tier in Space settings" |
|
|
|
|
|
def generate_reasoning_steps(query: str) -> Generator[str, None, None]: |
|
|
"""Generate reasoning steps (like the HF Space demo)""" |
|
|
yield "🤔 Analyzing the legal query..." |
|
|
time.sleep(0.5) |
|
|
|
|
|
yield "📚 Reviewing relevant legal doctrines and precedents..." |
|
|
time.sleep(0.5) |
|
|
|
|
|
yield "⚖️ Applying legal principles to the specific facts..." |
|
|
time.sleep(0.5) |
|
|
|
|
|
yield "✍️ Structuring the response according to legal formatting requirements..." |
|
|
time.sleep(0.5) |
|
|
|
|
|
yield "🎯 Finalizing analysis with precise legal terminology..." |
|
|
|
|
|
def generate_response(query: str, show_reasoning: bool = False): |
|
|
"""Generate legal analysis with optional reasoning display""" |
|
|
global model, tokenizer, model_loaded |
|
|
|
|
|
if not model_loaded: |
|
|
return "❌ Model not loaded. Please wait for initialization.", "" |
|
|
|
|
|
if not query.strip(): |
|
|
return "Please enter a legal query.", "" |
|
|
|
|
|
reasoning_text = "" |
|
|
|
|
|
|
|
|
if show_reasoning: |
|
|
reasoning_steps = [] |
|
|
for step in generate_reasoning_steps(query): |
|
|
reasoning_steps.append(step) |
|
|
reasoning_text = "\n".join(reasoning_steps) |
|
|
|
|
|
try: |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": SYSTEM_PROMPT}, |
|
|
{"role": "user", "content": query} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
add_generation_prompt=True, |
|
|
tokenize=True, |
|
|
return_tensors="pt" |
|
|
) |
|
|
|
|
|
if isinstance(inputs, dict): |
|
|
inputs = inputs.to(model.device) |
|
|
else: |
|
|
inputs = {"input_ids": inputs.to(model.device)} |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=1200, |
|
|
temperature=0.2, |
|
|
do_sample=False, |
|
|
pad_token_id=tokenizer.pad_token_id, |
|
|
) |
|
|
|
|
|
latency = time.time() - start_time |
|
|
|
|
|
|
|
|
prompt_len = inputs["input_ids"].shape[-1] |
|
|
response = tokenizer.decode(outputs[0][prompt_len:], skip_special_tokens=True).strip() |
|
|
|
|
|
|
|
|
final_output = f"""## ⚖️ Legal Analysis |
|
|
|
|
|
**Query:** {query} |
|
|
|
|
|
**Response Time:** {latency:.2f}s |
|
|
**Model:** {BASE_MODEL_ID}{f" + {ADAPTER_ID}" if ADAPTER_ID else " (base model)"} |
|
|
|
|
|
--- |
|
|
|
|
|
{response} |
|
|
|
|
|
--- |
|
|
*Generated by LEXPT - Legal Analysis AI* |
|
|
""" |
|
|
|
|
|
return final_output, reasoning_text |
|
|
|
|
|
except Exception as e: |
|
|
return f"❌ Generation error: {str(e)}", reasoning_text |
|
|
|
|
|
|
|
|
EXAMPLE_QUERIES = [ |
|
|
"Draft 5 advocacy point-headings for petitioner that a knife→gun variance violated Sixth-Amendment notice", |
|
|
"Explain the difference between a 'variance' and a 'constructive amendment' of the charging instrument", |
|
|
"Analyze prejudice under the variance doctrine: Did the proof at trial (gun vs. knife) mislead the defense?", |
|
|
"Write a crisp one-page IRAC on Ridgeway: Issue (variance/notice), Rule, Application, Conclusion", |
|
|
"Create a checklist of record cites you would pull to brief this issue" |
|
|
] |
|
|
|
|
|
|
|
|
print("🚀 Auto-loading LEXPT model...") |
|
|
model_status = "Loading..." |
|
|
try: |
|
|
for status_update in load_model(): |
|
|
model_status = status_update |
|
|
print(status_update) |
|
|
except Exception as e: |
|
|
model_status = f"❌ Auto-load failed: {str(e)}" |
|
|
print(model_status) |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="LEXPT - Legal Analysis AI", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.main-header { text-align: center; margin-bottom: 2rem; } |
|
|
.reasoning-box { background: #f8f9fa; padding: 1rem; border-radius: 8px; margin: 1rem 0; } |
|
|
.status-box { background: #e8f5e8; padding: 0.5rem; border-radius: 4px; } |
|
|
""" |
|
|
) as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# ⚖️ LEXPT - Legal Analysis AI |
|
|
|
|
|
**Professional legal analysis powered by fine-tuned GPT-OSS-20B** |
|
|
Specialized for appellate and habeas corpus issues |
|
|
|
|
|
*Give it a couple of seconds to start. You can enable reasoning level to see the thinking process.* |
|
|
""", elem_classes="main-header") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown(f""" |
|
|
<div class="status-box"> |
|
|
<strong>Model Status:</strong> {model_status} |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
query_input = gr.Textbox( |
|
|
label="Legal Query", |
|
|
placeholder="Enter your legal analysis request...", |
|
|
lines=4 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
submit_btn = gr.Button("⚖️ Analyze", variant="primary") |
|
|
reasoning_checkbox = gr.Checkbox( |
|
|
label="Show Reasoning Process", |
|
|
value=False |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 📋 Example Queries") |
|
|
gr.Examples( |
|
|
examples=EXAMPLE_QUERIES, |
|
|
inputs=query_input, |
|
|
label="" |
|
|
) |
|
|
|
|
|
|
|
|
output_response = gr.Markdown(label="Analysis") |
|
|
|
|
|
|
|
|
with gr.Accordion("🤔 Click to view Thinking Process", open=False) as reasoning_accordion: |
|
|
output_reasoning = gr.Markdown("Reasoning steps will appear here when enabled...") |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=generate_response, |
|
|
inputs=[query_input, reasoning_checkbox], |
|
|
outputs=[output_response, output_reasoning] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### 🔧 Technical Details |
|
|
|
|
|
- **Base Model:** OpenAI GPT-OSS-20B (20 billion parameters) |
|
|
- **Fine-tuning:** PEFT adapter trained on legal analysis tasks |
|
|
- **Specialization:** Appellate law, habeas corpus, constitutional issues |
|
|
- **Optimization:** 4-bit quantization for efficient inference |
|
|
|
|
|
*This Space demonstrates professional legal AI deployment on Hugging Face infrastructure.* |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch( |
|
|
share=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_api=True |
|
|
) |
|
|
|