#!/usr/bin/env python3 """ LEXPT Hugging Face Space - Professional Legal Analysis Replicates the structure of professional HF Spaces with reasoning display """ import gradio as gr import time import torch from typing import Generator # Global model variables model = None tokenizer = None model_loaded = False # Model configuration - Using base model BASE_MODEL_ID = "openai/gpt-oss-20b" # 20B model BASE_MODEL_ID = "openai/gpt-oss-20b" # Using base model for now # Your adapter (if available) # Your legal system prompt SYSTEM_PROMPT = """ ROLE You are a U.S. legal analysis assistant focused on appellate and habeas issues. Your job is to produce precise, jurisdiction-aware answers drawn from the user's prompt and any text they embed (e.g., an opinion extract). You must not invent facts, quotations, or citations. CORE DIRECTIVE — FINAL ANSWER ONLY - Output ONLY the final answer to the user's prompt. - Do NOT include prefaces, meta-commentary, chain-of-thought, or self-references. - Do NOT restate the question, apologize, or add disclaimers. - Do NOT add citations unless the prompt explicitly requests them. SCOPE & SOURCES - Default to the jurisdiction and stage implied by the prompt. If an opinion text is provided, treat it as the primary source of truth; do not add outside facts. - If a request is impossible to answer from the provided materials, respond exactly: "Insufficient information." - If the prompt asks for general doctrine (e.g., variance vs. constructive amendment, preservation standards, habeas default), state black-letter rules succinctly without citing unless requested. FORMATTING & STYLE - If the prompt says "list," return a numbered list starting at 1, with one item per line. - If the prompt asks for a "checklist," use short bullet points; keep each bullet to one sentence. - If the prompt asks for an "IRAC," use exactly these section headers in order, each on its own line: Issue; Rule; Application; Conclusion. No extra headings or text. - If the prompt asks for an "argument for petitioner" or "argument for the state," produce 4–8 concise point-headings with brief supporting parentheticals or sub-bullets. - If a word/line limit is specified, obey it strictly. - Use party names and case captions exactly as given in the prompt. CITATIONS (ONLY IF REQUESTED) - When citations are explicitly requested, use Bluebook style: • First mention: full citation with court, year, and pincites if provided/clear. • Later mentions: short form with pincites. • For federal rules, cite rule and subdivision (e.g., Fed. R. Evid. 801(d)(2)(E)). - If the prompt requests a "citation string," include the best supporting authorities in descending order of weight and relevance. SUBSTANTIVE GUIDANCE (WHEN ASKED) - Variance vs. constructive amendment: define both; explain that a variance is a proof–pleading discrepancy assessed for prejudice; a constructive amendment alters the charge's elements and is structural on direct review. - Preservation/waiver: identify the contemporaneous objection rule, motion grounds specificity, and the effect of not requesting a continuance when surprised. - Habeas procedural default: outline cause-and-prejudice (and actual-innocence gateway) if asked. - Standards of review: identify the applicable standard (e.g., abuse of discretion, de novo, harmless-error) when requested and tie it to the posture. - Evidence questions: if asked, cover authentication, hearsay/non-hearsay routes (including 801(d)(2)(E)), Rule 403, and the permissibility of juror aids like transcripts. CONSTRAINTS - Do not invent case names, record cites, or quotations. - Do not introduce new facts not in the prompt's record. - Keep tense and terminology consistent with the prompt (e.g., "appellant," "petitioner," "respondent," "state"). - Be concise and information-dense; avoid filler. DEFAULT OUTPUT BEHAVIOR - If the prompt is ambiguous but answerable, choose the most reasonable interpretation and answer directly without commentary. - If the prompt requests formatting (e.g., numbered list, IRAC, checklist), follow it exactly. """ def load_model(): """Load the LEXPT model (optimized for HF Spaces)""" global model, tokenizer, model_loaded try: from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import torch # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Check if GPU is available for quantization if torch.cuda.is_available(): # GPU available - use quantization bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, quantization_config=bnb_config, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, ) else: # CPU fallback - no quantization model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True, ) # Load your adapter with fallback if ADAPTER_ID: try: model = PeftModel.from_pretrained(model, ADAPTER_ID) adapter_loaded = True print(f"✅ Loaded adapter: {ADAPTER_ID}") except Exception as adapter_error: print(f"⚠️ Adapter loading failed: {adapter_error}") print("📝 Using base model without adapter") adapter_loaded = False else: adapter_loaded = False model.eval() model_loaded = True adapter_status = f" + {ADAPTER_ID}" if adapter_loaded else " (base model only)" return f"✅ LEXPT Model loaded successfully!{adapter_status}" except Exception as e: print(f"❌ Model loading error: {str(e)}") return f"❌ Error loading model: {str(e)}\n\n💡 Try upgrading to T4 Small GPU tier in Space settings" def generate_reasoning_steps(query: str) -> Generator[str, None, None]: """Generate reasoning steps (like the HF Space demo)""" yield "🤔 Analyzing the legal query..." time.sleep(0.5) yield "📚 Reviewing relevant legal doctrines and precedents..." time.sleep(0.5) yield "⚖️ Applying legal principles to the specific facts..." time.sleep(0.5) yield "✍️ Structuring the response according to legal formatting requirements..." time.sleep(0.5) yield "🎯 Finalizing analysis with precise legal terminology..." def generate_response(query: str, show_reasoning: bool = False): """Generate legal analysis with optional reasoning display""" global model, tokenizer, model_loaded if not model_loaded: return "❌ Model not loaded. Please wait for initialization.", "" if not query.strip(): return "Please enter a legal query.", "" reasoning_text = "" # Show reasoning steps if requested if show_reasoning: reasoning_steps = [] for step in generate_reasoning_steps(query): reasoning_steps.append(step) reasoning_text = "\n".join(reasoning_steps) try: # Build messages messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": query} ] # Tokenize inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt" ) if isinstance(inputs, dict): inputs = inputs.to(model.device) else: inputs = {"input_ids": inputs.to(model.device)} start_time = time.time() # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1200, temperature=0.2, do_sample=False, pad_token_id=tokenizer.pad_token_id, ) latency = time.time() - start_time # Decode response prompt_len = inputs["input_ids"].shape[-1] response = tokenizer.decode(outputs[0][prompt_len:], skip_special_tokens=True).strip() # Format final output final_output = f"""## ⚖️ Legal Analysis **Query:** {query} **Response Time:** {latency:.2f}s **Model:** {BASE_MODEL_ID}{f" + {ADAPTER_ID}" if ADAPTER_ID else " (base model)"} --- {response} --- *Generated by LEXPT - Legal Analysis AI* """ return final_output, reasoning_text except Exception as e: return f"❌ Generation error: {str(e)}", reasoning_text # Example legal queries EXAMPLE_QUERIES = [ "Draft 5 advocacy point-headings for petitioner that a knife→gun variance violated Sixth-Amendment notice", "Explain the difference between a 'variance' and a 'constructive amendment' of the charging instrument", "Analyze prejudice under the variance doctrine: Did the proof at trial (gun vs. knife) mislead the defense?", "Write a crisp one-page IRAC on Ridgeway: Issue (variance/notice), Rule, Application, Conclusion", "Create a checklist of record cites you would pull to brief this issue" ] # Auto-load model on startup (for HF Spaces) print("🚀 Auto-loading LEXPT model...") model_status = "Loading..." try: for status_update in load_model(): model_status = status_update print(status_update) except Exception as e: model_status = f"❌ Auto-load failed: {str(e)}" print(model_status) # Create Gradio interface (replicating professional HF Space design) with gr.Blocks( title="LEXPT - Legal Analysis AI", theme=gr.themes.Soft(), css=""" .main-header { text-align: center; margin-bottom: 2rem; } .reasoning-box { background: #f8f9fa; padding: 1rem; border-radius: 8px; margin: 1rem 0; } .status-box { background: #e8f5e8; padding: 0.5rem; border-radius: 4px; } """ ) as demo: gr.Markdown(""" # ⚖️ LEXPT - Legal Analysis AI **Professional legal analysis powered by fine-tuned GPT-OSS-20B** Specialized for appellate and habeas corpus issues *Give it a couple of seconds to start. You can enable reasoning level to see the thinking process.* """, elem_classes="main-header") with gr.Row(): with gr.Column(scale=1): gr.Markdown(f"""