Spaces:
Sleeping
Sleeping
File size: 1,592 Bytes
afd8f94 8178916 afd8f94 54aaa6a c09c6d8 54aaa6a 8178916 c09c6d8 8178916 54aaa6a 8178916 afd8f94 86a2323 c09c6d8 8178916 afd8f94 c09c6d8 afd8f94 8178916 86a2323 8178916 86a2323 8178916 54aaa6a 86a2323 afd8f94 c09c6d8 afd8f94 54aaa6a afd8f94 54aaa6a afd8f94 86a2323 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# 1. Configuration
# We point to the official unsloth version of the base model for the tokenizer
base_model_name = "unsloth/Llama-3.2-3B-bnb-4bit"
adapter_path = "."
# 2. Load Tokenizer directly from the base model source
# This fixes the "backend tokenizer" error by getting the files from Hugging Face directly
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# 3. Load Model with memory-saving settings for CPU
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float32,
device_map="cpu",
low_cpu_mem_usage=True
)
# 4. Apply your fine-tuned adapters
model = PeftModel.from_pretrained(model, adapter_path)
# 5. The Reasoning Logic
def legal_summarizer(legal_text):
prompt = f"Analyze the following legal text and provide a grounded summary.\n\nInput:\n{legal_text}\n\nResponse:\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=256)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
if "Response:" in decoded:
return decoded.split("Response:")[-1].strip()
return decoded.strip()
# 6. Setup the UI
demo = gr.Interface(
fn=legal_summarizer,
inputs=gr.Textbox(lines=10, label="Paste Legal Clause"),
outputs=gr.Textbox(label="LexGuard AI Summary"),
title="⚖️ LexGuard AI: Legal Auditor"
)
if __name__ == "__main__":
demo.launch() |