mickey61305's picture
Update app.py
c09c6d8 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# 1. Configuration
# We point to the official unsloth version of the base model for the tokenizer
base_model_name = "unsloth/Llama-3.2-3B-bnb-4bit"
adapter_path = "."
# 2. Load Tokenizer directly from the base model source
# This fixes the "backend tokenizer" error by getting the files from Hugging Face directly
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# 3. Load Model with memory-saving settings for CPU
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float32,
device_map="cpu",
low_cpu_mem_usage=True
)
# 4. Apply your fine-tuned adapters
model = PeftModel.from_pretrained(model, adapter_path)
# 5. The Reasoning Logic
def legal_summarizer(legal_text):
prompt = f"Analyze the following legal text and provide a grounded summary.\n\nInput:\n{legal_text}\n\nResponse:\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=256)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
if "Response:" in decoded:
return decoded.split("Response:")[-1].strip()
return decoded.strip()
# 6. Setup the UI
demo = gr.Interface(
fn=legal_summarizer,
inputs=gr.Textbox(lines=10, label="Paste Legal Clause"),
outputs=gr.Textbox(label="LexGuard AI Summary"),
title="⚖️ LexGuard AI: Legal Auditor"
)
if __name__ == "__main__":
demo.launch()