File size: 1,592 Bytes
afd8f94
 
8178916
 
afd8f94
54aaa6a
c09c6d8
54aaa6a
 
8178916
c09c6d8
 
 
8178916
54aaa6a
8178916
 
 
 
 
afd8f94
86a2323
c09c6d8
8178916
afd8f94
c09c6d8
afd8f94
 
8178916
86a2323
8178916
 
86a2323
8178916
54aaa6a
86a2323
 
 
afd8f94
c09c6d8
afd8f94
 
54aaa6a
afd8f94
54aaa6a
afd8f94
86a2323
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# 1. Configuration
# We point to the official unsloth version of the base model for the tokenizer
base_model_name = "unsloth/Llama-3.2-3B-bnb-4bit"
adapter_path = "." 

# 2. Load Tokenizer directly from the base model source
# This fixes the "backend tokenizer" error by getting the files from Hugging Face directly
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# 3. Load Model with memory-saving settings for CPU
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float32,
    device_map="cpu",
    low_cpu_mem_usage=True
)

# 4. Apply your fine-tuned adapters
model = PeftModel.from_pretrained(model, adapter_path)

# 5. The Reasoning Logic
def legal_summarizer(legal_text):
    prompt = f"Analyze the following legal text and provide a grounded summary.\n\nInput:\n{legal_text}\n\nResponse:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=256)
    
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if "Response:" in decoded:
        return decoded.split("Response:")[-1].strip()
    return decoded.strip()

# 6. Setup the UI
demo = gr.Interface(
    fn=legal_summarizer, 
    inputs=gr.Textbox(lines=10, label="Paste Legal Clause"), 
    outputs=gr.Textbox(label="LexGuard AI Summary"),
    title="⚖️ LexGuard AI: Legal Auditor"
)

if __name__ == "__main__":
    demo.launch()