import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# 1. Configuration
# We point to the official unsloth version of the base model for the tokenizer
base_model_name = "unsloth/Llama-3.2-3B-bnb-4bit"
adapter_path = "." 

# 2. Load Tokenizer directly from the base model source
# This fixes the "backend tokenizer" error by getting the files from Hugging Face directly
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# 3. Load Model with memory-saving settings for CPU
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float32,
    device_map="cpu",
    low_cpu_mem_usage=True
)

# 4. Apply your fine-tuned adapters
model = PeftModel.from_pretrained(model, adapter_path)

# 5. The Reasoning Logic
def legal_summarizer(legal_text):
    prompt = f"Analyze the following legal text and provide a grounded summary.\n\nInput:\n{legal_text}\n\nResponse:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=256)
    
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if "Response:" in decoded:
        return decoded.split("Response:")[-1].strip()
    return decoded.strip()

# 6. Setup the UI
demo = gr.Interface(
    fn=legal_summarizer, 
    inputs=gr.Textbox(lines=10, label="Paste Legal Clause"), 
    outputs=gr.Textbox(label="LexGuard AI Summary"),
    title="⚖️ LexGuard AI: Legal Auditor"
)

if __name__ == "__main__":
    demo.launch()