import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # 1. Configuration # We point to the official unsloth version of the base model for the tokenizer base_model_name = "unsloth/Llama-3.2-3B-bnb-4bit" adapter_path = "." # 2. Load Tokenizer directly from the base model source # This fixes the "backend tokenizer" error by getting the files from Hugging Face directly tokenizer = AutoTokenizer.from_pretrained(base_model_name) # 3. Load Model with memory-saving settings for CPU model = AutoModelForCausalLM.from_pretrained( base_model_name, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True ) # 4. Apply your fine-tuned adapters model = PeftModel.from_pretrained(model, adapter_path) # 5. The Reasoning Logic def legal_summarizer(legal_text): prompt = f"Analyze the following legal text and provide a grounded summary.\n\nInput:\n{legal_text}\n\nResponse:\n" inputs = tokenizer(prompt, return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=256) decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) if "Response:" in decoded: return decoded.split("Response:")[-1].strip() return decoded.strip() # 6. Setup the UI demo = gr.Interface( fn=legal_summarizer, inputs=gr.Textbox(lines=10, label="Paste Legal Clause"), outputs=gr.Textbox(label="LexGuard AI Summary"), title="⚖️ LexGuard AI: Legal Auditor" ) if __name__ == "__main__": demo.launch()