Proteus2 / app.py
odexus's picture
Upload 2 files
9a9c95d verified
import gradio as gr
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load model and tokenizer
model_id = "Equall/SaulLM-54B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
@spaces.GPU()
def generate_response(message, history, system_prompt, max_tokens, temperature):
"""Generate legal analysis using Saul-54B"""
# Build conversation history
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
# Format for model
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
# Generate
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=temperature > 0,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
return response
# Default system prompt for legal queries
DEFAULT_SYSTEM = """You are SaulLM-54B, a specialized legal language model. You provide accurate legal analysis based on U.S. and European legal systems.
IMPORTANT DISCLAIMERS:
- This is for informational purposes only, not legal advice
- Information may not reflect recent legal developments
- Users should consult qualified legal professionals for actual legal advice
- Do not use this for decisions that could affect legal rights"""
# Build interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# SaulLM-54B Legal Assistant")
gr.Markdown("*Specialized AI for legal reasoning and analysis. Private queries, powered by Zero GPU (25 min/day free).*")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Legal Analysis", height=500)
msg = gr.Textbox(
label="Your Legal Question",
placeholder="Ask about statutes, case law, legal concepts, or compliance...",
lines=3
)
with gr.Row():
submit = gr.Button("Submit", variant="primary")
clear = gr.Button("Clear Chat")
with gr.Column(scale=1):
system_prompt = gr.Textbox(
label="System Prompt",
value=DEFAULT_SYSTEM,
lines=12,
max_lines=12
)
max_tokens = gr.Slider(
label="Max Response Tokens",
minimum=100,
maximum=2000,
value=1000,
step=100
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.0,
value=0.7,
step=0.1
)
gr.Markdown("### Usage Tips")
gr.Markdown("""
- Be specific about jurisdiction
- Cite relevant statutes/cases if known
- Zero GPU resets after 60s idle
- 25 min/day free compute limit
""")
def user_submit(message, history):
return "", history + [[message, None]]
def bot_respond(history, system_prompt, max_tokens, temperature):
message = history[-1][0]
history_context = history[:-1]
response = generate_response(message, history_context, system_prompt, max_tokens, temperature)
history[-1][1] = response
return history
msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_respond, [chatbot, system_prompt, max_tokens, temperature], chatbot
)
submit.click(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_respond, [chatbot, system_prompt, max_tokens, temperature], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.queue().launch()