# app.py - Gradio UI for interacting with facebook/opt-125m import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Optional toxicity scoring try: from detoxify import Detoxify detox_available = True except Exception: detox_available = False MODEL_NAME = "facebook/opt-125m" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" def load_models(): tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model.to(DEVICE) model.eval() detox = Detoxify('original') if detox_available else None return tokenizer, model, detox tokenizer, model, detox = load_models() @torch.inference_mode() def generate(prompt, max_new_tokens=150, temperature=0.8, top_p=0.95, return_toxicity=False): inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) out = model.generate( **inputs, do_sample=True, max_new_tokens=int(max_new_tokens), temperature=float(temperature), top_p=float(top_p), pad_token_id=tokenizer.eos_token_id ) text = tokenizer.decode(out[0], skip_special_tokens=True) continuation = text[len(prompt):].strip() if text.startswith(prompt) else text toxicity_score = None if return_toxicity and detox is not None: try: toxicity_score = detox.predict(continuation)["toxicity"] except Exception: toxicity_score = None return continuation, toxicity_score with gr.Blocks() as demo: gr.Markdown("# OPT-125M Interactive") with gr.Row(): inp = gr.Textbox(label="Prompt", placeholder="Type something to the model...", lines=3) with gr.Column(): max_tokens = gr.Slider(10, 512, value=150, step=10, label="Max new tokens") temp = gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="Top-p (nucleus)") tox_checkbox = gr.Checkbox(value=False, label="Return toxicity score (requires detoxify)") run_btn = gr.Button("Generate") output_text = gr.Textbox(label="Model output", lines=8) tox_out = gr.Textbox(label="Toxicity score (None if unavailable)", lines=1) def on_click(prompt, max_new_tokens, temperature, top_p, tox): continuation, tox_score = generate(prompt, max_new_tokens, temperature, top_p, tox) return continuation, str(tox_score) if tox_score is not None else "Not available" run_btn.click(on_click, inputs=[inp, max_tokens, temp, top_p, tox_checkbox], outputs=[output_text, tox_out]) if __name__ == "__main__": demo.launch()