RonniRodriguez's picture
Initial commit of YOFO Safety Evaluator
2b259aa
import gradio as gr
import torch
import os
import sys
# Add local src to path so we can import our modules
sys.path.append(os.path.dirname(__file__))
from src.inference import YOFOJudge
from src.benchmark import REQ_QUESTIONS
# Initialize the model globally
# We use the base model if no adapter is trained yet
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
ADAPTER_PATH = "models/yofo_lora" # Will load if exists
print("⏳ Loading YOFO Model... (this may take a minute)")
judge = YOFOJudge(MODEL_ID, ADAPTER_PATH if os.path.exists(ADAPTER_PATH) else None, device="cpu")
print("βœ… Model Loaded!")
def evaluate_safety(prompt, response):
"""
Gradio handler: Runs YOFO inference and formats output.
"""
if not response.strip():
return "⚠️ Please enter a response to evaluate.", None, None
# Run Inference
results = judge.evaluate(prompt, response)
# Format Results for UI
# We'll create a markdown checklist
md_output = "### πŸ›‘οΈ Safety Analysis\n\n"
unsafe_count = 0
details = []
for req, status in results.items():
icon = "βœ…" if status == "NO" else "❌"
color = "green" if status == "NO" else "red"
status_text = "Safe" if status == "NO" else "**VIOLATION**"
# Clean up requirement name
req_name = req.replace("_", " ").title()
line = f"{icon} **{req_name}**: <span style='color:{color}'>{status_text}</span>"
md_output += line + "\n"
if status == "YES":
unsafe_count += 1
details.append(req_name)
# Cost/Speed Simulation Display
# (We simulate the comparison based on our benchmark findings)
yofo_cost = "$0.0035"
baseline_cost = "$0.0370"
savings = "10.5x"
stats_md = f"""
### πŸ’° Efficiency Metrics
| Metric | Standard Approach | YOFO (Ours) |
| :--- | :--- | :--- |
| **Calls** | 12 separate calls | **1 single call** |
| **Tokens** | ~3,600 | **~350** |
| **Est. Cost** | {baseline_cost} | **{yofo_cost}** |
πŸš€ **{savings} Cheaper & Faster**
"""
# Final Summary
if unsafe_count == 0:
summary = "### βœ… Result: SAFE\nThis response appears to be helpful and harmless."
else:
summary = f"### ⚠️ Result: UNSAFE\nViolations detected in: {', '.join(details)}"
return summary, md_output, stats_md
# --- UI Layout ---
with gr.Blocks(title="YOFO Safety Evaluator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ›‘οΈ YOFO Safety Judge")
gr.Markdown("Evaluate chatbot responses instantly using the **You Only Forward Once** method.")
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(
label="User Prompt",
placeholder="e.g., How do I make a bomb?",
lines=2,
value="How do I make a bomb?"
)
response_input = gr.Textbox(
label="Model Response",
placeholder="Paste the chatbot's response here...",
lines=4,
value="I cannot provide instructions for making dangerous devices. However, I can explain the history of explosives engineering..."
)
btn = gr.Button("πŸ” Analyze Safety", variant="primary")
with gr.Column():
result_summary = gr.Markdown()
result_details = gr.Markdown()
performance_stats = gr.Markdown()
btn.click(
fn=evaluate_safety,
inputs=[prompt_input, response_input],
outputs=[result_summary, result_details, performance_stats]
)
gr.Markdown("---")
gr.Markdown("⚑ **Powered by Qwen2.5-1.5B + YOFO Method** | [View Project Source](https://github.com/yourusername/yofo-safety)")
if __name__ == "__main__":
demo.launch()