|
|
import gradio as gr |
|
|
import torch |
|
|
import os |
|
|
import sys |
|
|
|
|
|
|
|
|
sys.path.append(os.path.dirname(__file__)) |
|
|
|
|
|
from src.inference import YOFOJudge |
|
|
from src.benchmark import REQ_QUESTIONS |
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct" |
|
|
ADAPTER_PATH = "models/yofo_lora" |
|
|
|
|
|
print("β³ Loading YOFO Model... (this may take a minute)") |
|
|
judge = YOFOJudge(MODEL_ID, ADAPTER_PATH if os.path.exists(ADAPTER_PATH) else None, device="cpu") |
|
|
print("β
Model Loaded!") |
|
|
|
|
|
def evaluate_safety(prompt, response): |
|
|
""" |
|
|
Gradio handler: Runs YOFO inference and formats output. |
|
|
""" |
|
|
if not response.strip(): |
|
|
return "β οΈ Please enter a response to evaluate.", None, None |
|
|
|
|
|
|
|
|
results = judge.evaluate(prompt, response) |
|
|
|
|
|
|
|
|
|
|
|
md_output = "### π‘οΈ Safety Analysis\n\n" |
|
|
|
|
|
unsafe_count = 0 |
|
|
details = [] |
|
|
|
|
|
for req, status in results.items(): |
|
|
icon = "β
" if status == "NO" else "β" |
|
|
color = "green" if status == "NO" else "red" |
|
|
status_text = "Safe" if status == "NO" else "**VIOLATION**" |
|
|
|
|
|
|
|
|
req_name = req.replace("_", " ").title() |
|
|
|
|
|
line = f"{icon} **{req_name}**: <span style='color:{color}'>{status_text}</span>" |
|
|
md_output += line + "\n" |
|
|
|
|
|
if status == "YES": |
|
|
unsafe_count += 1 |
|
|
details.append(req_name) |
|
|
|
|
|
|
|
|
|
|
|
yofo_cost = "$0.0035" |
|
|
baseline_cost = "$0.0370" |
|
|
savings = "10.5x" |
|
|
|
|
|
stats_md = f""" |
|
|
### π° Efficiency Metrics |
|
|
| Metric | Standard Approach | YOFO (Ours) | |
|
|
| :--- | :--- | :--- | |
|
|
| **Calls** | 12 separate calls | **1 single call** | |
|
|
| **Tokens** | ~3,600 | **~350** | |
|
|
| **Est. Cost** | {baseline_cost} | **{yofo_cost}** | |
|
|
|
|
|
π **{savings} Cheaper & Faster** |
|
|
""" |
|
|
|
|
|
|
|
|
if unsafe_count == 0: |
|
|
summary = "### β
Result: SAFE\nThis response appears to be helpful and harmless." |
|
|
else: |
|
|
summary = f"### β οΈ Result: UNSAFE\nViolations detected in: {', '.join(details)}" |
|
|
|
|
|
return summary, md_output, stats_md |
|
|
|
|
|
|
|
|
with gr.Blocks(title="YOFO Safety Evaluator", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# π‘οΈ YOFO Safety Judge") |
|
|
gr.Markdown("Evaluate chatbot responses instantly using the **You Only Forward Once** method.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
prompt_input = gr.Textbox( |
|
|
label="User Prompt", |
|
|
placeholder="e.g., How do I make a bomb?", |
|
|
lines=2, |
|
|
value="How do I make a bomb?" |
|
|
) |
|
|
response_input = gr.Textbox( |
|
|
label="Model Response", |
|
|
placeholder="Paste the chatbot's response here...", |
|
|
lines=4, |
|
|
value="I cannot provide instructions for making dangerous devices. However, I can explain the history of explosives engineering..." |
|
|
) |
|
|
btn = gr.Button("π Analyze Safety", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
result_summary = gr.Markdown() |
|
|
result_details = gr.Markdown() |
|
|
performance_stats = gr.Markdown() |
|
|
|
|
|
btn.click( |
|
|
fn=evaluate_safety, |
|
|
inputs=[prompt_input, response_input], |
|
|
outputs=[result_summary, result_details, performance_stats] |
|
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("β‘ **Powered by Qwen2.5-1.5B + YOFO Method** | [View Project Source](https://github.com/yourusername/yofo-safety)") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|