thisistheend / app.py
DarkMindForever's picture
Update app.py
96d0e2a verified
raw
history blame
6.7 kB
import os
import copy
import time
import html
import logging
import warnings
import gradio as gr
from openai import OpenAI
# --- CRITICAL LOG SILENCING (Fix for the -1 File Descriptor Error) ---
class SilentLogFilter(logging.Filter):
def filter(self, record):
return "Invalid file descriptor" not in record.getMessage()
logger = logging.getLogger("asyncio")
logger.addFilter(SilentLogFilter())
warnings.filterwarnings("ignore", category=RuntimeWarning)
# --- CONFIGURATION ---
# Using the Hugging Face Inference Router for endpoint access
MODEL_NAME = "openai/gpt-oss-safeguard-20b"
BASE_URL = "https://router.huggingface.co/v1"
# Ensure HF_TOKEN is set in your environment variables
HF_TOKEN = os.getenv("HF_TOKEN", "your_hf_token_here")
stop_generation = False
def stream_from_vllm(messages, thinking_enabled=True, temperature=0.7):
global stop_generation
client = OpenAI(base_url=BASE_URL, api_key=HF_TOKEN)
# gpt-oss models use 'reasoning' effort levels: low, medium, high
# We map the UI toggle to these levels
reasoning_config = {"effort": "medium"} if thinking_enabled else {"effort": "low"}
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
temperature=temperature,
stream=True,
max_tokens=4096,
extra_body={
"reasoning": reasoning_config
}
)
for chunk in response:
if stop_generation:
break
if chunk.choices and chunk.choices[0].delta:
yield chunk.choices[0].delta
except Exception as e:
raise e
class SafeguardModel:
def __init__(self):
self.accumulated_content = ""
self.accumulated_reasoning = ""
def reset_state(self):
self.accumulated_content = ""
self.accumulated_reasoning = ""
def _render_response(self, reasoning_content, regular_content, skip_think=False):
html_parts = []
if reasoning_content and not skip_think:
res_esc = html.escape(reasoning_content).replace("\n", "<br>")
html_parts.append(
f"<details open><summary style='cursor:pointer;font-weight:bold;color:#e67e22;'>🛡️ Safeguard Reasoning</summary>"
f"<div style='color:#555;padding:15px;border-left:4px solid #e67e22;background:#fff5eb;margin:10px 0;'>{res_esc}</div></details>"
)
if regular_content:
content_esc = html.escape(regular_content).replace("\n", "<br>")
html_parts.append(f"<div style='white-space: pre-wrap; line-height:1.6;'>{content_esc}</div>")
return "".join(html_parts)
def _build_messages(self, raw_hist, sys_prompt):
msgs = []
if sys_prompt.strip():
msgs.append({"role": "system", "content": sys_prompt.strip()})
for h in raw_hist:
role = h["role"]
content = h.get("content", "")
msgs.append({"role": role, "content": content})
return msgs
def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=0.7):
global stop_generation
stop_generation = False
msgs = self._build_messages(raw_hist, sys_prompt)
self.reset_state()
try:
for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
if stop_generation: break
if hasattr(delta, 'content') and delta.content:
self.accumulated_content += delta.content
if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
self.accumulated_reasoning += delta.reasoning_content
yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled)
except Exception as e:
yield self._render_response("", f"⚠️ Connection Error: {str(e)}")
model_engine = SafeguardModel()
def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
global stop_generation
stop_generation = False
if not msg.strip(): return raw_hist, copy.deepcopy(raw_hist), ""
if raw_hist is None: raw_hist = []
raw_hist.append({"role": "user", "content": msg.strip()})
assistant_node = {"role": "assistant", "content": "", "reasoning_content": ""}
raw_hist.append(assistant_node)
yield raw_hist, copy.deepcopy(raw_hist), ""
for chunk in model_engine.stream_generate(raw_hist[:-1], sys_prompt, thinking_enabled, temperature):
if stop_generation: break
assistant_node["content"] = model_engine.accumulated_content
assistant_node["display_content"] = chunk
yield raw_hist, copy.deepcopy(raw_hist), ""
def reset_chat():
global stop_generation
stop_generation = True
return [], [], ""
def format_display(raw_hist):
return [{"role": m["role"], "content": m.get("display_content", m["content"])} for m in raw_hist]
# --- GRADIO UI ---
with gr.Blocks(title="GPT-OSS Safeguard 20B", theme=gr.themes.Soft(primary_hue="orange")) as demo:
gr.HTML("<h1 style='text-align:center;'>🛡️ GPT-OSS Safeguard 20B</h1>")
raw_history = gr.State([])
with gr.Row():
with gr.Column(scale=4):
chatbox = gr.Chatbot(label="Safety Analysis", type="messages", height=550, sanitize_html=False)
textbox = gr.Textbox(label="Input Content", placeholder="Enter content to analyze against policy...", lines=3)
with gr.Row():
send_btn = gr.Button("Analyze", variant="primary")
clear_btn = gr.Button("Reset")
with gr.Column(scale=1):
think_toggle = gr.Checkbox(label="Enable Deep Reasoning", value=True)
temp_slider = gr.Slider(0, 1.0, value=0.7, label="Temperature")
sys_prompt = gr.Textbox(
label="Safety Policy (System)",
value="You are a safety classifier. Analyze the user content against the following policy: [Insert Policy Here]. Output your decision clearly.",
lines=10
)
def wrapper(*args):
for hist, raw_upd, txt_val in chat(*args):
yield format_display(hist), raw_upd, txt_val
send_btn.click(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
textbox.submit(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
clear_btn.click(reset_chat, outputs=[chatbox, raw_history, textbox])
if __name__ == "__main__":
demo.queue().launch(ssr_mode=False)