Spaces:
Runtime error
Runtime error
| import os | |
| import copy | |
| import time | |
| import html | |
| import logging | |
| import warnings | |
| import gradio as gr | |
| from openai import OpenAI | |
| # --- CRITICAL LOG SILENCING (Fix for the -1 File Descriptor Error) --- | |
| class SilentLogFilter(logging.Filter): | |
| def filter(self, record): | |
| return "Invalid file descriptor" not in record.getMessage() | |
| logger = logging.getLogger("asyncio") | |
| logger.addFilter(SilentLogFilter()) | |
| warnings.filterwarnings("ignore", category=RuntimeWarning) | |
| # --- CONFIGURATION --- | |
| # Using the Hugging Face Inference Router for endpoint access | |
| MODEL_NAME = "openai/gpt-oss-safeguard-20b" | |
| BASE_URL = "https://router.huggingface.co/v1" | |
| # Ensure HF_TOKEN is set in your environment variables | |
| HF_TOKEN = os.getenv("HF_TOKEN", "your_hf_token_here") | |
| stop_generation = False | |
| def stream_from_vllm(messages, thinking_enabled=True, temperature=0.7): | |
| global stop_generation | |
| client = OpenAI(base_url=BASE_URL, api_key=HF_TOKEN) | |
| # gpt-oss models use 'reasoning' effort levels: low, medium, high | |
| # We map the UI toggle to these levels | |
| reasoning_config = {"effort": "medium"} if thinking_enabled else {"effort": "low"} | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=messages, | |
| temperature=temperature, | |
| stream=True, | |
| max_tokens=4096, | |
| extra_body={ | |
| "reasoning": reasoning_config | |
| } | |
| ) | |
| for chunk in response: | |
| if stop_generation: | |
| break | |
| if chunk.choices and chunk.choices[0].delta: | |
| yield chunk.choices[0].delta | |
| except Exception as e: | |
| raise e | |
| class SafeguardModel: | |
| def __init__(self): | |
| self.accumulated_content = "" | |
| self.accumulated_reasoning = "" | |
| def reset_state(self): | |
| self.accumulated_content = "" | |
| self.accumulated_reasoning = "" | |
| def _render_response(self, reasoning_content, regular_content, skip_think=False): | |
| html_parts = [] | |
| if reasoning_content and not skip_think: | |
| res_esc = html.escape(reasoning_content).replace("\n", "<br>") | |
| html_parts.append( | |
| f"<details open><summary style='cursor:pointer;font-weight:bold;color:#e67e22;'>🛡️ Safeguard Reasoning</summary>" | |
| f"<div style='color:#555;padding:15px;border-left:4px solid #e67e22;background:#fff5eb;margin:10px 0;'>{res_esc}</div></details>" | |
| ) | |
| if regular_content: | |
| content_esc = html.escape(regular_content).replace("\n", "<br>") | |
| html_parts.append(f"<div style='white-space: pre-wrap; line-height:1.6;'>{content_esc}</div>") | |
| return "".join(html_parts) | |
| def _build_messages(self, raw_hist, sys_prompt): | |
| msgs = [] | |
| if sys_prompt.strip(): | |
| msgs.append({"role": "system", "content": sys_prompt.strip()}) | |
| for h in raw_hist: | |
| role = h["role"] | |
| content = h.get("content", "") | |
| msgs.append({"role": role, "content": content}) | |
| return msgs | |
| def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=0.7): | |
| global stop_generation | |
| stop_generation = False | |
| msgs = self._build_messages(raw_hist, sys_prompt) | |
| self.reset_state() | |
| try: | |
| for delta in stream_from_vllm(msgs, thinking_enabled, temperature): | |
| if stop_generation: break | |
| if hasattr(delta, 'content') and delta.content: | |
| self.accumulated_content += delta.content | |
| if hasattr(delta, 'reasoning_content') and delta.reasoning_content: | |
| self.accumulated_reasoning += delta.reasoning_content | |
| yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled) | |
| except Exception as e: | |
| yield self._render_response("", f"⚠️ Connection Error: {str(e)}") | |
| model_engine = SafeguardModel() | |
| def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature): | |
| global stop_generation | |
| stop_generation = False | |
| if not msg.strip(): return raw_hist, copy.deepcopy(raw_hist), "" | |
| if raw_hist is None: raw_hist = [] | |
| raw_hist.append({"role": "user", "content": msg.strip()}) | |
| assistant_node = {"role": "assistant", "content": "", "reasoning_content": ""} | |
| raw_hist.append(assistant_node) | |
| yield raw_hist, copy.deepcopy(raw_hist), "" | |
| for chunk in model_engine.stream_generate(raw_hist[:-1], sys_prompt, thinking_enabled, temperature): | |
| if stop_generation: break | |
| assistant_node["content"] = model_engine.accumulated_content | |
| assistant_node["display_content"] = chunk | |
| yield raw_hist, copy.deepcopy(raw_hist), "" | |
| def reset_chat(): | |
| global stop_generation | |
| stop_generation = True | |
| return [], [], "" | |
| def format_display(raw_hist): | |
| return [{"role": m["role"], "content": m.get("display_content", m["content"])} for m in raw_hist] | |
| # --- GRADIO UI --- | |
| with gr.Blocks(title="GPT-OSS Safeguard 20B", theme=gr.themes.Soft(primary_hue="orange")) as demo: | |
| gr.HTML("<h1 style='text-align:center;'>🛡️ GPT-OSS Safeguard 20B</h1>") | |
| raw_history = gr.State([]) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbox = gr.Chatbot(label="Safety Analysis", type="messages", height=550, sanitize_html=False) | |
| textbox = gr.Textbox(label="Input Content", placeholder="Enter content to analyze against policy...", lines=3) | |
| with gr.Row(): | |
| send_btn = gr.Button("Analyze", variant="primary") | |
| clear_btn = gr.Button("Reset") | |
| with gr.Column(scale=1): | |
| think_toggle = gr.Checkbox(label="Enable Deep Reasoning", value=True) | |
| temp_slider = gr.Slider(0, 1.0, value=0.7, label="Temperature") | |
| sys_prompt = gr.Textbox( | |
| label="Safety Policy (System)", | |
| value="You are a safety classifier. Analyze the user content against the following policy: [Insert Policy Here]. Output your decision clearly.", | |
| lines=10 | |
| ) | |
| def wrapper(*args): | |
| for hist, raw_upd, txt_val in chat(*args): | |
| yield format_display(hist), raw_upd, txt_val | |
| send_btn.click(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox]) | |
| textbox.submit(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox]) | |
| clear_btn.click(reset_chat, outputs=[chatbox, raw_history, textbox]) | |
| if __name__ == "__main__": | |
| demo.queue().launch(ssr_mode=False) |