Spaces:

DarkMindForever
/

thisistheend

Runtime error

App Files Files Community

thisistheend / app.py

DarkMindForever

Update app.py

96d0e2a verified 4 months ago

raw

history blame

6.7 kB

	import os
	import copy
	import time
	import html
	import logging
	import warnings
	import gradio as gr
	from openai import OpenAI

	# --- CRITICAL LOG SILENCING (Fix for the -1 File Descriptor Error) ---
	class SilentLogFilter(logging.Filter):
	def filter(self, record):
	return "Invalid file descriptor" not in record.getMessage()

	logger = logging.getLogger("asyncio")
	logger.addFilter(SilentLogFilter())
	warnings.filterwarnings("ignore", category=RuntimeWarning)

	# --- CONFIGURATION ---
	# Using the Hugging Face Inference Router for endpoint access
	MODEL_NAME = "openai/gpt-oss-safeguard-20b"
	BASE_URL = "https://router.huggingface.co/v1"
	# Ensure HF_TOKEN is set in your environment variables
	HF_TOKEN = os.getenv("HF_TOKEN", "your_hf_token_here")

	stop_generation = False

	def stream_from_vllm(messages, thinking_enabled=True, temperature=0.7):
	global stop_generation
	client = OpenAI(base_url=BASE_URL, api_key=HF_TOKEN)

	# gpt-oss models use 'reasoning' effort levels: low, medium, high
	# We map the UI toggle to these levels
	reasoning_config = {"effort": "medium"} if thinking_enabled else {"effort": "low"}

	try:
	response = client.chat.completions.create(
	model=MODEL_NAME,
	messages=messages,
	temperature=temperature,
	stream=True,
	max_tokens=4096,
	extra_body={
	"reasoning": reasoning_config
	}
	)
	for chunk in response:
	if stop_generation:
	break
	if chunk.choices and chunk.choices[0].delta:
	yield chunk.choices[0].delta
	except Exception as e:
	raise e

	class SafeguardModel:
	def __init__(self):
	self.accumulated_content = ""
	self.accumulated_reasoning = ""

	def reset_state(self):
	self.accumulated_content = ""
	self.accumulated_reasoning = ""

	def _render_response(self, reasoning_content, regular_content, skip_think=False):
	html_parts = []
	if reasoning_content and not skip_think:
	res_esc = html.escape(reasoning_content).replace("\n", "<br>")
	html_parts.append(
	f"<details open><summary style='cursor:pointer;font-weight:bold;color:#e67e22;'>🛡️ Safeguard Reasoning</summary>"
	f"<div style='color:#555;padding:15px;border-left:4px solid #e67e22;background:#fff5eb;margin:10px 0;'>{res_esc}</div></details>"
	)
	if regular_content:
	content_esc = html.escape(regular_content).replace("\n", "<br>")
	html_parts.append(f"<div style='white-space: pre-wrap; line-height:1.6;'>{content_esc}</div>")
	return "".join(html_parts)

	def _build_messages(self, raw_hist, sys_prompt):
	msgs = []
	if sys_prompt.strip():
	msgs.append({"role": "system", "content": sys_prompt.strip()})
	for h in raw_hist:
	role = h["role"]
	content = h.get("content", "")
	msgs.append({"role": role, "content": content})
	return msgs

	def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=0.7):
	global stop_generation
	stop_generation = False
	msgs = self._build_messages(raw_hist, sys_prompt)
	self.reset_state()

	try:
	for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
	if stop_generation: break

	if hasattr(delta, 'content') and delta.content:
	self.accumulated_content += delta.content
	if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
	self.accumulated_reasoning += delta.reasoning_content

	yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled)
	except Exception as e:
	yield self._render_response("", f"⚠️ Connection Error: {str(e)}")

	model_engine = SafeguardModel()

	def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
	global stop_generation
	stop_generation = False
	if not msg.strip(): return raw_hist, copy.deepcopy(raw_hist), ""
	if raw_hist is None: raw_hist = []

	raw_hist.append({"role": "user", "content": msg.strip()})
	assistant_node = {"role": "assistant", "content": "", "reasoning_content": ""}
	raw_hist.append(assistant_node)

	yield raw_hist, copy.deepcopy(raw_hist), ""

	for chunk in model_engine.stream_generate(raw_hist[:-1], sys_prompt, thinking_enabled, temperature):
	if stop_generation: break
	assistant_node["content"] = model_engine.accumulated_content
	assistant_node["display_content"] = chunk
	yield raw_hist, copy.deepcopy(raw_hist), ""

	def reset_chat():
	global stop_generation
	stop_generation = True
	return [], [], ""

	def format_display(raw_hist):
	return [{"role": m["role"], "content": m.get("display_content", m["content"])} for m in raw_hist]

	# --- GRADIO UI ---
	with gr.Blocks(title="GPT-OSS Safeguard 20B", theme=gr.themes.Soft(primary_hue="orange")) as demo:
	gr.HTML("<h1 style='text-align:center;'>🛡️ GPT-OSS Safeguard 20B</h1>")

	raw_history = gr.State([])

	with gr.Row():
	with gr.Column(scale=4):
	chatbox = gr.Chatbot(label="Safety Analysis", type="messages", height=550, sanitize_html=False)
	textbox = gr.Textbox(label="Input Content", placeholder="Enter content to analyze against policy...", lines=3)
	with gr.Row():
	send_btn = gr.Button("Analyze", variant="primary")
	clear_btn = gr.Button("Reset")

	with gr.Column(scale=1):
	think_toggle = gr.Checkbox(label="Enable Deep Reasoning", value=True)
	temp_slider = gr.Slider(0, 1.0, value=0.7, label="Temperature")
	sys_prompt = gr.Textbox(
	label="Safety Policy (System)",
	value="You are a safety classifier. Analyze the user content against the following policy: [Insert Policy Here]. Output your decision clearly.",
	lines=10
	)

	def wrapper(*args):
	for hist, raw_upd, txt_val in chat(*args):
	yield format_display(hist), raw_upd, txt_val

	send_btn.click(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
	textbox.submit(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
	clear_btn.click(reset_chat, outputs=[chatbox, raw_history, textbox])

	if __name__ == "__main__":
	demo.queue().launch(ssr_mode=False)