Files changed (1) hide show
  1. app.py +132 -84
app.py CHANGED
@@ -1,118 +1,166 @@
1
  import os
 
 
2
  import html
3
  import logging
4
  import warnings
5
  import gradio as gr
6
  from openai import OpenAI
7
- from duckduckgo_search import DDGS
8
- from datetime import datetime
9
 
10
- # --- CRITICAL LOG SILENCING ---
11
- # This filter catches the specific "Invalid file descriptor" message and hides it
12
  class SilentLogFilter(logging.Filter):
13
  def filter(self, record):
14
  return "Invalid file descriptor" not in record.getMessage()
15
 
16
  logger = logging.getLogger("asyncio")
17
  logger.addFilter(SilentLogFilter())
18
-
19
- # Mute standard warnings
20
  warnings.filterwarnings("ignore", category=RuntimeWarning)
21
- logging.getLogger("httpx").setLevel(logging.WARNING)
22
 
23
  # --- CONFIGURATION ---
24
- STOP_GENERATION = False
25
- MODEL_NAME = "google/gemma-3-12b-it"
26
- VLLM_BASE_URL = os.getenv("VLLM_URL", "http://localhost:8000/v1")
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- class HawkSearchUltra:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def __init__(self):
30
- self.client = OpenAI(base_url=VLLM_BASE_URL, api_key="hawk-token")
31
- self.date = datetime.now().strftime("%B %d, %Y")
32
 
33
- def hunt_web(self, query: str) -> str:
34
- try:
35
- with DDGS() as ddgs:
36
- results = [r['body'] for r in ddgs.text(query, max_results=3)]
37
- return "\n\n".join(results) if results else "No live data found."
38
- except Exception:
39
- return "Search temporarily unavailable."
40
-
41
- def _render_blocks(self, reasoning: str, content: str, thinking: bool):
42
- html_out = ""
43
- if thinking and reasoning:
44
- res_esc = html.escape(reasoning).replace("\n", "<br>")
45
- html_out += (
46
- f"<details open><summary style='cursor:pointer;font-weight:bold;color:#f39c12;'>🦅 Hawk Reasoning</summary>"
47
- f"<div style='color:#666;padding:15px;border-left:4px solid #f39c12;background:#fffdf0;margin:10px 0;'>{res_esc}</div></details>"
48
  )
49
- if content:
50
- html_out += f"<div style='white-space: pre-wrap;'>{html.escape(content)}</div>"
51
- return [{"type": "text", "text": html_out}]
 
52
 
53
- def generate_response(self, msg, history, sys_prompt, thinking, temp):
54
- global STOP_GENERATION
55
- STOP_GENERATION = False
56
-
57
- if history is None: history = []
58
- history.append({"role": "user", "content": [{"type": "text", "text": msg}]})
59
-
60
- assistant_entry = {"role": "assistant", "content": []}
61
- history.append(assistant_entry)
62
- yield history, history, ""
63
 
64
- context = self.hunt_web(msg)
65
- full_sys = f"{sys_prompt}\n\nDate: {self.date}\nContext: {context}"
66
-
67
- api_messages = [{"role": "system", "content": full_sys}]
68
- for h in history[:-1]:
69
- text = h["content"][0]["text"] if isinstance(h["content"], list) else h["content"]
70
- api_messages.append({"role": h["role"], "content": text})
71
 
72
- acc_content, acc_reasoning = "", ""
73
  try:
74
- stream = self.client.chat.completions.create(
75
- model=MODEL_NAME,
76
- messages=api_messages,
77
- stream=True,
78
- temperature=temp,
79
- extra_body={"reasoning_parser": "qwen3"} if thinking else {}
80
- )
81
- for chunk in stream:
82
- if STOP_GENERATION: break
83
- delta = chunk.choices[0].delta
84
- if hasattr(delta, 'content') and delta.content: acc_content += delta.content
85
- if hasattr(delta, 'reasoning_content') and delta.reasoning_content: acc_reasoning += delta.reasoning_content
86
- assistant_entry["content"] = self._render_blocks(acc_reasoning, acc_content, thinking)
87
- yield history, history, ""
88
  except Exception as e:
89
- assistant_entry["content"] = [{"type": "text", "text": f"⚠️ Hawk Error: {e}"}]
90
- yield history, history, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- hawk = HawkSearchUltra()
 
 
 
 
93
 
94
- with gr.Blocks(title="HawkSearch Ultra") as demo:
95
- gr.HTML("<h1 style='text-align:center;'>🦅 HawkSearch Ultra</h1>")
96
- state = gr.State([])
 
 
 
 
 
 
 
 
 
 
 
97
  with gr.Row():
98
  with gr.Column(scale=4):
99
- chatbot = gr.Chatbot(label="Hawk Intel", height=600, sanitize_html=False)
100
- txt = gr.Textbox(placeholder="Ask Hawk...", lines=2, label="Query")
101
  with gr.Row():
102
- btn = gr.Button("Hunt", variant="primary")
103
- clr = gr.Button("Clear")
 
104
  with gr.Column(scale=1):
105
- think = gr.Checkbox(label="Reasoning", value=True)
106
- tmp = gr.Slider(0, 1.2, value=0.7, label="Temp")
107
- sys_p = gr.Textbox(label="System", value="You are HawkSearch.", lines=4)
 
 
 
 
 
 
 
 
108
 
109
- btn.click(hawk.generate_response, [txt, state, sys_p, think, tmp], [chatbot, state, txt])
110
- txt.submit(hawk.generate_response, [txt, state, sys_p, think, tmp], [chatbot, state, txt])
111
- clr.click(lambda: ([], [], ""), None, [chatbot, state, txt])
112
 
113
  if __name__ == "__main__":
114
- # Removed show_api to fix the TypeError
115
- demo.queue().launch(
116
- theme=gr.themes.Soft(primary_hue="orange"),
117
- ssr_mode=False
118
- )
 
1
  import os
2
+ import copy
3
+ import time
4
  import html
5
  import logging
6
  import warnings
7
  import gradio as gr
8
  from openai import OpenAI
 
 
9
 
10
+ # --- CRITICAL LOG SILENCING (Fix for the -1 File Descriptor Error) ---
 
11
  class SilentLogFilter(logging.Filter):
12
  def filter(self, record):
13
  return "Invalid file descriptor" not in record.getMessage()
14
 
15
  logger = logging.getLogger("asyncio")
16
  logger.addFilter(SilentLogFilter())
 
 
17
  warnings.filterwarnings("ignore", category=RuntimeWarning)
 
18
 
19
  # --- CONFIGURATION ---
20
+ # Using the Hugging Face Inference Router for endpoint access
21
+ MODEL_NAME = "openai/gpt-oss-safeguard-20b"
22
+ BASE_URL = "https://router.huggingface.co/v1"
23
+ # Ensure HF_TOKEN is set in your environment variables
24
+ HF_TOKEN = os.getenv("HF_TOKEN", "your_hf_token_here")
25
+
26
+ stop_generation = False
27
+
28
+ def stream_from_vllm(messages, thinking_enabled=True, temperature=0.7):
29
+ global stop_generation
30
+ client = OpenAI(base_url=BASE_URL, api_key=HF_TOKEN)
31
+
32
+ # gpt-oss models use 'reasoning' effort levels: low, medium, high
33
+ # We map the UI toggle to these levels
34
+ reasoning_config = {"effort": "medium"} if thinking_enabled else {"effort": "low"}
35
 
36
+ try:
37
+ response = client.chat.completions.create(
38
+ model=MODEL_NAME,
39
+ messages=messages,
40
+ temperature=temperature,
41
+ stream=True,
42
+ max_tokens=4096,
43
+ extra_body={
44
+ "reasoning": reasoning_config
45
+ }
46
+ )
47
+ for chunk in response:
48
+ if stop_generation:
49
+ break
50
+ if chunk.choices and chunk.choices[0].delta:
51
+ yield chunk.choices[0].delta
52
+ except Exception as e:
53
+ raise e
54
+
55
+ class SafeguardModel:
56
  def __init__(self):
57
+ self.accumulated_content = ""
58
+ self.accumulated_reasoning = ""
59
 
60
+ def reset_state(self):
61
+ self.accumulated_content = ""
62
+ self.accumulated_reasoning = ""
63
+
64
+ def _render_response(self, reasoning_content, regular_content, skip_think=False):
65
+ html_parts = []
66
+ if reasoning_content and not skip_think:
67
+ res_esc = html.escape(reasoning_content).replace("\n", "<br>")
68
+ html_parts.append(
69
+ f"<details open><summary style='cursor:pointer;font-weight:bold;color:#e67e22;'>🛡️ Safeguard Reasoning</summary>"
70
+ f"<div style='color:#555;padding:15px;border-left:4px solid #e67e22;background:#fff5eb;margin:10px 0;'>{res_esc}</div></details>"
 
 
 
 
71
  )
72
+ if regular_content:
73
+ content_esc = html.escape(regular_content).replace("\n", "<br>")
74
+ html_parts.append(f"<div style='white-space: pre-wrap; line-height:1.6;'>{content_esc}</div>")
75
+ return "".join(html_parts)
76
 
77
+ def _build_messages(self, raw_hist, sys_prompt):
78
+ msgs = []
79
+ if sys_prompt.strip():
80
+ msgs.append({"role": "system", "content": sys_prompt.strip()})
81
+ for h in raw_hist:
82
+ role = h["role"]
83
+ content = h.get("content", "")
84
+ msgs.append({"role": role, "content": content})
85
+ return msgs
 
86
 
87
+ def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=0.7):
88
+ global stop_generation
89
+ stop_generation = False
90
+ msgs = self._build_messages(raw_hist, sys_prompt)
91
+ self.reset_state()
 
 
92
 
 
93
  try:
94
+ for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
95
+ if stop_generation: break
96
+
97
+ if hasattr(delta, 'content') and delta.content:
98
+ self.accumulated_content += delta.content
99
+ if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
100
+ self.accumulated_reasoning += delta.reasoning_content
101
+
102
+ yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled)
 
 
 
 
 
103
  except Exception as e:
104
+ yield self._render_response("", f"⚠️ Connection Error: {str(e)}")
105
+
106
+ model_engine = SafeguardModel()
107
+
108
+ def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
109
+ global stop_generation
110
+ stop_generation = False
111
+ if not msg.strip(): return raw_hist, copy.deepcopy(raw_hist), ""
112
+ if raw_hist is None: raw_hist = []
113
+
114
+ raw_hist.append({"role": "user", "content": msg.strip()})
115
+ assistant_node = {"role": "assistant", "content": "", "reasoning_content": ""}
116
+ raw_hist.append(assistant_node)
117
+
118
+ yield raw_hist, copy.deepcopy(raw_hist), ""
119
 
120
+ for chunk in model_engine.stream_generate(raw_hist[:-1], sys_prompt, thinking_enabled, temperature):
121
+ if stop_generation: break
122
+ assistant_node["content"] = model_engine.accumulated_content
123
+ assistant_node["display_content"] = chunk
124
+ yield raw_hist, copy.deepcopy(raw_hist), ""
125
 
126
+ def reset_chat():
127
+ global stop_generation
128
+ stop_generation = True
129
+ return [], [], ""
130
+
131
+ def format_display(raw_hist):
132
+ return [{"role": m["role"], "content": m.get("display_content", m["content"])} for m in raw_hist]
133
+
134
+ # --- GRADIO UI ---
135
+ with gr.Blocks(title="GPT-OSS Safeguard 20B", theme=gr.themes.Soft(primary_hue="orange")) as demo:
136
+ gr.HTML("<h1 style='text-align:center;'>🛡️ GPT-OSS Safeguard 20B</h1>")
137
+
138
+ raw_history = gr.State([])
139
+
140
  with gr.Row():
141
  with gr.Column(scale=4):
142
+ chatbox = gr.Chatbot(label="Safety Analysis", type="messages", height=550, sanitize_html=False)
143
+ textbox = gr.Textbox(label="Input Content", placeholder="Enter content to analyze against policy...", lines=3)
144
  with gr.Row():
145
+ send_btn = gr.Button("Analyze", variant="primary")
146
+ clear_btn = gr.Button("Reset")
147
+
148
  with gr.Column(scale=1):
149
+ think_toggle = gr.Checkbox(label="Enable Deep Reasoning", value=True)
150
+ temp_slider = gr.Slider(0, 1.0, value=0.7, label="Temperature")
151
+ sys_prompt = gr.Textbox(
152
+ label="Safety Policy (System)",
153
+ value="You are a safety classifier. Analyze the user content against the following policy: [Insert Policy Here]. Output your decision clearly.",
154
+ lines=10
155
+ )
156
+
157
+ def wrapper(*args):
158
+ for hist, raw_upd, txt_val in chat(*args):
159
+ yield format_display(hist), raw_upd, txt_val
160
 
161
+ send_btn.click(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
162
+ textbox.submit(wrapper, [textbox, raw_history, sys_prompt, think_toggle, temp_slider], [chatbox, raw_history, textbox])
163
+ clear_btn.click(reset_chat, outputs=[chatbox, raw_history, textbox])
164
 
165
  if __name__ == "__main__":
166
+ demo.queue().launch(ssr_mode=False)