Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,579 Bytes
ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 ba7492c f1b7ce9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import json, logging
from typing import List, Generator, Optional
from openai import OpenAI
def request_generation(
api_key: str,
api_base: str,
message: str,
system_prompt: str,
model_name: str,
chat_history: Optional[List[dict]] = None,
temperature: float = 0.3,
max_new_tokens: int = 1024,
reasoning_effort: str = "off",
tools: Optional[List[dict]] = None,
tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
"""
Streams Responses API events. Emits:
- "analysis" sentinel once, then raw reasoning deltas
- "assistantfinal" sentinel once, then visible output deltas
If no visible deltas, emits a tool-call fallback message.
"""
client = OpenAI(api_key=api_key, base_url=api_base)
input_messages: List[dict] = []
if chat_history:
input_messages.extend(m for m in chat_history if m.get("role") != "system")
input_messages.append({"role": "user", "content": message})
request_args = {
"model": model_name,
"input": input_messages,
"instructions": system_prompt,
"temperature": temperature,
"max_output_tokens": max_new_tokens,
"reasoning": {
"effort": reasoning_effort,
"generate_summary": "detailed",
"summary": "detailed",
},
"stream": True,
}
if tools:
request_args["tools"] = tools
if tool_choice:
request_args["tool_choice"] = tool_choice
raw_reasoning, raw_visible = [], []
try:
stream = client.responses.create(**request_args)
reasoning_started = False
reasoning_closed = False
saw_visible_output = False
last_tool_name = None
last_tool_args = None
buffer = ""
for event in stream:
et = getattr(event, "type", "")
if et == "response.reasoning_text.delta":
if not reasoning_started:
yield "analysis"
reasoning_started = True
rdelta = getattr(event, "delta", "") or ""
if rdelta:
raw_reasoning.append(rdelta)
yield rdelta
continue
if et == "response.output_text.delta":
if reasoning_started and not reasoning_closed:
yield "assistantfinal"
reasoning_closed = True
saw_visible_output = True
delta = getattr(event, "delta", "") or ""
raw_visible.append(delta)
buffer += delta
if "\n" in buffer or len(buffer) > 150:
yield buffer
buffer = ""
continue
if et.startswith("response.tool") or et.startswith("response.function_call"):
name = getattr(event, "name", None)
args = getattr(event, "arguments", None)
if args is None:
args = getattr(event, "args", None) or getattr(event, "delta", None) or getattr(event, "data", None)
if name:
last_tool_name = name
if args is not None:
last_tool_args = args
continue
if et in ("response.completed", "response.error"):
if buffer:
yield buffer
buffer = ""
if reasoning_started and not reasoning_closed:
yield "assistantfinal"
reasoning_closed = True
if not saw_visible_output:
msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
if last_tool_name:
try:
args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
except Exception:
args_text = str(last_tool_args)
msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
yield msg
if et == "response.error":
err = getattr(event, "error", None)
emsg = getattr(err, "message", "") if err else "Unknown error"
yield f"Error: {emsg}"
break
if buffer:
yield buffer
except Exception as e:
logging.exception("[Gateway] Streaming failed")
yield f"Error: {e}"
|