File size: 4,579 Bytes
ba7492c
f1b7ce9
ba7492c
f1b7ce9
 
 
 
 
 
 
 
 
 
 
ba7492c
f1b7ce9
 
 
 
ba7492c
 
 
 
f1b7ce9
 
 
ba7492c
f1b7ce9
ba7492c
 
f1b7ce9
 
 
ba7492c
 
f1b7ce9
ba7492c
 
 
 
 
 
f1b7ce9
 
 
 
 
 
 
ba7492c
 
f1b7ce9
 
ba7492c
f1b7ce9
ba7492c
 
 
 
 
f1b7ce9
 
ba7492c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1b7ce9
ba7492c
 
 
 
 
f1b7ce9
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json, logging
from typing import List, Generator, Optional
from openai import OpenAI


def request_generation(
    api_key: str,
    api_base: str,
    message: str,
    system_prompt: str,
    model_name: str,
    chat_history: Optional[List[dict]] = None,
    temperature: float = 0.3,
    max_new_tokens: int = 1024,
    reasoning_effort: str = "off",
    tools: Optional[List[dict]] = None,
    tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
    """
    Streams Responses API events. Emits:
      - "analysis" sentinel once, then raw reasoning deltas
      - "assistantfinal" sentinel once, then visible output deltas
    If no visible deltas, emits a tool-call fallback message.
    """
    client = OpenAI(api_key=api_key, base_url=api_base)

    input_messages: List[dict] = []
    if chat_history:
        input_messages.extend(m for m in chat_history if m.get("role") != "system")
    input_messages.append({"role": "user", "content": message})

    request_args = {
        "model": model_name,
        "input": input_messages,
        "instructions": system_prompt,
        "temperature": temperature,
        "max_output_tokens": max_new_tokens,
        "reasoning": {
            "effort": reasoning_effort,
            "generate_summary": "detailed",
            "summary": "detailed",
        },
        "stream": True,
    }
    if tools:
        request_args["tools"] = tools
    if tool_choice:
        request_args["tool_choice"] = tool_choice


    raw_reasoning, raw_visible = [], []

    try:
        stream = client.responses.create(**request_args)

        reasoning_started = False
        reasoning_closed = False
        saw_visible_output = False
        last_tool_name = None
        last_tool_args = None
        buffer = ""

        for event in stream:
            et = getattr(event, "type", "")

            if et == "response.reasoning_text.delta":
                if not reasoning_started:
                    yield "analysis"
                    reasoning_started = True
                rdelta = getattr(event, "delta", "") or ""
                if rdelta:
                    raw_reasoning.append(rdelta)
                    yield rdelta
                continue

            if et == "response.output_text.delta":
                if reasoning_started and not reasoning_closed:
                    yield "assistantfinal"
                    reasoning_closed = True

                saw_visible_output = True
                delta = getattr(event, "delta", "") or ""
                raw_visible.append(delta)
                buffer += delta

                if "\n" in buffer or len(buffer) > 150:
                    yield buffer
                    buffer = ""
                continue

            if et.startswith("response.tool") or et.startswith("response.function_call"):
                name = getattr(event, "name", None)
                args = getattr(event, "arguments", None)
                if args is None:
                    args = getattr(event, "args", None) or getattr(event, "delta", None) or getattr(event, "data", None)
                if name:
                    last_tool_name = name
                if args is not None:
                    last_tool_args = args
                continue

            if et in ("response.completed", "response.error"):
                if buffer:
                    yield buffer
                    buffer = ""

                if reasoning_started and not reasoning_closed:
                    yield "assistantfinal"
                    reasoning_closed = True

                if not saw_visible_output:
                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
                    if last_tool_name:
                        try:
                            args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
                        except Exception:
                            args_text = str(last_tool_args)
                        msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
                    yield msg

                if et == "response.error":
                    err = getattr(event, "error", None)
                    emsg = getattr(err, "message", "") if err else "Unknown error"
                    yield f"Error: {emsg}"
                break

        if buffer:
            yield buffer

    except Exception as e:
        logging.exception("[Gateway] Streaming failed")
        yield f"Error: {e}"