Update app.py
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ GPT_OSS_HEADERS = {
|
|
| 19 |
'x-selected-model': 'gpt-oss-120b',
|
| 20 |
}
|
| 21 |
|
| 22 |
-
# 3.
|
| 23 |
@app.route('/', methods=['GET'])
|
| 24 |
def root():
|
| 25 |
return jsonify({
|
|
@@ -29,11 +29,11 @@ def root():
|
|
| 29 |
"features": {
|
| 30 |
"stream": "支持",
|
| 31 |
"non_stream": "支持",
|
| 32 |
-
"conditional_reasoning": "
|
| 33 |
}
|
| 34 |
})
|
| 35 |
|
| 36 |
-
# 4.
|
| 37 |
@app.route('/v1/chat/completions', methods=['POST'])
|
| 38 |
def chat_completions_proxy():
|
| 39 |
try:
|
|
@@ -48,8 +48,6 @@ def chat_completions_proxy():
|
|
| 48 |
except Exception as e:
|
| 49 |
return jsonify({"error": f"请求格式无效: {e}"}), 400
|
| 50 |
|
| 51 |
-
# (一) 条件性设置请求头
|
| 52 |
-
# 创建一个请求头的副本,以避免修改全局常量
|
| 53 |
request_headers = GPT_OSS_HEADERS.copy()
|
| 54 |
if stream_requested:
|
| 55 |
request_headers['x-show-reasoning'] = 'true'
|
|
@@ -63,7 +61,6 @@ def chat_completions_proxy():
|
|
| 63 |
"params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
|
| 64 |
}
|
| 65 |
|
| 66 |
-
# (二) 内部生成器,负责获取和解析后端数据
|
| 67 |
def _internal_proxy_stream():
|
| 68 |
try:
|
| 69 |
with requests.post(
|
|
@@ -81,27 +78,47 @@ def chat_completions_proxy():
|
|
| 81 |
except json.JSONDecodeError:
|
| 82 |
continue
|
| 83 |
except requests.exceptions.RequestException as e:
|
| 84 |
-
# 在生成器内部抛出错误,以便外部可以捕获
|
| 85 |
raise IOError(f"与后端服务通信失败: {e}")
|
| 86 |
|
| 87 |
-
# (三) 根据客户端请求,决定如何格式化响应
|
| 88 |
if stream_requested:
|
| 89 |
-
# 如果客户端请求流式响应
|
| 90 |
def stream_formatter():
|
| 91 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
for gpt_oss_data in _internal_proxy_stream():
|
| 93 |
event_type = gpt_oss_data.get('type')
|
| 94 |
|
| 95 |
-
#
|
| 96 |
if (event_type == 'thread.item_updated' and
|
| 97 |
gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
|
| 98 |
thought = gpt_oss_data['update']['entry']['content']
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
#
|
| 105 |
if (event_type == 'thread.item_updated' and
|
| 106 |
gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
|
| 107 |
delta_content = gpt_oss_data['update'].get('delta', '')
|
|
@@ -120,35 +137,20 @@ def chat_completions_proxy():
|
|
| 120 |
return Response(stream_formatter(), mimetype='text/event-stream')
|
| 121 |
|
| 122 |
else:
|
| 123 |
-
#
|
| 124 |
try:
|
| 125 |
full_response_content = ""
|
| 126 |
for gpt_oss_data in _internal_proxy_stream():
|
| 127 |
event_type = gpt_oss_data.get('type')
|
| 128 |
-
# 只关心最终的文本片段
|
| 129 |
if (event_type == 'thread.item_updated' and
|
| 130 |
gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
|
| 131 |
full_response_content += gpt_oss_data['update'].get('delta', '')
|
| 132 |
|
| 133 |
-
# 构建一个标准的OpenAI非流式JSON响应
|
| 134 |
final_response = {
|
| 135 |
-
"id": f"chatcmpl-{str(uuid.uuid4())}",
|
| 136 |
-
"object": "chat.completion",
|
| 137 |
-
"created": int(time.time()),
|
| 138 |
"model": "gpt-oss-120b",
|
| 139 |
-
"choices": [
|
| 140 |
-
|
| 141 |
-
"index": 0,
|
| 142 |
-
"message": {
|
| 143 |
-
"role": "assistant",
|
| 144 |
-
"content": full_response_content.strip()
|
| 145 |
-
},
|
| 146 |
-
"finish_reason": "stop"
|
| 147 |
-
}
|
| 148 |
-
],
|
| 149 |
-
"usage": { # 提供一个模拟的usage对象
|
| 150 |
-
"prompt_tokens": None, "completion_tokens": None, "total_tokens": None
|
| 151 |
-
}
|
| 152 |
}
|
| 153 |
return jsonify(final_response)
|
| 154 |
|
|
|
|
| 19 |
'x-selected-model': 'gpt-oss-120b',
|
| 20 |
}
|
| 21 |
|
| 22 |
+
# 3. 根路由
|
| 23 |
@app.route('/', methods=['GET'])
|
| 24 |
def root():
|
| 25 |
return jsonify({
|
|
|
|
| 29 |
"features": {
|
| 30 |
"stream": "支持",
|
| 31 |
"non_stream": "支持",
|
| 32 |
+
"conditional_reasoning": "思考过程在流式模式下通过 <think> 标签显示"
|
| 33 |
}
|
| 34 |
})
|
| 35 |
|
| 36 |
+
# 4. 核心API端点
|
| 37 |
@app.route('/v1/chat/completions', methods=['POST'])
|
| 38 |
def chat_completions_proxy():
|
| 39 |
try:
|
|
|
|
| 48 |
except Exception as e:
|
| 49 |
return jsonify({"error": f"请求格式无效: {e}"}), 400
|
| 50 |
|
|
|
|
|
|
|
| 51 |
request_headers = GPT_OSS_HEADERS.copy()
|
| 52 |
if stream_requested:
|
| 53 |
request_headers['x-show-reasoning'] = 'true'
|
|
|
|
| 61 |
"params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
|
| 62 |
}
|
| 63 |
|
|
|
|
| 64 |
def _internal_proxy_stream():
|
| 65 |
try:
|
| 66 |
with requests.post(
|
|
|
|
| 78 |
except json.JSONDecodeError:
|
| 79 |
continue
|
| 80 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 81 |
raise IOError(f"与后端服务通信失败: {e}")
|
| 82 |
|
|
|
|
| 83 |
if stream_requested:
|
|
|
|
| 84 |
def stream_formatter():
|
| 85 |
try:
|
| 86 |
+
# --- 关键改动在这里 ---
|
| 87 |
+
# 用于拼接所有思考过程的变量
|
| 88 |
+
all_thoughts = []
|
| 89 |
+
|
| 90 |
for gpt_oss_data in _internal_proxy_stream():
|
| 91 |
event_type = gpt_oss_data.get('type')
|
| 92 |
|
| 93 |
+
# 步骤1: 捕获并暂存所有思考过程
|
| 94 |
if (event_type == 'thread.item_updated' and
|
| 95 |
gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
|
| 96 |
thought = gpt_oss_data['update']['entry']['content']
|
| 97 |
+
all_thoughts.append(thought)
|
| 98 |
+
|
| 99 |
+
# 步骤2: 当第一个文本片段出现时,一次性将所有思考过程格式化并发送
|
| 100 |
+
if (event_type == 'thread.item_updated' and
|
| 101 |
+
gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta' and
|
| 102 |
+
all_thoughts): # 确保只在第一次发送
|
| 103 |
+
|
| 104 |
+
# 格式化所有思考过程
|
| 105 |
+
formatted_thoughts = "<think>\n"
|
| 106 |
+
for i, t in enumerate(all_thoughts, 1):
|
| 107 |
+
formatted_thoughts += f" Step {i}: {t}\n"
|
| 108 |
+
formatted_thoughts += "</think>\n\n"
|
| 109 |
+
|
| 110 |
+
# 构建一个标准的OpenAI流块来发送思考过程
|
| 111 |
+
openai_chunk = {
|
| 112 |
+
"id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion.chunk",
|
| 113 |
+
"created": int(time.time()), "model": "gpt-oss-120b",
|
| 114 |
+
"choices": [{"index": 0, "delta": {"content": formatted_thoughts}, "finish_reason": None}]
|
| 115 |
+
}
|
| 116 |
+
yield f"data: {json.dumps(openai_chunk)}\n\n"
|
| 117 |
+
|
| 118 |
+
# 清空思考过程,防止重复发送
|
| 119 |
+
all_thoughts = []
|
| 120 |
|
| 121 |
+
# 步骤3: 正常发送文本片段
|
| 122 |
if (event_type == 'thread.item_updated' and
|
| 123 |
gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
|
| 124 |
delta_content = gpt_oss_data['update'].get('delta', '')
|
|
|
|
| 137 |
return Response(stream_formatter(), mimetype='text/event-stream')
|
| 138 |
|
| 139 |
else:
|
| 140 |
+
# 非流式逻辑保持不变
|
| 141 |
try:
|
| 142 |
full_response_content = ""
|
| 143 |
for gpt_oss_data in _internal_proxy_stream():
|
| 144 |
event_type = gpt_oss_data.get('type')
|
|
|
|
| 145 |
if (event_type == 'thread.item_updated' and
|
| 146 |
gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
|
| 147 |
full_response_content += gpt_oss_data['update'].get('delta', '')
|
| 148 |
|
|
|
|
| 149 |
final_response = {
|
| 150 |
+
"id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion", "created": int(time.time()),
|
|
|
|
|
|
|
| 151 |
"model": "gpt-oss-120b",
|
| 152 |
+
"choices": [{"index": 0, "message": {"role": "assistant", "content": full_response_content.strip()}, "finish_reason": "stop"}],
|
| 153 |
+
"usage": {"prompt_tokens": None, "completion_tokens": None, "total_tokens": None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
}
|
| 155 |
return jsonify(final_response)
|
| 156 |
|