oapi

Paused

App Files Files Community

sehsapneb commited on Aug 6, 2025

Commit

39bacc7

verified ·

1 Parent(s): e572557

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -34

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ GPT_OSS_HEADERS = {
     'x-selected-model': 'gpt-oss-120b',
 }
-# 3. 新增：根路由，用于健康检查和显示欢迎信息
 @app.route('/', methods=['GET'])
 def root():
     return jsonify({
@@ -29,11 +29,11 @@ def root():
         "features": {
             "stream": "支持",
             "non_stream": "支持",
-            "conditional_reasoning": "仅在流式模式下显示思考过程"
         }
     })
-# 4. 核心：创建OpenAI兼容的API端点
 @app.route('/v1/chat/completions', methods=['POST'])
 def chat_completions_proxy():
     try:
@@ -48,8 +48,6 @@ def chat_completions_proxy():
     except Exception as e:
         return jsonify({"error": f"请求格式无效: {e}"}), 400
-    # (一) 条件性设置请求头
-    # 创建一个请求头的副本，以避免修改全局常量
     request_headers = GPT_OSS_HEADERS.copy()
     if stream_requested:
         request_headers['x-show-reasoning'] = 'true'
@@ -63,7 +61,6 @@ def chat_completions_proxy():
         "params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
     }
-    # (二) 内部生成器，负责获取和解析后端数据
     def _internal_proxy_stream():
         try:
             with requests.post(
@@ -81,27 +78,47 @@ def chat_completions_proxy():
                             except json.JSONDecodeError:
                                 continue
         except requests.exceptions.RequestException as e:
-            # 在生成器内部抛出错误，以便外部可以捕获
             raise IOError(f"与后端服务通信失败: {e}")
-    # (三) 根据客户端请求，决定如何格式化响应
     if stream_requested:
-        # 如果客户端请求流式响应
         def stream_formatter():
             try:
                 for gpt_oss_data in _internal_proxy_stream():
                     event_type = gpt_oss_data.get('type')
-                    # 格式化思考过程
                     if (event_type == 'thread.item_updated' and
                         gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
                         thought = gpt_oss_data['update']['entry']['content']
-                        # 我们可以选择发送一个自定义的流块，或者忽略它
-                        # 这里我们创建一个包含思考过程的自定义块
-                        reasoning_chunk = { "reasoning": thought }
-                        yield f"data: {json.dumps(reasoning_chunk)}\n\n"
-                    # 格式化文本片段
                     if (event_type == 'thread.item_updated' and
                         gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
                         delta_content = gpt_oss_data['update'].get('delta', '')
@@ -120,35 +137,20 @@ def chat_completions_proxy():
         return Response(stream_formatter(), mimetype='text/event-stream')
     else:
-        # 如果客户端请求非流式响应
         try:
             full_response_content = ""
             for gpt_oss_data in _internal_proxy_stream():
                 event_type = gpt_oss_data.get('type')
-                # 只关心最终的文本片段
                 if (event_type == 'thread.item_updated' and
                     gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
                     full_response_content += gpt_oss_data['update'].get('delta', '')
-            # 构建一个标准的OpenAI非流式JSON响应
             final_response = {
-                "id": f"chatcmpl-{str(uuid.uuid4())}",
-                "object": "chat.completion",
-                "created": int(time.time()),
                 "model": "gpt-oss-120b",
-                "choices": [
-                    {
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": full_response_content.strip()
-                        },
-                        "finish_reason": "stop"
-                    }
-                ],
-                "usage": { # 提供一个模拟的usage对象
-                    "prompt_tokens": None, "completion_tokens": None, "total_tokens": None
-                }
             }
             return jsonify(final_response)

     'x-selected-model': 'gpt-oss-120b',
 }
+# 3. 根路由
 @app.route('/', methods=['GET'])
 def root():
     return jsonify({
         "features": {
             "stream": "支持",
             "non_stream": "支持",
+            "conditional_reasoning": "思考过程在流式模式下通过 <think> 标签显示"
         }
     })
+# 4. 核心API端点
 @app.route('/v1/chat/completions', methods=['POST'])
 def chat_completions_proxy():
     try:
     except Exception as e:
         return jsonify({"error": f"请求格式无效: {e}"}), 400
     request_headers = GPT_OSS_HEADERS.copy()
     if stream_requested:
         request_headers['x-show-reasoning'] = 'true'
         "params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
     }
     def _internal_proxy_stream():
         try:
             with requests.post(
                             except json.JSONDecodeError:
                                 continue
         except requests.exceptions.RequestException as e:
             raise IOError(f"与后端服务通信失败: {e}")
     if stream_requested:
         def stream_formatter():
             try:
+                # --- 关键改动在这里 ---
+                # 用于拼接所有思考过程的变量
+                all_thoughts = []
                 for gpt_oss_data in _internal_proxy_stream():
                     event_type = gpt_oss_data.get('type')
+                    # 步骤1: 捕获并暂存所有思考过程
                     if (event_type == 'thread.item_updated' and
                         gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
                         thought = gpt_oss_data['update']['entry']['content']
+                        all_thoughts.append(thought)
+                    # 步骤2: 当第一个文本片段出现时，一次性将所有思考过程格式化并发送
+                    if (event_type == 'thread.item_updated' and
+                        gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta' and
+                        all_thoughts): # 确保只在第一次发送
+                        # 格式化所有思考过程
+                        formatted_thoughts = "<think>\n"
+                        for i, t in enumerate(all_thoughts, 1):
+                            formatted_thoughts += f"  Step {i}: {t}\n"
+                        formatted_thoughts += "</think>\n\n"
+                        # 构建一个标准的OpenAI流块来发送思考过程
+                        openai_chunk = {
+                            "id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion.chunk",
+                            "created": int(time.time()), "model": "gpt-oss-120b",
+                            "choices": [{"index": 0, "delta": {"content": formatted_thoughts}, "finish_reason": None}]
+                        }
+                        yield f"data: {json.dumps(openai_chunk)}\n\n"
+                        # 清空思考过程，防止重复发送
+                        all_thoughts = []
+                    # 步骤3: 正常发送文本片段
                     if (event_type == 'thread.item_updated' and
                         gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
                         delta_content = gpt_oss_data['update'].get('delta', '')
         return Response(stream_formatter(), mimetype='text/event-stream')
     else:
+        # 非流式逻辑保持不变
         try:
             full_response_content = ""
             for gpt_oss_data in _internal_proxy_stream():
                 event_type = gpt_oss_data.get('type')
                 if (event_type == 'thread.item_updated' and
                     gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
                     full_response_content += gpt_oss_data['update'].get('delta', '')
             final_response = {
+                "id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion", "created": int(time.time()),
                 "model": "gpt-oss-120b",
+                "choices": [{"index": 0, "message": {"role": "assistant", "content": full_response_content.strip()}, "finish_reason": "stop"}],
+                "usage": {"prompt_tokens": None, "completion_tokens": None, "total_tokens": None}
             }
             return jsonify(final_response)