sehsapneb commited on
Commit
39bacc7
·
verified ·
1 Parent(s): e572557

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -34
app.py CHANGED
@@ -19,7 +19,7 @@ GPT_OSS_HEADERS = {
19
  'x-selected-model': 'gpt-oss-120b',
20
  }
21
 
22
- # 3. 新增:根路由,用于健康检查和显示欢迎信息
23
  @app.route('/', methods=['GET'])
24
  def root():
25
  return jsonify({
@@ -29,11 +29,11 @@ def root():
29
  "features": {
30
  "stream": "支持",
31
  "non_stream": "支持",
32
- "conditional_reasoning": "仅在流式模式下显示思考过程"
33
  }
34
  })
35
 
36
- # 4. 核心:创建OpenAI兼容的API端点
37
  @app.route('/v1/chat/completions', methods=['POST'])
38
  def chat_completions_proxy():
39
  try:
@@ -48,8 +48,6 @@ def chat_completions_proxy():
48
  except Exception as e:
49
  return jsonify({"error": f"请求格式无效: {e}"}), 400
50
 
51
- # (一) 条件性设置请求头
52
- # 创建一个请求头的副本,以避免修改全局常量
53
  request_headers = GPT_OSS_HEADERS.copy()
54
  if stream_requested:
55
  request_headers['x-show-reasoning'] = 'true'
@@ -63,7 +61,6 @@ def chat_completions_proxy():
63
  "params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
64
  }
65
 
66
- # (二) 内部生成器,负责获取和解析后端数据
67
  def _internal_proxy_stream():
68
  try:
69
  with requests.post(
@@ -81,27 +78,47 @@ def chat_completions_proxy():
81
  except json.JSONDecodeError:
82
  continue
83
  except requests.exceptions.RequestException as e:
84
- # 在生成器内部抛出错误,以便外部可以捕获
85
  raise IOError(f"与后端服务通信失败: {e}")
86
 
87
- # (三) 根据客户端请求,决定如何格式化响应
88
  if stream_requested:
89
- # 如果客户端请求流式响应
90
  def stream_formatter():
91
  try:
 
 
 
 
92
  for gpt_oss_data in _internal_proxy_stream():
93
  event_type = gpt_oss_data.get('type')
94
 
95
- # 格式化思考过程
96
  if (event_type == 'thread.item_updated' and
97
  gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
98
  thought = gpt_oss_data['update']['entry']['content']
99
- # 我们可以选择发送一个自定义的流块,或者忽略它
100
- # 这里我们创建一个包含思考过程的自定义块
101
- reasoning_chunk = { "reasoning": thought }
102
- yield f"data: {json.dumps(reasoning_chunk)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # 格式化文本片段
105
  if (event_type == 'thread.item_updated' and
106
  gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
107
  delta_content = gpt_oss_data['update'].get('delta', '')
@@ -120,35 +137,20 @@ def chat_completions_proxy():
120
  return Response(stream_formatter(), mimetype='text/event-stream')
121
 
122
  else:
123
- # 如果客户端请求非流式响应
124
  try:
125
  full_response_content = ""
126
  for gpt_oss_data in _internal_proxy_stream():
127
  event_type = gpt_oss_data.get('type')
128
- # 只关心最终的文本片段
129
  if (event_type == 'thread.item_updated' and
130
  gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
131
  full_response_content += gpt_oss_data['update'].get('delta', '')
132
 
133
- # 构建一个标准的OpenAI非流式JSON响应
134
  final_response = {
135
- "id": f"chatcmpl-{str(uuid.uuid4())}",
136
- "object": "chat.completion",
137
- "created": int(time.time()),
138
  "model": "gpt-oss-120b",
139
- "choices": [
140
- {
141
- "index": 0,
142
- "message": {
143
- "role": "assistant",
144
- "content": full_response_content.strip()
145
- },
146
- "finish_reason": "stop"
147
- }
148
- ],
149
- "usage": { # 提供一个模拟的usage对象
150
- "prompt_tokens": None, "completion_tokens": None, "total_tokens": None
151
- }
152
  }
153
  return jsonify(final_response)
154
 
 
19
  'x-selected-model': 'gpt-oss-120b',
20
  }
21
 
22
+ # 3. 根路由
23
  @app.route('/', methods=['GET'])
24
  def root():
25
  return jsonify({
 
29
  "features": {
30
  "stream": "支持",
31
  "non_stream": "支持",
32
+ "conditional_reasoning": "思考过程在流式模式下通过 <think> 标签显示"
33
  }
34
  })
35
 
36
+ # 4. 核心API端点
37
  @app.route('/v1/chat/completions', methods=['POST'])
38
  def chat_completions_proxy():
39
  try:
 
48
  except Exception as e:
49
  return jsonify({"error": f"请求格式无效: {e}"}), 400
50
 
 
 
51
  request_headers = GPT_OSS_HEADERS.copy()
52
  if stream_requested:
53
  request_headers['x-show-reasoning'] = 'true'
 
61
  "params": {"input": {"text": user_prompt, "content": [{"type": "input_text", "text": user_prompt}]}}
62
  }
63
 
 
64
  def _internal_proxy_stream():
65
  try:
66
  with requests.post(
 
78
  except json.JSONDecodeError:
79
  continue
80
  except requests.exceptions.RequestException as e:
 
81
  raise IOError(f"与后端服务通信失败: {e}")
82
 
 
83
  if stream_requested:
 
84
  def stream_formatter():
85
  try:
86
+ # --- 关键改动在这里 ---
87
+ # 用于拼接所有思考过程的变量
88
+ all_thoughts = []
89
+
90
  for gpt_oss_data in _internal_proxy_stream():
91
  event_type = gpt_oss_data.get('type')
92
 
93
+ # 步骤1: 捕获并暂存所有思考过程
94
  if (event_type == 'thread.item_updated' and
95
  gpt_oss_data.get('update', {}).get('type') == 'cot.entry_added'):
96
  thought = gpt_oss_data['update']['entry']['content']
97
+ all_thoughts.append(thought)
98
+
99
+ # 步骤2: 当第一个文本片段出现时,一次性将所有思考过程格式化并发送
100
+ if (event_type == 'thread.item_updated' and
101
+ gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta' and
102
+ all_thoughts): # 确保只在第一次发送
103
+
104
+ # 格式化所有思考过程
105
+ formatted_thoughts = "<think>\n"
106
+ for i, t in enumerate(all_thoughts, 1):
107
+ formatted_thoughts += f" Step {i}: {t}\n"
108
+ formatted_thoughts += "</think>\n\n"
109
+
110
+ # 构建一个标准的OpenAI流块来发送思考过程
111
+ openai_chunk = {
112
+ "id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion.chunk",
113
+ "created": int(time.time()), "model": "gpt-oss-120b",
114
+ "choices": [{"index": 0, "delta": {"content": formatted_thoughts}, "finish_reason": None}]
115
+ }
116
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
117
+
118
+ # 清空思考过程,防止重复发送
119
+ all_thoughts = []
120
 
121
+ # 步骤3: 正常发送文本片段
122
  if (event_type == 'thread.item_updated' and
123
  gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
124
  delta_content = gpt_oss_data['update'].get('delta', '')
 
137
  return Response(stream_formatter(), mimetype='text/event-stream')
138
 
139
  else:
140
+ # 非流式逻辑保持不变
141
  try:
142
  full_response_content = ""
143
  for gpt_oss_data in _internal_proxy_stream():
144
  event_type = gpt_oss_data.get('type')
 
145
  if (event_type == 'thread.item_updated' and
146
  gpt_oss_data.get('update', {}).get('type') == 'assistant_message.content_part.text_delta'):
147
  full_response_content += gpt_oss_data['update'].get('delta', '')
148
 
 
149
  final_response = {
150
+ "id": f"chatcmpl-{str(uuid.uuid4())}", "object": "chat.completion", "created": int(time.time()),
 
 
151
  "model": "gpt-oss-120b",
152
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": full_response_content.strip()}, "finish_reason": "stop"}],
153
+ "usage": {"prompt_tokens": None, "completion_tokens": None, "total_tokens": None}
 
 
 
 
 
 
 
 
 
 
 
154
  }
155
  return jsonify(final_response)
156