Deepsider2api

Sleeping

App Files Files Community

OraCatQAQ commited on Mar 26, 2025

Commit

f48a33c

1 Parent(s): e40cdb9

Add reasoning_content field to ChatMessage model and update response handling in generate_openai_response and stream_openai_response functions to include reasoning content. Enhance create_chat_completion to accumulate and return reasoning content in responses.

Browse files

Files changed (1) hide show

app.py +56 -18

app.py CHANGED Viewed

@@ -103,6 +103,7 @@ class ChatMessage(BaseModel):
     role: str
     content: str
     name: Optional[str] = None
 class ChatCompletionRequest(BaseModel):
     model: str
@@ -217,10 +218,10 @@ def format_messages_for_deepsider(messages: List[ChatMessage]) -> str:
     return prompt.strip()
-async def generate_openai_response(full_response: str, request_id: str, model: str) -> Dict:
     """生成符合OpenAI API响应格式的完整响应"""
     timestamp = int(time.time())
-    return {
         "id": f"chatcmpl-{request_id}",
         "object": "chat.completion",
         "created": timestamp,
@@ -236,19 +237,26 @@ async def generate_openai_response(full_response: str, request_id: str, model: s
             }
         ],
         "usage": {
-            "prompt_tokens": 0,  # 无法准确计算
-            "completion_tokens": 0,  # 无法准确计算
-            "total_tokens": 0  # 无法准确计算
         }
     }
 async def stream_openai_response(response, request_id: str, model: str, api_key, token_index):
     """流式返回OpenAI API格式的响应"""
     timestamp = int(time.time())
     full_response = ""
     try:
-        # 修改1：使用iter_content替代iter_lines
         buffer = bytearray()
         for chunk in response.iter_content(chunk_size=None):
             if chunk:
@@ -259,14 +267,14 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
                     for line in lines[:-1]:
                         if line.startswith('data: '):
-                            # 修改2：增加异常捕获和日志
                             try:
                                 data = json.loads(line[6:])
-                                # 修改3：增加调试日志
                                 logger.debug(f"Received data: {data}")
                                 if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
                                     content = data.get('data', {}).get('content', '')
                                     if content:
                                         full_response += content
                                         chunk = {
@@ -285,6 +293,26 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
                                             ]
                                         }
                                         yield f"data: {json.dumps(chunk)}\n\n"
                                 elif data.get('code') == 203:
                                     # 生成完成信号
@@ -316,12 +344,6 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
     except Exception as e:
         logger.error(f"流式响应处理出错: {str(e)}")
-        # 尝试使用下一个Token
-        tokens = api_key.split(',')
-        if len(tokens) > 1:
-            logger.info(f"尝试使用下一个Token重试请求")
-            # 目前我们不在这里实现自动重试，只记录错误
         # 返回错误信息
         error_chunk = {
             "id": f"chatcmpl-{request_id}",
@@ -366,7 +388,7 @@ async def list_models(api_key: str = Depends(verify_api_key)):
 @app.post("/v1/chat/completions")
 async def create_chat_completion(
     request: Request,
-    api_key: str = Depends(verify_api_key)  # 这里返回的是 DEEPSIDER_TOKEN
 ):
     """创建聊天完成API - 支持普通请求和流式请求"""
     # 解析请求体
@@ -390,8 +412,16 @@ async def create_chat_completion(
         "timezone": "Asia/Shanghai"
     }
-    # 获取请求头（使用 DEEPSIDER_TOKEN）
-    headers = get_headers(api_key)  # api_key 现在是 DEEPSIDER_TOKEN
     try:
         response = requests.post(
@@ -432,6 +462,8 @@ async def create_chat_completion(
         else:
             # 收集完整响应
             full_response = ""
             for line in response.iter_lines():
                 if not line:
                     continue
@@ -442,14 +474,20 @@ async def create_chat_completion(
                         if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
                             content = data.get('data', {}).get('content', '')
                             if content:
                                 full_response += content
                     except json.JSONDecodeError:
                         pass
             # 返回OpenAI格式的完整响应
-            return await generate_openai_response(full_response, request_id, chat_request.model)
     except requests.Timeout as e:
         logger.error(f"请求超时: {str(e)}")

     role: str
     content: str
     name: Optional[str] = None
+    reasoning_content: Optional[str] = None  # 添加思维链内容字段
 class ChatCompletionRequest(BaseModel):
     model: str
     return prompt.strip()
+async def generate_openai_response(full_response: str, request_id: str, model: str, reasoning_content: str = None) -> Dict:
     """生成符合OpenAI API响应格式的完整响应"""
     timestamp = int(time.time())
+    response_data = {
         "id": f"chatcmpl-{request_id}",
         "object": "chat.completion",
         "created": timestamp,
             }
         ],
         "usage": {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0
         }
     }
+    # 如果有思维链内容，添加到响应中
+    if reasoning_content:
+        response_data["choices"][0]["message"]["reasoning_content"] = reasoning_content
+    return response_data
 async def stream_openai_response(response, request_id: str, model: str, api_key, token_index):
     """流式返回OpenAI API格式的响应"""
     timestamp = int(time.time())
     full_response = ""
+    full_reasoning = ""  # 添加思维链内容累积变量
     try:
+        # 使用iter_content替代iter_lines
         buffer = bytearray()
         for chunk in response.iter_content(chunk_size=None):
             if chunk:
                     for line in lines[:-1]:
                         if line.startswith('data: '):
                             try:
                                 data = json.loads(line[6:])
                                 logger.debug(f"Received data: {data}")
                                 if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
                                     content = data.get('data', {}).get('content', '')
+                                    reasoning_content = data.get('data', {}).get('reasoning_content', '')
                                     if content:
                                         full_response += content
                                         chunk = {
                                             ]
                                         }
                                         yield f"data: {json.dumps(chunk)}\n\n"
+                                    # 处理思维链内容
+                                    if reasoning_content:
+                                        full_reasoning += reasoning_content
+                                        reasoning_chunk = {
+                                            "id": f"chatcmpl-{request_id}",
+                                            "object": "chat.completion.chunk",
+                                            "created": timestamp,
+                                            "model": model,
+                                            "choices": [
+                                                {
+                                                    "index": 0,
+                                                    "delta": {
+                                                        "reasoning_content": reasoning_content
+                                                    },
+                                                    "finish_reason": None
+                                                }
+                                            ]
+                                        }
+                                        yield f"data: {json.dumps(reasoning_chunk)}\n\n"
                                 elif data.get('code') == 203:
                                     # 生成完成信号
     except Exception as e:
         logger.error(f"流式响应处理出错: {str(e)}")
         # 返回错误信息
         error_chunk = {
             "id": f"chatcmpl-{request_id}",
 @app.post("/v1/chat/completions")
 async def create_chat_completion(
     request: Request,
+    api_key: str = Depends(verify_api_key)
 ):
     """创建聊天完成API - 支持普通请求和流式请求"""
     # 解析请求体
         "timezone": "Asia/Shanghai"
     }
+    # 添加其他可选参数
+    if chat_request.temperature is not None:
+        payload["temperature"] = chat_request.temperature
+    if chat_request.top_p is not None:
+        payload["top_p"] = chat_request.top_p
+    if chat_request.max_tokens is not None:
+        payload["max_tokens"] = chat_request.max_tokens
+    # 获取请求头
+    headers = get_headers(api_key)
     try:
         response = requests.post(
         else:
             # 收集完整响应
             full_response = ""
+            full_reasoning = ""  # 思维链内容累积变量
             for line in response.iter_lines():
                 if not line:
                     continue
                         if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
                             content = data.get('data', {}).get('content', '')
+                            reasoning_content = data.get('data', {}).get('reasoning_content', '')
                             if content:
                                 full_response += content
+                            # 收集思维链内容
+                            if reasoning_content:
+                                full_reasoning += reasoning_content
                     except json.JSONDecodeError:
                         pass
             # 返回OpenAI格式的完整响应
+            return await generate_openai_response(full_response, request_id, chat_request.model, full_reasoning)
     except requests.Timeout as e:
         logger.error(f"请求超时: {str(e)}")