ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20, 2025

Commit

ae93e33

verified ·

1 Parent(s): 27ad875

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -75

app.py CHANGED Viewed

@@ -431,51 +431,139 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        chunk_data_list = chunk.decode("utf-8").split("\n\n")
-                        for chunk_data in chunk_data_list:
-                            if not chunk_data:
-                                continue
-                            if chunk_data.startswith("data:"):
-                                chunk_data = chunk_data[5:].strip()
-                                if chunk_data == "[DONE]":
-                                    continue
-                                try:
-                                    response_json = json.loads(chunk_data)
-                                    if (
-                                        model_name == "deepseek-reasoner" and
-                                        "choices" in response_json and
-                                        len(response_json["choices"]) > 0
-                                    ):
-                                        delta = response_json["choices"][0].get("delta", {})
-                                        new_content = ""
-                                        if "reasoning_content" in delta and delta["reasoning_content"] is not None:
-                                            new_content += "> " + delta["reasoning_content"]
-                                        if "content" in delta and delta["content"] is not None:
-                                            new_content += delta["content"]
-                                        if new_content:
-                                            response_json["choices"][0]["delta"] = {"content": new_content}
-                                            yield f"data: {json.dumps(response_json)}\n\n".encode("utf-8")
-                                    else:
-                                        yield f"data: {chunk_data}\n\n".encode("utf-8")
-                                except (
-                                    KeyError,
-                                    ValueError,
-                                    IndexError
-                                ) as e:
-                                    logging.error(
-                                        f"解析流式响应单行 JSON 失败: {e}, "
-                                        f"行内容: {chunk_data}"
-                                    )
                 end_time = time.time()
                 first_token_time = (
@@ -484,41 +572,6 @@ def handsome_chat_completions():
                 )
                 total_time = end_time - start_time
-                prompt_tokens = 0
-                completion_tokens = 0
-                for line in full_response_content.splitlines():
-                    if line.startswith("data:"):
-                        line = line[5:].strip()
-                        if line == "[DONE]":
-                            continue
-                        try:
-                            response_json = json.loads(line)
-                            if (
-                                "usage" in response_json and
-                                "completion_tokens" in response_json["usage"]
-                            ):
-                                completion_tokens += response_json[
-                                    "usage"
-                                ]["completion_tokens"]
-                            if (
-                                "usage" in response_json and
-                                "prompt_tokens" in response_json["usage"]
-                            ):
-                                prompt_tokens = response_json[
-                                    "usage"
-                                ]["prompt_tokens"]
-                        except (
-                            KeyError,
-                            ValueError,
-                            IndexError
-                        ) as e:
-                            logging.error(
-                                f"解析流式响应单行 JSON 失败: {e}, "
-                                f"行内容: {line}"
-                            )
                 user_content = ""
                 messages = data.get("messages", [])
@@ -542,7 +595,7 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
@@ -562,6 +615,7 @@ def handsome_chat_completions():
                 content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
@@ -663,6 +717,7 @@ def handsome_chat_completions():
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")

             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                reasoning_content_accumulated = ""
+                content_accumulated = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        try:
+                            for line in chunk.decode("utf-8").splitlines():
+                                if line.startswith("data:"):
+                                    line = line[5:].strip()
+                                    if line == "[DONE]":
+                                        continue
+                                    try:
+                                        response_json = json.loads(line)
+                                        if (
+                                            "usage" in response_json and
+                                            "completion_tokens" in response_json["usage"]
+                                        ):
+                                            completion_tokens = response_json[
+                                                "usage"
+                                            ]["completion_tokens"]
+                                        # Special handling for deepseek-reasoner in streaming mode
+                                        if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
+                                            delta = response_json["choices"][0].get("delta", {})
+                                            if "reasoning_content" in delta and delta["reasoning_content"]:
+                                                reasoning_content = delta["reasoning_content"]
+                                                formatted_reasoning_chunk = {
+                                                    "id": response_json.get("id", ""),
+                                                    "object": "chat.completion.chunk",
+                                                    "created": response_json.get("created", int(time.time())),
+                                                    "model": model_name,
+                                                    "choices": [
+                                                        {
+                                                            "index": 0,
+                                                            "delta": {
+                                                                "content": f"```Thinking\n{reasoning_content}\n```",
+                                                            },
+                                                            "finish_reason": None
+                                                        }
+                                                    ],
+                                                    "usage": None,
+                                                }
+                                                yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
+                                            if "content" in delta and delta["content"]:
+                                                content = delta["content"]
+                                                formatted_content_chunk = {
+                                                    "id": response_json.get("id", ""),
+                                                    "object": "chat.completion.chunk",
+                                                    "created": response_json.get("created", int(time.time())),
+                                                    "model": model_name,
+                                                    "choices": [
+                                                        {
+                                                            "index": 0,
+                                                            "delta": {
+                                                                "content": content,
+                                                            },
+                                                            "finish_reason": None
+                                                        }
+                                                    ],
+                                                    "usage": None,
+                                                }
+                                                yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
+                                        elif "choices" in response_json and len(response_json["choices"]) > 0:
+                                            # Handle other models normally
+                                            delta = response_json["choices"][0].get("delta", {})
+                                            if "content" in delta and delta["content"]:
+                                                formatted_content_chunk = {
+                                                    "id": response_json.get("id", ""),
+                                                    "object": "chat.completion.chunk",
+                                                    "created": response_json.get("created", int(time.time())),
+                                                    "model": model_name,
+                                                    "choices": [
+                                                        {
+                                                            "index": 0,
+                                                            "delta": {
+                                                                "content": delta["content"],
+                                                            },
+                                                            "finish_reason": None
+                                                        }
+                                                    ],
+                                                    "usage": None,
+                                                }
+                                                yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
+                                        if (
+                                            "usage" in response_json and
+                                            "prompt_tokens" in response_json["usage"]
+                                        ):
+                                            prompt_tokens = response_json[
+                                                "usage"
+                                            ]["prompt_tokens"]
+                                    except (
+                                        KeyError,
+                                        ValueError,
+                                        IndexError
+                                    ) as e:
+                                        logging.error(
+                                            f"解析流式响应单行 JSON 失败: {e}, "
+                                            f"行内容: {line}"
+                                        )
+                        except Exception as e:
+                             logging.error(f"处理流式响应失败：{e}")
+                # Send the [DONE] message after all chunks have been processed
+                done_chunk = {
+                    "id": response_json.get("id", ""),
+                    "object": "chat.completion.chunk",
+                    "created": response_json.get("created", int(time.time())),
+                    "model": model_name,
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {},
+                            "finish_reason": "stop"
+                        }
+                    ],
+                    "usage": {
+                        "completion_tokens": completion_tokens,
+                        "prompt_tokens": prompt_tokens,
+                        "total_tokens": prompt_tokens + completion_tokens
+                    },
+                }
+                yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
                 end_time = time.time()
                 first_token_time = (
                 )
                 total_time = end_time - start_time
                 user_content = ""
                 messages = data.get("messages", [])
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
                 content_type=response.headers['Content-Type']
             )
         else:
+            # ... (Non-streaming part remains the same as in the previous response)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")