Spaces:

dan92
/

notdiamond2api2

Paused

App Files Files Community

dan92 commited on Nov 27, 2024

Commit

4d18321

verified ·

1 Parent(s): 6b2d613

Upload 2 files

Browse files

Files changed (2) hide show

Dockerfile +2 -1
app.py +75 -42

Dockerfile CHANGED Viewed

@@ -23,4 +23,5 @@ ENV PYTHONUNBUFFERED=1
 EXPOSE 3000
 # 使用 gunicorn 作为生产级 WSGI 服务器
-CMD ["gunicorn", "--bind", "0.0.0.0:3000", "--workers", "4", "app:app"]

 EXPOSE 3000
 # 使用 gunicorn 作为生产级 WSGI 服务器
+# 在 Dockerfile 中修改 gunicorn 命令
+CMD ["gunicorn", "--bind", "0.0.0.0:3000", "--workers", "4", "--timeout", "120", "--keep-alive", "5", "--worker-class", "sync", "app:app"]

app.py CHANGED Viewed

@@ -98,8 +98,14 @@ class CustomHTTPAdapter(HTTPAdapter):
 # 创建自定义的 Session
 def create_custom_session():
     session = requests.Session()
-    adapter = CustomHTTPAdapter()
     session.mount('https://', adapter)
     session.mount('http://', adapter)
     return session
@@ -510,37 +516,54 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0301"):
     """计算消息列表中的总令牌数量。"""
     return sum(count_tokens(str(message), model) for message in messages)
 def stream_notdiamond_response(response, model):
-    """改进的流式响应处理，确保保持上下文完整性。"""
     buffer = ""
     full_content = ""
-    for chunk in response.iter_content(chunk_size=1024):
-        if chunk:
-            try:
-                new_content = chunk.decode('utf-8')
-                buffer += new_content
-                full_content += new_content
-                # 创建完整的响应块
-                chunk_data = create_openai_chunk(new_content, model)
-                # 确保响应块包含完整的上下文
-                if 'choices' in chunk_data and chunk_data['choices']:
-                    chunk_data['choices'][0]['delta']['content'] = new_content
-                    chunk_data['choices'][0]['context'] = full_content  # 添加完整上下文
-                yield chunk_data
-            except Exception as e:
-                logger.error(f"Error processing chunk: {e}")
-                continue
-    # 发送完成标记
-    final_chunk = create_openai_chunk('', model, 'stop')
-    if 'choices' in final_chunk and final_chunk['choices']:
-        final_chunk['choices'][0]['context'] = full_content  # 在最终块中包含完整上下文
-    yield final_chunk
 def handle_non_stream_response(response, model, prompt_tokens):
     """改进的非流式响应处理，确保保持完整上下文。"""
@@ -588,23 +611,32 @@ def handle_non_stream_response(response, model, prompt_tokens):
         logger.error(f"Error processing non-stream response: {e}")
         raise
 def generate_stream_response(response, model, prompt_tokens):
-    """生成流式 HTTP 响应。"""
     total_completion_tokens = 0
-    for chunk in stream_notdiamond_response(response, model):
-        content = chunk['choices'][0]['delta'].get('content', '')
-        total_completion_tokens += count_tokens(content, model)
-        chunk['usage'] = {
-            "prompt_tokens": prompt_tokens,
-            "completion_tokens": total_completion_tokens,
-            "total_tokens": prompt_tokens + total_completion_tokens
-        }
-        yield f"data: {json.dumps(chunk)}\n\n"
-    yield "data: [DONE]\n\n"
 def get_auth_credentials():
     """从API获取认证凭据"""
@@ -866,7 +898,8 @@ def make_request(payload, auth_manager, model_id):
                     url,
                     headers=headers,
                     json=payload,
-                    stream=True
                 ).result()
                 if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':

 # 创建自定义的 Session
 def create_custom_session():
+    """创建自定义的 Session，添加超时设置"""
     session = requests.Session()
+    adapter = CustomHTTPAdapter(
+        pool_connections=100,
+        pool_maxsize=100,
+        max_retries=3,
+        pool_block=False
+    )
     session.mount('https://', adapter)
     session.mount('http://', adapter)
     return session
     """计算消息列表中的总令牌数量。"""
     return sum(count_tokens(str(message), model) for message in messages)
+# 在文件开头添加常量
+STREAM_TIMEOUT = 30  # 流式响应超时时间（秒）
+REQUEST_TIMEOUT = 10  # 普通请求超时时间（秒）
+CHUNK_SIZE = 512  # 减小块大小以加快处理速度
 def stream_notdiamond_response(response, model):
+    """改进的流式响应处理，添加超时和错误处理"""
     buffer = ""
     full_content = ""
+    last_chunk_time = time.time()
+    try:
+        for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
+            current_time = time.time()
+            if current_time - last_chunk_time > STREAM_TIMEOUT:
+                logger.warning("Stream timeout reached")
+                break
+            if chunk:
+                try:
+                    new_content = chunk.decode('utf-8')
+                    buffer += new_content
+                    full_content += new_content
+                    # 创建完整的响应块
+                    chunk_data = create_openai_chunk(new_content, model)
+                    # 确保响应块包含完整的上下文
+                    if 'choices' in chunk_data and chunk_data['choices']:
+                        chunk_data['choices'][0]['delta']['content'] = new_content
+                        chunk_data['choices'][0]['context'] = full_content
+                    yield chunk_data
+                    last_chunk_time = current_time
+                except Exception as e:
+                    logger.error(f"Error processing chunk: {e}")
+                    continue
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Stream error: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error in stream processing: {e}")
+    finally:
+        # 确保发送完成标记
+        final_chunk = create_openai_chunk('', model, 'stop')
+        if 'choices' in final_chunk and final_chunk['choices']:
+            final_chunk['choices'][0]['context'] = full_content
+        yield final_chunk
 def handle_non_stream_response(response, model, prompt_tokens):
     """改进的非流式响应处理，确保保持完整上下文。"""
         logger.error(f"Error processing non-stream response: {e}")
         raise
+# 修改 generate_stream_response 函数
 def generate_stream_response(response, model, prompt_tokens):
+    """改进的流式 HTTP 响应生成器"""
     total_completion_tokens = 0
+    start_time = time.time()
+    try:
+        for chunk in stream_notdiamond_response(response, model):
+            if time.time() - start_time > STREAM_TIMEOUT:
+                logger.warning("Response generation timeout")
+                break
+            content = chunk['choices'][0]['delta'].get('content', '')
+            total_completion_tokens += count_tokens(content, model)
+            chunk['usage'] = {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": total_completion_tokens,
+                "total_tokens": prompt_tokens + total_completion_tokens
+            }
+            yield f"data: {json.dumps(chunk)}\n\n"
+    except Exception as e:
+        logger.error(f"Error generating stream response: {e}")
+    finally:
+        yield "data: [DONE]\n\n"
 def get_auth_credentials():
     """从API获取认证凭据"""
                     url,
                     headers=headers,
                     json=payload,
+                    stream=True,
+                    timeout=REQUEST_TIMEOUT  # 添加超时设置
                 ).result()
                 if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':