Spaces:
Paused
Paused
Upload 2 files
Browse files- Dockerfile +2 -1
- app.py +75 -42
Dockerfile
CHANGED
|
@@ -23,4 +23,5 @@ ENV PYTHONUNBUFFERED=1
|
|
| 23 |
EXPOSE 3000
|
| 24 |
|
| 25 |
# 使用 gunicorn 作为生产级 WSGI 服务器
|
| 26 |
-
|
|
|
|
|
|
| 23 |
EXPOSE 3000
|
| 24 |
|
| 25 |
# 使用 gunicorn 作为生产级 WSGI 服务器
|
| 26 |
+
# 在 Dockerfile 中修改 gunicorn 命令
|
| 27 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:3000", "--workers", "4", "--timeout", "120", "--keep-alive", "5", "--worker-class", "sync", "app:app"]
|
app.py
CHANGED
|
@@ -98,8 +98,14 @@ class CustomHTTPAdapter(HTTPAdapter):
|
|
| 98 |
|
| 99 |
# 创建自定义的 Session
|
| 100 |
def create_custom_session():
|
|
|
|
| 101 |
session = requests.Session()
|
| 102 |
-
adapter = CustomHTTPAdapter(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
session.mount('https://', adapter)
|
| 104 |
session.mount('http://', adapter)
|
| 105 |
return session
|
|
@@ -510,37 +516,54 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0301"):
|
|
| 510 |
"""计算消息列表中的总令牌数量。"""
|
| 511 |
return sum(count_tokens(str(message), model) for message in messages)
|
| 512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
def stream_notdiamond_response(response, model):
|
| 514 |
-
"""
|
| 515 |
buffer = ""
|
| 516 |
full_content = ""
|
|
|
|
| 517 |
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
# 创建完整的响应块
|
| 526 |
-
chunk_data = create_openai_chunk(new_content, model)
|
| 527 |
-
|
| 528 |
-
# 确保响应块包含完整的上下文
|
| 529 |
-
if 'choices' in chunk_data and chunk_data['choices']:
|
| 530 |
-
chunk_data['choices'][0]['delta']['content'] = new_content
|
| 531 |
-
chunk_data['choices'][0]['context'] = full_content # 添加完整上下文
|
| 532 |
-
|
| 533 |
-
yield chunk_data
|
| 534 |
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
def handle_non_stream_response(response, model, prompt_tokens):
|
| 546 |
"""改进的非流式响应处理,确保保持完整上下文。"""
|
|
@@ -588,23 +611,32 @@ def handle_non_stream_response(response, model, prompt_tokens):
|
|
| 588 |
logger.error(f"Error processing non-stream response: {e}")
|
| 589 |
raise
|
| 590 |
|
|
|
|
| 591 |
def generate_stream_response(response, model, prompt_tokens):
|
| 592 |
-
"""
|
| 593 |
total_completion_tokens = 0
|
|
|
|
| 594 |
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
def get_auth_credentials():
|
| 610 |
"""从API获取认证凭据"""
|
|
@@ -866,7 +898,8 @@ def make_request(payload, auth_manager, model_id):
|
|
| 866 |
url,
|
| 867 |
headers=headers,
|
| 868 |
json=payload,
|
| 869 |
-
stream=True
|
|
|
|
| 870 |
).result()
|
| 871 |
|
| 872 |
if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':
|
|
|
|
| 98 |
|
| 99 |
# 创建自定义的 Session
|
| 100 |
def create_custom_session():
|
| 101 |
+
"""创建自定义的 Session,添加超时设置"""
|
| 102 |
session = requests.Session()
|
| 103 |
+
adapter = CustomHTTPAdapter(
|
| 104 |
+
pool_connections=100,
|
| 105 |
+
pool_maxsize=100,
|
| 106 |
+
max_retries=3,
|
| 107 |
+
pool_block=False
|
| 108 |
+
)
|
| 109 |
session.mount('https://', adapter)
|
| 110 |
session.mount('http://', adapter)
|
| 111 |
return session
|
|
|
|
| 516 |
"""计算消息列表中的总令牌数量。"""
|
| 517 |
return sum(count_tokens(str(message), model) for message in messages)
|
| 518 |
|
| 519 |
+
# 在文件开头添加常量
|
| 520 |
+
STREAM_TIMEOUT = 30 # 流式响应超时时间(秒)
|
| 521 |
+
REQUEST_TIMEOUT = 10 # 普通请求超时时间(秒)
|
| 522 |
+
CHUNK_SIZE = 512 # 减小块大小以加快处理速度
|
| 523 |
+
|
| 524 |
def stream_notdiamond_response(response, model):
|
| 525 |
+
"""改进的流式响应处理,添加超时和错误处理"""
|
| 526 |
buffer = ""
|
| 527 |
full_content = ""
|
| 528 |
+
last_chunk_time = time.time()
|
| 529 |
|
| 530 |
+
try:
|
| 531 |
+
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
| 532 |
+
current_time = time.time()
|
| 533 |
+
if current_time - last_chunk_time > STREAM_TIMEOUT:
|
| 534 |
+
logger.warning("Stream timeout reached")
|
| 535 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
+
if chunk:
|
| 538 |
+
try:
|
| 539 |
+
new_content = chunk.decode('utf-8')
|
| 540 |
+
buffer += new_content
|
| 541 |
+
full_content += new_content
|
| 542 |
+
|
| 543 |
+
# 创建完整的响应块
|
| 544 |
+
chunk_data = create_openai_chunk(new_content, model)
|
| 545 |
+
|
| 546 |
+
# 确保响应块包含完整的上下文
|
| 547 |
+
if 'choices' in chunk_data and chunk_data['choices']:
|
| 548 |
+
chunk_data['choices'][0]['delta']['content'] = new_content
|
| 549 |
+
chunk_data['choices'][0]['context'] = full_content
|
| 550 |
+
|
| 551 |
+
yield chunk_data
|
| 552 |
+
last_chunk_time = current_time
|
| 553 |
+
|
| 554 |
+
except Exception as e:
|
| 555 |
+
logger.error(f"Error processing chunk: {e}")
|
| 556 |
+
continue
|
| 557 |
+
except requests.exceptions.RequestException as e:
|
| 558 |
+
logger.error(f"Stream error: {e}")
|
| 559 |
+
except Exception as e:
|
| 560 |
+
logger.error(f"Unexpected error in stream processing: {e}")
|
| 561 |
+
finally:
|
| 562 |
+
# 确保发送完成标记
|
| 563 |
+
final_chunk = create_openai_chunk('', model, 'stop')
|
| 564 |
+
if 'choices' in final_chunk and final_chunk['choices']:
|
| 565 |
+
final_chunk['choices'][0]['context'] = full_content
|
| 566 |
+
yield final_chunk
|
| 567 |
|
| 568 |
def handle_non_stream_response(response, model, prompt_tokens):
|
| 569 |
"""改进的非流式响应处理,确保保持完整上下文。"""
|
|
|
|
| 611 |
logger.error(f"Error processing non-stream response: {e}")
|
| 612 |
raise
|
| 613 |
|
| 614 |
+
# 修改 generate_stream_response 函数
|
| 615 |
def generate_stream_response(response, model, prompt_tokens):
|
| 616 |
+
"""改进的流式 HTTP 响应生成器"""
|
| 617 |
total_completion_tokens = 0
|
| 618 |
+
start_time = time.time()
|
| 619 |
|
| 620 |
+
try:
|
| 621 |
+
for chunk in stream_notdiamond_response(response, model):
|
| 622 |
+
if time.time() - start_time > STREAM_TIMEOUT:
|
| 623 |
+
logger.warning("Response generation timeout")
|
| 624 |
+
break
|
| 625 |
+
|
| 626 |
+
content = chunk['choices'][0]['delta'].get('content', '')
|
| 627 |
+
total_completion_tokens += count_tokens(content, model)
|
| 628 |
+
|
| 629 |
+
chunk['usage'] = {
|
| 630 |
+
"prompt_tokens": prompt_tokens,
|
| 631 |
+
"completion_tokens": total_completion_tokens,
|
| 632 |
+
"total_tokens": prompt_tokens + total_completion_tokens
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 636 |
+
except Exception as e:
|
| 637 |
+
logger.error(f"Error generating stream response: {e}")
|
| 638 |
+
finally:
|
| 639 |
+
yield "data: [DONE]\n\n"
|
| 640 |
|
| 641 |
def get_auth_credentials():
|
| 642 |
"""从API获取认证凭据"""
|
|
|
|
| 898 |
url,
|
| 899 |
headers=headers,
|
| 900 |
json=payload,
|
| 901 |
+
stream=True,
|
| 902 |
+
timeout=REQUEST_TIMEOUT # 添加超时设置
|
| 903 |
).result()
|
| 904 |
|
| 905 |
if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':
|