dan92 commited on
Commit
4d18321
·
verified ·
1 Parent(s): 6b2d613

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. app.py +75 -42
Dockerfile CHANGED
@@ -23,4 +23,5 @@ ENV PYTHONUNBUFFERED=1
23
  EXPOSE 3000
24
 
25
  # 使用 gunicorn 作为生产级 WSGI 服务器
26
- CMD ["gunicorn", "--bind", "0.0.0.0:3000", "--workers", "4", "app:app"]
 
 
23
  EXPOSE 3000
24
 
25
  # 使用 gunicorn 作为生产级 WSGI 服务器
26
+ # Dockerfile 中修改 gunicorn 命令
27
+ CMD ["gunicorn", "--bind", "0.0.0.0:3000", "--workers", "4", "--timeout", "120", "--keep-alive", "5", "--worker-class", "sync", "app:app"]
app.py CHANGED
@@ -98,8 +98,14 @@ class CustomHTTPAdapter(HTTPAdapter):
98
 
99
  # 创建自定义的 Session
100
  def create_custom_session():
 
101
  session = requests.Session()
102
- adapter = CustomHTTPAdapter()
 
 
 
 
 
103
  session.mount('https://', adapter)
104
  session.mount('http://', adapter)
105
  return session
@@ -510,37 +516,54 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0301"):
510
  """计算消息列表中的总令牌数量。"""
511
  return sum(count_tokens(str(message), model) for message in messages)
512
 
 
 
 
 
 
513
  def stream_notdiamond_response(response, model):
514
- """改进的流式响应处理,确保保持上下文完整性。"""
515
  buffer = ""
516
  full_content = ""
 
517
 
518
- for chunk in response.iter_content(chunk_size=1024):
519
- if chunk:
520
- try:
521
- new_content = chunk.decode('utf-8')
522
- buffer += new_content
523
- full_content += new_content
524
-
525
- # 创建完整的响应块
526
- chunk_data = create_openai_chunk(new_content, model)
527
-
528
- # 确保响应块包含完整的上下文
529
- if 'choices' in chunk_data and chunk_data['choices']:
530
- chunk_data['choices'][0]['delta']['content'] = new_content
531
- chunk_data['choices'][0]['context'] = full_content # 添加完整上下文
532
-
533
- yield chunk_data
534
 
535
- except Exception as e:
536
- logger.error(f"Error processing chunk: {e}")
537
- continue
538
-
539
- # 发送完成标记
540
- final_chunk = create_openai_chunk('', model, 'stop')
541
- if 'choices' in final_chunk and final_chunk['choices']:
542
- final_chunk['choices'][0]['context'] = full_content # 在最终块中包含完整上下文
543
- yield final_chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
  def handle_non_stream_response(response, model, prompt_tokens):
546
  """改进的非流式响应处理,确保保持完整上下文。"""
@@ -588,23 +611,32 @@ def handle_non_stream_response(response, model, prompt_tokens):
588
  logger.error(f"Error processing non-stream response: {e}")
589
  raise
590
 
 
591
  def generate_stream_response(response, model, prompt_tokens):
592
- """生成流式 HTTP 响应。"""
593
  total_completion_tokens = 0
 
594
 
595
- for chunk in stream_notdiamond_response(response, model):
596
- content = chunk['choices'][0]['delta'].get('content', '')
597
- total_completion_tokens += count_tokens(content, model)
598
-
599
- chunk['usage'] = {
600
- "prompt_tokens": prompt_tokens,
601
- "completion_tokens": total_completion_tokens,
602
- "total_tokens": prompt_tokens + total_completion_tokens
603
- }
604
-
605
- yield f"data: {json.dumps(chunk)}\n\n"
606
-
607
- yield "data: [DONE]\n\n"
 
 
 
 
 
 
 
608
 
609
  def get_auth_credentials():
610
  """从API获取认证凭据"""
@@ -866,7 +898,8 @@ def make_request(payload, auth_manager, model_id):
866
  url,
867
  headers=headers,
868
  json=payload,
869
- stream=True
 
870
  ).result()
871
 
872
  if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':
 
98
 
99
  # 创建自定义的 Session
100
  def create_custom_session():
101
+ """创建自定义的 Session,添加超时设置"""
102
  session = requests.Session()
103
+ adapter = CustomHTTPAdapter(
104
+ pool_connections=100,
105
+ pool_maxsize=100,
106
+ max_retries=3,
107
+ pool_block=False
108
+ )
109
  session.mount('https://', adapter)
110
  session.mount('http://', adapter)
111
  return session
 
516
  """计算消息列表中的总令牌数量。"""
517
  return sum(count_tokens(str(message), model) for message in messages)
518
 
519
+ # 在文件开头添加常量
520
+ STREAM_TIMEOUT = 30 # 流式响应超时时间(秒)
521
+ REQUEST_TIMEOUT = 10 # 普通请求超时时间(秒)
522
+ CHUNK_SIZE = 512 # 减小块大小以加快处理速度
523
+
524
  def stream_notdiamond_response(response, model):
525
+ """改进的流式响应处理,添加超时和错误处理"""
526
  buffer = ""
527
  full_content = ""
528
+ last_chunk_time = time.time()
529
 
530
+ try:
531
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
532
+ current_time = time.time()
533
+ if current_time - last_chunk_time > STREAM_TIMEOUT:
534
+ logger.warning("Stream timeout reached")
535
+ break
 
 
 
 
 
 
 
 
 
 
536
 
537
+ if chunk:
538
+ try:
539
+ new_content = chunk.decode('utf-8')
540
+ buffer += new_content
541
+ full_content += new_content
542
+
543
+ # 创建完整的响应块
544
+ chunk_data = create_openai_chunk(new_content, model)
545
+
546
+ # 确保响应块包含完整的上下文
547
+ if 'choices' in chunk_data and chunk_data['choices']:
548
+ chunk_data['choices'][0]['delta']['content'] = new_content
549
+ chunk_data['choices'][0]['context'] = full_content
550
+
551
+ yield chunk_data
552
+ last_chunk_time = current_time
553
+
554
+ except Exception as e:
555
+ logger.error(f"Error processing chunk: {e}")
556
+ continue
557
+ except requests.exceptions.RequestException as e:
558
+ logger.error(f"Stream error: {e}")
559
+ except Exception as e:
560
+ logger.error(f"Unexpected error in stream processing: {e}")
561
+ finally:
562
+ # 确保发送完成标记
563
+ final_chunk = create_openai_chunk('', model, 'stop')
564
+ if 'choices' in final_chunk and final_chunk['choices']:
565
+ final_chunk['choices'][0]['context'] = full_content
566
+ yield final_chunk
567
 
568
  def handle_non_stream_response(response, model, prompt_tokens):
569
  """改进的非流式响应处理,确保保持完整上下文。"""
 
611
  logger.error(f"Error processing non-stream response: {e}")
612
  raise
613
 
614
+ # 修改 generate_stream_response 函数
615
  def generate_stream_response(response, model, prompt_tokens):
616
+ """改进的流式 HTTP 响应生成器"""
617
  total_completion_tokens = 0
618
+ start_time = time.time()
619
 
620
+ try:
621
+ for chunk in stream_notdiamond_response(response, model):
622
+ if time.time() - start_time > STREAM_TIMEOUT:
623
+ logger.warning("Response generation timeout")
624
+ break
625
+
626
+ content = chunk['choices'][0]['delta'].get('content', '')
627
+ total_completion_tokens += count_tokens(content, model)
628
+
629
+ chunk['usage'] = {
630
+ "prompt_tokens": prompt_tokens,
631
+ "completion_tokens": total_completion_tokens,
632
+ "total_tokens": prompt_tokens + total_completion_tokens
633
+ }
634
+
635
+ yield f"data: {json.dumps(chunk)}\n\n"
636
+ except Exception as e:
637
+ logger.error(f"Error generating stream response: {e}")
638
+ finally:
639
+ yield "data: [DONE]\n\n"
640
 
641
  def get_auth_credentials():
642
  """从API获取认证凭据"""
 
898
  url,
899
  headers=headers,
900
  json=payload,
901
+ stream=True,
902
+ timeout=REQUEST_TIMEOUT # 添加超时设置
903
  ).result()
904
 
905
  if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':