File size: 3,463 Bytes
9a62f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json
import uuid
import time
import tiktoken
from constants import CHAT_COMPLETION_CHUNK, CONTENT_TYPE_EVENT_STREAM
from flask import jsonify

def generate_system_fingerprint():
    """生成并返回唯一的系统指纹。"""
    return f"fp_{uuid.uuid4().hex[:10]}"

def create_openai_chunk(content, model, finish_reason=None, usage=None):
    """创建格式化的 OpenAI 响应块。"""
    chunk = {
        "id": f"chatcmpl-{uuid.uuid4()}",
        "object": CHAT_COMPLETION_CHUNK,
        "created": int(time.time()),
        "model": model,
        "system_fingerprint": generate_system_fingerprint(),
        "choices": [
            {
                "index": 0,
                "delta": {"content": content} if content else {},
                "logprobs": None,
                "finish_reason": finish_reason
            }
        ]
    }
    if usage is not None:
        chunk["usage"] = usage
    return chunk

def count_tokens(text, model="gpt-3.5-turbo-0301"):
    """计算给定文本的令牌数量。"""
    try:
        return len(tiktoken.encoding_for_model(model).encode(text))
    except KeyError:
        return len(tiktoken.get_encoding("cl100k_base").encode(text))

def count_message_tokens(messages, model="gpt-3.5-turbo-0301"):
    """计算消息列表中的总令牌数量。"""
    return sum(count_tokens(str(message), model) for message in messages)

def stream_notdiamond_response(response, model):
    """流式处理 notdiamond API 响应。"""
    buffer = ""

    for chunk in response.iter_content(1024):
        if chunk:
            buffer = chunk.decode('utf-8')
            yield create_openai_chunk(buffer, model)
    
    yield create_openai_chunk('', model, 'stop')

def handle_non_stream_response(response, model, prompt_tokens):
    """处理非流式 API 响应并构建最终 JSON。"""
    full_content = ""
    
    for chunk in stream_notdiamond_response(response, model):
        if chunk['choices'][0]['delta'].get('content'):
            full_content += chunk['choices'][0]['delta']['content']

    completion_tokens = count_tokens(full_content, model)
    total_tokens = prompt_tokens + completion_tokens

    return jsonify({
        "id": f"chatcmpl-{uuid.uuid4()}",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": model,
        "system_fingerprint": generate_system_fingerprint(),
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": full_content
                },
                "finish_reason": "stop"
            }
        ],
        "usage": {
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens,
            "total_tokens": total_tokens
        }
    })

def generate_stream_response(response, model, prompt_tokens):
    """生成流式 HTTP 响应。"""
    total_completion_tokens = 0
    
    for chunk in stream_notdiamond_response(response, model):
        content = chunk['choices'][0]['delta'].get('content', '')
        total_completion_tokens += count_tokens(content, model)
        
        chunk['usage'] = {
            "prompt_tokens": prompt_tokens,
            "completion_tokens": total_completion_tokens,
            "total_tokens": prompt_tokens + total_completion_tokens
        }
        
        yield f"data: {json.dumps(chunk)}\n\n"
    
    yield "data: [DONE]\n\n"