File size: 8,086 Bytes
f9a1ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
from flask import Flask, request, Response, jsonify, stream_with_context
import requests
import json
import uuid
import time
from datetime import datetime

ORIGINAL_API_URL = "https://app.unlimitedai.chat/api/chat"

app = Flask(__name__)


@app.route('/v1/models', methods=['GET'])
def list_models():
    # 你可以根据实际情况自定义模型列表
    models = [
        {
            "id": "chat-model-reasoning",
            "object": "model",
            "created": 1713235200,
            "owned_by": "organization-owner",
            "permission": [],
            "root": "chat-model-reasoning",
            "parent": None
        }
    ]
    return jsonify({"object": "list", "data": models})


@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
    data = request.json
    is_stream = data.get('stream', False)
    messages = data.get('messages', [])
    original_messages = []
    for msg in messages:
        original_msg = {
            "id": str(uuid.uuid4()),
            "createdAt": datetime.utcnow().isoformat() + "Z",
            "role": msg["role"],
            "content": msg["content"],
            "parts": [
                {
                    "type": "text",
                    "text": msg["content"]
                }
            ]
        }
        original_messages.append(original_msg)
    original_request = {
        "id": str(uuid.uuid4()),
        "messages": original_messages,
        "selectedChatModel": "chat-model-reasoning"
    }
    headers = {'Content-Type': 'application/json'}
    if is_stream:
        return stream_response(original_request, headers, data)
    else:
        return non_stream_response(original_request, headers, data)


def stream_response(original_request, headers, openai_request):
    def generate():
        response = requests.post(
            ORIGINAL_API_URL,
            headers=headers,
            json=original_request,
            stream=True
        )

        # 用于存储推理和回复内容
        reasoning_content = ""
        reply_content = ""

        message_id = None

        for line in response.iter_lines():
            if not line:
                continue

            line_str = line.decode('utf-8')

            # 解析不同类型的响应行
            if line_str.startswith('f:'):
                # 消息 ID
                message_data = json.loads(line_str[2:])
                message_id = message_data.get("messageId")

                # 发送 OpenAI 兼容的流式开始标记
                start_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"role": "assistant"},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(start_chunk)}\n\n"

            elif line_str.startswith('g:'):
                # 推理部分,在 OpenAI 格式中不直接显示,但我们可以收集它
                reasoning_part = line_str[2:].strip('"').replace("\\n", "\n")
                reasoning_content += reasoning_part

                content_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"reasoning_content": reasoning_part},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(content_chunk)}\n\n"

            elif line_str.startswith('0:'):
                # 回复部分,这是我们需要流式传输的主要内容
                reply_part = line_str[2:].strip('"').replace("\\n", "\n")
                reply_content += reply_part

                # 发送 OpenAI 兼容的内容块
                content_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"content": reply_part},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(content_chunk)}\n\n"

            elif line_str.startswith('e:') or line_str.startswith('d:'):
                # 结束标记
                finish_data = json.loads(line_str[2:])
                finish_reason = finish_data.get("finishReason", "stop")

                # 发送 OpenAI 兼容的结束块
                end_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {},
                            "finish_reason": finish_reason
                        }
                    ]
                }
                yield f"data: {json.dumps(end_chunk)}\n\n"
                yield "data: [DONE]\n\n"
                break

    return Response(
        stream_with_context(generate()),
        content_type='text/event-stream'
    )


def non_stream_response(original_request, headers, openai_request):
    response = requests.post(
        ORIGINAL_API_URL,
        headers=headers,
        json=original_request,
        stream=True
    )

    # 用于存储推理和回复内容
    reasoning_content = ""
    reply_content = ""

    message_id = None
    finish_reason = "stop"

    for line in response.iter_lines():
        if not line:
            continue

        line_str = line.decode('utf-8')

        # 解析不同类型的响应行
        if line_str.startswith('f:'):
            # 消息 ID
            message_data = json.loads(line_str[2:])
            message_id = message_data.get("messageId")

        elif line_str.startswith('g:'):
            # 推理部分
            reasoning_part = line_str[2:].strip('"')
            reasoning_content += reasoning_part

        elif line_str.startswith('0:'):
            # 回复部分
            reply_part = line_str[2:].strip('"').replace("\\n", "\n")
            reply_content += reply_part

        elif line_str.startswith('e:') or line_str.startswith('d:'):
            # 结束标记
            finish_data = json.loads(line_str[2:])
            finish_reason = finish_data.get("finishReason", "stop")

    # 构建 OpenAI 兼容的响应
    openai_response = {
        "id": f"chatcmpl-{uuid.uuid4()}",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": openai_request.get("model", "gpt-3.5-turbo"),
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": reply_content
                },
                "finish_reason": finish_reason
            }
        ],
        "usage": {
            "prompt_tokens": 0,  # 这里可以根据实际情况设置
            "completion_tokens": 0,
            "total_tokens": 0
        }
    }

    return jsonify(openai_response)


import os

if __name__ == '__main__':
    port = int(os.environ.get("PORT", 7860))  # 7860 default untuk Hugging Face
    app.run(host='0.0.0.0', port=port)