Spaces:

bunnybun07
/

test

Paused

File size: 8,086 Bytes

f9a1ce9

from flask import Flask, request, Response, jsonify, stream_with_context
import requests
import json
import uuid
import time
from datetime import datetime

ORIGINAL_API_URL = "https://app.unlimitedai.chat/api/chat"

app = Flask(__name__)


@app.route('/v1/models', methods=['GET'])
def list_models():
    # 你可以根据实际情况自定义模型列表
    models = [
        {
            "id": "chat-model-reasoning",
            "object": "model",
            "created": 1713235200,
            "owned_by": "organization-owner",
            "permission": [],
            "root": "chat-model-reasoning",
            "parent": None
        }
    ]
    return jsonify({"object": "list", "data": models})


@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
    data = request.json
    is_stream = data.get('stream', False)
    messages = data.get('messages', [])
    original_messages = []
    for msg in messages:
        original_msg = {
            "id": str(uuid.uuid4()),
            "createdAt": datetime.utcnow().isoformat() + "Z",
            "role": msg["role"],
            "content": msg["content"],
            "parts": [
                {
                    "type": "text",
                    "text": msg["content"]
                }
            ]
        }
        original_messages.append(original_msg)
    original_request = {
        "id": str(uuid.uuid4()),
        "messages": original_messages,
        "selectedChatModel": "chat-model-reasoning"
    }
    headers = {'Content-Type': 'application/json'}
    if is_stream:
        return stream_response(original_request, headers, data)
    else:
        return non_stream_response(original_request, headers, data)


def stream_response(original_request, headers, openai_request):
    def generate():
        response = requests.post(
            ORIGINAL_API_URL,
            headers=headers,
            json=original_request,
            stream=True
        )

        # 用于存储推理和回复内容
        reasoning_content = ""
        reply_content = ""

        message_id = None

        for line in response.iter_lines():
            if not line:
                continue

            line_str = line.decode('utf-8')

            # 解析不同类型的响应行
            if line_str.startswith('f:'):
                # 消息 ID
                message_data = json.loads(line_str[2:])
                message_id = message_data.get("messageId")

                # 发送 OpenAI 兼容的流式开始标记
                start_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"role": "assistant"},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(start_chunk)}\n\n"

            elif line_str.startswith('g:'):
                # 推理部分，在 OpenAI 格式中不直接显示，但我们可以收集它
                reasoning_part = line_str[2:].strip('"').replace("\\n", "\n")
                reasoning_content += reasoning_part

                content_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"reasoning_content": reasoning_part},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(content_chunk)}\n\n"

            elif line_str.startswith('0:'):
                # 回复部分，这是我们需要流式传输的主要内容
                reply_part = line_str[2:].strip('"').replace("\\n", "\n")
                reply_content += reply_part

                # 发送 OpenAI 兼容的内容块
                content_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"content": reply_part},
                            "finish_reason": None
                        }
                    ]
                }
                yield f"data: {json.dumps(content_chunk)}\n\n"

            elif line_str.startswith('e:') or line_str.startswith('d:'):
                # 结束标记
                finish_data = json.loads(line_str[2:])
                finish_reason = finish_data.get("finishReason", "stop")

                # 发送 OpenAI 兼容的结束块
                end_chunk = {
                    "id": f"chatcmpl-{uuid.uuid4()}",
                    "object": "chat.completion.chunk",
                    "created": int(time.time()),
                    "model": openai_request.get("model", "gpt-3.5-turbo"),
                    "choices": [
                        {
                            "index": 0,
                            "delta": {},
                            "finish_reason": finish_reason
                        }
                    ]
                }
                yield f"data: {json.dumps(end_chunk)}\n\n"
                yield "data: [DONE]\n\n"
                break

    return Response(
        stream_with_context(generate()),
        content_type='text/event-stream'
    )


def non_stream_response(original_request, headers, openai_request):
    response = requests.post(
        ORIGINAL_API_URL,
        headers=headers,
        json=original_request,
        stream=True
    )

    # 用于存储推理和回复内容
    reasoning_content = ""
    reply_content = ""

    message_id = None
    finish_reason = "stop"

    for line in response.iter_lines():
        if not line:
            continue

        line_str = line.decode('utf-8')

        # 解析不同类型的响应行
        if line_str.startswith('f:'):
            # 消息 ID
            message_data = json.loads(line_str[2:])
            message_id = message_data.get("messageId")

        elif line_str.startswith('g:'):
            # 推理部分
            reasoning_part = line_str[2:].strip('"')
            reasoning_content += reasoning_part

        elif line_str.startswith('0:'):
            # 回复部分
            reply_part = line_str[2:].strip('"').replace("\\n", "\n")
            reply_content += reply_part

        elif line_str.startswith('e:') or line_str.startswith('d:'):
            # 结束标记
            finish_data = json.loads(line_str[2:])
            finish_reason = finish_data.get("finishReason", "stop")

    # 构建 OpenAI 兼容的响应
    openai_response = {
        "id": f"chatcmpl-{uuid.uuid4()}",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": openai_request.get("model", "gpt-3.5-turbo"),
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": reply_content
                },
                "finish_reason": finish_reason
            }
        ],
        "usage": {
            "prompt_tokens": 0,  # 这里可以根据实际情况设置
            "completion_tokens": 0,
            "total_tokens": 0
        }
    }

    return jsonify(openai_response)


import os

if __name__ == '__main__':
    port = int(os.environ.get("PORT", 7860))  # 7860 default untuk Hugging Face
    app.run(host='0.0.0.0', port=port)