File size: 4,047 Bytes
ab9cd81
8b04581
ab9cd81
 
 
 
 
 
21dd7fa
ab9cd81
21dd7fa
 
 
ab9cd81
8b04581
 
 
 
21dd7fa
6ef10c5
8b04581
21dd7fa
8b04581
21dd7fa
 
8b04581
 
21dd7fa
 
8b04581
21dd7fa
 
 
 
 
 
 
 
 
8b04581
21dd7fa
8b04581
21dd7fa
 
 
 
8b04581
21dd7fa
ab9cd81
8b04581
ab9cd81
 
8b04581
ab9cd81
 
 
 
 
8b04581
ab9cd81
 
 
 
 
8b04581
 
 
 
 
 
ab9cd81
21dd7fa
 
 
8b04581
 
 
 
21dd7fa
 
 
 
 
8b04581
21dd7fa
 
8b04581
21dd7fa
 
ab9cd81
 
 
 
8b04581
ab9cd81
 
 
 
 
8b04581
ab9cd81
 
 
 
 
 
 
 
 
 
 
21dd7fa
ab9cd81
 
 
 
 
 
 
21dd7fa
ab9cd81
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from pydantic import BaseModel
from typing import List, Optional, Literal
from gradio_client import Client
import uvicorn
import time
import uuid
import logging

# === Настройка логгера ===
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)

# === Задаём модель (используется глобально) ===
AI_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"

# === Подключаемся к Gradio Space по URL ===
try:
    gr_client = Client(src="https://nymbo-serverless-textgen-hub.hf.space")
    logger.info("✅ Успешно подключено к Gradio Space")
except Exception as e:
    logger.error(f"❌ Ошибка подключения к Gradio Client: {e}")
    gr_client = None

# === Функция вызова модели ===
def ask(user_prompt, system_prompt):
    if not gr_client:
        return "[Ошибка: Gradio Client не инициализирован]"

    try:
        result = gr_client.predict(
            history=[[user_prompt, None]],
            system_msg=system_prompt,
            max_tokens=512,
            temperature=0.7,
            top_p=0.95,
            freq_penalty=0,
            seed=-1,
            custom_model=AI_MODEL,
            search_term="",
            selected_model=AI_MODEL,
            api_name="/bot"
        )
        return result
    except Exception as e:
        logger.error(f"❌ Ошибка при вызове модели: {e}")
        return f"[Ошибка: {str(e)}]"

# === FastAPI приложение ===
app = FastAPI()

# === Модели запроса ===
class Message(BaseModel):
    role: Literal["user", "assistant", "system"]
    content: str

class ChatRequest(BaseModel):
    model: str  # Заглушка — не используется
    messages: List[Message]
    temperature: Optional[float] = 0.7
    top_p: Optional[float] = 0.95
    max_tokens: Optional[int] = 512

# === Корневой маршрут "/" ===
@app.get("/", response_class=PlainTextResponse)
async def root():
    return "Proxy free test"

# === Основной маршрут OpenAI-совместимый ===
@app.post("/v1/chat/completions")
async def chat_completion(request: Request):
    headers = dict(request.headers)
    body = await request.body()

    logger.info("📥 Запрос получен")
    logger.info(f"🔸 Заголовки: {headers}")
    logger.info(f"🔸 Тело: {body.decode('utf-8')}")

    try:
        data = await request.json()
        chat_request = ChatRequest(**data)
    except Exception as e:
        logger.error(f"❌ Ошибка разбора JSON: {e}")
        return {"error": "Некорректный JSON"}

    # Игнорируем model из запроса и используем глобальный AI_MODEL
    user_msg = next((m.content for m in reversed(chat_request.messages) if m.role == "user"), None)
    system_msg = next((m.content for m in chat_request.messages if m.role == "system"), "You are a helpful AI assistant.")

    if not user_msg:
        return {"error": "User message not found."}

    assistant_reply = ask(user_msg, system_msg)

    response = {
        "id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": AI_MODEL,
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": assistant_reply
                },
                "finish_reason": "stop"
            }
        ],
        "usage": {
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "total_tokens": 0
        }
    }

    return response

# === Запуск сервера ===
if __name__ == "__main__":
    uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)