from fastapi import FastAPI, Request from fastapi.responses import PlainTextResponse from pydantic import BaseModel from typing import List, Optional, Literal from gradio_client import Client import uvicorn import time import uuid import logging # === Настройка логгера === logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger(__name__) # === Задаём модель (используется глобально) === AI_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" # === Подключаемся к Gradio Space по URL === try: gr_client = Client("https://nymbo-serverless-textgen-hub.hf.space") logger.info("✅ Успешно подключено к Gradio Space") except Exception as e: logger.error(f"❌ Ошибка подключения к Gradio Client: {e}") gr_client = None # === Функция вызова модели === def ask(user_prompt, system_prompt): if not gr_client: return "[Ошибка: Gradio Client не инициализирован]" try: result = gr_client.predict( history=[[user_prompt, None]], system_msg=system_prompt, max_tokens=512, temperature=0.7, top_p=0.95, freq_penalty=0, seed=-1, custom_model=AI_MODEL, search_term="", selected_model=AI_MODEL, api_name="/bot" ) return result except Exception as e: logger.error(f"❌ Ошибка при вызове модели: {e}") return f"[Ошибка: {str(e)}]" # === FastAPI приложение === app = FastAPI() # === Модели запроса === class Message(BaseModel): role: Literal["user", "assistant", "system"] content: str class ChatRequest(BaseModel): model: str # Заглушка — не используется messages: List[Message] temperature: Optional[float] = 0.7 top_p: Optional[float] = 0.95 max_tokens: Optional[int] = 512 # === Корневой маршрут "/" === @app.get("/", response_class=PlainTextResponse) async def root(): return "Proxy free test" # === Основной маршрут OpenAI-совместимый === @app.post("/v1/chat/completions") async def chat_completion(request: Request): headers = dict(request.headers) body = await request.body() logger.info("📥 Запрос получен") logger.info(f"🔸 Заголовки: {headers}") logger.info(f"🔸 Тело: {body.decode('utf-8')}") try: data = await request.json() chat_request = ChatRequest(**data) except Exception as e: logger.error(f"❌ Ошибка разбора JSON: {e}") return {"error": "Некорректный JSON"} # Игнорируем model из запроса и используем глобальный AI_MODEL user_msg = next((m.content for m in reversed(chat_request.messages) if m.role == "user"), None) system_msg = next((m.content for m in chat_request.messages if m.role == "system"), "You are a helpful AI assistant.") if not user_msg: return {"error": "User message not found."} assistant_reply = ask(user_msg, system_msg) response = { "id": f"chatcmpl-{uuid.uuid4().hex[:12]}", "object": "chat.completion", "created": int(time.time()), "model": AI_MODEL, "choices": [ { "index": 0, "message": { "role": "assistant", "content": assistant_reply }, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 } } return response # === Запуск сервера === if __name__ == "__main__": uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)