Update server.py
Browse files
server.py
CHANGED
|
@@ -1,27 +1,28 @@
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
from contextlib import asynccontextmanager
|
| 6 |
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
-
from fastapi import FastAPI, HTTPException, Request
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 11 |
from fastapi.responses import JSONResponse, StreamingResponse
|
| 12 |
from pydantic import BaseModel, Field
|
| 13 |
-
from openai import
|
|
|
|
| 14 |
import json
|
| 15 |
|
| 16 |
|
| 17 |
# Load environment variables
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
-
# Configure
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
format='%(levelname)s - %(message)s'
|
| 24 |
-
)
|
| 25 |
logger = logging.getLogger("rox_ai")
|
| 26 |
|
| 27 |
# Check for API key
|
|
@@ -30,6 +31,19 @@ NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
|
| 30 |
if not NVIDIA_API_KEY:
|
| 31 |
raise RuntimeError("NVIDIA_API_KEY not set")
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Model configurations
|
| 34 |
ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
|
| 35 |
ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" # Changed to a more reliable model
|
|
@@ -50,18 +64,33 @@ ROX_DYNO_IDENTITY = "You are Rox 6 Dyno, an AI model created by Rox AI. Your cre
|
|
| 50 |
ROX_CODER_7_IDENTITY = "You are Rox 7 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced coding specialist."
|
| 51 |
ROX_VISION_IDENTITY = "You are Rox Vision Max, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in visual understanding and multimodal tasks."
|
| 52 |
|
| 53 |
-
# Initialize OpenAI client with timeout optimization
|
| 54 |
-
client = OpenAI(
|
| 55 |
-
base_url="https://integrate.api.nvidia.com/v1",
|
| 56 |
-
api_key=NVIDIA_API_KEY,
|
| 57 |
-
timeout=60.0,
|
| 58 |
-
max_retries=2
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
@asynccontextmanager
|
| 62 |
async def lifespan(app: FastAPI):
|
| 63 |
"""Lifespan context manager"""
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
# Initialize FastAPI app - optimized for speed
|
|
@@ -75,38 +104,64 @@ app = FastAPI(
|
|
| 75 |
)
|
| 76 |
|
| 77 |
# GZip compression for faster transfers
|
| 78 |
-
app.add_middleware(GZipMiddleware, minimum_size=
|
| 79 |
|
| 80 |
-
# CORS -
|
| 81 |
app.add_middleware(
|
| 82 |
CORSMiddleware,
|
| 83 |
-
allow_origins=
|
| 84 |
-
allow_credentials=
|
| 85 |
allow_methods=["*"],
|
| 86 |
allow_headers=["*"],
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# Minimal exception handler
|
| 91 |
@app.exception_handler(Exception)
|
| 92 |
async def global_exception_handler(request: Request, exc: Exception):
|
|
|
|
| 93 |
return JSONResponse(
|
| 94 |
status_code=500,
|
| 95 |
content={"error": "Internal server error"}
|
| 96 |
)
|
| 97 |
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
# Helper function for streaming responses
|
| 106 |
-
async def stream_response(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
"""Stream responses from OpenAI API"""
|
| 108 |
try:
|
| 109 |
-
stream =
|
| 110 |
model=model,
|
| 111 |
messages=messages,
|
| 112 |
temperature=temperature,
|
|
@@ -116,9 +171,11 @@ async def stream_response(model: str, messages: list, temperature: float, top_p:
|
|
| 116 |
extra_body=extra_body
|
| 117 |
)
|
| 118 |
|
| 119 |
-
for chunk in stream:
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
| 122 |
|
| 123 |
yield "data: [DONE]\n\n"
|
| 124 |
except Exception as e:
|
|
@@ -243,21 +300,21 @@ class HFResponseItem(BaseModel):
|
|
| 243 |
async def chat(req: ChatRequest):
|
| 244 |
"""Rox Core - Main conversational model with streaming support"""
|
| 245 |
messages = [{"role": "system", "content": ROX_CORE_IDENTITY}]
|
| 246 |
-
messages.extend([m.
|
| 247 |
|
| 248 |
if req.stream:
|
| 249 |
return StreamingResponse(
|
| 250 |
-
stream_response(ROX_CORE_MODEL, messages, req.temperature, req.top_p, req.max_tokens),
|
| 251 |
media_type="text/event-stream"
|
| 252 |
)
|
| 253 |
|
| 254 |
try:
|
| 255 |
-
completion =
|
| 256 |
model=ROX_CORE_MODEL,
|
| 257 |
messages=messages,
|
| 258 |
temperature=req.temperature,
|
| 259 |
top_p=req.top_p,
|
| 260 |
-
max_tokens=req.max_tokens,
|
| 261 |
stream=False
|
| 262 |
)
|
| 263 |
return {"content": completion.choices[0].message.content or ""}
|
|
@@ -269,21 +326,21 @@ async def chat(req: ChatRequest):
|
|
| 269 |
async def turbo(req: ChatRequest):
|
| 270 |
"""Rox 2.1 Turbo - Fast and efficient with streaming"""
|
| 271 |
messages = [{"role": "system", "content": ROX_TURBO_IDENTITY}]
|
| 272 |
-
messages.extend([m.
|
| 273 |
|
| 274 |
if req.stream:
|
| 275 |
return StreamingResponse(
|
| 276 |
-
stream_response(ROX_TURBO_MODEL, messages, req.temperature, req.top_p, req.max_tokens),
|
| 277 |
media_type="text/event-stream"
|
| 278 |
)
|
| 279 |
|
| 280 |
try:
|
| 281 |
-
completion =
|
| 282 |
model=ROX_TURBO_MODEL,
|
| 283 |
messages=messages,
|
| 284 |
temperature=req.temperature,
|
| 285 |
top_p=req.top_p,
|
| 286 |
-
max_tokens=req.max_tokens,
|
| 287 |
stream=False
|
| 288 |
)
|
| 289 |
return {"content": completion.choices[0].message.content or ""}
|
|
@@ -295,7 +352,7 @@ async def turbo(req: ChatRequest):
|
|
| 295 |
async def coder(req: ChatRequest):
|
| 296 |
"""Rox 3.5 Coder - Specialized coding with streaming"""
|
| 297 |
messages = [{"role": "system", "content": ROX_CODER_IDENTITY}]
|
| 298 |
-
messages.extend([m.
|
| 299 |
|
| 300 |
extra_body = {
|
| 301 |
"top_k": 20,
|
|
@@ -306,12 +363,12 @@ async def coder(req: ChatRequest):
|
|
| 306 |
|
| 307 |
if req.stream:
|
| 308 |
return StreamingResponse(
|
| 309 |
-
stream_response(ROX_CODER_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 310 |
media_type="text/event-stream"
|
| 311 |
)
|
| 312 |
|
| 313 |
try:
|
| 314 |
-
completion =
|
| 315 |
model=ROX_CODER_MODEL,
|
| 316 |
messages=messages,
|
| 317 |
temperature=req.temperature,
|
|
@@ -329,18 +386,18 @@ async def coder(req: ChatRequest):
|
|
| 329 |
async def turbo45(req: ChatRequest):
|
| 330 |
"""Rox 4.5 Turbo - Advanced reasoning with streaming"""
|
| 331 |
messages = [{"role": "system", "content": ROX_TURBO_45_IDENTITY}]
|
| 332 |
-
messages.extend([m.
|
| 333 |
|
| 334 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 335 |
|
| 336 |
if req.stream:
|
| 337 |
return StreamingResponse(
|
| 338 |
-
stream_response(ROX_TURBO_45_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body),
|
| 339 |
media_type="text/event-stream"
|
| 340 |
)
|
| 341 |
|
| 342 |
try:
|
| 343 |
-
completion =
|
| 344 |
model=ROX_TURBO_45_MODEL,
|
| 345 |
messages=messages,
|
| 346 |
temperature=req.temperature,
|
|
@@ -358,18 +415,18 @@ async def turbo45(req: ChatRequest):
|
|
| 358 |
async def ultra(req: ChatRequest):
|
| 359 |
"""Rox 5 Ultra - Most advanced with streaming"""
|
| 360 |
messages = [{"role": "system", "content": ROX_ULTRA_IDENTITY}]
|
| 361 |
-
messages.extend([m.
|
| 362 |
|
| 363 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 364 |
|
| 365 |
if req.stream:
|
| 366 |
return StreamingResponse(
|
| 367 |
-
stream_response(ROX_ULTRA_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body),
|
| 368 |
media_type="text/event-stream"
|
| 369 |
)
|
| 370 |
|
| 371 |
try:
|
| 372 |
-
completion =
|
| 373 |
model=ROX_ULTRA_MODEL,
|
| 374 |
messages=messages,
|
| 375 |
temperature=req.temperature,
|
|
@@ -387,18 +444,18 @@ async def ultra(req: ChatRequest):
|
|
| 387 |
async def dyno(req: ChatRequest):
|
| 388 |
"""Rox 6 Dyno - Extended context with streaming"""
|
| 389 |
messages = [{"role": "system", "content": ROX_DYNO_IDENTITY}]
|
| 390 |
-
messages.extend([m.
|
| 391 |
|
| 392 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 393 |
|
| 394 |
if req.stream:
|
| 395 |
return StreamingResponse(
|
| 396 |
-
stream_response(ROX_DYNO_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 397 |
media_type="text/event-stream"
|
| 398 |
)
|
| 399 |
|
| 400 |
try:
|
| 401 |
-
completion =
|
| 402 |
model=ROX_DYNO_MODEL,
|
| 403 |
messages=messages,
|
| 404 |
temperature=req.temperature,
|
|
@@ -416,7 +473,7 @@ async def dyno(req: ChatRequest):
|
|
| 416 |
async def coder7(req: ChatRequest):
|
| 417 |
"""Rox 7 Coder - Most advanced coding with streaming"""
|
| 418 |
messages = [{"role": "system", "content": ROX_CODER_7_IDENTITY}]
|
| 419 |
-
messages.extend([m.
|
| 420 |
|
| 421 |
extra_body = {
|
| 422 |
"chat_template_kwargs": {
|
|
@@ -427,12 +484,12 @@ async def coder7(req: ChatRequest):
|
|
| 427 |
|
| 428 |
if req.stream:
|
| 429 |
return StreamingResponse(
|
| 430 |
-
stream_response(ROX_CODER_7_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 431 |
media_type="text/event-stream"
|
| 432 |
)
|
| 433 |
|
| 434 |
try:
|
| 435 |
-
completion =
|
| 436 |
model=ROX_CODER_7_MODEL,
|
| 437 |
messages=messages,
|
| 438 |
temperature=req.temperature,
|
|
@@ -450,16 +507,16 @@ async def coder7(req: ChatRequest):
|
|
| 450 |
async def vision(req: ChatRequest):
|
| 451 |
"""Rox Vision Max - Visual understanding with streaming"""
|
| 452 |
messages = [{"role": "system", "content": ROX_VISION_IDENTITY}]
|
| 453 |
-
messages.extend([m.
|
| 454 |
|
| 455 |
if req.stream:
|
| 456 |
return StreamingResponse(
|
| 457 |
-
stream_response(ROX_VISION_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192)),
|
| 458 |
media_type="text/event-stream"
|
| 459 |
)
|
| 460 |
|
| 461 |
try:
|
| 462 |
-
completion =
|
| 463 |
model=ROX_VISION_MODEL,
|
| 464 |
messages=messages,
|
| 465 |
temperature=req.temperature,
|
|
@@ -482,7 +539,7 @@ async def hf_generate(req: HFRequest):
|
|
| 482 |
]
|
| 483 |
|
| 484 |
try:
|
| 485 |
-
completion =
|
| 486 |
model=ROX_CORE_MODEL,
|
| 487 |
messages=messages,
|
| 488 |
temperature=params.temperature or 0.7,
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
+
import time
|
| 5 |
+
import uuid
|
| 6 |
+
from typing import List, Optional, AsyncGenerator, Iterable
|
| 7 |
from contextlib import asynccontextmanager
|
| 8 |
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
+
from fastapi import FastAPI, HTTPException, Request, Response
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
from fastapi.middleware.gzip import GZipMiddleware
|
| 13 |
from fastapi.responses import JSONResponse, StreamingResponse
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
+
from openai import AsyncOpenAI
|
| 16 |
+
import httpx
|
| 17 |
import json
|
| 18 |
|
| 19 |
|
| 20 |
# Load environment variables
|
| 21 |
load_dotenv()
|
| 22 |
|
| 23 |
+
# Configure logging (env-controlled)
|
| 24 |
+
LOG_LEVEL = os.getenv("LOG_LEVEL", "WARNING").upper()
|
| 25 |
+
logging.basicConfig(level=LOG_LEVEL, format="%(levelname)s - %(message)s")
|
|
|
|
|
|
|
| 26 |
logger = logging.getLogger("rox_ai")
|
| 27 |
|
| 28 |
# Check for API key
|
|
|
|
| 31 |
if not NVIDIA_API_KEY:
|
| 32 |
raise RuntimeError("NVIDIA_API_KEY not set")
|
| 33 |
|
| 34 |
+
API_BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
|
| 35 |
+
|
| 36 |
+
def _parse_cors_origins(value: str) -> List[str]:
|
| 37 |
+
v = (value or "").strip()
|
| 38 |
+
if not v:
|
| 39 |
+
return []
|
| 40 |
+
if v == "*":
|
| 41 |
+
return ["*"]
|
| 42 |
+
return [o.strip() for o in v.split(",") if o.strip()]
|
| 43 |
+
|
| 44 |
+
CORS_ORIGINS = _parse_cors_origins(os.getenv("CORS_ORIGINS", "*"))
|
| 45 |
+
GZIP_MIN_SIZE = int(os.getenv("GZIP_MIN_SIZE", "500"))
|
| 46 |
+
|
| 47 |
# Model configurations
|
| 48 |
ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
|
| 49 |
ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" # Changed to a more reliable model
|
|
|
|
| 64 |
ROX_CODER_7_IDENTITY = "You are Rox 7 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced coding specialist."
|
| 65 |
ROX_VISION_IDENTITY = "You are Rox Vision Max, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in visual understanding and multimodal tasks."
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
@asynccontextmanager
|
| 68 |
async def lifespan(app: FastAPI):
|
| 69 |
"""Lifespan context manager"""
|
| 70 |
+
# One pooled async HTTP client for all requests (keep-alive, limits, timeouts)
|
| 71 |
+
timeout_s = float(os.getenv("UPSTREAM_TIMEOUT_SECONDS", "60"))
|
| 72 |
+
max_retries = int(os.getenv("UPSTREAM_MAX_RETRIES", "2"))
|
| 73 |
+
max_connections = int(os.getenv("UPSTREAM_MAX_CONNECTIONS", "200"))
|
| 74 |
+
max_keepalive = int(os.getenv("UPSTREAM_MAX_KEEPALIVE_CONNECTIONS", "50"))
|
| 75 |
+
|
| 76 |
+
http_client = httpx.AsyncClient(
|
| 77 |
+
timeout=httpx.Timeout(timeout_s),
|
| 78 |
+
limits=httpx.Limits(max_connections=max_connections, max_keepalive_connections=max_keepalive),
|
| 79 |
+
headers={"User-Agent": "Rox-AI-API/2.0"},
|
| 80 |
+
)
|
| 81 |
+
app.state.http_client = http_client
|
| 82 |
+
app.state.client = AsyncOpenAI(
|
| 83 |
+
base_url=API_BASE_URL,
|
| 84 |
+
api_key=NVIDIA_API_KEY,
|
| 85 |
+
timeout=timeout_s,
|
| 86 |
+
max_retries=max_retries,
|
| 87 |
+
http_client=http_client,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
yield
|
| 92 |
+
finally:
|
| 93 |
+
await http_client.aclose()
|
| 94 |
|
| 95 |
|
| 96 |
# Initialize FastAPI app - optimized for speed
|
|
|
|
| 104 |
)
|
| 105 |
|
| 106 |
# GZip compression for faster transfers
|
| 107 |
+
app.add_middleware(GZipMiddleware, minimum_size=GZIP_MIN_SIZE)
|
| 108 |
|
| 109 |
+
# CORS - env controlled (default "*")
|
| 110 |
app.add_middleware(
|
| 111 |
CORSMiddleware,
|
| 112 |
+
allow_origins=CORS_ORIGINS,
|
| 113 |
+
allow_credentials=(CORS_ORIGINS != ["*"]),
|
| 114 |
allow_methods=["*"],
|
| 115 |
allow_headers=["*"],
|
| 116 |
)
|
| 117 |
|
| 118 |
+
@app.middleware("http")
|
| 119 |
+
async def add_request_context(request: Request, call_next):
|
| 120 |
+
request_id = request.headers.get("x-request-id") or str(uuid.uuid4())
|
| 121 |
+
start = time.perf_counter()
|
| 122 |
+
try:
|
| 123 |
+
response: Response = await call_next(request)
|
| 124 |
+
finally:
|
| 125 |
+
elapsed_ms = (time.perf_counter() - start) * 1000.0
|
| 126 |
+
# Keep logs lightweight; only emit at INFO+ if enabled
|
| 127 |
+
if logger.isEnabledFor(logging.INFO):
|
| 128 |
+
logger.info("%s %s -> %.2fms id=%s", request.method, request.url.path, elapsed_ms, request_id)
|
| 129 |
+
|
| 130 |
+
response.headers["X-Request-Id"] = request_id
|
| 131 |
+
response.headers["X-Process-Time-Ms"] = f"{elapsed_ms:.2f}"
|
| 132 |
+
return response
|
| 133 |
+
|
| 134 |
|
| 135 |
# Minimal exception handler
|
| 136 |
@app.exception_handler(Exception)
|
| 137 |
async def global_exception_handler(request: Request, exc: Exception):
|
| 138 |
+
logger.exception("Unhandled error on %s %s", request.method, request.url.path)
|
| 139 |
return JSONResponse(
|
| 140 |
status_code=500,
|
| 141 |
content={"error": "Internal server error"}
|
| 142 |
)
|
| 143 |
|
| 144 |
|
| 145 |
+
def _client(app_: FastAPI) -> AsyncOpenAI:
|
| 146 |
+
c = getattr(app_.state, "client", None)
|
| 147 |
+
if c is None:
|
| 148 |
+
raise RuntimeError("Client not initialized")
|
| 149 |
+
return c
|
| 150 |
|
| 151 |
|
| 152 |
# Helper function for streaming responses
|
| 153 |
+
async def stream_response(
|
| 154 |
+
app_: FastAPI,
|
| 155 |
+
model: str,
|
| 156 |
+
messages: list,
|
| 157 |
+
temperature: float,
|
| 158 |
+
top_p: float,
|
| 159 |
+
max_tokens: int,
|
| 160 |
+
extra_body: dict | None = None,
|
| 161 |
+
) -> AsyncGenerator[str, None]:
|
| 162 |
"""Stream responses from OpenAI API"""
|
| 163 |
try:
|
| 164 |
+
stream = await _client(app_).chat.completions.create(
|
| 165 |
model=model,
|
| 166 |
messages=messages,
|
| 167 |
temperature=temperature,
|
|
|
|
| 171 |
extra_body=extra_body
|
| 172 |
)
|
| 173 |
|
| 174 |
+
async for chunk in stream:
|
| 175 |
+
delta = chunk.choices[0].delta
|
| 176 |
+
content = getattr(delta, "content", None)
|
| 177 |
+
if content:
|
| 178 |
+
yield f"data: {json.dumps({'content': content})}\n\n"
|
| 179 |
|
| 180 |
yield "data: [DONE]\n\n"
|
| 181 |
except Exception as e:
|
|
|
|
| 300 |
async def chat(req: ChatRequest):
|
| 301 |
"""Rox Core - Main conversational model with streaming support"""
|
| 302 |
messages = [{"role": "system", "content": ROX_CORE_IDENTITY}]
|
| 303 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 304 |
|
| 305 |
if req.stream:
|
| 306 |
return StreamingResponse(
|
| 307 |
+
stream_response(app, ROX_CORE_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192)),
|
| 308 |
media_type="text/event-stream"
|
| 309 |
)
|
| 310 |
|
| 311 |
try:
|
| 312 |
+
completion = await _client(app).chat.completions.create(
|
| 313 |
model=ROX_CORE_MODEL,
|
| 314 |
messages=messages,
|
| 315 |
temperature=req.temperature,
|
| 316 |
top_p=req.top_p,
|
| 317 |
+
max_tokens=min(req.max_tokens, 8192),
|
| 318 |
stream=False
|
| 319 |
)
|
| 320 |
return {"content": completion.choices[0].message.content or ""}
|
|
|
|
| 326 |
async def turbo(req: ChatRequest):
|
| 327 |
"""Rox 2.1 Turbo - Fast and efficient with streaming"""
|
| 328 |
messages = [{"role": "system", "content": ROX_TURBO_IDENTITY}]
|
| 329 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 330 |
|
| 331 |
if req.stream:
|
| 332 |
return StreamingResponse(
|
| 333 |
+
stream_response(app, ROX_TURBO_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192)),
|
| 334 |
media_type="text/event-stream"
|
| 335 |
)
|
| 336 |
|
| 337 |
try:
|
| 338 |
+
completion = await _client(app).chat.completions.create(
|
| 339 |
model=ROX_TURBO_MODEL,
|
| 340 |
messages=messages,
|
| 341 |
temperature=req.temperature,
|
| 342 |
top_p=req.top_p,
|
| 343 |
+
max_tokens=min(req.max_tokens, 8192),
|
| 344 |
stream=False
|
| 345 |
)
|
| 346 |
return {"content": completion.choices[0].message.content or ""}
|
|
|
|
| 352 |
async def coder(req: ChatRequest):
|
| 353 |
"""Rox 3.5 Coder - Specialized coding with streaming"""
|
| 354 |
messages = [{"role": "system", "content": ROX_CODER_IDENTITY}]
|
| 355 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 356 |
|
| 357 |
extra_body = {
|
| 358 |
"top_k": 20,
|
|
|
|
| 363 |
|
| 364 |
if req.stream:
|
| 365 |
return StreamingResponse(
|
| 366 |
+
stream_response(app, ROX_CODER_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 367 |
media_type="text/event-stream"
|
| 368 |
)
|
| 369 |
|
| 370 |
try:
|
| 371 |
+
completion = await _client(app).chat.completions.create(
|
| 372 |
model=ROX_CODER_MODEL,
|
| 373 |
messages=messages,
|
| 374 |
temperature=req.temperature,
|
|
|
|
| 386 |
async def turbo45(req: ChatRequest):
|
| 387 |
"""Rox 4.5 Turbo - Advanced reasoning with streaming"""
|
| 388 |
messages = [{"role": "system", "content": ROX_TURBO_45_IDENTITY}]
|
| 389 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 390 |
|
| 391 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 392 |
|
| 393 |
if req.stream:
|
| 394 |
return StreamingResponse(
|
| 395 |
+
stream_response(app, ROX_TURBO_45_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body),
|
| 396 |
media_type="text/event-stream"
|
| 397 |
)
|
| 398 |
|
| 399 |
try:
|
| 400 |
+
completion = await _client(app).chat.completions.create(
|
| 401 |
model=ROX_TURBO_45_MODEL,
|
| 402 |
messages=messages,
|
| 403 |
temperature=req.temperature,
|
|
|
|
| 415 |
async def ultra(req: ChatRequest):
|
| 416 |
"""Rox 5 Ultra - Most advanced with streaming"""
|
| 417 |
messages = [{"role": "system", "content": ROX_ULTRA_IDENTITY}]
|
| 418 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 419 |
|
| 420 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 421 |
|
| 422 |
if req.stream:
|
| 423 |
return StreamingResponse(
|
| 424 |
+
stream_response(app, ROX_ULTRA_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body),
|
| 425 |
media_type="text/event-stream"
|
| 426 |
)
|
| 427 |
|
| 428 |
try:
|
| 429 |
+
completion = await _client(app).chat.completions.create(
|
| 430 |
model=ROX_ULTRA_MODEL,
|
| 431 |
messages=messages,
|
| 432 |
temperature=req.temperature,
|
|
|
|
| 444 |
async def dyno(req: ChatRequest):
|
| 445 |
"""Rox 6 Dyno - Extended context with streaming"""
|
| 446 |
messages = [{"role": "system", "content": ROX_DYNO_IDENTITY}]
|
| 447 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 448 |
|
| 449 |
extra_body = {"chat_template_kwargs": {"thinking": True}}
|
| 450 |
|
| 451 |
if req.stream:
|
| 452 |
return StreamingResponse(
|
| 453 |
+
stream_response(app, ROX_DYNO_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 454 |
media_type="text/event-stream"
|
| 455 |
)
|
| 456 |
|
| 457 |
try:
|
| 458 |
+
completion = await _client(app).chat.completions.create(
|
| 459 |
model=ROX_DYNO_MODEL,
|
| 460 |
messages=messages,
|
| 461 |
temperature=req.temperature,
|
|
|
|
| 473 |
async def coder7(req: ChatRequest):
|
| 474 |
"""Rox 7 Coder - Most advanced coding with streaming"""
|
| 475 |
messages = [{"role": "system", "content": ROX_CODER_7_IDENTITY}]
|
| 476 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 477 |
|
| 478 |
extra_body = {
|
| 479 |
"chat_template_kwargs": {
|
|
|
|
| 484 |
|
| 485 |
if req.stream:
|
| 486 |
return StreamingResponse(
|
| 487 |
+
stream_response(app, ROX_CODER_7_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body),
|
| 488 |
media_type="text/event-stream"
|
| 489 |
)
|
| 490 |
|
| 491 |
try:
|
| 492 |
+
completion = await _client(app).chat.completions.create(
|
| 493 |
model=ROX_CODER_7_MODEL,
|
| 494 |
messages=messages,
|
| 495 |
temperature=req.temperature,
|
|
|
|
| 507 |
async def vision(req: ChatRequest):
|
| 508 |
"""Rox Vision Max - Visual understanding with streaming"""
|
| 509 |
messages = [{"role": "system", "content": ROX_VISION_IDENTITY}]
|
| 510 |
+
messages.extend([m.model_dump() for m in req.messages])
|
| 511 |
|
| 512 |
if req.stream:
|
| 513 |
return StreamingResponse(
|
| 514 |
+
stream_response(app, ROX_VISION_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192)),
|
| 515 |
media_type="text/event-stream"
|
| 516 |
)
|
| 517 |
|
| 518 |
try:
|
| 519 |
+
completion = await _client(app).chat.completions.create(
|
| 520 |
model=ROX_VISION_MODEL,
|
| 521 |
messages=messages,
|
| 522 |
temperature=req.temperature,
|
|
|
|
| 539 |
]
|
| 540 |
|
| 541 |
try:
|
| 542 |
+
completion = await _client(app).chat.completions.create(
|
| 543 |
model=ROX_CORE_MODEL,
|
| 544 |
messages=messages,
|
| 545 |
temperature=params.temperature or 0.7,
|