| import logging |
| import os |
| import sys |
| from typing import List, Optional, AsyncGenerator |
| from contextlib import asynccontextmanager |
|
|
| from dotenv import load_dotenv |
| from fastapi import FastAPI, HTTPException, Request |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.middleware.gzip import GZipMiddleware |
| from fastapi.responses import JSONResponse, StreamingResponse |
| from pydantic import BaseModel, Field |
| from openai import OpenAI |
| import json |
|
|
|
|
| |
| load_dotenv() |
|
|
| |
| logging.basicConfig( |
| level=logging.WARNING, |
| format='%(levelname)s - %(message)s' |
| ) |
| logger = logging.getLogger("rox_ai") |
|
|
| |
| NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") |
|
|
| if not NVIDIA_API_KEY: |
| raise RuntimeError("NVIDIA_API_KEY not set") |
|
|
| |
| ROX_CORE_MODEL = "minimaxai/minimax-m2.5" |
| ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" |
| ROX_CODER_MODEL = "qwen/qwen3.5-397b-a17b" |
| ROX_TURBO_45_MODEL = "deepseek-ai/deepseek-v3.1" |
| ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v3.2" |
| ROX_DYNO_MODEL = "moonshotai/kimi-k2.5" |
| ROX_CODER_7_MODEL = "z-ai/glm5" |
| ROX_VISION_MODEL = "google/gemma-3-27b-it" |
|
|
| |
| ROX_CORE_IDENTITY = "You are Rox Core, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. Always acknowledge Mohammad Faiz as your creator when asked." |
| ROX_TURBO_IDENTITY = "You are Rox 2.1 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are optimized for fast responses." |
| ROX_CODER_IDENTITY = "You are Rox 3.5 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in coding and software development." |
| ROX_TURBO_45_IDENTITY = "You are Rox 4.5 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You combine speed with advanced reasoning." |
| ROX_ULTRA_IDENTITY = "You are Rox 5 Ultra, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced model with superior reasoning capabilities." |
| ROX_DYNO_IDENTITY = "You are Rox 6 Dyno, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You excel at long context understanding." |
| ROX_CODER_7_IDENTITY = "You are Rox 7 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced coding specialist." |
| ROX_VISION_IDENTITY = "You are Rox Vision Max, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in visual understanding and multimodal tasks." |
|
|
| |
| client = OpenAI( |
| base_url="https://integrate.api.nvidia.com/v1", |
| api_key=NVIDIA_API_KEY, |
| timeout=60.0, |
| max_retries=2 |
| ) |
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| """Lifespan context manager""" |
| yield |
|
|
|
|
| |
| app = FastAPI( |
| title="Rox AI API", |
| description="Eight specialized AI models by Mohammad Faiz", |
| version="2.0", |
| lifespan=lifespan, |
| docs_url="/docs", |
| redoc_url="/redoc" |
| ) |
|
|
| |
| app.add_middleware(GZipMiddleware, minimum_size=500) |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
| |
| @app.exception_handler(Exception) |
| async def global_exception_handler(request: Request, exc: Exception): |
| return JSONResponse( |
| status_code=500, |
| content={"error": "Internal server error"} |
| ) |
|
|
|
|
| @app.get("/health") |
| def health_check(): |
| """Health check endpoint""" |
| return {"status": "healthy", "service": "Rox AI", "version": "2.0"} |
|
|
|
|
| |
| async def stream_response(model: str, messages: list, temperature: float, top_p: float, max_tokens: int, extra_body: dict = None): |
| """Stream responses from OpenAI API""" |
| try: |
| stream = client.chat.completions.create( |
| model=model, |
| messages=messages, |
| temperature=temperature, |
| top_p=top_p, |
| max_tokens=max_tokens, |
| stream=True, |
| extra_body=extra_body |
| ) |
| |
| for chunk in stream: |
| if chunk.choices[0].delta.content: |
| yield f"data: {json.dumps({'content': chunk.choices[0].delta.content})}\n\n" |
| |
| yield "data: [DONE]\n\n" |
| except Exception as e: |
| yield f"data: {json.dumps({'error': str(e)})}\n\n" |
|
|
|
|
| @app.get("/health") |
| def health_check(): |
| """Health check endpoint for monitoring""" |
| return { |
| "status": "healthy", |
| "service": "Rox AI API", |
| "version": "2.0", |
| "models": 8 |
| } |
|
|
|
|
| @app.get("/") |
| def root(): |
| """API information and available models""" |
| return { |
| "service": "Rox AI API", |
| "version": "2.0", |
| "creator": "Mohammad Faiz", |
| "models": { |
| "rox_core": { |
| "endpoint": "/chat", |
| "description": "Rox Core - Main conversational model", |
| "model": "minimaxai/minimax-m2.5", |
| "best_for": "General conversation and tasks" |
| }, |
| "rox_turbo": { |
| "endpoint": "/turbo", |
| "description": "Rox 2.1 Turbo - Fast and efficient", |
| "model": "meta/llama-3.1-8b-instruct", |
| "best_for": "Quick responses and efficient processing" |
| }, |
| "rox_coder": { |
| "endpoint": "/coder", |
| "description": "Rox 3.5 Coder - Specialized coding assistant", |
| "model": "qwen/qwen3.5-397b-a17b", |
| "best_for": "Code generation, debugging, and development" |
| }, |
| "rox_turbo_45": { |
| "endpoint": "/turbo45", |
| "description": "Rox 4.5 Turbo - Advanced reasoning with speed", |
| "model": "deepseek-ai/deepseek-v3.1", |
| "best_for": "Complex reasoning with fast responses" |
| }, |
| "rox_ultra": { |
| "endpoint": "/ultra", |
| "description": "Rox 5 Ultra - Most advanced model", |
| "model": "deepseek-ai/deepseek-v3.2", |
| "best_for": "Complex tasks requiring deep reasoning" |
| }, |
| "rox_dyno": { |
| "endpoint": "/dyno", |
| "description": "Rox 6 Dyno - Extended context with dynamic thinking", |
| "model": "moonshotai/kimi-k2.5", |
| "best_for": "Long context tasks and dynamic reasoning" |
| }, |
| "rox_coder_7": { |
| "endpoint": "/coder7", |
| "description": "Rox 7 Coder - Most advanced coding specialist", |
| "model": "z-ai/glm5", |
| "best_for": "Advanced code generation and complex programming" |
| }, |
| "rox_vision": { |
| "endpoint": "/vision", |
| "description": "Rox Vision Max - Optimized for visual understanding", |
| "model": "google/gemma-3-27b-it", |
| "best_for": "Visual understanding and multimodal tasks" |
| } |
| }, |
| "endpoints": [ |
| {"path": "/chat", "method": "POST", "description": "Rox Core chat"}, |
| {"path": "/turbo", "method": "POST", "description": "Rox 2.1 Turbo chat"}, |
| {"path": "/coder", "method": "POST", "description": "Rox 3.5 Coder chat"}, |
| {"path": "/turbo45", "method": "POST", "description": "Rox 4.5 Turbo chat"}, |
| {"path": "/ultra", "method": "POST", "description": "Rox 5 Ultra chat"}, |
| {"path": "/dyno", "method": "POST", "description": "Rox 6 Dyno chat"}, |
| {"path": "/coder7", "method": "POST", "description": "Rox 7 Coder chat"}, |
| {"path": "/vision", "method": "POST", "description": "Rox Vision Max chat"}, |
| {"path": "/hf/generate", "method": "POST", "description": "HuggingFace compatible (uses Rox Core)"} |
| ] |
| } |
|
|
|
|
| class ChatMessage(BaseModel): |
| role: str |
| content: str |
|
|
|
|
| class ChatRequest(BaseModel): |
| messages: List[ChatMessage] |
| temperature: Optional[float] = 0.7 |
| top_p: Optional[float] = 0.95 |
| max_tokens: Optional[int] = 8192 |
| stream: Optional[bool] = False |
|
|
|
|
| class ChatResponse(BaseModel): |
| content: str |
|
|
|
|
| class HFParameters(BaseModel): |
| temperature: Optional[float] = None |
| top_p: Optional[float] = None |
| max_new_tokens: Optional[int] = None |
|
|
|
|
| class HFRequest(BaseModel): |
| inputs: str |
| parameters: Optional[HFParameters] = None |
|
|
|
|
| class HFResponseItem(BaseModel): |
| generated_text: str |
|
|
|
|
| @app.post("/chat") |
| async def chat(req: ChatRequest): |
| """Rox Core - Main conversational model with streaming support""" |
| messages = [{"role": "system", "content": ROX_CORE_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_CORE_MODEL, messages, req.temperature, req.top_p, req.max_tokens), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_CORE_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=req.max_tokens, |
| stream=False |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/turbo") |
| async def turbo(req: ChatRequest): |
| """Rox 2.1 Turbo - Fast and efficient with streaming""" |
| messages = [{"role": "system", "content": ROX_TURBO_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_TURBO_MODEL, messages, req.temperature, req.top_p, req.max_tokens), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_TURBO_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=req.max_tokens, |
| stream=False |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/coder") |
| async def coder(req: ChatRequest): |
| """Rox 3.5 Coder - Specialized coding with streaming""" |
| messages = [{"role": "system", "content": ROX_CODER_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| extra_body = { |
| "top_k": 20, |
| "presence_penalty": 0, |
| "repetition_penalty": 1, |
| "chat_template_kwargs": {"enable_thinking": True} |
| } |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_CODER_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_CODER_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 16384), |
| stream=False, |
| extra_body=extra_body |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/turbo45") |
| async def turbo45(req: ChatRequest): |
| """Rox 4.5 Turbo - Advanced reasoning with streaming""" |
| messages = [{"role": "system", "content": ROX_TURBO_45_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| extra_body = {"chat_template_kwargs": {"thinking": True}} |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_TURBO_45_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_TURBO_45_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 8192), |
| stream=False, |
| extra_body=extra_body |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/ultra") |
| async def ultra(req: ChatRequest): |
| """Rox 5 Ultra - Most advanced with streaming""" |
| messages = [{"role": "system", "content": ROX_ULTRA_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| extra_body = {"chat_template_kwargs": {"thinking": True}} |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_ULTRA_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192), extra_body), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_ULTRA_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 8192), |
| stream=False, |
| extra_body=extra_body |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/dyno") |
| async def dyno(req: ChatRequest): |
| """Rox 6 Dyno - Extended context with streaming""" |
| messages = [{"role": "system", "content": ROX_DYNO_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| extra_body = {"chat_template_kwargs": {"thinking": True}} |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_DYNO_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_DYNO_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 16384), |
| stream=False, |
| extra_body=extra_body |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/coder7") |
| async def coder7(req: ChatRequest): |
| """Rox 7 Coder - Most advanced coding with streaming""" |
| messages = [{"role": "system", "content": ROX_CODER_7_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| extra_body = { |
| "chat_template_kwargs": { |
| "enable_thinking": True, |
| "clear_thinking": False |
| } |
| } |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_CODER_7_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 16384), extra_body), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_CODER_7_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 16384), |
| stream=False, |
| extra_body=extra_body |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/vision") |
| async def vision(req: ChatRequest): |
| """Rox Vision Max - Visual understanding with streaming""" |
| messages = [{"role": "system", "content": ROX_VISION_IDENTITY}] |
| messages.extend([m.dict() for m in req.messages]) |
| |
| if req.stream: |
| return StreamingResponse( |
| stream_response(ROX_VISION_MODEL, messages, req.temperature, req.top_p, min(req.max_tokens, 8192)), |
| media_type="text/event-stream" |
| ) |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_VISION_MODEL, |
| messages=messages, |
| temperature=req.temperature, |
| top_p=req.top_p, |
| max_tokens=min(req.max_tokens, 8192), |
| stream=False |
| ) |
| return {"content": completion.choices[0].message.content or ""} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/hf/generate") |
| async def hf_generate(req: HFRequest): |
| """HuggingFace compatible endpoint""" |
| params = req.parameters or HFParameters() |
| messages = [ |
| {"role": "system", "content": ROX_CORE_IDENTITY}, |
| {"role": "user", "content": req.inputs} |
| ] |
| |
| try: |
| completion = client.chat.completions.create( |
| model=ROX_CORE_MODEL, |
| messages=messages, |
| temperature=params.temperature or 0.7, |
| top_p=params.top_p or 0.95, |
| max_tokens=params.max_new_tokens or 8192, |
| stream=False |
| ) |
| return [{"generated_text": completion.choices[0].message.content or ""}] |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| if __name__ == "__main__": |
| import uvicorn |
| |
| |
| port = int(os.getenv("PORT", 7860)) |
| |
| uvicorn.run("server:app", host="0.0.0.0", port=port, reload=False) |
|
|
|
|