Update server.py
Browse files
server.py
CHANGED
|
@@ -44,7 +44,7 @@ logger.info(f"✓ NVIDIA_API_KEY loaded (length: {len(NVIDIA_API_KEY)})")
|
|
| 44 |
|
| 45 |
# Model configurations
|
| 46 |
ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
|
| 47 |
-
ROX_TURBO_MODEL = "
|
| 48 |
ROX_CODER_MODEL = "qwen/qwen3.5-397b-a17b"
|
| 49 |
ROX_TURBO_45_MODEL = "deepseek-ai/deepseek-v3.1"
|
| 50 |
ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v3.2"
|
|
@@ -154,7 +154,7 @@ def root():
|
|
| 154 |
"rox_turbo": {
|
| 155 |
"endpoint": "/turbo",
|
| 156 |
"description": "Rox 2.1 Turbo - Fast and efficient",
|
| 157 |
-
"model": "
|
| 158 |
"best_for": "Quick responses and efficient processing"
|
| 159 |
},
|
| 160 |
"rox_coder": {
|
|
@@ -287,16 +287,18 @@ def turbo(req: ChatRequest):
|
|
| 287 |
completion = client.chat.completions.create(
|
| 288 |
model=ROX_TURBO_MODEL,
|
| 289 |
messages=messages,
|
| 290 |
-
temperature=req.temperature if req.temperature != 1.0 else 0.
|
| 291 |
-
top_p=req.top_p if req.top_p != 1.0 else 0.
|
| 292 |
max_tokens=req.max_tokens,
|
| 293 |
-
stream=False
|
| 294 |
)
|
| 295 |
except Exception as e:
|
| 296 |
logger.exception("Error while calling Rox 2.1 Turbo for /turbo")
|
|
|
|
|
|
|
| 297 |
raise HTTPException(
|
| 298 |
status_code=500,
|
| 299 |
-
detail="Internal server error while calling Rox 2.1 Turbo
|
| 300 |
) from e
|
| 301 |
|
| 302 |
try:
|
|
|
|
| 44 |
|
| 45 |
# Model configurations
|
| 46 |
ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
|
| 47 |
+
ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" # Changed to a more reliable model
|
| 48 |
ROX_CODER_MODEL = "qwen/qwen3.5-397b-a17b"
|
| 49 |
ROX_TURBO_45_MODEL = "deepseek-ai/deepseek-v3.1"
|
| 50 |
ROX_ULTRA_MODEL = "deepseek-ai/deepseek-v3.2"
|
|
|
|
| 154 |
"rox_turbo": {
|
| 155 |
"endpoint": "/turbo",
|
| 156 |
"description": "Rox 2.1 Turbo - Fast and efficient",
|
| 157 |
+
"model": "meta/llama-3.1-8b-instruct",
|
| 158 |
"best_for": "Quick responses and efficient processing"
|
| 159 |
},
|
| 160 |
"rox_coder": {
|
|
|
|
| 287 |
completion = client.chat.completions.create(
|
| 288 |
model=ROX_TURBO_MODEL,
|
| 289 |
messages=messages,
|
| 290 |
+
temperature=req.temperature if req.temperature != 1.0 else 0.7,
|
| 291 |
+
top_p=req.top_p if req.top_p != 1.0 else 0.9,
|
| 292 |
max_tokens=req.max_tokens,
|
| 293 |
+
stream=False
|
| 294 |
)
|
| 295 |
except Exception as e:
|
| 296 |
logger.exception("Error while calling Rox 2.1 Turbo for /turbo")
|
| 297 |
+
# Log the actual error for debugging
|
| 298 |
+
logger.error(f"Turbo model error details: {str(e)}")
|
| 299 |
raise HTTPException(
|
| 300 |
status_code=500,
|
| 301 |
+
detail=f"Internal server error while calling Rox 2.1 Turbo: {str(e)}",
|
| 302 |
) from e
|
| 303 |
|
| 304 |
try:
|