Spaces:
Running
Running
File size: 4,905 Bytes
0498411 d4d57c4 0498411 04093c9 0498411 1a0c834 0498411 905ef08 b56b320 0498411 859897e 905ef08 0498411 04093c9 0498411 905ef08 0498411 bc202f9 0498411 fdd720a 064802b fdd720a 8b78469 d0fa9a9 8b78469 1a0c834 369ee7e b56b320 0498411 859897e 0498411 859897e 0498411 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""
Standalone model inference and client management for AnyCoder Backend API.
No Gradio dependencies - works with FastAPI/backend only.
"""
import os
from typing import Optional
from openai import OpenAI
def get_inference_client(model_id: str, provider: str = "auto"):
"""
Return an appropriate client based on model_id.
Returns OpenAI-compatible client for all models or raises error if not configured.
"""
if model_id == "MiniMaxAI/MiniMax-M2" or model_id == "MiniMaxAI/MiniMax-M2.1":
# Use HuggingFace Router with Novita provider for MiniMax M2 models
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id == "moonshotai/Kimi-K2-Thinking":
# Use HuggingFace Router with Novita provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id == "moonshotai/Kimi-K2-Instruct":
# Use HuggingFace Router with Groq provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("deepseek-ai/"):
# DeepSeek models via HuggingFace Router with Novita provider
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("zai-org/GLM"):
# GLM models via HuggingFace Router
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("moonshotai/Kimi-K2"):
# Kimi K2 models via HuggingFace Router
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
elif model_id.startswith("Qwen/Qwen3-Coder-Next"):
# Qwen models via HuggingFace Router
return OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN"),
default_headers={"X-HF-Bill-To": "huggingface"}
)
else:
# Unknown model - try HuggingFace Inference API
return OpenAI(
base_url="https://api-inference.huggingface.co/v1",
api_key=os.getenv("HF_TOKEN")
)
def get_real_model_id(model_id: str) -> str:
"""Get the real model ID with provider suffixes if needed"""
if model_id == "zai-org/GLM-4.6":
# GLM-4.6 requires Cerebras provider suffix in model string for API calls
return "zai-org/GLM-4.6:cerebras"
elif model_id == "MiniMaxAI/MiniMax-M2" or model_id == "MiniMaxAI/MiniMax-M2.1":
# MiniMax M2 and M2.1 need Novita provider suffix
return f"{model_id}:novita"
elif model_id == "moonshotai/Kimi-K2-Thinking":
# Kimi K2 Thinking needs Together AI provider
return "moonshotai/Kimi-K2-Thinking:together"
elif model_id == "moonshotai/Kimi-K2-Instruct":
# Kimi K2 Instruct needs Groq provider
return "moonshotai/Kimi-K2-Instruct:groq"
elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"):
# DeepSeek V3 and R1 models need Novita provider
return f"{model_id}:novita"
elif model_id == "zai-org/GLM-4.5":
# GLM-4.5 needs fireworks-ai provider
return "zai-org/GLM-4.5:fireworks-ai"
elif model_id == "zai-org/GLM-4.7":
# GLM-4.7 needs cerebras provider suffix
return "zai-org/GLM-4.7:cerebras"
elif model_id == "zai-org/GLM-4.7-Flash":
# GLM-4.7-Flash via HuggingFace Router with Novita provider
return "zai-org/GLM-4.7-Flash:novita"
elif model_id == "zai-org/GLM-5":
# GLM-5 via HuggingFace Router with Novita provider
return "zai-org/GLM-5:novita"
elif model_id == "moonshotai/Kimi-K2.5":
# Kimi K2.5 needs Novita provider
return "moonshotai/Kimi-K2.5:novita"
elif model_id == "Qwen/Qwen3-Coder-Next":
# Qwen3-Coder-Next needs Novita provider
return "Qwen/Qwen3-Coder-Next:novita"
return model_id
def is_native_sdk_model(model_id: str) -> bool:
"""Check if model uses native SDK (not OpenAI-compatible)"""
return False
def is_mistral_model(model_id: str) -> bool:
"""Check if model uses Mistral SDK"""
return False
|