|
|
""" |
|
|
Standalone model inference and client management for AnyCoder Backend API. |
|
|
No Gradio dependencies - works with FastAPI/backend only. |
|
|
""" |
|
|
import os |
|
|
from typing import Optional |
|
|
|
|
|
from openai import OpenAI |
|
|
|
|
|
def get_inference_client(model_id: str, provider: str = "auto"): |
|
|
""" |
|
|
Return an appropriate client based on model_id. |
|
|
|
|
|
Returns OpenAI-compatible client for all models or raises error if not configured. |
|
|
""" |
|
|
if model_id == "MiniMaxAI/MiniMax-M2": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Thinking": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Instruct": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("deepseek-ai/"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("zai-org/GLM-4"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("moonshotai/Kimi-K2"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
else: |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://api-inference.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
def get_real_model_id(model_id: str) -> str: |
|
|
"""Get the real model ID with provider suffixes if needed""" |
|
|
if model_id == "zai-org/GLM-4.6": |
|
|
|
|
|
return "zai-org/GLM-4.6:cerebras" |
|
|
|
|
|
elif model_id == "MiniMaxAI/MiniMax-M2": |
|
|
|
|
|
return "MiniMaxAI/MiniMax-M2:novita" |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Thinking": |
|
|
|
|
|
return "moonshotai/Kimi-K2-Thinking:together" |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Instruct": |
|
|
|
|
|
return "moonshotai/Kimi-K2-Instruct:groq" |
|
|
|
|
|
elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"): |
|
|
|
|
|
return f"{model_id}:novita" |
|
|
|
|
|
elif model_id == "zai-org/GLM-4.5": |
|
|
|
|
|
return "zai-org/GLM-4.5:fireworks-ai" |
|
|
|
|
|
return model_id |
|
|
|
|
|
|
|
|
def is_native_sdk_model(model_id: str) -> bool: |
|
|
"""Check if model uses native SDK (not OpenAI-compatible)""" |
|
|
return False |
|
|
|
|
|
|
|
|
def is_mistral_model(model_id: str) -> bool: |
|
|
"""Check if model uses Mistral SDK""" |
|
|
return False |
|
|
|
|
|
|