model-router / models.json
dknguyen2304's picture
Upload folder using huggingface_hub
8bc62e2 verified
[
{
"id": "91bacb0e-3545-443e-a4b1-42f535440790",
"name": "DeepSeek-V3.2-Speciale",
"provider": "DeepSeek",
"rating": 5.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "A high-compute 671B MoE model optimized for advanced reasoning, achieving Gold Medal performance in 2025 math and coding Olympiads.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/DeepSeek.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "8k",
"input_token": 0.33,
"output_token": 0.55
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "c966144e-2c25-4658-b6ea-c9241282405f",
"name": "GLM-4.7",
"provider": "Z.ai",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Z.ai's flagship model unifying reasoning, coding, and agentic capabilities, GLM-4.7 is designed for fast rising agentic applications.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/intfloat.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1760054400000,
"context_length": "128k",
"max_output": "128k",
"input_token": 0.495,
"output_token": 2.2
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless",
"trending"
]
},
{
"id": "fb5319df-ae62-4aaf-a79f-fc485a9c4021",
"name": "Kimi-K2.5",
"provider": "MoonshotAI",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Kimi-K2.5 is a cutting-edge, native multimodal agentic model featuring 1 trillion parameters. It is specifically optimized for visual reasoning, expert-level coding, and autonomous agent orchestration (Agent Swarm).",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/MoonshotAI.png",
"category_code": "vision_language_model",
"category_name": "Vision Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1773273600000,
"context_length": "256K",
"max_output": "16K",
"input_token": 0.495,
"output_token": 2.75
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless"
]
},
{
"id": "5d3ad621-18c0-4eb2-b3b7-804965944d92",
"name": "Llama-3.3-70B-Instruct",
"provider": "Meta",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": true,
"introduction": "Experience leading performance and quality at a fraction of the cost.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Meta.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "128k",
"input_token": 0.209,
"output_token": 0.451
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "d8b5ac95-c382-43be-b5bc-b2e779b029de",
"name": "Llama-3.3-Swallow-70B-Instruct-v0.4",
"provider": "tokyotech-llm",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "\nContinual pre-trained on the Meta Llama 3.3 with improvements on the Japanese language capabilities ",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/tokyotech-llm.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "128k",
"input_token": 0.374,
"output_token": 0.374
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "fa2002bf-6fad-4820-9f1e-caf3572423c6",
"name": "Nemotron-3-Super-120B-A12B",
"provider": "Nvidia",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model (12B active) optimized for agentic reasoning. It features a 1M-token context window and world-leading inference speed for complex multi-agent AI systems. (221 characters)",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Nvidia.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1773273600000,
"context_length": "1M",
"max_output": "16K",
"input_token": 0.44,
"output_token": 0.88
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless"
]
},
{
"id": "e6ea0e0e-8327-4f68-9779-712dd52b7958",
"name": "Qwen2.5-Coder-32B-Instruct",
"provider": "Qwen",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "The current state-of-the-art open-source codeLLM has coding abilities matching those of GPT-4o.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "8k",
"input_token": 0.088,
"output_token": 0.187
},
"total_usage": 0,
"usecase": "Code generation ",
"tags": [
"Serverless"
]
},
{
"id": "01121eac-3105-475a-897a-1efbb3e1a5ba",
"name": "Qwen2.5-VL-7B-Instruct",
"provider": "Qwen",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Vision-language model designed to understand and process both visual and textual inputs with high accuracy",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
"category_code": "vision_language_model",
"category_name": "Vision Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "33k",
"max_output": "33k",
"input_token": 0.77,
"output_token": 0.77
},
"total_usage": 0,
"usecase": "Image & Text to Text",
"tags": [
"Serverless"
]
},
{
"id": "59701402-834d-468e-87ff-198fc5ecde19",
"name": "Qwen3-32B",
"provider": "Qwen",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Qwen's hybrid model excelling in tool-calling capabilities as well as following instructions and copying specific data formats.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "33k",
"input_token": 0.165,
"output_token": 0.187
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "4bcba847-8905-4a96-8d2c-84aeab8c1028",
"name": "Qwen3-VL-8B-Instruct",
"provider": "Qwen",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "A multimodal 8.8B dense Vision-Language model optimized for image understanding, multilingual OCR, and long-context video reasoning.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
"category_code": "vision_language_model",
"category_name": "Vision Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1773273600000,
"context_length": "256k",
"max_output": "32k",
"input_token": 0.198,
"output_token": 0.759
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless"
]
},
{
"id": "8bdf228c-f765-4739-8ce8-1541187e0fb9",
"name": "SaoLa-Llama3.1-planner",
"provider": "FPT.AI",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Excelling at Function Calling and JSON Structured Outputs",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "128k",
"input_token": 0.055,
"output_token": 0.088
},
"total_usage": 0,
"usecase": "Content safety classification ",
"tags": [
"Serverless"
]
},
{
"id": "247d9b6a-5319-4e05-9426-f82217170539",
"name": "SaoLa3.1-medium",
"provider": "FPT.AI",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Fine-tuned on Qwen2.5-32B-Instruct with 32K SFT sample synthesis data from Llama 3.1 405B",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "128k",
"input_token": 0.165,
"output_token": 0.187
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "3298e219-541e-47f3-910f-16f812ec0de0",
"name": "SaoLa4-medium",
"provider": "FPT.AI",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Saola4-medium is a large language model (LLM) specialized in the Vietnamese language. The model is optimized for STEM, higher education, and Vietnamese law, while maintaining strong reasoning capabilities.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1773273600000,
"context_length": "32K",
"max_output": "32K",
"input_token": 0.165,
"output_token": 0.187
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless"
]
},
{
"id": "b6cef2c7-4549-4935-8784-d6ca46b7f80d",
"name": "SaoLa4-small",
"provider": "FPT.AI",
"rating": 0.0,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "saola4-small is a high-efficiency, reasoning-optimized LLM for Vietnamese. It excels in Finance, Banking, STEM, and Law, with native Chain-of-Thought (CoT) for complex, step-by-step problem solving.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1773273600000,
"context_length": "32K",
"max_output": "32K",
"input_token": 0.132,
"output_token": 0.154
},
"total_usage": 0,
"usecase": null,
"tags": [
"Serverless"
]
},
{
"id": "6ba79e04-13d0-48e3-9957-3b862904d0b9",
"name": "gemma-3-27b-it",
"provider": "Google",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "Multimodal model designed to understand and process both visual and textual inputs with high accuracy",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Google.png",
"category_code": "vision_language_model",
"category_name": "Vision Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1715817600000,
"context_length": "128k",
"max_output": "8k",
"input_token": 0.11,
"output_token": 0.165
},
"total_usage": 0,
"usecase": "Image & Text to Text",
"tags": [
"Serverless"
]
},
{
"id": "64c42558-9604-4f71-a656-c3194c118440",
"name": "gpt-oss-120b",
"provider": "Open.AI",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "\tA 117B open-weight mixture-of-experts LLM released by OpenAI, optimized for reasoning, tool use, and structured outputs. It runs efficiently on 80GB GPUs (e.g., H100) using sparse activation and MXFP4 quantization.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1754956800000,
"context_length": "128K",
"max_output": "128K",
"input_token": 0.143,
"output_token": 0.605
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless"
]
},
{
"id": "74139e8d-9e2f-46c3-9b5c-2e8775170cd9",
"name": "gpt-oss-20b",
"provider": "Open.AI",
"rating": 3.95,
"is_marked_as_favorite": false,
"is_allowed_free_trial": false,
"is_support_finetune": null,
"introduction": "A 21B parameter open-weight LLM, smaller and more efficient than gpt-oss-120b, designed for deployment in environments with fewer resources (e.g., 16–24GB VRAM). It retains strong reasoning capabilities.",
"image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png",
"category_code": "large_language_model",
"category_name": "Large Language Model",
"planning_purpose": null,
"planning_scheduled_time": null,
"billing_info": {
"start_billing_time": 1754956800000,
"context_length": "128K",
"max_output": "128K",
"input_token": 0.0495,
"output_token": 0.198
},
"total_usage": 0,
"usecase": "Text generation",
"tags": [
"Serverless",
"trending"
]
}
]