[ { "id": "91bacb0e-3545-443e-a4b1-42f535440790", "name": "DeepSeek-V3.2-Speciale", "provider": "DeepSeek", "rating": 5.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "A high-compute 671B MoE model optimized for advanced reasoning, achieving Gold Medal performance in 2025 math and coding Olympiads.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/DeepSeek.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "8k", "input_token": 0.33, "output_token": 0.55 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "c966144e-2c25-4658-b6ea-c9241282405f", "name": "GLM-4.7", "provider": "Z.ai", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Z.ai's flagship model unifying reasoning, coding, and agentic capabilities, GLM-4.7 is designed for fast rising agentic applications.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/intfloat.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1760054400000, "context_length": "128k", "max_output": "128k", "input_token": 0.495, "output_token": 2.2 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless", "trending" ] }, { "id": "fb5319df-ae62-4aaf-a79f-fc485a9c4021", "name": "Kimi-K2.5", "provider": "MoonshotAI", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Kimi-K2.5 is a cutting-edge, native multimodal agentic model featuring 1 trillion parameters. It is specifically optimized for visual reasoning, expert-level coding, and autonomous agent orchestration (Agent Swarm).", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/MoonshotAI.png", "category_code": "vision_language_model", "category_name": "Vision Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1773273600000, "context_length": "256K", "max_output": "16K", "input_token": 0.495, "output_token": 2.75 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless" ] }, { "id": "5d3ad621-18c0-4eb2-b3b7-804965944d92", "name": "Llama-3.3-70B-Instruct", "provider": "Meta", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": true, "introduction": "Experience leading performance and quality at a fraction of the cost.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Meta.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "128k", "input_token": 0.209, "output_token": 0.451 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "d8b5ac95-c382-43be-b5bc-b2e779b029de", "name": "Llama-3.3-Swallow-70B-Instruct-v0.4", "provider": "tokyotech-llm", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "\nContinual pre-trained on the Meta Llama 3.3 with improvements on the Japanese language capabilities ", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/tokyotech-llm.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "128k", "input_token": 0.374, "output_token": 0.374 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "fa2002bf-6fad-4820-9f1e-caf3572423c6", "name": "Nemotron-3-Super-120B-A12B", "provider": "Nvidia", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model (12B active) optimized for agentic reasoning. It features a 1M-token context window and world-leading inference speed for complex multi-agent AI systems. (221 characters)", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Nvidia.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1773273600000, "context_length": "1M", "max_output": "16K", "input_token": 0.44, "output_token": 0.88 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless" ] }, { "id": "e6ea0e0e-8327-4f68-9779-712dd52b7958", "name": "Qwen2.5-Coder-32B-Instruct", "provider": "Qwen", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "The current state-of-the-art open-source codeLLM has coding abilities matching those of GPT-4o.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "8k", "input_token": 0.088, "output_token": 0.187 }, "total_usage": 0, "usecase": "Code generation ", "tags": [ "Serverless" ] }, { "id": "01121eac-3105-475a-897a-1efbb3e1a5ba", "name": "Qwen2.5-VL-7B-Instruct", "provider": "Qwen", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Vision-language model designed to understand and process both visual and textual inputs with high accuracy", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png", "category_code": "vision_language_model", "category_name": "Vision Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "33k", "max_output": "33k", "input_token": 0.77, "output_token": 0.77 }, "total_usage": 0, "usecase": "Image & Text to Text", "tags": [ "Serverless" ] }, { "id": "59701402-834d-468e-87ff-198fc5ecde19", "name": "Qwen3-32B", "provider": "Qwen", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Qwen's hybrid model excelling in tool-calling capabilities as well as following instructions and copying specific data formats.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "33k", "input_token": 0.165, "output_token": 0.187 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "4bcba847-8905-4a96-8d2c-84aeab8c1028", "name": "Qwen3-VL-8B-Instruct", "provider": "Qwen", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "A multimodal 8.8B dense Vision-Language model optimized for image understanding, multilingual OCR, and long-context video reasoning.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png", "category_code": "vision_language_model", "category_name": "Vision Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1773273600000, "context_length": "256k", "max_output": "32k", "input_token": 0.198, "output_token": 0.759 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless" ] }, { "id": "8bdf228c-f765-4739-8ce8-1541187e0fb9", "name": "SaoLa-Llama3.1-planner", "provider": "FPT.AI", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Excelling at Function Calling and JSON Structured Outputs", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "128k", "input_token": 0.055, "output_token": 0.088 }, "total_usage": 0, "usecase": "Content safety classification ", "tags": [ "Serverless" ] }, { "id": "247d9b6a-5319-4e05-9426-f82217170539", "name": "SaoLa3.1-medium", "provider": "FPT.AI", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Fine-tuned on Qwen2.5-32B-Instruct with 32K SFT sample synthesis data from Llama 3.1 405B", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "128k", "input_token": 0.165, "output_token": 0.187 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "3298e219-541e-47f3-910f-16f812ec0de0", "name": "SaoLa4-medium", "provider": "FPT.AI", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Saola4-medium is a large language model (LLM) specialized in the Vietnamese language. The model is optimized for STEM, higher education, and Vietnamese law, while maintaining strong reasoning capabilities.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1773273600000, "context_length": "32K", "max_output": "32K", "input_token": 0.165, "output_token": 0.187 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless" ] }, { "id": "b6cef2c7-4549-4935-8784-d6ca46b7f80d", "name": "SaoLa4-small", "provider": "FPT.AI", "rating": 0.0, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "saola4-small is a high-efficiency, reasoning-optimized LLM for Vietnamese. It excels in Finance, Banking, STEM, and Law, with native Chain-of-Thought (CoT) for complex, step-by-step problem solving.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1773273600000, "context_length": "32K", "max_output": "32K", "input_token": 0.132, "output_token": 0.154 }, "total_usage": 0, "usecase": null, "tags": [ "Serverless" ] }, { "id": "6ba79e04-13d0-48e3-9957-3b862904d0b9", "name": "gemma-3-27b-it", "provider": "Google", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "Multimodal model designed to understand and process both visual and textual inputs with high accuracy", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Google.png", "category_code": "vision_language_model", "category_name": "Vision Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1715817600000, "context_length": "128k", "max_output": "8k", "input_token": 0.11, "output_token": 0.165 }, "total_usage": 0, "usecase": "Image & Text to Text", "tags": [ "Serverless" ] }, { "id": "64c42558-9604-4f71-a656-c3194c118440", "name": "gpt-oss-120b", "provider": "Open.AI", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "\tA 117B open-weight mixture-of-experts LLM released by OpenAI, optimized for reasoning, tool use, and structured outputs. It runs efficiently on 80GB GPUs (e.g., H100) using sparse activation and MXFP4 quantization.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1754956800000, "context_length": "128K", "max_output": "128K", "input_token": 0.143, "output_token": 0.605 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless" ] }, { "id": "74139e8d-9e2f-46c3-9b5c-2e8775170cd9", "name": "gpt-oss-20b", "provider": "Open.AI", "rating": 3.95, "is_marked_as_favorite": false, "is_allowed_free_trial": false, "is_support_finetune": null, "introduction": "A 21B parameter open-weight LLM, smaller and more efficient than gpt-oss-120b, designed for deployment in environments with fewer resources (e.g., 16–24GB VRAM). It retains strong reasoning capabilities.", "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png", "category_code": "large_language_model", "category_name": "Large Language Model", "planning_purpose": null, "planning_scheduled_time": null, "billing_info": { "start_billing_time": 1754956800000, "context_length": "128K", "max_output": "128K", "input_token": 0.0495, "output_token": 0.198 }, "total_usage": 0, "usecase": "Text generation", "tags": [ "Serverless", "trending" ] } ]