[
    {
        "id": "91bacb0e-3545-443e-a4b1-42f535440790",
        "name": "DeepSeek-V3.2-Speciale",
        "provider": "DeepSeek",
        "rating": 5.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "A high-compute 671B MoE model optimized for advanced reasoning, achieving Gold Medal performance in 2025 math and coding Olympiads.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/DeepSeek.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "8k",
            "input_token": 0.33,
            "output_token": 0.55
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "c966144e-2c25-4658-b6ea-c9241282405f",
        "name": "GLM-4.7",
        "provider": "Z.ai",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Z.ai's flagship model unifying reasoning, coding, and agentic capabilities, GLM-4.7 is designed for fast rising agentic applications.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/intfloat.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1760054400000,
            "context_length": "128k",
            "max_output": "128k",
            "input_token": 0.495,
            "output_token": 2.2
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless",
            "trending"
        ]
    },
    {
        "id": "fb5319df-ae62-4aaf-a79f-fc485a9c4021",
        "name": "Kimi-K2.5",
        "provider": "MoonshotAI",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Kimi-K2.5 is a cutting-edge, native multimodal agentic model featuring 1 trillion parameters. It is specifically optimized for visual reasoning, expert-level coding, and autonomous agent orchestration (Agent Swarm).",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/MoonshotAI.png",
        "category_code": "vision_language_model",
        "category_name": "Vision Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1773273600000,
            "context_length": "256K",
            "max_output": "16K",
            "input_token": 0.495,
            "output_token": 2.75
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "5d3ad621-18c0-4eb2-b3b7-804965944d92",
        "name": "Llama-3.3-70B-Instruct",
        "provider": "Meta",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": true,
        "introduction": "Experience leading performance and quality at a fraction of the cost.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Meta.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "128k",
            "input_token": 0.209,
            "output_token": 0.451
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "d8b5ac95-c382-43be-b5bc-b2e779b029de",
        "name": "Llama-3.3-Swallow-70B-Instruct-v0.4",
        "provider": "tokyotech-llm",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "\nContinual pre-trained on the Meta Llama 3.3 with improvements on the Japanese language capabilities ",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/tokyotech-llm.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "128k",
            "input_token": 0.374,
            "output_token": 0.374
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "fa2002bf-6fad-4820-9f1e-caf3572423c6",
        "name": "Nemotron-3-Super-120B-A12B",
        "provider": "Nvidia",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model (12B active) optimized for agentic reasoning. It features a 1M-token context window and world-leading inference speed for complex multi-agent AI systems. (221 characters)",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Nvidia.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1773273600000,
            "context_length": "1M",
            "max_output": "16K",
            "input_token": 0.44,
            "output_token": 0.88
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "e6ea0e0e-8327-4f68-9779-712dd52b7958",
        "name": "Qwen2.5-Coder-32B-Instruct",
        "provider": "Qwen",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "The current state-of-the-art open-source codeLLM has coding abilities matching those of GPT-4o.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "8k",
            "input_token": 0.088,
            "output_token": 0.187
        },
        "total_usage": 0,
        "usecase": "Code generation ",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "01121eac-3105-475a-897a-1efbb3e1a5ba",
        "name": "Qwen2.5-VL-7B-Instruct",
        "provider": "Qwen",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Vision-language model designed to understand and process both visual and textual inputs with high accuracy",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
        "category_code": "vision_language_model",
        "category_name": "Vision Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "33k",
            "max_output": "33k",
            "input_token": 0.77,
            "output_token": 0.77
        },
        "total_usage": 0,
        "usecase": "Image & Text to Text",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "59701402-834d-468e-87ff-198fc5ecde19",
        "name": "Qwen3-32B",
        "provider": "Qwen",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Qwen's hybrid model excelling in tool-calling capabilities as well as following instructions and copying specific data formats.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "33k",
            "input_token": 0.165,
            "output_token": 0.187
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "4bcba847-8905-4a96-8d2c-84aeab8c1028",
        "name": "Qwen3-VL-8B-Instruct",
        "provider": "Qwen",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "A multimodal 8.8B dense Vision-Language model optimized for image understanding, multilingual OCR, and long-context video reasoning.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Qwen.png",
        "category_code": "vision_language_model",
        "category_name": "Vision Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1773273600000,
            "context_length": "256k",
            "max_output": "32k",
            "input_token": 0.198,
            "output_token": 0.759
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "8bdf228c-f765-4739-8ce8-1541187e0fb9",
        "name": "SaoLa-Llama3.1-planner",
        "provider": "FPT.AI",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Excelling at Function Calling and JSON Structured Outputs",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "128k",
            "input_token": 0.055,
            "output_token": 0.088
        },
        "total_usage": 0,
        "usecase": "Content safety classification ",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "247d9b6a-5319-4e05-9426-f82217170539",
        "name": "SaoLa3.1-medium",
        "provider": "FPT.AI",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Fine-tuned on Qwen2.5-32B-Instruct with 32K SFT sample synthesis data from Llama 3.1 405B",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "128k",
            "input_token": 0.165,
            "output_token": 0.187
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "3298e219-541e-47f3-910f-16f812ec0de0",
        "name": "SaoLa4-medium",
        "provider": "FPT.AI",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Saola4-medium is a large language model (LLM) specialized in the Vietnamese language. The model is optimized for STEM, higher education, and Vietnamese law, while maintaining strong reasoning capabilities.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1773273600000,
            "context_length": "32K",
            "max_output": "32K",
            "input_token": 0.165,
            "output_token": 0.187
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "b6cef2c7-4549-4935-8784-d6ca46b7f80d",
        "name": "SaoLa4-small",
        "provider": "FPT.AI",
        "rating": 0.0,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "saola4-small is a high-efficiency, reasoning-optimized LLM for Vietnamese. It excels in Finance, Banking, STEM, and Law, with native Chain-of-Thought (CoT) for complex, step-by-step problem solving.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/FPT.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1773273600000,
            "context_length": "32K",
            "max_output": "32K",
            "input_token": 0.132,
            "output_token": 0.154
        },
        "total_usage": 0,
        "usecase": null,
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "6ba79e04-13d0-48e3-9957-3b862904d0b9",
        "name": "gemma-3-27b-it",
        "provider": "Google",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "Multimodal model designed to understand and process both visual and textual inputs with high accuracy",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Google.png",
        "category_code": "vision_language_model",
        "category_name": "Vision Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1715817600000,
            "context_length": "128k",
            "max_output": "8k",
            "input_token": 0.11,
            "output_token": 0.165
        },
        "total_usage": 0,
        "usecase": "Image & Text to Text",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "64c42558-9604-4f71-a656-c3194c118440",
        "name": "gpt-oss-120b",
        "provider": "Open.AI",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "\tA 117B open-weight mixture-of-experts LLM released by OpenAI, optimized for reasoning, tool use, and structured outputs. It runs efficiently on 80GB GPUs (e.g., H100) using sparse activation and MXFP4 quantization.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1754956800000,
            "context_length": "128K",
            "max_output": "128K",
            "input_token": 0.143,
            "output_token": 0.605
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless"
        ]
    },
    {
        "id": "74139e8d-9e2f-46c3-9b5c-2e8775170cd9",
        "name": "gpt-oss-20b",
        "provider": "Open.AI",
        "rating": 3.95,
        "is_marked_as_favorite": false,
        "is_allowed_free_trial": false,
        "is_support_finetune": null,
        "introduction": "A 21B parameter open-weight LLM, smaller and more efficient than gpt-oss-120b, designed for deployment in environments with fewer resources (e.g., 16–24GB VRAM). It retains strong reasoning capabilities.",
        "image_url": "https://s3-sgn10.fptcloud.com/modelaas/images/v2/Open.AI.png",
        "category_code": "large_language_model",
        "category_name": "Large Language Model",
        "planning_purpose": null,
        "planning_scheduled_time": null,
        "billing_info": {
            "start_billing_time": 1754956800000,
            "context_length": "128K",
            "max_output": "128K",
            "input_token": 0.0495,
            "output_token": 0.198
        },
        "total_usage": 0,
        "usecase": "Text generation",
        "tags": [
            "Serverless",
            "trending"
        ]
    }
]