File size: 1,663 Bytes
a0098d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
System Routes
Hardware detection and system information
"""

from fastapi import APIRouter
from typing import Dict, Any

from backend.core.system_checker import system_checker, check_model_requirements

router = APIRouter()


@router.get("/info")
async def get_system_info() -> Dict[str, Any]:
    """
    Get complete system information including GPU, RAM, and capabilities.
    """
    return system_checker.to_dict()


@router.get("/capabilities")
async def get_capabilities() -> Dict[str, Any]:
    """
    Get system capabilities for quantization tasks.
    """
    info = system_checker.check()
    return {
        "capability": info.capability.value,
        "recommended_batch_size": info.recommended_batch_size,
        "max_model_size": info.max_model_size,
        "cuda_available": info.cuda_available,
        "mps_available": info.mps_available,
        "gpus": [
            {
                "name": gpu.name,
                "memory_gb": gpu.total_memory_gb
            }
            for gpu in info.gpus
        ]
    }


@router.post("/check-model")
async def check_model_requirements_endpoint(
    model_params_billions: float,
    dtype: str = "fp16"
) -> Dict[str, Any]:
    """
    Check if system can handle a model of specified size.
    
    Args:
        model_params_billions: Model size in billions of parameters
        dtype: Data type (fp32, fp16, int8, int4)
    """
    return check_model_requirements(model_params_billions, dtype)


@router.get("/refresh")
async def refresh_system_info() -> Dict[str, Any]:
    """
    Force refresh system information.
    """
    return system_checker.check(force_refresh=True).__dict__