File size: 769 Bytes

b0e88cf

# EVOLVE-BLOCK-START

GPU_MEM_SIZE = 80 # GB

def compute_model_placement(gpu_num, models):
    """
    Compute a model placement that minimizes the maximum KVPR across all GPUs.

    Args:
        gpu_num: Number of GPUs
        models: List of models to place

    Returns:
        A placement of models to GPUs
    """

    # gready algorithm to place models to the GPUs with smallest gpu_id first

    placement = dict()
    for gpu_id in range(gpu_num):
        placement[gpu_id] = []

    for model in models:
        for gpu_id in range(gpu_num):
            if model.model_size <= GPU_MEM_SIZE - sum(model.model_size for model in placement[gpu_id]):
                placement[gpu_id].append(model)
                break
    return placement

# EVOLVE-BLOCK-END