File size: 769 Bytes
b0e88cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# EVOLVE-BLOCK-START

GPU_MEM_SIZE = 80 # GB

def compute_model_placement(gpu_num, models):
    """
    Compute a model placement that minimizes the maximum KVPR across all GPUs.

    Args:
        gpu_num: Number of GPUs
        models: List of models to place

    Returns:
        A placement of models to GPUs
    """

    # gready algorithm to place models to the GPUs with smallest gpu_id first

    placement = dict()
    for gpu_id in range(gpu_num):
        placement[gpu_id] = []

    for model in models:
        for gpu_id in range(gpu_num):
            if model.model_size <= GPU_MEM_SIZE - sum(model.model_size for model in placement[gpu_id]):
                placement[gpu_id].append(model)
                break
    return placement

# EVOLVE-BLOCK-END