File size: 769 Bytes
b0e88cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | # EVOLVE-BLOCK-START
GPU_MEM_SIZE = 80 # GB
def compute_model_placement(gpu_num, models):
"""
Compute a model placement that minimizes the maximum KVPR across all GPUs.
Args:
gpu_num: Number of GPUs
models: List of models to place
Returns:
A placement of models to GPUs
"""
# gready algorithm to place models to the GPUs with smallest gpu_id first
placement = dict()
for gpu_id in range(gpu_num):
placement[gpu_id] = []
for model in models:
for gpu_id in range(gpu_num):
if model.model_size <= GPU_MEM_SIZE - sum(model.model_size for model in placement[gpu_id]):
placement[gpu_id].append(model)
break
return placement
# EVOLVE-BLOCK-END
|