| # EVOLVE-BLOCK-START | |
| GPU_MEM_SIZE = 80 # GB | |
| def compute_model_placement(gpu_num, models): | |
| """ | |
| Compute a model placement that minimizes the maximum KVPR across all GPUs. | |
| Args: | |
| gpu_num: Number of GPUs | |
| models: List of models to place | |
| Returns: | |
| A placement of models to GPUs | |
| """ | |
| # gready algorithm to place models to the GPUs with smallest gpu_id first | |
| placement = dict() | |
| for gpu_id in range(gpu_num): | |
| placement[gpu_id] = [] | |
| for model in models: | |
| for gpu_id in range(gpu_num): | |
| if model.model_size <= GPU_MEM_SIZE - sum(model.model_size for model in placement[gpu_id]): | |
| placement[gpu_id].append(model) | |
| break | |
| return placement | |
| # EVOLVE-BLOCK-END | |