| import json | |
| metric = { | |
| "tokens_per_second": [], | |
| "peak_gpu_memory_mb": [], | |
| "num_generated_tokens": [], | |
| "inference_time": [], | |
| "cpu_usage": [], | |
| } | |
| for key, value in json.load(open("/mnt/data/xiuying/Code/test/outputs/MiniCPM-V-4-int4/20250822_110321.json")).items(): | |
| metric["tokens_per_second"].append(value["tokens_per_second"]) | |
| metric["peak_gpu_memory_mb"].append(value["peak_gpu_memory_mb"]) | |
| metric["num_generated_tokens"].append(value["num_generated_tokens"]) | |
| metric["inference_time"].append(value["inference_time"]) | |
| metric["cpu_usage"].append(value["cpu_usage"]) | |
| for key, value in metric.items(): | |
| print(key, sum(value) / len(value)) |