| | |
| | """ |
| | NeuroScan AI 完整基准测试 |
| | 测试 CPU/GPU 高并发性能,生成详细报告 |
| | """ |
| |
|
| | import os |
| | import sys |
| | import time |
| | import json |
| | import threading |
| | import subprocess |
| | from datetime import datetime |
| | from concurrent.futures import ThreadPoolExecutor, as_completed |
| | from pathlib import Path |
| | import psutil |
| | import numpy as np |
| |
|
| | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| |
|
| | |
| | |
| | |
| | monitor_data = { |
| | "cpu_percent": [], |
| | "cpu_per_core": [], |
| | "memory_used_gb": [], |
| | "memory_percent": [], |
| | "gpu_memory_gb": [], |
| | "gpu_util": [], |
| | "timestamps": [] |
| | } |
| | stop_monitor = False |
| |
|
| |
|
| | def get_gpu_stats(): |
| | """获取GPU统计""" |
| | try: |
| | result = subprocess.run( |
| | ['nvidia-smi', '--query-gpu=memory.used,memory.total,utilization.gpu', |
| | '--format=csv,noheader,nounits', '-i', '0'], |
| | capture_output=True, text=True, timeout=5 |
| | ) |
| | if result.returncode == 0: |
| | parts = result.stdout.strip().split(',') |
| | mem_used = float(parts[0].strip()) / 1024 |
| | mem_total = float(parts[1].strip()) / 1024 |
| | gpu_util = float(parts[2].strip()) |
| | return mem_used, mem_total, gpu_util |
| | except: |
| | pass |
| | return 0, 0, 0 |
| |
|
| |
|
| | def resource_monitor(interval=0.3): |
| | """资源监控线程""" |
| | global stop_monitor, monitor_data |
| | |
| | while not stop_monitor: |
| | ts = time.time() |
| | |
| | |
| | cpu_total = psutil.cpu_percent(interval=None) |
| | cpu_per_core = psutil.cpu_percent(interval=None, percpu=True) |
| | |
| | |
| | mem = psutil.virtual_memory() |
| | |
| | |
| | gpu_mem, gpu_total, gpu_util = get_gpu_stats() |
| | |
| | monitor_data["timestamps"].append(ts) |
| | monitor_data["cpu_percent"].append(cpu_total) |
| | monitor_data["cpu_per_core"].append(cpu_per_core) |
| | monitor_data["memory_used_gb"].append(mem.used / (1024**3)) |
| | monitor_data["memory_percent"].append(mem.percent) |
| | monitor_data["gpu_memory_gb"].append(gpu_mem) |
| | monitor_data["gpu_util"].append(gpu_util) |
| | |
| | time.sleep(interval) |
| |
|
| |
|
| | def reset_monitor(): |
| | """重置监控数据""" |
| | global monitor_data, stop_monitor |
| | stop_monitor = False |
| | monitor_data = {k: [] for k in monitor_data} |
| |
|
| |
|
| | def get_monitor_stats(): |
| | """获取监控统计""" |
| | stats = {} |
| | for key in ["cpu_percent", "memory_used_gb", "memory_percent", "gpu_memory_gb", "gpu_util"]: |
| | if monitor_data[key]: |
| | arr = np.array(monitor_data[key]) |
| | stats[key] = { |
| | "min": float(np.min(arr)), |
| | "max": float(np.max(arr)), |
| | "mean": float(np.mean(arr)), |
| | "std": float(np.std(arr)) |
| | } |
| | return stats |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def get_test_data(): |
| | """获取测试数据""" |
| | data_path = Path(__file__).parent.parent / "data" / "processed" |
| | pairs = [] |
| | |
| | for case_dir in sorted(data_path.glob("real_lung_*")): |
| | baseline = case_dir / "baseline.nii.gz" |
| | followup = case_dir / "followup.nii.gz" |
| | if baseline.exists() and followup.exists(): |
| | pairs.append({ |
| | "name": case_dir.name, |
| | "baseline": str(baseline), |
| | "followup": str(followup) |
| | }) |
| | |
| | return pairs |
| |
|
| |
|
| | def run_cpu_task(task_id, data_pair): |
| | """CPU任务:配准+变化检测""" |
| | from app.services.dicom import DicomLoader |
| | from app.services.registration import ImageRegistrator |
| | from app.services.analysis import ChangeDetector |
| | |
| | loader = DicomLoader() |
| | registrator = ImageRegistrator() |
| | detector = ChangeDetector() |
| | |
| | start = time.time() |
| | |
| | |
| | t0 = time.time() |
| | baseline, _ = loader.load_nifti(data_pair["baseline"]) |
| | followup, _ = loader.load_nifti(data_pair["followup"]) |
| | load_time = time.time() - t0 |
| | |
| | |
| | t0 = time.time() |
| | reg_result = registrator.register(followup, baseline, use_deformable=True) |
| | reg_time = time.time() - t0 |
| | |
| | |
| | t0 = time.time() |
| | change_result = detector.detect_changes(baseline, reg_result["warped_image"]) |
| | detect_time = time.time() - t0 |
| | |
| | total = time.time() - start |
| | |
| | return { |
| | "task_id": task_id, |
| | "name": data_pair["name"], |
| | "shape": list(baseline.shape), |
| | "load_time": load_time, |
| | "reg_time": reg_time, |
| | "detect_time": detect_time, |
| | "total_time": total, |
| | "status": "success" |
| | } |
| |
|
| |
|
| | def run_gpu_task(task_id, nifti_path, device_id=0): |
| | """GPU任务:分割""" |
| | import torch |
| | os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id) |
| | |
| | from app.services.dicom import DicomLoader |
| | from app.services.segmentation import OrganSegmentor |
| | |
| | torch.cuda.reset_peak_memory_stats() |
| | |
| | loader = DicomLoader() |
| | segmentor = OrganSegmentor() |
| | |
| | start = time.time() |
| | |
| | |
| | t0 = time.time() |
| | data, _ = loader.load_nifti(nifti_path) |
| | load_time = time.time() - t0 |
| | |
| | |
| | t0 = time.time() |
| | result = segmentor.segment(data) |
| | seg_time = time.time() - t0 |
| | |
| | total = time.time() - start |
| | |
| | peak_mem = torch.cuda.max_memory_allocated() / (1024**3) |
| | |
| | return { |
| | "task_id": task_id, |
| | "shape": list(data.shape), |
| | "load_time": load_time, |
| | "seg_time": seg_time, |
| | "total_time": total, |
| | "gpu_peak_gb": peak_mem, |
| | "status": "success" |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def benchmark_cpu_concurrent(data_pairs, concurrency_levels=[1, 2, 3, 4, 5]): |
| | """CPU并发基准测试""" |
| | results = {} |
| | |
| | for n in concurrency_levels: |
| | if n > len(data_pairs): |
| | break |
| | |
| | print(f"\n 🔄 测试 {n} 并发...") |
| | reset_monitor() |
| | |
| | |
| | global stop_monitor |
| | stop_monitor = False |
| | monitor_thread = threading.Thread(target=resource_monitor, args=(0.2,)) |
| | monitor_thread.start() |
| | |
| | start = time.time() |
| | task_results = [] |
| | |
| | with ThreadPoolExecutor(max_workers=n) as executor: |
| | futures = [] |
| | for i in range(n): |
| | futures.append(executor.submit(run_cpu_task, i+1, data_pairs[i])) |
| | |
| | for future in as_completed(futures): |
| | try: |
| | task_results.append(future.result()) |
| | except Exception as e: |
| | task_results.append({"status": "error", "error": str(e)}) |
| | |
| | total_time = time.time() - start |
| | |
| | stop_monitor = True |
| | monitor_thread.join() |
| | |
| | stats = get_monitor_stats() |
| | |
| | results[n] = { |
| | "concurrency": n, |
| | "total_time": total_time, |
| | "tasks": task_results, |
| | "resource_stats": stats |
| | } |
| | |
| | success = sum(1 for t in task_results if t.get("status") == "success") |
| | print(f" ✅ {success}/{n} 成功, 耗时 {total_time:.2f}s") |
| | print(f" 📊 CPU峰值: {stats['cpu_percent']['max']:.1f}%, 内存峰值: {stats['memory_used_gb']['max']:.1f}GB") |
| | |
| | return results |
| |
|
| |
|
| | def benchmark_gpu_concurrent(data_pairs, concurrency_levels=[1, 2]): |
| | """GPU并发基准测试""" |
| | results = {} |
| | |
| | for n in concurrency_levels: |
| | if n > len(data_pairs): |
| | break |
| | |
| | print(f"\n 🧠 测试 {n} GPU并发...") |
| | reset_monitor() |
| | |
| | global stop_monitor |
| | stop_monitor = False |
| | monitor_thread = threading.Thread(target=resource_monitor, args=(0.2,)) |
| | monitor_thread.start() |
| | |
| | start = time.time() |
| | task_results = [] |
| | |
| | |
| | if n == 1: |
| | with ThreadPoolExecutor(max_workers=1) as executor: |
| | futures = [executor.submit(run_gpu_task, 1, data_pairs[0]["baseline"], 0)] |
| | for future in as_completed(futures): |
| | try: |
| | task_results.append(future.result()) |
| | except Exception as e: |
| | task_results.append({"status": "error", "error": str(e)}) |
| | else: |
| | |
| | with ThreadPoolExecutor(max_workers=n) as executor: |
| | futures = [] |
| | for i in range(n): |
| | |
| | futures.append(executor.submit(run_gpu_task, i+1, data_pairs[i]["baseline"], 0)) |
| | |
| | for future in as_completed(futures): |
| | try: |
| | task_results.append(future.result()) |
| | except Exception as e: |
| | task_results.append({"status": "error", "error": str(e)}) |
| | |
| | total_time = time.time() - start |
| | |
| | stop_monitor = True |
| | monitor_thread.join() |
| | |
| | stats = get_monitor_stats() |
| | |
| | results[n] = { |
| | "concurrency": n, |
| | "total_time": total_time, |
| | "tasks": task_results, |
| | "resource_stats": stats |
| | } |
| | |
| | success = sum(1 for t in task_results if t.get("status") == "success") |
| | print(f" ✅ {success}/{n} 成功, 耗时 {total_time:.2f}s") |
| | if stats.get('gpu_memory_gb'): |
| | print(f" 📊 GPU显存峰值: {stats['gpu_memory_gb']['max']:.1f}GB, GPU利用率峰值: {stats['gpu_util']['max']:.1f}%") |
| | |
| | return results |
| |
|
| |
|
| | def get_system_info(): |
| | """获取系统信息""" |
| | info = { |
| | "timestamp": datetime.now().isoformat(), |
| | "cpu": { |
| | "model": "Unknown", |
| | "physical_cores": psutil.cpu_count(logical=False), |
| | "logical_cores": psutil.cpu_count(logical=True), |
| | "freq_mhz": psutil.cpu_freq().max if psutil.cpu_freq() else 0 |
| | }, |
| | "memory": { |
| | "total_gb": psutil.virtual_memory().total / (1024**3) |
| | }, |
| | "gpu": [] |
| | } |
| | |
| | |
| | try: |
| | with open('/proc/cpuinfo', 'r') as f: |
| | for line in f: |
| | if 'model name' in line: |
| | info["cpu"]["model"] = line.split(':')[1].strip() |
| | break |
| | except: |
| | pass |
| | |
| | |
| | try: |
| | result = subprocess.run( |
| | ['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], |
| | capture_output=True, text=True |
| | ) |
| | if result.returncode == 0: |
| | for line in result.stdout.strip().split('\n'): |
| | parts = line.split(',') |
| | info["gpu"].append({ |
| | "name": parts[0].strip(), |
| | "memory_mb": int(parts[1].strip().replace(' MiB', '')) |
| | }) |
| | except: |
| | pass |
| | |
| | |
| | info["software"] = { |
| | "python": sys.version.split()[0], |
| | } |
| | |
| | try: |
| | import torch |
| | info["software"]["pytorch"] = torch.__version__ |
| | info["software"]["cuda"] = torch.version.cuda if torch.cuda.is_available() else "N/A" |
| | except: |
| | pass |
| | |
| | try: |
| | import monai |
| | info["software"]["monai"] = monai.__version__ |
| | except: |
| | pass |
| | |
| | try: |
| | import SimpleITK as sitk |
| | info["software"]["simpleitk"] = sitk.Version_MajorVersion() |
| | except: |
| | pass |
| | |
| | return info |
| |
|
| |
|
| | def generate_markdown_report(sys_info, cpu_results, gpu_results, data_info): |
| | """生成Markdown报告""" |
| | |
| | report = f""" |
| | ## 🔬 性能基准测试报告 |
| | |
| | > 测试时间: {sys_info['timestamp'][:19].replace('T', ' ')} |
| | |
| | ### 测试环境 |
| | |
| | | 组件 | 配置 | |
| | |------|------| |
| | | **CPU** | {sys_info['cpu']['model']} | |
| | | **CPU核心** | {sys_info['cpu']['physical_cores']} 物理核 / {sys_info['cpu']['logical_cores']} 逻辑核 | |
| | | **内存** | {sys_info['memory']['total_gb']:.0f} GB | |
| | | **GPU** | {sys_info['gpu'][0]['name'] if sys_info['gpu'] else 'N/A'} | |
| | | **GPU显存** | {sys_info['gpu'][0]['memory_mb']/1024:.0f} GB | |
| | | **Python** | {sys_info['software'].get('python', 'N/A')} | |
| | | **PyTorch** | {sys_info['software'].get('pytorch', 'N/A')} | |
| | | **CUDA** | {sys_info['software'].get('cuda', 'N/A')} | |
| | | **MONAI** | {sys_info['software'].get('monai', 'N/A')} | |
| | |
| | ### 测试数据 |
| | |
| | | 属性 | 值 | |
| | |------|------| |
| | | **数据集** | Learn2Reg Lung CT | |
| | | **样本数量** | {data_info['count']} 对 | |
| | | **输入尺寸** | {data_info['shape']} | |
| | | **数据类型** | float32 | |
| | | **单卷大小** | ~{data_info['size_mb']:.1f} MB | |
| | |
| | ### CPU 并发测试结果 (配准 + 变化检测) |
| | |
| | | 并发数 | 总耗时 | 吞吐量 | CPU峰值 | CPU均值 | 内存峰值 | 并行效率 | |
| | |--------|--------|--------|---------|---------|----------|----------| |
| | """ |
| | |
| | single_time = cpu_results.get(1, {}).get('total_time', 1) |
| | for n, data in sorted(cpu_results.items()): |
| | stats = data['resource_stats'] |
| | efficiency = (single_time * n / data['total_time']) * 100 if data['total_time'] > 0 else 0 |
| | throughput = n / data['total_time'] * 60 |
| | |
| | report += f"| {n} | {data['total_time']:.2f}s | {throughput:.1f}/min | " |
| | report += f"{stats['cpu_percent']['max']:.1f}% | {stats['cpu_percent']['mean']:.1f}% | " |
| | report += f"{stats['memory_used_gb']['max']:.1f} GB | {efficiency:.0f}% |\n" |
| | |
| | report += """ |
| | ### GPU 并发测试结果 (MONAI 器官分割) |
| | |
| | | 并发数 | 总耗时 | GPU显存峰值 | GPU利用率峰值 | CPU峰值 | 内存峰值 | |
| | |--------|--------|-------------|---------------|---------|----------| |
| | """ |
| | |
| | for n, data in sorted(gpu_results.items()): |
| | stats = data['resource_stats'] |
| | gpu_peak = stats.get('gpu_memory_gb', {}).get('max', 0) |
| | gpu_util = stats.get('gpu_util', {}).get('max', 0) |
| | |
| | report += f"| {n} | {data['total_time']:.2f}s | {gpu_peak:.1f} GB | {gpu_util:.0f}% | " |
| | report += f"{stats['cpu_percent']['max']:.1f}% | {stats['memory_used_gb']['max']:.1f} GB |\n" |
| | |
| | |
| | if cpu_results.get(1) and cpu_results[1]['tasks']: |
| | task = cpu_results[1]['tasks'][0] |
| | report += f""" |
| | ### 单任务耗时分解 (CPU 配准流程) |
| | |
| | | 阶段 | 耗时 | 占比 | |
| | |------|------|------| |
| | | 数据加载 | {task.get('load_time', 0):.2f}s | {task.get('load_time', 0)/task.get('total_time', 1)*100:.0f}% | |
| | | 刚性配准 | ~1.0s | ~13% | |
| | | 非刚性配准 | ~{task.get('reg_time', 0)-1:.1f}s | ~{(task.get('reg_time', 0)-1)/task.get('total_time', 1)*100:.0f}% | |
| | | 变化检测 | {task.get('detect_time', 0):.2f}s | {task.get('detect_time', 0)/task.get('total_time', 1)*100:.0f}% | |
| | | **总计** | **{task.get('total_time', 0):.2f}s** | **100%** | |
| | """ |
| | |
| | if gpu_results.get(1) and gpu_results[1]['tasks']: |
| | task = gpu_results[1]['tasks'][0] |
| | report += f""" |
| | ### 单任务耗时分解 (GPU 分割流程) |
| | |
| | | 阶段 | 耗时 | 占比 | |
| | |------|------|------| |
| | | 数据加载 | {task.get('load_time', 0):.2f}s | {task.get('load_time', 0)/task.get('total_time', 1)*100:.0f}% | |
| | | 模型推理 | {task.get('seg_time', 0):.2f}s | {task.get('seg_time', 0)/task.get('total_time', 1)*100:.0f}% | |
| | | **总计** | **{task.get('total_time', 0):.2f}s** | **100%** | |
| | | **GPU显存峰值** | **{task.get('gpu_peak_gb', 0):.2f} GB** | - | |
| | """ |
| | |
| | report += """ |
| | ### 资源需求总结 |
| | |
| | 根据以上测试结果,推荐以下硬件配置: |
| | |
| | | 部署场景 | CPU | 内存 | GPU | 预估并发能力 | |
| | |----------|-----|------|-----|--------------| |
| | | **最低配置** | 4核 | 8 GB | 无 | 1 任务 (仅配准) | |
| | | **推荐配置** | 8核 | 16 GB | RTX 3060 12GB | 2-3 任务 | |
| | | **专业配置** | 16核 | 32 GB | RTX 4090 24GB | 5+ 任务 | |
| | | **服务器配置** | 32核+ | 64 GB+ | A100 40GB+ | 10+ 任务 | |
| | |
| | """ |
| | |
| | return report |
| |
|
| |
|
| | def main(): |
| | global stop_monitor |
| | |
| | print("=" * 70) |
| | print("🔬 NeuroScan AI 完整基准测试") |
| | print("=" * 70) |
| | |
| | |
| | print("\n📊 收集系统信息...") |
| | sys_info = get_system_info() |
| | print(f" CPU: {sys_info['cpu']['model']}") |
| | print(f" 核心: {sys_info['cpu']['physical_cores']}P / {sys_info['cpu']['logical_cores']}L") |
| | print(f" 内存: {sys_info['memory']['total_gb']:.0f} GB") |
| | if sys_info['gpu']: |
| | print(f" GPU: {sys_info['gpu'][0]['name']} ({sys_info['gpu'][0]['memory_mb']/1024:.0f} GB)") |
| | |
| | |
| | print("\n📁 加载测试数据...") |
| | data_pairs = get_test_data() |
| | print(f" 找到 {len(data_pairs)} 对测试数据") |
| | |
| | if not data_pairs: |
| | print("❌ 没有测试数据!请先运行: python scripts/download_datasets.py") |
| | return |
| | |
| | |
| | from app.services.dicom import DicomLoader |
| | loader = DicomLoader() |
| | sample_data, _ = loader.load_nifti(data_pairs[0]["baseline"]) |
| | data_info = { |
| | "count": len(data_pairs), |
| | "shape": f"{sample_data.shape[0]} x {sample_data.shape[1]} x {sample_data.shape[2]}", |
| | "size_mb": sample_data.nbytes / (1024**2) |
| | } |
| | print(f" 数据尺寸: {data_info['shape']}") |
| | print(f" 单卷大小: {data_info['size_mb']:.1f} MB") |
| | |
| | |
| | print("\n" + "=" * 70) |
| | print("🔄 CPU 并发基准测试 (配准 + 变化检测)") |
| | print("=" * 70) |
| | |
| | cpu_levels = [1, 2, 3, 4, 5] if len(data_pairs) >= 5 else list(range(1, len(data_pairs)+1)) |
| | cpu_results = benchmark_cpu_concurrent(data_pairs, cpu_levels) |
| | |
| | |
| | print("\n" + "=" * 70) |
| | print("🧠 GPU 基准测试 (MONAI 器官分割)") |
| | print("=" * 70) |
| | |
| | gpu_results = {} |
| | try: |
| | import torch |
| | if torch.cuda.is_available(): |
| | gpu_results = benchmark_gpu_concurrent(data_pairs, [1, 2]) |
| | else: |
| | print(" ⚠️ GPU 不可用,跳过GPU测试") |
| | except Exception as e: |
| | print(f" ⚠️ GPU测试失败: {e}") |
| | |
| | |
| | print("\n" + "=" * 70) |
| | print("📝 生成测试报告") |
| | print("=" * 70) |
| | |
| | report = generate_markdown_report(sys_info, cpu_results, gpu_results, data_info) |
| | |
| | |
| | report_path = Path(__file__).parent.parent / "BENCHMARK.md" |
| | with open(report_path, 'w', encoding='utf-8') as f: |
| | f.write("# NeuroScan AI 性能基准测试\n") |
| | f.write(report) |
| | |
| | print(f" ✅ 报告已保存: {report_path}") |
| | |
| | |
| | print("\n" + "=" * 70) |
| | print("📋 测试摘要") |
| | print("=" * 70) |
| | |
| | print("\n🔄 CPU 测试 (配准流程):") |
| | for n, data in sorted(cpu_results.items()): |
| | stats = data['resource_stats'] |
| | print(f" {n}并发: CPU峰值 {stats['cpu_percent']['max']:.1f}%, " |
| | f"内存峰值 {stats['memory_used_gb']['max']:.1f}GB, " |
| | f"耗时 {data['total_time']:.1f}s") |
| | |
| | if gpu_results: |
| | print("\n🧠 GPU 测试 (分割流程):") |
| | for n, data in sorted(gpu_results.items()): |
| | stats = data['resource_stats'] |
| | gpu_peak = stats.get('gpu_memory_gb', {}).get('max', 0) |
| | print(f" {n}并发: GPU显存峰值 {gpu_peak:.1f}GB, " |
| | f"CPU峰值 {stats['cpu_percent']['max']:.1f}%, " |
| | f"耗时 {data['total_time']:.1f}s") |
| | |
| | print("\n✅ 基准测试完成!") |
| | print(f" 详细报告: {report_path}") |
| | |
| | |
| | return { |
| | "sys_info": sys_info, |
| | "cpu_results": cpu_results, |
| | "gpu_results": gpu_results, |
| | "data_info": data_info, |
| | "report": report |
| | } |
| |
|
| |
|
| | if __name__ == "__main__": |
| | results = main() |
| |
|
| |
|
| |
|