File size: 8,068 Bytes
1a6672d a5be23e 1a6672d a5be23e 1a6672d a5be23e 1a6672d a5be23e 1a6672d a5be23e 27c4e2c 1a6672d a5be23e 1a6672d a5be23e 5c0d4c4 28263c0 a5be23e 5c0d4c4 a5be23e 28263c0 5c0d4c4 a5be23e 1a6672d a5be23e 1a6672d a5be23e 1a6672d 27c4e2c a5be23e 1a6672d 0b5416e a5be23e 1a6672d a5be23e 0b5416e a5be23e 27c4e2c 0b5416e 27c4e2c 1a6672d a5be23e 1a6672d a5be23e 1a6672d 27c4e2c a5be23e 27c4e2c a5be23e 0b5416e 1a6672d 0b5416e 1a6672d 0b5416e 1a6672d a5be23e 0b5416e 1a6672d a5be23e 0b5416e 1a6672d 0b5416e 1a6672d a5be23e 984e3c2 a5be23e 1a6672d a5be23e 1a6672d a5be23e 1a6672d a5be23e 1a6672d a5be23e 0b5416e a5be23e 1a6672d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | import subprocess
import tempfile
import os
import re
from typing import Dict, List, Tuple
class RocprofWrapper:
"""Wrapper for AMD rocprof profiler and hipcc compiler"""
def __init__(self):
self.rocm_available = os.getenv(
"ROCM_AVAILABLE", "false").lower() == "true"
self.hipcc_path = os.getenv("HIPCC_PATH", "hipcc")
self.rocprof_path = os.getenv("ROCPROF_PATH", "rocprof")
def compile_hip_code(self, hip_code: str, output_file: str = None) -> Tuple[bool, str]:
"""Compile HIP code using hipcc"""
if not self.rocm_available:
return True, "Mock compilation successful (ROCm not available)"
temp_file = None
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.hip', delete=False) as f:
f.write(hip_code)
temp_file = f.name
if output_file is None:
output_file = temp_file.replace('.hip', '.out')
# Add and --offload-arch=gfx942 to solve "Cannot find libdevice for sm_52" error
# This ensures compilation works even if CUDA device libraries are missing.
cmd = [self.hipcc_path, '-o', output_file,
temp_file, '--offload-arch=gfx942']
# Set environment variable just in case hipcc invokes nvcc internally
env = os.environ.copy()
env['NVCC_APPEND_FLAGS'] = ' --offload-arch=gfx942'
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=60, env=env, check=False)
if result.returncode == 0:
return True, f"Compilation successful: {output_file}"
else:
return False, f"Compilation failed: {result.stderr}"
except subprocess.TimeoutExpired:
return False, "Compilation timed out"
except (OSError, subprocess.SubprocessError) as e:
return False, f"Compilation error: {str(e)}"
finally:
try:
if temp_file and os.path.exists(temp_file):
os.unlink(temp_file)
except OSError:
pass
def run_with_profiling(self, executable_path: str, args: List[str] = None) -> Dict:
"""Run executable with rocprof profiling"""
if not self.rocm_available:
# Caller should use get_mock_profiling_data(kernel_name, iteration) directly.
return {"success": False, "error": "ROCm not available; use get_mock_profiling_data(kernel_name, iteration) instead", "execution_time_ms": 0}
try:
if args is None:
args = []
# Run with rocprof stats timing
cmd = [self.rocprof_path, '--stats', '--', executable_path] + args
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=120, check=False)
if result.returncode != 0:
detail = result.stderr.strip() or result.stdout.strip(
) or "rocprof exited with a non-zero status"
return {
"success": False,
"error": f"Profiling failed: {detail}",
"execution_time_ms": 0,
}
# Parse rocprof output
profiling_data = self._parse_rocprof_output(
result.stdout, result.stderr)
return profiling_data
except subprocess.TimeoutExpired:
return {"success": False, "error": "Profiling timed out", "execution_time_ms": 0}
except (OSError, subprocess.SubprocessError) as e:
return {"success": False, "error": f"Profiling error: {str(e)}", "execution_time_ms": 0}
def _parse_rocprof_output(self, stdout: str, _stderr: str) -> Dict:
"""Parse rocprof --stats CSV output (Name,Calls,TotalDurationNs,AverageNs,Percentage)."""
import csv
import io
try:
metrics: Dict = {}
reader = csv.DictReader(io.StringIO(stdout))
for row in reader:
name = row.get("Name", "")
# Skip ROCm runtime helper kernels
if "__amd_rocclr" in name:
continue
avg_ns_str = row.get("AverageNs", "") or ""
if avg_ns_str.strip():
avg_ns = float(avg_ns_str)
if avg_ns > 0:
metrics["execution_time_ms"] = round(
avg_ns / 1_000_000, 6)
metrics["memory_bandwidth_gbps"] = 0.0
metrics["gpu_utilization_percent"] = 0.0
metrics["sq_waves"] = 0
break
if not metrics:
return {
"success": False,
"error": "rocprof output contained no parseable kernel rows",
"execution_time_ms": 0,
}
metrics["success"] = True
return metrics
except Exception as e:
return {
"success": False,
"error": f"Failed to parse rocprof output: {str(e)}",
"execution_time_ms": 0,
}
def get_mock_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict:
"""Public accessor for deterministic demo profiling data used by testing layer."""
return self._get_demo_profiling_data(kernel_name, iteration)
def _get_demo_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict:
"""
Return deterministic per-kernel demo profiling data.
Replaces random.uniform() with representative MI300X values keyed by kernel name
and iteration number. Every entry is tagged with data_source so the caller and
the UI can show an honest provenance badge instead of fabricated numbers.
"""
from .demo_artifacts import get_demo_data
data = get_demo_data(kernel_name, iteration)
data['success'] = True
return data
def get_hardware_info(self) -> Dict:
"""Get AMD GPU hardware information"""
if not self.rocm_available:
return {
'gpu_name': 'AMD MI300X (Mock)',
'compute_units': 120,
'memory_size_gb': 192,
'memory_bandwidth_tb_s': 5.3,
'wavefront_size': 64
}
try:
# Try to get real GPU info using rocminfo or similar
cmd = ['rocminfo']
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=10, check=False)
if result.returncode == 0:
return self._parse_rocminfo(result.stdout)
else:
return self._get_mock_hardware_info()
except (OSError, subprocess.SubprocessError):
return self._get_mock_hardware_info()
def _parse_rocminfo(self, output: str) -> Dict:
"""Parse rocminfo output to extract hardware info."""
info = self._get_mock_hardware_info() # safe MI300X defaults
name_match = re.search(r'^\s*Name:\s+(.+)$', output, re.MULTILINE)
if name_match:
info['gpu_name'] = name_match.group(1).strip()
cu_match = re.search(r'^\s*Compute Unit:\s+(\d+)',
output, re.MULTILINE)
if cu_match:
info['compute_units'] = int(cu_match.group(1))
wf_match = re.search(
r'^\s*Wavefront Size:\s+(\d+)', output, re.MULTILINE)
if wf_match:
info['wavefront_size'] = int(wf_match.group(1))
return info
def _get_mock_hardware_info(self) -> Dict:
"""Mock hardware info for MI300X"""
return {
'gpu_name': 'AMD MI300X',
'compute_units': 120,
'memory_size_gb': 192,
'memory_bandwidth_tb_s': 5.3,
'wavefront_size': 64,
'l2_cache_size_kb': 16384,
'l1_cache_size_kb': 128
}
|