| import subprocess |
| import tempfile |
| import os |
| import re |
| from typing import Dict, List, Tuple |
|
|
|
|
| class RocprofWrapper: |
| """Wrapper for AMD rocprof profiler and hipcc compiler""" |
|
|
| def __init__(self): |
| self.rocm_available = os.getenv( |
| "ROCM_AVAILABLE", "false").lower() == "true" |
| self.hipcc_path = os.getenv("HIPCC_PATH", "hipcc") |
| self.rocprof_path = os.getenv("ROCPROF_PATH", "rocprof") |
|
|
| def compile_hip_code(self, hip_code: str, output_file: str = None) -> Tuple[bool, str]: |
| """Compile HIP code using hipcc""" |
| if not self.rocm_available: |
| return True, "Mock compilation successful (ROCm not available)" |
|
|
| temp_file = None |
| try: |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.hip', delete=False) as f: |
| f.write(hip_code) |
| temp_file = f.name |
|
|
| if output_file is None: |
| output_file = temp_file.replace('.hip', '.out') |
|
|
| |
| |
| cmd = [self.hipcc_path, '-o', output_file, |
| temp_file, '--offload-arch=gfx942'] |
|
|
| |
| env = os.environ.copy() |
| env['NVCC_APPEND_FLAGS'] = ' --offload-arch=gfx942' |
|
|
| result = subprocess.run( |
| cmd, capture_output=True, text=True, timeout=60, env=env, check=False) |
|
|
| if result.returncode == 0: |
| return True, f"Compilation successful: {output_file}" |
| else: |
| return False, f"Compilation failed: {result.stderr}" |
|
|
| except subprocess.TimeoutExpired: |
| return False, "Compilation timed out" |
| except (OSError, subprocess.SubprocessError) as e: |
| return False, f"Compilation error: {str(e)}" |
| finally: |
| try: |
| if temp_file and os.path.exists(temp_file): |
| os.unlink(temp_file) |
| except OSError: |
| pass |
|
|
| def run_with_profiling(self, executable_path: str, args: List[str] = None) -> Dict: |
| """Run executable with rocprof profiling""" |
| if not self.rocm_available: |
| |
| return {"success": False, "error": "ROCm not available; use get_mock_profiling_data(kernel_name, iteration) instead", "execution_time_ms": 0} |
|
|
| try: |
| if args is None: |
| args = [] |
|
|
| |
| cmd = [self.rocprof_path, '--stats', '--', executable_path] + args |
| result = subprocess.run( |
| cmd, capture_output=True, text=True, timeout=120, check=False) |
|
|
| if result.returncode != 0: |
| detail = result.stderr.strip() or result.stdout.strip( |
| ) or "rocprof exited with a non-zero status" |
| return { |
| "success": False, |
| "error": f"Profiling failed: {detail}", |
| "execution_time_ms": 0, |
| } |
|
|
| |
| profiling_data = self._parse_rocprof_output( |
| result.stdout, result.stderr) |
|
|
| return profiling_data |
|
|
| except subprocess.TimeoutExpired: |
| return {"success": False, "error": "Profiling timed out", "execution_time_ms": 0} |
| except (OSError, subprocess.SubprocessError) as e: |
| return {"success": False, "error": f"Profiling error: {str(e)}", "execution_time_ms": 0} |
|
|
| def _parse_rocprof_output(self, stdout: str, _stderr: str) -> Dict: |
| """Parse rocprof --stats CSV output (Name,Calls,TotalDurationNs,AverageNs,Percentage).""" |
| import csv |
| import io |
| try: |
| metrics: Dict = {} |
| reader = csv.DictReader(io.StringIO(stdout)) |
| for row in reader: |
| name = row.get("Name", "") |
| |
| if "__amd_rocclr" in name: |
| continue |
| avg_ns_str = row.get("AverageNs", "") or "" |
| if avg_ns_str.strip(): |
| avg_ns = float(avg_ns_str) |
| if avg_ns > 0: |
| metrics["execution_time_ms"] = round( |
| avg_ns / 1_000_000, 6) |
| metrics["memory_bandwidth_gbps"] = 0.0 |
| metrics["gpu_utilization_percent"] = 0.0 |
| metrics["sq_waves"] = 0 |
| break |
|
|
| if not metrics: |
| return { |
| "success": False, |
| "error": "rocprof output contained no parseable kernel rows", |
| "execution_time_ms": 0, |
| } |
|
|
| metrics["success"] = True |
| return metrics |
|
|
| except Exception as e: |
| return { |
| "success": False, |
| "error": f"Failed to parse rocprof output: {str(e)}", |
| "execution_time_ms": 0, |
| } |
|
|
| def get_mock_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict: |
| """Public accessor for deterministic demo profiling data used by testing layer.""" |
| return self._get_demo_profiling_data(kernel_name, iteration) |
|
|
| def _get_demo_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict: |
| """ |
| Return deterministic per-kernel demo profiling data. |
| |
| Replaces random.uniform() with representative MI300X values keyed by kernel name |
| and iteration number. Every entry is tagged with data_source so the caller and |
| the UI can show an honest provenance badge instead of fabricated numbers. |
| """ |
| from .demo_artifacts import get_demo_data |
| data = get_demo_data(kernel_name, iteration) |
| data['success'] = True |
| return data |
|
|
| def get_hardware_info(self) -> Dict: |
| """Get AMD GPU hardware information""" |
| if not self.rocm_available: |
| return { |
| 'gpu_name': 'AMD MI300X (Mock)', |
| 'compute_units': 120, |
| 'memory_size_gb': 192, |
| 'memory_bandwidth_tb_s': 5.3, |
| 'wavefront_size': 64 |
| } |
|
|
| try: |
| |
| cmd = ['rocminfo'] |
| result = subprocess.run( |
| cmd, capture_output=True, text=True, timeout=10, check=False) |
|
|
| if result.returncode == 0: |
| return self._parse_rocminfo(result.stdout) |
| else: |
| return self._get_mock_hardware_info() |
|
|
| except (OSError, subprocess.SubprocessError): |
| return self._get_mock_hardware_info() |
|
|
| def _parse_rocminfo(self, output: str) -> Dict: |
| """Parse rocminfo output to extract hardware info.""" |
| info = self._get_mock_hardware_info() |
| name_match = re.search(r'^\s*Name:\s+(.+)$', output, re.MULTILINE) |
| if name_match: |
| info['gpu_name'] = name_match.group(1).strip() |
| cu_match = re.search(r'^\s*Compute Unit:\s+(\d+)', |
| output, re.MULTILINE) |
| if cu_match: |
| info['compute_units'] = int(cu_match.group(1)) |
| wf_match = re.search( |
| r'^\s*Wavefront Size:\s+(\d+)', output, re.MULTILINE) |
| if wf_match: |
| info['wavefront_size'] = int(wf_match.group(1)) |
| return info |
|
|
| def _get_mock_hardware_info(self) -> Dict: |
| """Mock hardware info for MI300X""" |
| return { |
| 'gpu_name': 'AMD MI300X', |
| 'compute_units': 120, |
| 'memory_size_gb': 192, |
| 'memory_bandwidth_tb_s': 5.3, |
| 'wavefront_size': 64, |
| 'l2_cache_size_kb': 16384, |
| 'l1_cache_size_kb': 128 |
| } |
|
|