import subprocess import tempfile import os import re from typing import Dict, List, Tuple class RocprofWrapper: """Wrapper for AMD rocprof profiler and hipcc compiler""" def __init__(self): self.rocm_available = os.getenv( "ROCM_AVAILABLE", "false").lower() == "true" self.hipcc_path = os.getenv("HIPCC_PATH", "hipcc") self.rocprof_path = os.getenv("ROCPROF_PATH", "rocprof") def compile_hip_code(self, hip_code: str, output_file: str = None) -> Tuple[bool, str]: """Compile HIP code using hipcc""" if not self.rocm_available: return True, "Mock compilation successful (ROCm not available)" temp_file = None try: with tempfile.NamedTemporaryFile(mode='w', suffix='.hip', delete=False) as f: f.write(hip_code) temp_file = f.name if output_file is None: output_file = temp_file.replace('.hip', '.out') # Add and --offload-arch=gfx942 to solve "Cannot find libdevice for sm_52" error # This ensures compilation works even if CUDA device libraries are missing. cmd = [self.hipcc_path, '-o', output_file, temp_file, '--offload-arch=gfx942'] # Set environment variable just in case hipcc invokes nvcc internally env = os.environ.copy() env['NVCC_APPEND_FLAGS'] = ' --offload-arch=gfx942' result = subprocess.run( cmd, capture_output=True, text=True, timeout=60, env=env, check=False) if result.returncode == 0: return True, f"Compilation successful: {output_file}" else: return False, f"Compilation failed: {result.stderr}" except subprocess.TimeoutExpired: return False, "Compilation timed out" except (OSError, subprocess.SubprocessError) as e: return False, f"Compilation error: {str(e)}" finally: try: if temp_file and os.path.exists(temp_file): os.unlink(temp_file) except OSError: pass def run_with_profiling(self, executable_path: str, args: List[str] = None) -> Dict: """Run executable with rocprof profiling""" if not self.rocm_available: # Caller should use get_mock_profiling_data(kernel_name, iteration) directly. return {"success": False, "error": "ROCm not available; use get_mock_profiling_data(kernel_name, iteration) instead", "execution_time_ms": 0} try: if args is None: args = [] # Run with rocprof stats timing cmd = [self.rocprof_path, '--stats', '--', executable_path] + args result = subprocess.run( cmd, capture_output=True, text=True, timeout=120, check=False) if result.returncode != 0: detail = result.stderr.strip() or result.stdout.strip( ) or "rocprof exited with a non-zero status" return { "success": False, "error": f"Profiling failed: {detail}", "execution_time_ms": 0, } # Parse rocprof output profiling_data = self._parse_rocprof_output( result.stdout, result.stderr) return profiling_data except subprocess.TimeoutExpired: return {"success": False, "error": "Profiling timed out", "execution_time_ms": 0} except (OSError, subprocess.SubprocessError) as e: return {"success": False, "error": f"Profiling error: {str(e)}", "execution_time_ms": 0} def _parse_rocprof_output(self, stdout: str, _stderr: str) -> Dict: """Parse rocprof --stats CSV output (Name,Calls,TotalDurationNs,AverageNs,Percentage).""" import csv import io try: metrics: Dict = {} reader = csv.DictReader(io.StringIO(stdout)) for row in reader: name = row.get("Name", "") # Skip ROCm runtime helper kernels if "__amd_rocclr" in name: continue avg_ns_str = row.get("AverageNs", "") or "" if avg_ns_str.strip(): avg_ns = float(avg_ns_str) if avg_ns > 0: metrics["execution_time_ms"] = round( avg_ns / 1_000_000, 6) metrics["memory_bandwidth_gbps"] = 0.0 metrics["gpu_utilization_percent"] = 0.0 metrics["sq_waves"] = 0 break if not metrics: return { "success": False, "error": "rocprof output contained no parseable kernel rows", "execution_time_ms": 0, } metrics["success"] = True return metrics except Exception as e: return { "success": False, "error": f"Failed to parse rocprof output: {str(e)}", "execution_time_ms": 0, } def get_mock_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict: """Public accessor for deterministic demo profiling data used by testing layer.""" return self._get_demo_profiling_data(kernel_name, iteration) def _get_demo_profiling_data(self, kernel_name: str = "custom", iteration: int = 1) -> Dict: """ Return deterministic per-kernel demo profiling data. Replaces random.uniform() with representative MI300X values keyed by kernel name and iteration number. Every entry is tagged with data_source so the caller and the UI can show an honest provenance badge instead of fabricated numbers. """ from .demo_artifacts import get_demo_data data = get_demo_data(kernel_name, iteration) data['success'] = True return data def get_hardware_info(self) -> Dict: """Get AMD GPU hardware information""" if not self.rocm_available: return { 'gpu_name': 'AMD MI300X (Mock)', 'compute_units': 120, 'memory_size_gb': 192, 'memory_bandwidth_tb_s': 5.3, 'wavefront_size': 64 } try: # Try to get real GPU info using rocminfo or similar cmd = ['rocminfo'] result = subprocess.run( cmd, capture_output=True, text=True, timeout=10, check=False) if result.returncode == 0: return self._parse_rocminfo(result.stdout) else: return self._get_mock_hardware_info() except (OSError, subprocess.SubprocessError): return self._get_mock_hardware_info() def _parse_rocminfo(self, output: str) -> Dict: """Parse rocminfo output to extract hardware info.""" info = self._get_mock_hardware_info() # safe MI300X defaults name_match = re.search(r'^\s*Name:\s+(.+)$', output, re.MULTILINE) if name_match: info['gpu_name'] = name_match.group(1).strip() cu_match = re.search(r'^\s*Compute Unit:\s+(\d+)', output, re.MULTILINE) if cu_match: info['compute_units'] = int(cu_match.group(1)) wf_match = re.search( r'^\s*Wavefront Size:\s+(\d+)', output, re.MULTILINE) if wf_match: info['wavefront_size'] = int(wf_match.group(1)) return info def _get_mock_hardware_info(self) -> Dict: """Mock hardware info for MI300X""" return { 'gpu_name': 'AMD MI300X', 'compute_units': 120, 'memory_size_gb': 192, 'memory_bandwidth_tb_s': 5.3, 'wavefront_size': 64, 'l2_cache_size_kb': 16384, 'l1_cache_size_kb': 128 }