File size: 5,949 Bytes
1a6672d
 
 
 
 
 
 
a5be23e
1a6672d
 
a5be23e
1a6672d
 
 
 
 
 
 
 
 
 
 
 
 
 
a5be23e
1a6672d
 
 
 
a5be23e
1a6672d
2fe80fd
 
 
 
 
 
 
 
 
27c4e2c
1a6672d
 
 
a5be23e
1a6672d
 
 
 
 
2fe80fd
 
 
a5be23e
28263c0
 
a5be23e
28263c0
 
5c0d4c4
a5be23e
1a6672d
28263c0
 
a5be23e
 
1a6672d
 
28263c0
a5be23e
 
28263c0
 
1a6672d
a5be23e
 
1a6672d
 
 
a5be23e
1a6672d
 
 
a5be23e
 
 
1a6672d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import subprocess
import tempfile
import os


class HipifyWrapper:
    """Wrapper for hipify-clang tool with Python fallback"""

    def __init__(self):
        pass

    def hipify_code(self, cuda_code: str) -> tuple[str, list[dict]]:
        """
        Try to run real hipify-clang if available.
        Falls back to Python-based pattern replacement.
        Returns (hip_code, list of changes made)
        """
        # Try real hipify first
        if self._hipify_available():
            result = self._run_real_hipify(cuda_code)
            if result:
                return result

        # Fallback: Python pattern replacement
        return self._python_hipify(cuda_code)

    def _hipify_available(self) -> bool:
        try:
            result = subprocess.run(
                ["hipify-clang", "--version"],
                capture_output=True, timeout=5, check=False
            )
            if result.returncode != 0:
                return False
            # Skip if no usable CUDA headers — hipify-clang will fail anyway
            cuda_header_paths = [
                "/usr/local/cuda/include/cuda_runtime.h",
                "/usr/lib/cuda/include/cuda_runtime.h",
                "/opt/cuda/include/cuda_runtime.h",
            ]
            return any(os.path.exists(p) for p in cuda_header_paths)
        except (OSError, subprocess.SubprocessError):
            return False

    def _run_real_hipify(self, cuda_code: str) -> tuple[str, list[dict]] | None:
        tmp_path = None
        try:
            with tempfile.NamedTemporaryFile(suffix=".cu", mode="w", delete=False) as f:
                f.write(cuda_code)
                tmp_path = f.name

            # -nocudalib and -nocudainc are hipify-clang tool flags — must come BEFORE
            # the -- separator (flags after -- go to the internal Clang parser, not the tool).
            cmd = ["hipify-clang", "-nocudalib", "-nocudainc", tmp_path, "--"]

            # Debug log for build engineering
            print(f"DEBUG: Running hipify-clang command: {' '.join(cmd)}")

            # Set environment variable just in case hipify-clang invokes nvcc internally
            env = os.environ.copy()
            env['NVCC_APPEND_FLAGS'] = '-nocudalib'

            result = subprocess.run(
                cmd,
                capture_output=True, text=True, timeout=30,
                env=env,
                check=False,
            )

            if result.returncode != 0:
                print(
                    f"DEBUG: hipify-clang failed with return code {result.returncode}")
                print(f"DEBUG: stderr: {result.stderr}")

            if result.returncode == 0 and result.stdout:
                changes = self._detect_changes(
                    cuda_code, result.stdout, source="hipify-clang")
                return result.stdout, changes

            return None
        except (OSError, subprocess.SubprocessError):
            return None
        finally:
            try:
                if tmp_path and os.path.exists(tmp_path):
                    os.unlink(tmp_path)
            except OSError:
                pass

    def _python_hipify(self, cuda_code: str) -> tuple[str, list[dict]]:
        """Python-based hipify — handles the mechanical replacements."""
        hip_code = cuda_code
        changes = []

        for cuda_api, hip_api in HIPIFY_MAP.items():
            if cuda_api in hip_code and cuda_api != hip_api:
                count = hip_code.count(cuda_api)
                hip_code = hip_code.replace(cuda_api, hip_api)
                changes.append({
                    "old": cuda_api,
                    "new": hip_api,
                    "count": count,
                    "source": "hipify",
                    "confidence": "high"
                })

        # Fix kernel launch syntax: kernel<<<blocks, threads>>> → hipLaunchKernelGGL
        # Keep it as-is for now — LLM handles complex launch syntax
        # Simple <<<>>> launches are valid in HIP too

        return hip_code, changes

    def _detect_changes(self, original: str, converted: str, source: str) -> list[dict]:
        """Detect what changed between original and converted code."""
        changes = []
        orig_lines = original.splitlines()
        conv_lines = converted.splitlines()

        for i, (o, c) in enumerate(zip(orig_lines, conv_lines)):
            if o != c:
                changes.append({
                    "line": i + 1,
                    "old": o.strip(),
                    "new": c.strip(),
                    "source": source,
                    "confidence": "high"
                })

        return changes


# Legacy function for backward compatibility
def run_hipify(cuda_code: str) -> tuple[str, list[dict]]:
    """Legacy function - use HipifyWrapper.hipify_code instead"""
    wrapper = HipifyWrapper()
    return wrapper.hipify_code(cuda_code)


# Common CUDA → HIP replacements hipify handles
HIPIFY_MAP = {
    "cudaMalloc": "hipMalloc",
    "cudaFree": "hipFree",
    "cudaMemcpy": "hipMemcpy",
    "cudaMemcpyHostToDevice": "hipMemcpyHostToDevice",
    "cudaMemcpyDeviceToHost": "hipMemcpyDeviceToHost",
    "cudaMemcpyDeviceToDevice": "hipMemcpyDeviceToDevice",
    "cudaSuccess": "hipSuccess",
    "cudaError_t": "hipError_t",
    "cudaGetLastError": "hipGetLastError",
    "cudaDeviceSynchronize": "hipDeviceSynchronize",
    "cudaEventCreate": "hipEventCreate",
    "cudaEventRecord": "hipEventRecord",
    "cudaEventSynchronize": "hipEventSynchronize",
    "cudaEventElapsedTime": "hipEventElapsedTime",
    "cudaEventDestroy": "hipEventDestroy",
    "cudaEvent_t": "hipEvent_t",
    "cudaStream_t": "hipStream_t",
    "cudaStreamCreate": "hipStreamCreate",
    "cudaStreamDestroy": "hipStreamDestroy",
    "cuda_runtime.h": "hip/hip_runtime.h",
    "cuda_runtime_api.h": "hip/hip_runtime_api.h",
    "__syncthreads": "__syncthreads",   # same in HIP
}