sosonsong commited on
Commit
94047c1
·
verified ·
1 Parent(s): fb177fd

Upload compiler_env.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. compiler_env.py +168 -0
compiler_env.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ import numpy as np
3
+ import subprocess
4
+ import time
5
+ import os
6
+ from ir_feature_extractor import extract_features
7
+
8
+
9
+ class LoopUnrollEnv(gym.Env):
10
+ def __init__(
11
+ self,
12
+ source_files=None,
13
+ repeat_runs=5,
14
+ arch: str = "x86",
15
+ clang_bin: str | None = None,
16
+ opt_bin: str | None = None,
17
+ ):
18
+ super().__init__()
19
+
20
+ self.arch = arch
21
+ self.clang_bin = clang_bin or "clang"
22
+ self.opt_bin = opt_bin or "opt"
23
+
24
+ self.source_files = source_files or ["test_loop.c"]
25
+ self.repeat_runs = repeat_runs
26
+
27
+ self.action_space = gym.spaces.Discrete(6)
28
+ self.observation_space = gym.spaces.Box(
29
+ low=0.0, high=1.0, shape=(7,), dtype=np.float32
30
+ )
31
+
32
+ self.fixed_baselines = {}
33
+ self._precompute_baselines()
34
+
35
+ def _run_subprocess(self, cmd, **kwargs):
36
+ return subprocess.run(cmd, capture_output=True, **kwargs)
37
+
38
+ def _precompute_baselines(self):
39
+ print("베이스라인 사전 측정 중...")
40
+ for src in self.source_files:
41
+ bc = self._compile_to_bc(src)
42
+ if bc:
43
+ exe = self._bc_to_exe(bc)
44
+ if exe:
45
+ t = self._measure_time_robust(exe, n=11)
46
+ self.fixed_baselines[src] = t
47
+ print(f" {os.path.basename(src)}: {t*1000:.1f}ms")
48
+ print("베이스라인 측정 완료")
49
+
50
+ def _measure_time_robust(self, exe, n=11):
51
+ times = []
52
+ for _ in range(n):
53
+ t0 = time.perf_counter()
54
+ run_cmd = ["qemu-aarch64-static", exe] if self.arch == "arm64" else [exe]
55
+ r = self._run_subprocess(run_cmd)
56
+ t1 = time.perf_counter()
57
+ if r.returncode == 0:
58
+ times.append(t1 - t0)
59
+ return float(np.median(times)) if times else 999.0
60
+
61
+ def _measure_time(self, exe):
62
+ return self._measure_time_robust(exe, n=self.repeat_runs)
63
+
64
+ def _compile_to_bc(self, src):
65
+ bc = src.replace(".c", ".bc")
66
+ target_flags = ["-target", "aarch64-linux-gnu"] if self.arch == "arm64" else []
67
+ cmd = [
68
+ self.clang_bin,
69
+ "-O1",
70
+ "-emit-llvm",
71
+ "-c",
72
+ *target_flags,
73
+ src,
74
+ "-o",
75
+ bc,
76
+ ]
77
+ r = self._run_subprocess(cmd)
78
+ return bc if r.returncode == 0 else None
79
+
80
+ def _apply_action(self, bc_file, action):
81
+ out = bc_file.replace(".bc", f"_act{action}.bc")
82
+
83
+ passes = {
84
+ 0: "",
85
+ 1: "loop-vectorize",
86
+ 2: "inline,loop-vectorize",
87
+ 3: "loop-unroll,loop-vectorize",
88
+ 4: "inline,loop-unroll,loop-vectorize",
89
+ 5: "loop-unroll",
90
+ }
91
+ p = passes[int(action)]
92
+
93
+ if p:
94
+ cmd = [self.opt_bin, f"--passes={p}", bc_file, "-o", out]
95
+ r = self._run_subprocess(cmd)
96
+ return out if r.returncode == 0 else bc_file
97
+
98
+ return bc_file
99
+
100
+ def _measure_code_size(self, bc_file):
101
+ """ARM64용: 오브젝트 파일 크기로 성능 대리 측정 (qemu 대신)"""
102
+ obj = bc_file.replace(".bc", ".o")
103
+ cmd = [
104
+ self.clang_bin,
105
+ "-target", "aarch64-linux-gnu",
106
+ "-O1", "-c",
107
+ bc_file, "-o", obj
108
+ ]
109
+ r = self._run_subprocess(cmd)
110
+ if r.returncode != 0:
111
+ return 999999
112
+ import os
113
+ return os.path.getsize(obj)
114
+
115
+ def _bc_to_exe(self, bc_file):
116
+ exe = os.path.abspath(bc_file.replace(".bc", "_exe"))
117
+ target_flags = ["-target", "aarch64-linux-gnu", "-static"] if self.arch == "arm64" else []
118
+ cmd = [
119
+ self.clang_bin,
120
+ "-O1",
121
+ *target_flags,
122
+ bc_file,
123
+ "-o",
124
+ exe,
125
+ "-lm",
126
+ ]
127
+ r = self._run_subprocess(cmd)
128
+ return exe if r.returncode == 0 else None
129
+
130
+ def reset(self, seed=None, options=None):
131
+ super().reset(seed=seed)
132
+
133
+ idx = np.random.randint(len(self.source_files))
134
+ self.current_file = self.source_files[idx]
135
+
136
+ self.bc_file = self._compile_to_bc(self.current_file)
137
+ self.base_time = self.fixed_baselines.get(self.current_file, 1920 if self.arch == "arm64" else 1.0)
138
+
139
+ obs = np.array(extract_features(self.bc_file), dtype=np.float32)
140
+ return obs, {}
141
+
142
+ def step(self, action):
143
+ opt_bc = self._apply_action(self.bc_file, int(action))
144
+ exe = self._bc_to_exe(opt_bc)
145
+ new_time = self._measure_time(exe) if exe else self.base_time * 2
146
+ improvement = (self.base_time - new_time) / (self.base_time + 1e-9)
147
+
148
+ if improvement > 0.01:
149
+ reward = improvement * 20.0 + 1.0
150
+ elif improvement < -0.01:
151
+ reward = -2.0
152
+ else:
153
+ reward = -0.1
154
+
155
+ done = improvement > 0.70 or improvement < -0.50
156
+
157
+ info = {
158
+ "speedup_pct": improvement * 100,
159
+ "baseline_ms": self.base_time * 1000,
160
+ "optimized_ms": new_time * 1000,
161
+ "flags": int(action),
162
+ "arch": self.arch,
163
+ "clang_bin": self.clang_bin,
164
+ "opt_bin": self.opt_bin,
165
+ }
166
+
167
+ obs = np.array(extract_features(self.bc_file), dtype=np.float32)
168
+ return obs, reward, done, False, info