| | """ |
| | meta_agent.py โ Reptile Meta-Learning (eval_speedup ์ ๊ฑฐ ๋ฒ์ ) |
| | """ |
| | import os, copy, argparse |
| | import numpy as np |
| | import torch |
| | from stable_baselines3 import PPO |
| | from stable_baselines3.common.env_util import make_vec_env |
| | from compiler_env import LoopUnrollEnv |
| |
|
| | PROJECT_ROOT = os.path.expanduser("~/projects/machineai") |
| | MODELS_DIR = os.path.join(PROJECT_ROOT, "models") |
| | BENCH_DIR = os.path.join(PROJECT_ROOT, "benchmarks") |
| |
|
| | def get_params(model): |
| | return copy.deepcopy(model.policy.state_dict()) |
| |
|
| | def set_params(model, params): |
| | model.policy.load_state_dict(copy.deepcopy(params)) |
| |
|
| | def make_model(arch, source_files, base_params=None): |
| | env = make_vec_env(lambda: LoopUnrollEnv( |
| | arch=arch, source_files=source_files), n_envs=1) |
| | model = PPO("MlpPolicy", env, verbose=0, |
| | learning_rate=3e-4, n_steps=64, batch_size=32) |
| | if base_params: |
| | set_params(model, base_params) |
| | return model |
| |
|
| | def main(): |
| | ap = argparse.ArgumentParser() |
| | ap.add_argument("--meta-base", default="models/x86v2_base.zip") |
| | ap.add_argument("--arch", default="aarch64-linux-gnu") |
| | ap.add_argument("--outer-iters", type=int, default=3) |
| | ap.add_argument("--inner-steps", type=int, default=256) |
| | ap.add_argument("--adapt-steps", type=int, default=100) |
| | ap.add_argument("--out-path", default="models/meta_init.zip") |
| | args = ap.parse_args() |
| |
|
| | bench_files = [ |
| | os.path.join(BENCH_DIR, f) for f in [ |
| | "loop_heavy_arm64v2.c", "nested_arm64v2.c", "matmul_arm64.c" |
| | ] if os.path.exists(os.path.join(BENCH_DIR, f)) |
| | ] |
| | tasks = [[f] for f in bench_files] |
| | print(f"[Meta] ํ์คํฌ ์: {len(tasks)}") |
| |
|
| | |
| | init_env = make_vec_env(lambda: LoopUnrollEnv(arch="x86_64"), n_envs=1) |
| | base_model = PPO.load(args.meta_base, env=init_env) |
| | meta_params = get_params(base_model) |
| | print(f"[Meta] ๋ก๋ ์๋ฃ: {args.meta_base}") |
| |
|
| | |
| | print(f"\n=== Meta-Train ({args.outer_iters} outer iters x {args.inner_steps} inner steps) ===") |
| | for outer_i in range(args.outer_iters): |
| | adapted_list = [] |
| | for task_files in tasks: |
| | m = make_model(args.arch, task_files, base_params=meta_params) |
| | m.learn(total_timesteps=args.inner_steps, reset_num_timesteps=True) |
| | rew = np.mean([ep["r"] for ep in m.ep_info_buffer]) if m.ep_info_buffer else 0.0 |
| | adapted_list.append((get_params(m), rew)) |
| | print(f" ํ์คํฌ {os.path.basename(task_files[0])}: reward={rew:.1f}") |
| |
|
| | |
| | keys = meta_params.keys() |
| | avg = {k: torch.stack([p[k].float() for p,_ in adapted_list]).mean(0) for k in keys} |
| | meta_lr = 0.3 |
| | for k in keys: |
| | meta_params[k] = meta_params[k].float() + meta_lr * (avg[k] - meta_params[k].float()) |
| |
|
| | avg_rew = np.mean([r for _,r in adapted_list]) |
| | print(f"[Outer {outer_i+1}/{args.outer_iters}] ํ๊ท reward: {avg_rew:.1f}") |
| |
|
| | |
| | set_params(base_model, meta_params) |
| | base_model.save(args.out_path) |
| | print(f"\n[Meta] ์ ์ฅ: {args.out_path}") |
| |
|
| | |
| | print(f"\n=== Fast Adapt ({args.adapt_steps}์คํ
) ===") |
| | m = make_model(args.arch, bench_files, base_params=meta_params) |
| | m.learn(total_timesteps=args.adapt_steps, reset_num_timesteps=True) |
| | m.save("models/meta_adapted.zip") |
| | print("์ ์ ๋ชจ๋ธ ์ ์ฅ: models/meta_adapted.zip") |
| | print("\n์ด์ benchmark.py๋ก ์ฑ๋ฅ ํ์ธ ์ค...") |
| | os.system(f"python3 benchmark.py --arch {args.arch} --model models/meta_adapted.zip " |
| | f"--source-files {' '.join(bench_files)}") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|