File size: 3,759 Bytes
c780f59 5fe4705 c780f59 5fe4705 7e03473 c780f59 d5c6f39 c780f59 d5c6f39 98a22be c780f59 d5c6f39 c780f59 d5c6f39 c780f59 d5c6f39 c780f59 d5c6f39 c780f59 5fe4705 c780f59 7e03473 c780f59 5fe4705 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import os
from dotenv import load_dotenv
from typing import Iterator, Tuple
from env_server import KernelOptimization_env, TASKS, app as openenv_app
from openai import OpenAI
from models import Action
import gradio as gr
import traceback
import uvicorn
from fastapi.responses import RedirectResponse
load_dotenv()
def task_baseline_code(task_id: str) -> str:
return TASKS[task_id]["baseline_code"]
def extract_code(text: str) -> str:
if "```" not in text:
return text
start = text.find("```")
end = text.rfind("```")
chunk = text[start + 3 : end]
if chunk.startswith("cuda") or chunk.startswith("cpp"):
return chunk.split("\n", 1)[1]
return chunk
def ui(task_id:str, kernel_code: str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]]:
log= []
env=KernelOptimization_env()
api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
if not api_key:
yield "ERROR: Missing OPENAI_API_KEY", ""
return
model = os.getenv("MODEL_NAME", "llama-3.2-3b")
client = OpenAI(api_key=api_key, base_url=os.getenv("API_BASE_URL", "https://api.openai.com/v1"))
obs = env.reset(task_id=task_id)["observation"]
if kernel_code and kernel_code.strip():
custom_code = kernel_code.strip()
env.state.best_code = custom_code
obs["current_best_code"] = custom_code
best_code = obs["current_best_code"]
log.append(f"Task: {obs['task_name']}")
for _ in range(max_steps):
try:
prompt = f"Optimize CUDA code:\n{obs['current_best_code']}\nPending checks: {obs['pending_checks']}\nReturn code only."
res = client.chat.completions.create(
model=model,
temperature=0.0,
messages=[
{"role": "system", "content": "Return only optimized CUDA code."},
{"role": "user", "content": prompt},
],
)
raw = (res.choices[0].message.content or "").strip()
code = extract_code(raw).strip() or obs["current_best_code"]
step = env.step(Action(optimized_code=code, strategy="ui_proposed"))
obs = step.observation.model_dump()
best_code = obs["current_best_code"]
log.append(f"step={obs['step_count']} reward={step.reward.value:.3f} speedup={obs['current_best_speedup']:.3f}x")
yield "\n".join(log), best_code
if step.done:
break
except Exception as e:
yield f"{chr(10).join(log)}\nERROR: {e}\n{traceback.format_exc()}", best_code
return
with gr.Blocks(title="CUDA Kernel Optimizer") as demo:
gr.Markdown("CUDA Kernel Optimizer - OpenEnv-aligned workflow")
task = gr.Dropdown(choices=list(TASKS.keys()), value="vector_add_easy", label="Task")
kernel_input = gr.Code(
label="Kernel Code (editable, used as optimization input)",
language="cpp",
lines=16,
value=TASKS["vector_add_easy"]["baseline_code"],
)
steps = gr.Slider(minimum=1, maximum=12, value=6, step=1, label="Max Steps")
key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
run = gr.Button("Run Optimization", variant="primary")
logs = gr.Textbox(label="Logs", lines=14)
code = gr.Code(label="Best Code", language="cpp", lines=16)
task.change(task_baseline_code, inputs=[task], outputs=[kernel_input])
run.click(ui, inputs=[task, kernel_input, steps, key], outputs=[logs, code])
app = gr.mount_gradio_app(openenv_app, demo, path="/ui")
@app.get("/")
def root():
return RedirectResponse(url="/ui")
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", "7860")))
|