""" inference.py - CDN Cache Optimizer Baseline Agent Uses OpenAI client to run an LLM agent against the environment. Emits structured [START], [STEP], [END] logs to stdout. Required env vars: API_BASE_URL - LLM API endpoint MODEL_NAME - model identifier HF_TOKEN - Hugging Face / API key """ import os import sys import json import time import requests from openai import OpenAI from env.cache import CDNCacheEnv, TASK_CONFIGS from env.models import Action, Observation # ───────────────────────────────────────────── # Config from environment (required by OpenEnv spec) # ───────────────────────────────────────────── API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1") MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini") HF_TOKEN = os.getenv("HF_TOKEN") HF_TOKEN = os.environ.get("HF_TOKEN", "") if not HF_TOKEN: print("[WARN] HF_TOKEN not set. Using API_BASE_URL without auth header override.", file=sys.stderr) client = OpenAI( base_url=API_BASE_URL, api_key=HF_TOKEN or "placeholder", ) TASKS = ["task_easy", "task_medium", "task_hard"] SEED = 42 # ───────────────────────────────────────────── # LLM Agent # ───────────────────────────────────────────── SYSTEM_PROMPT = """You are an intelligent CDN cache management agent. At each step you receive the current cache state and an incoming file request. Your job: decide which file to evict (if any) to make room for new content. Rules: - Only evict a file if the cache is nearly full and the incoming file is NOT already cached - Prefer evicting files with LOW request_frequency and NOT viral - Never evict a file that was just evicted (cache thrashing) - If cache has space, respond with null (no eviction needed) You MUST respond with ONLY valid JSON in this exact format: {"evict_file_id": "" or null} No explanation. No markdown. Only the JSON object.""" def build_user_prompt(obs: Observation) -> str: cached_summary = [] for f in obs.cached_files: cached_summary.append( f" - {f.file_id}: size={f.size_mb}MB freq={f.request_frequency:.1f} " f"viral={f.is_viral} last_accessed=step_{f.last_accessed}" ) cached_str = "\n".join(cached_summary) if cached_summary else " (empty)" space_needed = obs.incoming_file_size_mb space_free = obs.cache_capacity_mb - obs.cache_used_mb return f"""Step {obs.step} | Time of day: {obs.time_of_day:.2f} | Hit rate: {obs.recent_hit_rate:.2f} Cache: {obs.cache_used_mb:.1f}MB / {obs.cache_capacity_mb:.1f}MB used ({obs.cache_fill_ratio*100:.1f}% full) Free space: {space_free:.1f}MB Incoming request: file_id: {obs.incoming_file_id} size: {obs.incoming_file_size_mb}MB viral: {obs.incoming_file_is_viral} already_cached: {obs.cache_hit} space_needed_to_cache: {"none (fits)" if space_free >= space_needed else f"{space_needed - space_free:.1f}MB deficit"} Next 3 requests preview: {obs.queue_preview} Currently cached files ({len(obs.cached_files)} files): {cached_str} Decide: which file to evict? (null if no eviction needed)""" def llm_action(obs: Observation, step_num: int) -> Action: """Call LLM and parse action. Fall back to LRU on failure.""" prompt = build_user_prompt(obs) try: response = client.chat.completions.create( model=MODEL_NAME, messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ], max_tokens=50, temperature=0.0, ) raw = response.choices[0].message.content.strip() parsed = json.loads(raw) return Action(evict_file_id=parsed.get("evict_file_id")) except Exception as e: # Fallback: LRU if obs.cached_files: lru = min(obs.cached_files, key=lambda f: f.last_accessed) return Action(evict_file_id=lru.file_id) return Action(evict_file_id=None) # ───────────────────────────────────────────── # Run one task episode # ───────────────────────────────────────────── def run_task(task_id: str) -> dict: config = TASK_CONFIGS[task_id] env = CDNCacheEnv(task_id=task_id, seed=SEED) obs = env.reset() total_reward = 0.0 step_num = 0 # ── [START] ── print(f"[START] task={task_id}", flush=True) while True: action = llm_action(obs, step_num) result = env.step(action) total_reward += result.reward.total # ── [STEP] ── print(f"[STEP] step={step_num} reward={round(result.reward.total, 4)}", flush=True) obs = result.observation step_num += 1 if result.done: break final_state = env.state() final_hit_rate = final_state["hit_rate"] score = round(min(1.0, final_hit_rate / {"task_easy": 0.60, "task_medium": 0.55, "task_hard": 0.45}[task_id]), 4) # ── [END] ── print(f"[END] task={task_id} score={score} steps={step_num}", flush=True) return { "task_id": task_id, "total_reward": round(total_reward, 4), "final_hit_rate": round(final_hit_rate, 4), "score": score, } # ───────────────────────────────────────────── # Main # ───────────────────────────────────────────── if __name__ == "__main__": print(f"[INFO] Starting CDN Cache Optimizer inference", file=sys.stderr) print(f"[INFO] Model: {MODEL_NAME} | API: {API_BASE_URL}", file=sys.stderr) results = [] for task_id in TASKS: print(f"\n[INFO] Running {task_id}...", file=sys.stderr) r = run_task(task_id) results.append(r) print(f"[INFO] {task_id} done | score={r['score']} hit_rate={r['final_hit_rate']}", file=sys.stderr) print("\n[INFO] === FINAL RESULTS ===", file=sys.stderr) for r in results: print(f"[INFO] {r['task_id']}: score={r['score']} reward={r['total_reward']}", file=sys.stderr) overall = round(sum(r["score"] for r in results) / len(results), 4) print(f"[INFO] Overall score: {overall}", file=sys.stderr)