# inference.py # ───────────────────────────────────────────── # LLM agent runner — mandatory stdout format # [START] [STEP] [END] as required by judges # ───────────────────────────────────────────── import os import json import time from openai import OpenAI from config import TASK_EASY, TASK_MEDIUM, TASK_HARD from environment import DataCleaningEnv from models import Action # ── Client Setup ────────────────────────────── API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY", "dummy-key") client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL) # ── System Prompt ───────────────────────────── SYSTEM_PROMPT = """ You are a data cleaning agent. You will be given a messy CSV dataset and must clean it step by step using the available actions. Available actions: 1. fill_missing — {"action_type": "fill_missing", "parameters": {"column": "col_name", "strategy": "mean|median|mode|drop"}} 2. drop_duplicates — {"action_type": "drop_duplicates", "parameters": {}} 3. fix_dtype — {"action_type": "fix_dtype", "parameters": {"column": "col_name", "target_type": "int|float|str"}} 4. rename_column — {"action_type": "rename_column", "parameters": {"old_name": "Old Col", "new_name": "old_col"}} 5. remove_outliers — {"action_type": "remove_outliers", "parameters": {"column": "col_name", "method": "iqr|zscore"}} 6. standardize_values — {"action_type": "standardize_values", "parameters": {"column": "col_name", "mapping": {"old": "new"}}} 7. submit — {"action_type": "submit", "parameters": {}} Rules: - Always respond with a single valid JSON action only - No explanation, no markdown, just raw JSON - Call submit when you think the dataset is clean - Analyze the issues list carefully before acting """ # ── Agent Loop ──────────────────────────────── def run_agent(difficulty: str, max_steps: int = 20) -> dict: env = DataCleaningEnv(difficulty=difficulty) obs = env.reset() history = [] rewards = [] success = False score = 0.0 task_name = f"data-cleaning-{difficulty}" # ── [START] ── print(f"[START] task={task_name} env=data-cleaning-env model={MODEL_NAME}") for step in range(1, max_steps + 1): if obs.done: break user_msg = f""" Current state: - Task: {obs.task_id} - Step: {obs.step}/{obs.max_steps} - Issues: {obs.issues} - Null counts: {obs.null_counts} - Duplicate rows: {obs.duplicate_rows} - Columns: {obs.columns} - Dtypes: {obs.dtypes} - Data (first 5 rows): {obs.dataframe[:5]} Respond with JSON action only. """ history.append({"role": "user", "content": user_msg}) # Call LLM error_msg = "null" action_str = "null" reward_val = 0.0 done_val = False try: response = client.chat.completions.create( model = MODEL_NAME, messages = [ {"role": "system", "content": SYSTEM_PROMPT}, *history, ], max_tokens = 200, temperature = 0.0, ) raw = response.choices[0].message.content.strip() history.append({"role": "assistant", "content": raw}) # Parse action action_dict = json.loads(raw) action = Action(**action_dict) action_str = f"{action.action_type}({json.dumps(action.parameters)})" # Execute result = env.step(action) reward_val = result.reward.value done_val = result.done obs = result.observation if result.reward.reason and "failed" in result.reward.reason.lower(): error_msg = result.reward.reason[:80] except json.JSONDecodeError as e: error_msg = f"json_parse_error" done_val = False except Exception as e: error_msg = str(e)[:80].replace("\n", " ") done_val = False rewards.append(reward_val) # ── [STEP] ── done_str = "true" if done_val else "false" print( f"[STEP] step={step} action={action_str} " f"reward={reward_val:.2f} done={done_str} error={error_msg}" ) if done_val: break time.sleep(0.3) # Final score score, _ = env.task.grade() success = score >= 0.6 rewards_str = ",".join(f"{r:.2f}" for r in rewards) success_str = "true" if success else "false" steps_taken = len(rewards) # ── [END] ── print(f"[END] success={success_str} steps={steps_taken} score={score:.2f} rewards={rewards_str}") return { "difficulty": difficulty, "steps_taken": steps_taken, "score": score, "success": success, "rewards": rewards, } # ── Main ────────────────────────────────────── def main(): results = {} for difficulty in [TASK_EASY, TASK_MEDIUM, TASK_HARD]: results[difficulty] = run_agent(difficulty) time.sleep(1) return results if __name__ == "__main__": main()