Spaces:

KeithXD
/

Team_Sparks

Sleeping

App Files Files Community

Team_Sparks / inference.py

KeithXD

Upload 17 files

13ed4d6 verified 9 days ago

raw

history blame contribute delete

11.1 kB

	"""
	inference.py -- AuditRepairEnv++ Baseline Inference Agent
	=========================================================
	OpenEnv Submission \| Cost-Constrained Ledger Repair

	STDOUT format (strict -- must match exactly):

	[START]
	Task: easy

	[STEP]
	Action: FIX_ENTRY 1
	Reward: 0.2

	[END]
	Final Score: 0.85

	Uses OpenAI Client for LLM calls.
	Reads env variables: API_BASE_URL, MODEL_NAME, HF_TOKEN
	Runs all tasks: easy, medium, hard
	"""

	import asyncio
	import json
	import os
	import textwrap
	import urllib.request
	import urllib.error
	from typing import List, Optional

	from openai import OpenAI


	# ──────────────────────────────────────────────────────────
	# ENVIRONMENT CONFIGURATION
	# ──────────────────────────────────────────────────────────
	HF_TOKEN = os.getenv("HF_TOKEN")
	API_KEY = HF_TOKEN or os.getenv("API_KEY", "")
	API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
	MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")

	# Validate HF_TOKEN before proceeding
	if not HF_TOKEN:
	raise ValueError(
	"HF_TOKEN environment variable is required. "
	"Set it via: export HF_TOKEN='your_token_here'"
	)
	if not API_KEY:
	raise ValueError(
	"API_KEY environment variable must be set (or HF_TOKEN)"
	)

	# Environment server URL
	ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:7860")

	BENCHMARK = "auditrepairenv"
	TASKS = ["easy", "medium", "hard"]

	MAX_STEPS = 15
	MAX_TOTAL_REWARD = 2.0
	SUCCESS_SCORE_THRESHOLD = 0.5
	TEMPERATURE = 0.2
	MAX_TOKENS = 300


	# ──────────────────────────────────────────────────────────
	# STDOUT LOGGING (strict OpenEnv format)
	# ──────────────────────────────────────────────────────────
	def log_start(task: str) -> None:
	print(f"\n[START]\nTask: {task}", flush=True)


	def log_step(action: str, reward: float) -> None:
	action_clean = action.replace("\n", " ").replace("\r", "").strip()[:200]
	print(f"\n[STEP]\nAction: {action_clean}\nReward: {reward}", flush=True)


	def log_end(score: float) -> None:
	print(f"\n[END]\nFinal Score: {score}", flush=True)


	# ──────────────────────────────────────────────────────────
	# ENVIRONMENT HTTP CLIENT (calls our OpenEnv server)
	# ──────────────────────────────────────────────────────────
	def env_request(path: str, method: str = "GET", body: dict = None) -> dict:
	url = ENV_BASE_URL.rstrip("/") + path
	data = json.dumps(body or {}).encode() if body is not None else b"{}"
	req = urllib.request.Request(
	url, data=data, method=method,
	headers={"Content-Type": "application/json"}
	)
	try:
	with urllib.request.urlopen(req, timeout=30) as r:
	return json.loads(r.read().decode())
	except urllib.error.HTTPError as e:
	return {"error": f"HTTP {e.code}: {e.read().decode()[:100]}"}
	except Exception as ex:
	return {"error": str(ex)}


	def env_reset(task_id: str) -> dict:
	return env_request("/reset", "POST", {"task_id": task_id})


	def env_step(message: str) -> dict:
	return env_request("/step", "POST", {"message": message})


	# ──────────────────────────────────────────────────────────
	# AGENT PROMPT
	# ──────────────────────────────────────────────────────────
	SYSTEM_PROMPT = textwrap.dedent("""
	You are AuditRepairAgent -- an AI that repairs financial ledger inconsistencies.

	You are given a ledger with entries that may have errors (value != expected_value).
	Each entry has an id, value, expected_value, and dependencies list.

	Available actions (respond with exactly ONE per step):
	FIX_ENTRY <id> -- Sets value = expected_value. May trigger dependency changes.
	ADJUST_ENTRY <id> <delta> -- Increment/decrement the entry's value by delta.
	REVERT_ENTRY <id> -- Undo the last change to an entry.
	NO_OP -- Do nothing.

	Rules:
	1. Each action costs budget. Minimize total actions.
	2. Fixing an already-correct entry is overcorrection (penalty).
	3. Dependencies: fixing one entry may change expected_value of other entries.
	4. Goal: fix all errors within budget.

	Respond with ONLY the action, nothing else:
	FIX_ENTRY 3
	""").strip()


	def build_prompt(obs: dict, step_num: int, last_echoed: str,
	last_reward: float, history: List[str]) -> str:
	"""Build user prompt from the current observation."""
	ledger_str = ""
	for entry in obs.get("ledger", []):
	status = "OK" if entry["value"] == entry["expected_value"] else "ERR"
	deps = entry.get("dependencies", [])
	dep_str = f", deps={deps}" if deps else ""
	ledger_str += (
	f" [{status}] id={entry['id']}: value={entry['value']}, "
	f"expected={entry['expected_value']}{dep_str}\n"
	)

	errors_str = ""
	for err in obs.get("errors", []):
	errors_str += (
	f" Entry {err['entry_id']}: value={err['current_value']}, "
	f"expected={err['expected_value']}, delta={err['delta']}\n"
	)

	history_block = "\n".join(history[-3:]) if history else "None"

	return textwrap.dedent(f"""
	Task: {obs.get('task_description', '')}
	Step {step_num} of {obs.get('max_steps', 10)}

	Ledger:
	{ledger_str}
	Current Errors:
	{errors_str if errors_str else ' None -- all entries correct!'}
	Budget: {obs.get('remaining_budget', 0)} / {obs.get('initial_budget', 0)}
	Last result: {last_echoed}
	Last reward: {last_reward:+.2f}
	History: {history_block}

	Respond with the single best action (e.g. FIX_ENTRY 3):
	""").strip()


	def get_model_message(client: OpenAI, step_num: int, obs: dict,
	last_echoed: str, last_reward: float,
	history: List[str]) -> str:
	"""Get agent action from LLM, with fallback to heuristic."""
	try:
	prompt = build_prompt(obs, step_num, last_echoed, last_reward, history)
	completion = client.chat.completions.create(
	model=MODEL_NAME,
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": prompt},
	],
	max_tokens=MAX_TOKENS,
	temperature=TEMPERATURE,
	)
	response = (completion.choices[0].message.content or "").strip()
	# Extract just the action line
	for line in response.split("\n"):
	line = line.strip()
	if line and any(
	line.upper().startswith(a)
	for a in ["FIX_ENTRY", "ADJUST_ENTRY", "REVERT_ENTRY", "NO_OP"]
	):
	return line
	return response.split("\n")[0].strip() if response else "NO_OP"
	except Exception:
	# Silently fallback
	return _fallback_action(obs)


	def _fallback_action(obs: dict) -> str:
	"""Deterministic fallback: fix the first error found."""
	errors = obs.get("errors", [])
	if errors:
	return f"FIX_ENTRY {errors[0]['entry_id']}"
	return "NO_OP"


	# ──────────────────────────────────────────────────────────
	# RUN ONE TASK
	# ──────────────────────────────────────────────────────────
	def run_task(client: OpenAI, task_id: str) -> float:
	"""Run a single task episode. Returns score in [0.0, 1.0]."""
	history: List[str] = []
	rewards: List[float] = []
	score = 0.0

	log_start(task=task_id)

	try:
	# Reset
	result = env_reset(task_id)
	if "error" in result:
	log_end(score=0.0)
	return 0.0

	obs = result
	last_echoed = obs.get("echoed_message", "")
	last_reward = 0.0

	max_steps = obs.get("max_steps", MAX_STEPS)

	for step in range(1, max_steps + 1):
	if obs.get("done", False):
	break

	# Get agent action (text message)
	message = get_model_message(
	client, step, obs, last_echoed, last_reward, history
	)

	# Step the environment
	step_result = env_step(message)

	if "error" in step_result and "observation" not in step_result:
	reward = 0.0
	done = False
	error = step_result["error"][:80]
	else:
	reward = float(step_result.get("reward", 0) or 0)
	done = bool(step_result.get("done", False))
	error = step_result.get("last_action_error")
	obs = step_result.get("observation", obs)

	rewards.append(reward)
	last_echoed = obs.get("echoed_message", "")
	last_reward = reward

	log_step(action=message, reward=reward)

	history.append(f"Step {step}: {message!r} -> reward {reward:+.2f}")

	if done:
	# Extract final score from info
	info = step_result.get("info", {})
	final_score = info.get("final_score")
	if final_score is not None:
	score = float(final_score)
	break

	# Compute score if not set from server
	if score == 0.0 and rewards:
	score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0

	score = min(max(score, 0.0), 1.0)

	except Exception:
	pass

	finally:
	log_end(score=score)

	return score


	# ──────────────────────────────────────────────────────────
	# MAIN
	# ──────────────────────────────────────────────────────────
	async def main() -> None:
	client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

	for task_id in TASKS:
	run_task(client, task_id)


	if __name__ == "__main__":
	asyncio.run(main())