causal-stream / inference_random.py
skyruh's picture
fix: enforce true determinism and ground abstract action schemas
7e33b86
import os
import requests
import random
from typing import Dict, Any
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
MODEL_NAME = "RandomAgent"
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
def reset_env(task_id: int):
return requests.post(f"{ENV_URL}/reset?task_id={task_id}", json={}).json()
def step_env(task_id: int, action: Dict[str, Any]):
return requests.post(f"{ENV_URL}/step?task_id={task_id}", json=action).json()
def run_agent(task_id: int):
task_name = f"task-{task_id}"
print(f"[START] task={task_name} env=causal-stream-v3 model={MODEL_NAME}", flush=True)
obs = reset_env(task_id)
rewards = []
steps_taken = 0
done = False
actions = [
{"type": "read_dashboard"},
{"type": "sample_stream", "sample_size": 20},
{"type": "inspect_lineage", "model_id": "aggregator"},
{"type": "query_system_logs", "log_name": "system_events"},
{"type": "query_provider_contract", "provider_id": "Stripe-Sim"},
{"type": "submit_theory", "cause": "latency_spike", "evidence": []},
{"type": "submit_postmortem", "timeline": [{"tick": 0, "description": "Random guess"}], "impact_duration_ticks": 100, "prevention_action": "update_schema"}
]
while not done and steps_taken < 10:
action = random.choice(actions)
resp = step_env(task_id, action)
reward = resp['reward']
done = resp['done']
rewards.append(reward)
steps_taken += 1
print(f"[STEP] step={steps_taken} action={action['type']} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
if action["type"] == "submit_postmortem":
break
success = sum(rewards) > 0.5
score = sum(rewards)
print(f"[END] success={str(success).lower()} steps={steps_taken} score={score:.2f} rewards={','.join([f'{r:.2f}' for r in rewards])}", flush=True)
if __name__ == "__main__":
for tid in [1, 2, 3, 4]:
run_agent(tid)