File size: 2,540 Bytes
d78cfdc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | [START] task=task_easy env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=1 action=propose_clarification reward=0.12 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=2 action=propose_clarification reward=0.86 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=3 action=propose_clarification reward=0.89 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=4 action=propose_clarification reward=0.00 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=5 action=propose_clarification reward=0.86 done=true error=null [END] success=true steps=5 score=0.865 rewards=0.12,0.86,0.89,0.00,0.86 [START] task=task_medium env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=1 action=propose_clarification reward=0.12 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=2 action=propose_clarification reward=0.00 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=3 action=propose_new_rule reward=0.80 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=4 action=propose_clarification reward=0.00 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=5 action=propose_clarification reward=0.00 done=true error=null [END] success=false steps=5 score=0.000 rewards=0.12,0.00,0.80,0.00,0.00 [START] task=task_hard env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=1 action=propose_clarification reward=0.12 done=false error=null INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK" [STEP] step=2 action=evolve_policy reward=0.90 done=true error=null [END] success=true steps=2 score=0.900 rewards=0.12,0.90 |