Spaces:

luciferai-devil
/

devil-policyevolverenv

Sleeping

App Files Files Community

devil-policyevolverenv / evolution_logs.txt

Somuai12

feat: add reward evolution chart to Gradio dashboard

d78cfdc about 2 months ago

raw

history blame contribute delete

2.54 kB

	[START] task=task_easy env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=1 action=propose_clarification reward=0.12 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=2 action=propose_clarification reward=0.86 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=3 action=propose_clarification reward=0.89 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=4 action=propose_clarification reward=0.00 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=5 action=propose_clarification reward=0.86 done=true error=null
	[END] success=true steps=5 score=0.865 rewards=0.12,0.86,0.89,0.00,0.86
	[START] task=task_medium env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=1 action=propose_clarification reward=0.12 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=2 action=propose_clarification reward=0.00 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=3 action=propose_new_rule reward=0.80 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=4 action=propose_clarification reward=0.00 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=5 action=propose_clarification reward=0.00 done=true error=null
	[END] success=false steps=5 score=0.000 rewards=0.12,0.00,0.80,0.00,0.00
	[START] task=task_hard env=PolicyEvolverEnv model=meta-llama/Llama-3.3-70B-Instruct
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=1 action=propose_clarification reward=0.12 done=false error=null
	INFO:httpx:HTTP Request: POST https://router.huggingface.co/v1/chat/completions "HTTP/1.1 200 OK"
	[STEP] step=2 action=evolve_policy reward=0.90 done=true error=null
	[END] success=true steps=2 score=0.900 rewards=0.12,0.90