Spaces:
Sleeping
Sleeping
| {"text": "### State\nConfusion: 3.250681\nAction: explain\nReward: 0.277968\nNext Confusion: 2.895787"} | |
| {"text": "### State\nConfusion: 6.946829\nAction: correct_fact\nReward: 1.136782\nNext Confusion: 6.797103"} | |
| {"text": "### State\nConfusion: 3.01263\nAction: explain\nReward: 0.161669\nNext Confusion: 2.434628"} | |
| {"text": "### State\nConfusion: 4.200218\nAction: analogize\nReward: -0.536253\nNext Confusion: 4.50549"} | |
| {"text": "### State\nConfusion: 4.204886\nAction: correct_fact\nReward: 0.001798\nNext Confusion: 4.348224"} | |
| {"text": "### State\nConfusion: 4.431564\nAction: analogize\nReward: -0.00913\nNext Confusion: 5.02598"} | |
| {"text": "### State\nConfusion: 7.112898\nAction: analogize\nReward: -0.260503\nNext Confusion: 7.776484"} | |
| {"text": "### State\nConfusion: 6.707709\nAction: worked_example\nReward: 0.623157\nNext Confusion: 5.590979"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.200613\nNext Confusion: 9.88075"} | |
| {"text": "### State\nConfusion: 6.441003\nAction: analogize\nReward: 0.059471\nNext Confusion: 6.277812"} | |
| {"text": "### State\nConfusion: 4.039312\nAction: analogize\nReward: 0.588281\nNext Confusion: 3.436871"} | |
| {"text": "### State\nConfusion: 4.283644\nAction: analogize\nReward: 0.377662\nNext Confusion: 4.225992"} | |
| {"text": "### State\nConfusion: 5.741777\nAction: correct_fact\nReward: -0.086082\nNext Confusion: 6.010756"} | |
| {"text": "### State\nConfusion: 2.020052\nAction: correct_fact\nReward: 0.065653\nNext Confusion: 1.847908"} | |
| {"text": "### State\nConfusion: 4.519175\nAction: explain\nReward: -0.532623\nNext Confusion: 4.565185"} | |
| {"text": "### State\nConfusion: 8.321839\nAction: correct_fact\nReward: -0.03258\nNext Confusion: 8.116296"} | |
| {"text": "### State\nConfusion: 8.126682\nAction: worked_example\nReward: 1.752661\nNext Confusion: 6.691122"} | |
| {"text": "### State\nConfusion: 5.13175\nAction: worked_example\nReward: 1.046388\nNext Confusion: 3.915293"} | |
| {"text": "### State\nConfusion: 2.675358\nAction: analogize\nReward: -0.527201\nNext Confusion: 3.269576"} | |
| {"text": "### State\nConfusion: 4.452432\nAction: question\nReward: -0.12029\nNext Confusion: 4.301777"} | |
| {"text": "### State\nConfusion: 5.261161\nAction: explain\nReward: -0.348596\nNext Confusion: 5.673129"} | |
| {"text": "### State\nConfusion: 4.41783\nAction: correct_fact\nReward: 0.606589\nNext Confusion: 3.844203"} | |
| {"text": "### State\nConfusion: 7.167364\nAction: analogize\nReward: -0.036751\nNext Confusion: 7.91662"} | |
| {"text": "### State\nConfusion: 4.443446\nAction: analogize\nReward: -0.405494\nNext Confusion: 4.455853"} | |
| {"text": "### State\nConfusion: 2.394889\nAction: worked_example\nReward: 1.569233\nNext Confusion: 0.521398"} | |
| {"text": "### State\nConfusion: 5.666886\nAction: analogize\nReward: 0.132937\nNext Confusion: 5.507486"} | |
| {"text": "### State\nConfusion: 8.903051\nAction: worked_example\nReward: 1.168286\nNext Confusion: 8.015495"} | |
| {"text": "### State\nConfusion: 7.419142\nAction: analogize\nReward: 0.370696\nNext Confusion: 6.90989"} | |
| {"text": "### State\nConfusion: 4.930095\nAction: analogize\nReward: -0.354928\nNext Confusion: 5.273698"} | |
| {"text": "### State\nConfusion: 3.654876\nAction: correct_fact\nReward: 0.075058\nNext Confusion: 4.012523"} | |
| {"text": "### State\nConfusion: 3.298562\nAction: correct_fact\nReward: 0.274487\nNext Confusion: 3.207063"} | |
| {"text": "### State\nConfusion: 5.36716\nAction: worked_example\nReward: 1.439936\nNext Confusion: 3.664801"} | |
| {"text": "### State\nConfusion: 3.746032\nAction: question\nReward: 0.769005\nNext Confusion: 3.475201"} | |
| {"text": "### State\nConfusion: 5.479237\nAction: correct_fact\nReward: 0.557245\nNext Confusion: 5.685524"} | |
| {"text": "### State\nConfusion: 5.16923\nAction: analogize\nReward: 0.659608\nNext Confusion: 4.931562"} | |
| {"text": "### State\nConfusion: 5.465634\nAction: analogize\nReward: -0.455456\nNext Confusion: 6.204408"} | |
| {"text": "### State\nConfusion: 4.611916\nAction: analogize\nReward: 0.505761\nNext Confusion: 4.227184"} | |
| {"text": "### State\nConfusion: 6.265313\nAction: analogize\nReward: -0.2694\nNext Confusion: 6.18744"} | |
| {"text": "### State\nConfusion: 4.897626\nAction: analogize\nReward: 0.702536\nNext Confusion: 4.261006"} | |
| {"text": "### State\nConfusion: 4.620578\nAction: analogize\nReward: 0.574101\nNext Confusion: 4.518202"} | |
| {"text": "### State\nConfusion: 4.456707\nAction: question\nReward: -0.11188\nNext Confusion: 4.38086"} | |
| {"text": "### State\nConfusion: 3.882776\nAction: correct_fact\nReward: 0.216437\nNext Confusion: 3.257319"} | |
| {"text": "### State\nConfusion: 7.64693\nAction: question\nReward: 0.516669\nNext Confusion: 6.653407"} | |
| {"text": "### State\nConfusion: 3.631051\nAction: analogize\nReward: -0.060158\nNext Confusion: 3.668511"} | |
| {"text": "### State\nConfusion: 3.242117\nAction: analogize\nReward: -0.632007\nNext Confusion: 4.07684"} | |
| {"text": "### State\nConfusion: 5.477382\nAction: worked_example\nReward: 0.014376\nNext Confusion: 5.024957"} | |
| {"text": "### State\nConfusion: 4.500757\nAction: question\nReward: 0.791141\nNext Confusion: 4.249756"} | |
| {"text": "### State\nConfusion: 4.149765\nAction: analogize\nReward: -0.737377\nNext Confusion: 5.078204"} | |
| {"text": "### State\nConfusion: 4.0037\nAction: analogize\nReward: 0.026296\nNext Confusion: 4.308817"} | |
| {"text": "### State\nConfusion: 3.601284\nAction: correct_fact\nReward: 0.736689\nNext Confusion: 2.987959"} | |
| {"text": "### State\nConfusion: 4.442764\nAction: analogize\nReward: -1.300369\nNext Confusion: 5.400936"} | |
| {"text": "### State\nConfusion: 3.490773\nAction: explain\nReward: -0.677986\nNext Confusion: 3.864996"} | |
| {"text": "### State\nConfusion: 4.677259\nAction: question\nReward: 0.948914\nNext Confusion: 4.377257"} | |
| {"text": "### State\nConfusion: 5.060442\nAction: correct_fact\nReward: 0.766447\nNext Confusion: 4.600817"} | |
| {"text": "### State\nConfusion: 4.615941\nAction: analogize\nReward: 1.128073\nNext Confusion: 4.953195"} | |
| {"text": "### State\nConfusion: 2.83426\nAction: analogize\nReward: 0.942352\nNext Confusion: 2.650195"} | |
| {"text": "### State\nConfusion: 3.270736\nAction: analogize\nReward: 0.441857\nNext Confusion: 2.980848"} | |
| {"text": "### State\nConfusion: 5.609833\nAction: analogize\nReward: -0.276144\nNext Confusion: 6.021879"} | |
| {"text": "### State\nConfusion: 3.269245\nAction: analogize\nReward: -0.558671\nNext Confusion: 3.781189"} | |
| {"text": "### State\nConfusion: 5.657327\nAction: correct_fact\nReward: -0.370105\nNext Confusion: 6.336563"} | |
| {"text": "### State\nConfusion: 4.241743\nAction: question\nReward: 0.405085\nNext Confusion: 3.343711"} | |
| {"text": "### State\nConfusion: 4.50831\nAction: worked_example\nReward: 1.323088\nNext Confusion: 3.333388"} | |
| {"text": "### State\nConfusion: 8.612566\nAction: analogize\nReward: -0.590897\nNext Confusion: 9.277476"} | |
| {"text": "### State\nConfusion: 3.341706\nAction: question\nReward: 0.605924\nNext Confusion: 2.983989"} | |
| {"text": "### State\nConfusion: 6.981562\nAction: worked_example\nReward: 1.42079\nNext Confusion: 6.22347"} | |
| {"text": "### State\nConfusion: 6.482682\nAction: explain\nReward: 0.134391\nNext Confusion: 6.252345"} | |
| {"text": "### State\nConfusion: 3.801888\nAction: correct_fact\nReward: -0.720082\nNext Confusion: 3.976293"} | |
| {"text": "### State\nConfusion: 3.833232\nAction: analogize\nReward: -0.353737\nNext Confusion: 4.299214"} | |
| {"text": "### State\nConfusion: 5.998812\nAction: explain\nReward: 0.389272\nNext Confusion: 5.514136"} | |
| {"text": "### State\nConfusion: 4.684952\nAction: question\nReward: -0.067119\nNext Confusion: 4.627373"} | |
| {"text": "### State\nConfusion: 3.739171\nAction: explain\nReward: 0.081376\nNext Confusion: 4.377791"} | |
| {"text": "### State\nConfusion: 6.828212\nAction: explain\nReward: 1.582702\nNext Confusion: 5.688067"} | |
| {"text": "### State\nConfusion: 7.270997\nAction: analogize\nReward: -1.004266\nNext Confusion: 7.936233"} | |
| {"text": "### State\nConfusion: 6.698097\nAction: analogize\nReward: -0.106108\nNext Confusion: 7.112916"} | |
| {"text": "### State\nConfusion: 5.745265\nAction: analogize\nReward: -0.139695\nNext Confusion: 5.720766"} | |
| {"text": "### State\nConfusion: 3.632954\nAction: question\nReward: 1.16064\nNext Confusion: 3.356993"} | |
| {"text": "### State\nConfusion: 6.09853\nAction: analogize\nReward: -1.18527\nNext Confusion: 7.567443"} | |
| {"text": "### State\nConfusion: 3.86836\nAction: worked_example\nReward: 0.259128\nNext Confusion: 3.484797"} | |
| {"text": "### State\nConfusion: 6.078642\nAction: question\nReward: -1.233893\nNext Confusion: 7.035697"} | |
| {"text": "### State\nConfusion: 2.798933\nAction: explain\nReward: 0.579925\nNext Confusion: 2.698838"} | |
| {"text": "### State\nConfusion: 5.148643\nAction: analogize\nReward: 0.709493\nNext Confusion: 4.744413"} | |
| {"text": "### State\nConfusion: 7.496325\nAction: explain\nReward: 1.159911\nNext Confusion: 7.308018"} | |
| {"text": "### State\nConfusion: 3.956711\nAction: analogize\nReward: -0.442151\nNext Confusion: 4.599521"} | |
| {"text": "### State\nConfusion: 5.598244\nAction: analogize\nReward: -1.461143\nNext Confusion: 6.285129"} | |
| {"text": "### State\nConfusion: 2.592946\nAction: analogize\nReward: -0.963043\nNext Confusion: 3.637503"} | |
| {"text": "### State\nConfusion: 5.577053\nAction: question\nReward: -1.324903\nNext Confusion: 6.152676"} | |
| {"text": "### State\nConfusion: 5.230882\nAction: analogize\nReward: -0.518079\nNext Confusion: 5.693272"} | |
| {"text": "### State\nConfusion: 4.305274\nAction: question\nReward: 0.976141\nNext Confusion: 4.003798"} | |
| {"text": "### State\nConfusion: 8.230191\nAction: analogize\nReward: -0.636257\nNext Confusion: 9.311369"} | |
| {"text": "### State\nConfusion: 3.902071\nAction: analogize\nReward: 0.500202\nNext Confusion: 3.75269"} | |
| {"text": "### State\nConfusion: 2.887785\nAction: correct_fact\nReward: 1.377221\nNext Confusion: 2.390823"} | |
| {"text": "### State\nConfusion: 4.391108\nAction: analogize\nReward: -0.342566\nNext Confusion: 4.73378"} | |
| {"text": "### State\nConfusion: 6.183204\nAction: question\nReward: -0.198667\nNext Confusion: 6.669494"} | |
| {"text": "### State\nConfusion: 5.336835\nAction: worked_example\nReward: 0.735701\nNext Confusion: 3.867037"} | |
| {"text": "### State\nConfusion: 3.049285\nAction: analogize\nReward: 1.806344\nNext Confusion: 1.962377"} | |
| {"text": "### State\nConfusion: 5.835483\nAction: analogize\nReward: 0.322794\nNext Confusion: 5.636015"} | |
| {"text": "### State\nConfusion: 6.197648\nAction: analogize\nReward: -0.037625\nNext Confusion: 6.726842"} | |
| {"text": "### State\nConfusion: 3.712592\nAction: analogize\nReward: -1.302483\nNext Confusion: 4.627915"} | |
| {"text": "### State\nConfusion: 2.942607\nAction: analogize\nReward: 0.391221\nNext Confusion: 3.280312"} | |
| {"text": "### State\nConfusion: 7.031706\nAction: analogize\nReward: -0.006694\nNext Confusion: 7.219192"} | |
| {"text": "### State\nConfusion: 6.227323\nAction: analogize\nReward: -1.185675\nNext Confusion: 7.040853"} | |
| {"text": "### State\nConfusion: 4.443704\nAction: explain\nReward: -0.222932\nNext Confusion: 4.861446"} | |
| {"text": "### State\nConfusion: 5.72539\nAction: analogize\nReward: -0.945966\nNext Confusion: 6.40068"} | |
| {"text": "### State\nConfusion: 3.285097\nAction: analogize\nReward: 0.502635\nNext Confusion: 2.970505"} | |
| {"text": "### State\nConfusion: 4.861887\nAction: question\nReward: -0.581137\nNext Confusion: 5.077682"} | |
| {"text": "### State\nConfusion: 3.146511\nAction: worked_example\nReward: -0.022938\nNext Confusion: 2.971046"} | |
| {"text": "### State\nConfusion: 7.118391\nAction: question\nReward: 0.68446\nNext Confusion: 6.163698"} | |
| {"text": "### State\nConfusion: 3.458978\nAction: analogize\nReward: -0.578027\nNext Confusion: 3.857936"} | |
| {"text": "### State\nConfusion: 3.074595\nAction: analogize\nReward: -0.545842\nNext Confusion: 3.548289"} | |
| {"text": "### State\nConfusion: 3.44214\nAction: analogize\nReward: 0.225382\nNext Confusion: 3.721544"} | |
| {"text": "### State\nConfusion: 8.819264\nAction: analogize\nReward: 0.151339\nNext Confusion: 8.657555"} | |
| {"text": "### State\nConfusion: 4.521422\nAction: worked_example\nReward: -0.10238\nNext Confusion: 4.557052"} | |
| {"text": "### State\nConfusion: 6.23277\nAction: explain\nReward: 0.457413\nNext Confusion: 6.462687"} | |
| {"text": "### State\nConfusion: 6.84021\nAction: correct_fact\nReward: -0.057949\nNext Confusion: 6.665882"} | |
| {"text": "### State\nConfusion: 4.478228\nAction: analogize\nReward: -1.01402\nNext Confusion: 4.81966"} | |
| {"text": "### State\nConfusion: 3.90523\nAction: analogize\nReward: -0.397117\nNext Confusion: 4.581836"} | |
| {"text": "### State\nConfusion: 5.505198\nAction: explain\nReward: -0.791799\nNext Confusion: 5.179022"} | |
| {"text": "### State\nConfusion: 6.171377\nAction: analogize\nReward: 2.02061\nNext Confusion: 5.293525"} | |
| {"text": "### State\nConfusion: 7.07752\nAction: analogize\nReward: -0.140945\nNext Confusion: 7.439284"} | |
| {"text": "### State\nConfusion: 4.74815\nAction: question\nReward: -0.921987\nNext Confusion: 4.901676"} | |
| {"text": "### State\nConfusion: 4.509364\nAction: explain\nReward: 1.032465\nNext Confusion: 3.376094"} | |
| {"text": "### State\nConfusion: 2.027247\nAction: worked_example\nReward: 0.728003\nNext Confusion: 2.481546"} | |
| {"text": "### State\nConfusion: 3.262276\nAction: analogize\nReward: -0.181705\nNext Confusion: 4.062567"} | |
| {"text": "### State\nConfusion: 4.404922\nAction: analogize\nReward: 0.137978\nNext Confusion: 4.550036"} | |
| {"text": "### State\nConfusion: 8.323386\nAction: analogize\nReward: -0.82731\nNext Confusion: 8.967352"} | |
| {"text": "### State\nConfusion: 3.065048\nAction: worked_example\nReward: 1.422474\nNext Confusion: 1.396024"} | |
| {"text": "### State\nConfusion: 8.650606\nAction: correct_fact\nReward: 0.013713\nNext Confusion: 8.505588"} | |
| {"text": "### State\nConfusion: 4.651423\nAction: correct_fact\nReward: 0.975844\nNext Confusion: 3.993101"} | |
| {"text": "### State\nConfusion: 5.509835\nAction: analogize\nReward: 1.279046\nNext Confusion: 5.567154"} | |
| {"text": "### State\nConfusion: 6.096856\nAction: analogize\nReward: -0.38767\nNext Confusion: 6.7619"} | |
| {"text": "### State\nConfusion: 4.852611\nAction: analogize\nReward: -0.843568\nNext Confusion: 4.90363"} | |
| {"text": "### State\nConfusion: 3.32426\nAction: correct_fact\nReward: 0.634364\nNext Confusion: 3.443108"} | |
| {"text": "### State\nConfusion: 3.007626\nAction: explain\nReward: -0.714219\nNext Confusion: 3.19942"} | |
| {"text": "### State\nConfusion: 8.091647\nAction: analogize\nReward: 0.138296\nNext Confusion: 8.641704"} | |
| {"text": "### State\nConfusion: 6.707883\nAction: analogize\nReward: -1.551451\nNext Confusion: 7.149846"} | |
| {"text": "### State\nConfusion: 5.131027\nAction: analogize\nReward: -0.898392\nNext Confusion: 5.243616"} | |
| {"text": "### State\nConfusion: 7.653114\nAction: analogize\nReward: 0.004476\nNext Confusion: 7.78867"} | |
| {"text": "### State\nConfusion: 3.999675\nAction: worked_example\nReward: 0.461726\nNext Confusion: 3.61097"} | |
| {"text": "### State\nConfusion: 9.165361\nAction: analogize\nReward: -0.127371\nNext Confusion: 9.594726"} | |
| {"text": "### State\nConfusion: 5.877134\nAction: analogize\nReward: -0.827503\nNext Confusion: 6.792844"} | |
| {"text": "### State\nConfusion: 4.240065\nAction: analogize\nReward: 0.017438\nNext Confusion: 4.091051"} | |
| {"text": "### State\nConfusion: 6.373348\nAction: worked_example\nReward: 2.087573\nNext Confusion: 4.599887"} | |
| {"text": "### State\nConfusion: 3.398287\nAction: analogize\nReward: 0.804075\nNext Confusion: 2.719617"} | |
| {"text": "### State\nConfusion: 5.074518\nAction: analogize\nReward: 0.123028\nNext Confusion: 5.348822"} | |
| {"text": "### State\nConfusion: 2.402497\nAction: analogize\nReward: -0.223833\nNext Confusion: 2.35697"} | |
| {"text": "### State\nConfusion: 4.486272\nAction: question\nReward: 0.398914\nNext Confusion: 4.16057"} | |
| {"text": "### State\nConfusion: 5.279123\nAction: analogize\nReward: -0.645918\nNext Confusion: 5.878665"} | |
| {"text": "### State\nConfusion: 2.826214\nAction: explain\nReward: 0.117254\nNext Confusion: 2.519507"} | |
| {"text": "### State\nConfusion: 7.463021\nAction: correct_fact\nReward: -0.078153\nNext Confusion: 7.250635"} | |
| {"text": "### State\nConfusion: 4.378883\nAction: correct_fact\nReward: 0.349144\nNext Confusion: 4.469529"} | |
| {"text": "### State\nConfusion: 3.601191\nAction: analogize\nReward: -0.4524\nNext Confusion: 3.516358"} | |
| {"text": "### State\nConfusion: 3.17514\nAction: worked_example\nReward: 1.188492\nNext Confusion: 2.420519"} | |
| {"text": "### State\nConfusion: 4.386989\nAction: analogize\nReward: -0.239119\nNext Confusion: 4.577966"} | |
| {"text": "### State\nConfusion: 3.484874\nAction: analogize\nReward: 0.235369\nNext Confusion: 3.315143"} | |
| {"text": "### State\nConfusion: 5.282593\nAction: analogize\nReward: 0.254918\nNext Confusion: 5.733146"} | |
| {"text": "### State\nConfusion: 2.864476\nAction: explain\nReward: 0.342867\nNext Confusion: 3.148373"} | |
| {"text": "### State\nConfusion: 4.97228\nAction: analogize\nReward: -0.818184\nNext Confusion: 4.98446"} | |
| {"text": "### State\nConfusion: 5.917563\nAction: analogize\nReward: 0.363083\nNext Confusion: 6.30535"} | |
| {"text": "### State\nConfusion: 4.550479\nAction: analogize\nReward: 0.016655\nNext Confusion: 5.802547"} | |
| {"text": "### State\nConfusion: 3.973916\nAction: analogize\nReward: -0.288334\nNext Confusion: 4.395061"} | |
| {"text": "### State\nConfusion: 3.831629\nAction: analogize\nReward: 0.150414\nNext Confusion: 4.073593"} | |
| {"text": "### State\nConfusion: 6.013124\nAction: analogize\nReward: -0.745808\nNext Confusion: 6.446821"} | |
| {"text": "### State\nConfusion: 4.514874\nAction: analogize\nReward: 0.229664\nNext Confusion: 4.723423"} | |
| {"text": "### State\nConfusion: 4.115125\nAction: analogize\nReward: -1.187277\nNext Confusion: 4.327879"} | |
| {"text": "### State\nConfusion: 4.383352\nAction: analogize\nReward: -0.218114\nNext Confusion: 4.679171"} | |
| {"text": "### State\nConfusion: 3.9782\nAction: correct_fact\nReward: 0.984978\nNext Confusion: 3.826136"} | |
| {"text": "### State\nConfusion: 4.19293\nAction: correct_fact\nReward: -0.565662\nNext Confusion: 4.888168"} | |
| {"text": "### State\nConfusion: 3.7524\nAction: correct_fact\nReward: 0.912141\nNext Confusion: 2.79205"} | |
| {"text": "### State\nConfusion: 5.939085\nAction: analogize\nReward: 0.14276\nNext Confusion: 5.931013"} | |
| {"text": "### State\nConfusion: 3.940862\nAction: question\nReward: 1.299393\nNext Confusion: 3.066277"} | |
| {"text": "### State\nConfusion: 6.421869\nAction: analogize\nReward: 0.36015\nNext Confusion: 6.279908"} | |
| {"text": "### State\nConfusion: 1.453712\nAction: analogize\nReward: -0.959622\nNext Confusion: 2.468141"} | |
| {"text": "### State\nConfusion: 5.496111\nAction: analogize\nReward: -0.43066\nNext Confusion: 5.796453"} | |
| {"text": "### State\nConfusion: 3.835277\nAction: analogize\nReward: -0.210495\nNext Confusion: 4.315474"} | |
| {"text": "### State\nConfusion: 4.667592\nAction: analogize\nReward: 0.221866\nNext Confusion: 4.160811"} | |
| {"text": "### State\nConfusion: 7.403551\nAction: worked_example\nReward: 0.907719\nNext Confusion: 6.486184"} | |
| {"text": "### State\nConfusion: 4.500176\nAction: analogize\nReward: -0.275044\nNext Confusion: 4.99979"} | |
| {"text": "### State\nConfusion: 3.816011\nAction: analogize\nReward: -0.839258\nNext Confusion: 4.310652"} | |
| {"text": "### State\nConfusion: 8.19787\nAction: question\nReward: 0.946011\nNext Confusion: 7.058693"} | |
| {"text": "### State\nConfusion: 3.899465\nAction: analogize\nReward: 0.877398\nNext Confusion: 2.888416"} | |
| {"text": "### State\nConfusion: 5.417362\nAction: question\nReward: 1.217177\nNext Confusion: 4.386165"} | |
| {"text": "### State\nConfusion: 4.163053\nAction: analogize\nReward: -0.983947\nNext Confusion: 5.03493"} | |
| {"text": "### State\nConfusion: 5.122217\nAction: analogize\nReward: -1.302016\nNext Confusion: 5.724295"} | |
| {"text": "### State\nConfusion: 4.713499\nAction: question\nReward: -1.06088\nNext Confusion: 5.390096"} | |
| {"text": "### State\nConfusion: 5.252266\nAction: correct_fact\nReward: 0.112053\nNext Confusion: 5.166234"} | |
| {"text": "### State\nConfusion: 4.277287\nAction: analogize\nReward: 0.349988\nNext Confusion: 3.858081"} | |
| {"text": "### State\nConfusion: 4.312006\nAction: analogize\nReward: -0.280414\nNext Confusion: 4.743623"} | |
| {"text": "### State\nConfusion: 2.116424\nAction: worked_example\nReward: 2.219539\nNext Confusion: 0.30772"} | |
| {"text": "### State\nConfusion: 4.487134\nAction: analogize\nReward: -0.316827\nNext Confusion: 4.72908"} | |
| {"text": "### State\nConfusion: 3.734942\nAction: explain\nReward: 0.559271\nNext Confusion: 3.387467"} | |
| {"text": "### State\nConfusion: 3.817736\nAction: correct_fact\nReward: -0.371755\nNext Confusion: 4.35576"} | |
| {"text": "### State\nConfusion: 3.534894\nAction: question\nReward: 0.714752\nNext Confusion: 2.6903"} | |
| {"text": "### State\nConfusion: 3.297557\nAction: analogize\nReward: -0.02651\nNext Confusion: 3.73844"} | |
| {"text": "### State\nConfusion: 3.66799\nAction: question\nReward: 1.278086\nNext Confusion: 2.777691"} | |
| {"text": "### State\nConfusion: 8.029835\nAction: explain\nReward: 0.004985\nNext Confusion: 8.023991"} | |
| {"text": "### State\nConfusion: 4.69018\nAction: analogize\nReward: -0.212867\nNext Confusion: 5.239052"} | |
| {"text": "### State\nConfusion: 3.043147\nAction: analogize\nReward: 0.341268\nNext Confusion: 2.599216"} | |
| {"text": "### State\nConfusion: 5.367027\nAction: question\nReward: 0.914907\nNext Confusion: 4.669119"} | |
| {"text": "### State\nConfusion: 2.610881\nAction: worked_example\nReward: 1.154435\nNext Confusion: 0.578632"} | |
| {"text": "### State\nConfusion: 2.696339\nAction: question\nReward: -0.000458\nNext Confusion: 2.809412"} | |
| {"text": "### State\nConfusion: 4.532858\nAction: analogize\nReward: 1.700078\nNext Confusion: 3.571719"} | |
| {"text": "### State\nConfusion: 3.726544\nAction: question\nReward: -0.297797\nNext Confusion: 3.881641"} | |
| {"text": "### State\nConfusion: 5.898968\nAction: analogize\nReward: -0.010863\nNext Confusion: 6.54058"} | |
| {"text": "### State\nConfusion: 4.530686\nAction: correct_fact\nReward: -0.477902\nNext Confusion: 5.114145"} | |
| {"text": "### State\nConfusion: 4.004507\nAction: analogize\nReward: -0.184868\nNext Confusion: 4.069702"} | |
| {"text": "### State\nConfusion: 3.988666\nAction: correct_fact\nReward: -0.211808\nNext Confusion: 4.288907"} | |
| {"text": "### State\nConfusion: 3.219075\nAction: worked_example\nReward: 0.314439\nNext Confusion: 3.734656"} | |
| {"text": "### State\nConfusion: 3.293107\nAction: explain\nReward: -0.32457\nNext Confusion: 2.891417"} | |
| {"text": "### State\nConfusion: 4.433997\nAction: explain\nReward: 1.305128\nNext Confusion: 3.859591"} | |
| {"text": "### State\nConfusion: 3.627563\nAction: worked_example\nReward: 1.166838\nNext Confusion: 3.274162"} | |
| {"text": "### State\nConfusion: 2.615709\nAction: analogize\nReward: 0.055049\nNext Confusion: 2.620762"} | |
| {"text": "### State\nConfusion: 4.573371\nAction: analogize\nReward: 0.162817\nNext Confusion: 4.686983"} | |
| {"text": "### State\nConfusion: 2.914325\nAction: analogize\nReward: 0.573057\nNext Confusion: 2.620443"} | |
| {"text": "### State\nConfusion: 4.103402\nAction: correct_fact\nReward: -0.109456\nNext Confusion: 4.510875"} | |
| {"text": "### State\nConfusion: 4.716958\nAction: analogize\nReward: -1.069869\nNext Confusion: 5.22988"} | |
| {"text": "### State\nConfusion: 1.439015\nAction: worked_example\nReward: 1.653535\nNext Confusion: 0.0"} | |
| {"text": "### State\nConfusion: 4.512936\nAction: explain\nReward: -0.674713\nNext Confusion: 4.853434"} | |
| {"text": "### State\nConfusion: 4.616469\nAction: analogize\nReward: 0.228202\nNext Confusion: 4.998136"} | |
| {"text": "### State\nConfusion: 3.472289\nAction: analogize\nReward: -0.518788\nNext Confusion: 4.016916"} | |
| {"text": "### State\nConfusion: 5.290328\nAction: explain\nReward: 1.622873\nNext Confusion: 4.628211"} | |
| {"text": "### State\nConfusion: 4.509018\nAction: analogize\nReward: -0.246907\nNext Confusion: 4.696386"} | |
| {"text": "### State\nConfusion: 3.649479\nAction: analogize\nReward: -0.646281\nNext Confusion: 3.705608"} | |
| {"text": "### State\nConfusion: 4.436886\nAction: explain\nReward: 0.071447\nNext Confusion: 4.62008"} | |
| {"text": "### State\nConfusion: 6.165114\nAction: explain\nReward: 0.102394\nNext Confusion: 5.989851"} | |
| {"text": "### State\nConfusion: 3.751394\nAction: question\nReward: 0.110986\nNext Confusion: 3.572822"} | |
| {"text": "### State\nConfusion: 4.967533\nAction: worked_example\nReward: 1.888933\nNext Confusion: 3.294329"} | |
| {"text": "### State\nConfusion: 6.802119\nAction: analogize\nReward: -0.318646\nNext Confusion: 6.787519"} | |
| {"text": "### State\nConfusion: 2.757781\nAction: analogize\nReward: -0.041224\nNext Confusion: 2.945709"} | |
| {"text": "### State\nConfusion: 3.441426\nAction: question\nReward: -0.356823\nNext Confusion: 3.619329"} | |
| {"text": "### State\nConfusion: 3.824876\nAction: analogize\nReward: -0.282262\nNext Confusion: 4.405335"} | |
| {"text": "### State\nConfusion: 6.033777\nAction: analogize\nReward: 0.118754\nNext Confusion: 5.973301"} | |
| {"text": "### State\nConfusion: 3.73862\nAction: analogize\nReward: 0.371175\nNext Confusion: 3.56842"} | |
| {"text": "### State\nConfusion: 9.286809\nAction: worked_example\nReward: 2.644093\nNext Confusion: 7.658459"} | |
| {"text": "### State\nConfusion: 3.810243\nAction: worked_example\nReward: 1.870768\nNext Confusion: 1.740508"} | |
| {"text": "### State\nConfusion: 3.956185\nAction: question\nReward: 0.81304\nNext Confusion: 3.16671"} | |
| {"text": "### State\nConfusion: 3.00301\nAction: analogize\nReward: -0.345361\nNext Confusion: 2.953303"} | |
| {"text": "### State\nConfusion: 3.451303\nAction: analogize\nReward: -0.858153\nNext Confusion: 4.097912"} | |
| {"text": "### State\nConfusion: 3.48126\nAction: question\nReward: -0.04059\nNext Confusion: 3.13211"} | |
| {"text": "### State\nConfusion: 5.124149\nAction: question\nReward: 0.945285\nNext Confusion: 3.9529"} | |
| {"text": "### State\nConfusion: 3.216448\nAction: analogize\nReward: 0.281472\nNext Confusion: 3.077387"} | |
| {"text": "### State\nConfusion: 3.474833\nAction: analogize\nReward: -1.38124\nNext Confusion: 4.510687"} | |
| {"text": "### State\nConfusion: 3.63635\nAction: analogize\nReward: 0.229631\nNext Confusion: 4.149948"} | |
| {"text": "### State\nConfusion: 3.650369\nAction: analogize\nReward: -0.573367\nNext Confusion: 4.224239"} | |
| {"text": "### State\nConfusion: 3.732268\nAction: explain\nReward: -0.387552\nNext Confusion: 4.25324"} | |
| {"text": "### State\nConfusion: 3.145265\nAction: analogize\nReward: -0.078527\nNext Confusion: 3.347933"} | |
| {"text": "### State\nConfusion: 6.957926\nAction: analogize\nReward: 1.066442\nNext Confusion: 6.509846"} | |
| {"text": "### State\nConfusion: 3.81907\nAction: analogize\nReward: -0.537125\nNext Confusion: 3.932789"} | |
| {"text": "### State\nConfusion: 6.875212\nAction: explain\nReward: 0.923676\nNext Confusion: 6.357508"} | |
| {"text": "### State\nConfusion: 6.113523\nAction: analogize\nReward: 0.216784\nNext Confusion: 5.652343"} | |
| {"text": "### State\nConfusion: 4.34719\nAction: analogize\nReward: 0.100165\nNext Confusion: 4.293112"} | |
| {"text": "### State\nConfusion: 3.46102\nAction: analogize\nReward: -1.955337\nNext Confusion: 5.25365"} | |
| {"text": "### State\nConfusion: 2.541971\nAction: analogize\nReward: -0.67109\nNext Confusion: 3.242385"} | |
| {"text": "### State\nConfusion: 3.4017\nAction: analogize\nReward: -1.546172\nNext Confusion: 4.715109"} | |
| {"text": "### State\nConfusion: 3.903403\nAction: explain\nReward: 0.955664\nNext Confusion: 3.233826"} | |
| {"text": "### State\nConfusion: 4.410839\nAction: correct_fact\nReward: -0.778426\nNext Confusion: 4.867506"} | |
| {"text": "### State\nConfusion: 5.352035\nAction: analogize\nReward: 0.083221\nNext Confusion: 5.309198"} | |
| {"text": "### State\nConfusion: 3.845539\nAction: worked_example\nReward: 0.721115\nNext Confusion: 3.197238"} | |
| {"text": "### State\nConfusion: 4.363106\nAction: analogize\nReward: -0.783213\nNext Confusion: 4.303111"} | |
| {"text": "### State\nConfusion: 7.119\nAction: analogize\nReward: -0.595028\nNext Confusion: 8.178642"} | |
| {"text": "### State\nConfusion: 2.084962\nAction: correct_fact\nReward: 0.396716\nNext Confusion: 2.375542"} | |
| {"text": "### State\nConfusion: 8.238723\nAction: analogize\nReward: -0.264989\nNext Confusion: 8.752825"} | |
| {"text": "### State\nConfusion: 5.645957\nAction: analogize\nReward: -0.080122\nNext Confusion: 5.676078"} | |
| {"text": "### State\nConfusion: 6.062615\nAction: analogize\nReward: 1.713626\nNext Confusion: 5.300618"} | |
| {"text": "### State\nConfusion: 8.921528\nAction: worked_example\nReward: 1.502296\nNext Confusion: 7.384206"} | |
| {"text": "### State\nConfusion: 3.535991\nAction: explain\nReward: 0.402923\nNext Confusion: 2.849609"} | |
| {"text": "### State\nConfusion: 4.234336\nAction: analogize\nReward: 0.416406\nNext Confusion: 4.556367"} | |
| {"text": "### State\nConfusion: 2.388265\nAction: analogize\nReward: -0.320608\nNext Confusion: 2.556611"} | |
| {"text": "### State\nConfusion: 5.013006\nAction: analogize\nReward: -0.899276\nNext Confusion: 5.653823"} | |
| {"text": "### State\nConfusion: 3.866427\nAction: analogize\nReward: -0.164342\nNext Confusion: 4.012446"} | |
| {"text": "### State\nConfusion: 3.142777\nAction: correct_fact\nReward: 1.00956\nNext Confusion: 2.708875"} | |
| {"text": "### State\nConfusion: 5.851146\nAction: worked_example\nReward: 1.087909\nNext Confusion: 4.400152"} | |
| {"text": "### State\nConfusion: 9.969285\nAction: correct_fact\nReward: -0.29484\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.26319\nAction: analogize\nReward: -0.381748\nNext Confusion: 3.710803"} | |
| {"text": "### State\nConfusion: 3.560582\nAction: explain\nReward: -1.057823\nNext Confusion: 3.697929"} | |
| {"text": "### State\nConfusion: 5.7797\nAction: worked_example\nReward: 1.58499\nNext Confusion: 4.221551"} | |
| {"text": "### State\nConfusion: 3.365968\nAction: analogize\nReward: -0.12409\nNext Confusion: 3.294733"} | |
| {"text": "### State\nConfusion: 7.357676\nAction: analogize\nReward: -0.930477\nNext Confusion: 7.947465"} | |
| {"text": "### State\nConfusion: 5.213246\nAction: correct_fact\nReward: -0.457683\nNext Confusion: 5.714824"} | |
| {"text": "### State\nConfusion: 8.686376\nAction: worked_example\nReward: 0.914465\nNext Confusion: 7.735434"} | |
| {"text": "### State\nConfusion: 5.906049\nAction: question\nReward: 0.914918\nNext Confusion: 4.746426"} | |
| {"text": "### State\nConfusion: 3.43873\nAction: analogize\nReward: -0.791984\nNext Confusion: 4.093406"} | |
| {"text": "### State\nConfusion: 8.147875\nAction: analogize\nReward: -0.339623\nNext Confusion: 9.074441"} | |
| {"text": "### State\nConfusion: 4.987865\nAction: correct_fact\nReward: -0.135189\nNext Confusion: 5.126443"} | |
| {"text": "### State\nConfusion: 6.827771\nAction: analogize\nReward: -0.182827\nNext Confusion: 7.09621"} | |
| {"text": "### State\nConfusion: 4.841376\nAction: correct_fact\nReward: 0.440885\nNext Confusion: 4.646943"} | |
| {"text": "### State\nConfusion: 6.711827\nAction: explain\nReward: 1.491523\nNext Confusion: 5.848803"} | |
| {"text": "### State\nConfusion: 4.967377\nAction: analogize\nReward: 0.201888\nNext Confusion: 5.331802"} | |
| {"text": "### State\nConfusion: 7.154704\nAction: analogize\nReward: 0.341228\nNext Confusion: 7.220116"} | |
| {"text": "### State\nConfusion: 2.715033\nAction: analogize\nReward: 0.773613\nNext Confusion: 2.429066"} | |
| {"text": "### State\nConfusion: 6.547896\nAction: explain\nReward: 0.563126\nNext Confusion: 6.683662"} | |
| {"text": "### State\nConfusion: 6.269861\nAction: analogize\nReward: -0.157441\nNext Confusion: 6.715462"} | |
| {"text": "### State\nConfusion: 5.309012\nAction: analogize\nReward: -0.39383\nNext Confusion: 6.213959"} | |
| {"text": "### State\nConfusion: 4.32928\nAction: analogize\nReward: -1.387919\nNext Confusion: 5.461956"} | |
| {"text": "### State\nConfusion: 7.075807\nAction: question\nReward: 0.342144\nNext Confusion: 6.488871"} | |
| {"text": "### State\nConfusion: 2.908937\nAction: worked_example\nReward: 1.786775\nNext Confusion: 1.24221"} | |
| {"text": "### State\nConfusion: 3.163769\nAction: analogize\nReward: -0.649834\nNext Confusion: 3.567671"} | |
| {"text": "### State\nConfusion: 6.413857\nAction: analogize\nReward: 0.632799\nNext Confusion: 6.437032"} | |
| {"text": "### State\nConfusion: 4.157505\nAction: correct_fact\nReward: -0.609828\nNext Confusion: 5.336113"} | |
| {"text": "### State\nConfusion: 5.211759\nAction: analogize\nReward: -0.736369\nNext Confusion: 5.220968"} | |
| {"text": "### State\nConfusion: 4.079003\nAction: analogize\nReward: 0.088859\nNext Confusion: 3.57106"} | |
| {"text": "### State\nConfusion: 7.824298\nAction: worked_example\nReward: 1.347861\nNext Confusion: 7.120889"} | |
| {"text": "### State\nConfusion: 3.864172\nAction: correct_fact\nReward: -1.319164\nNext Confusion: 5.237199"} | |
| {"text": "### State\nConfusion: 3.599646\nAction: analogize\nReward: -0.234941\nNext Confusion: 3.997227"} | |
| {"text": "### State\nConfusion: 6.169248\nAction: analogize\nReward: -0.73131\nNext Confusion: 6.311524"} | |
| {"text": "### State\nConfusion: 4.205811\nAction: analogize\nReward: -0.028118\nNext Confusion: 4.480411"} | |
| {"text": "### State\nConfusion: 6.248756\nAction: worked_example\nReward: 2.320086\nNext Confusion: 4.345955"} | |
| {"text": "### State\nConfusion: 5.422293\nAction: correct_fact\nReward: -0.231537\nNext Confusion: 5.617543"} | |
| {"text": "### State\nConfusion: 2.242248\nAction: analogize\nReward: -2.077594\nNext Confusion: 4.016796"} | |
| {"text": "### State\nConfusion: 6.597207\nAction: worked_example\nReward: 0.058722\nNext Confusion: 6.496094"} | |
| {"text": "### State\nConfusion: 6.364598\nAction: analogize\nReward: -0.576298\nNext Confusion: 6.859088"} | |
| {"text": "### State\nConfusion: 4.490757\nAction: analogize\nReward: -0.434451\nNext Confusion: 4.642448"} | |
| {"text": "### State\nConfusion: 3.946703\nAction: analogize\nReward: -0.681441\nNext Confusion: 4.066546"} | |
| {"text": "### State\nConfusion: 4.001388\nAction: analogize\nReward: -0.120379\nNext Confusion: 3.938912"} | |
| {"text": "### State\nConfusion: 9.710864\nAction: analogize\nReward: 0.004072\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.747454\nAction: analogize\nReward: -0.778482\nNext Confusion: 4.130773"} | |
| {"text": "### State\nConfusion: 4.575968\nAction: explain\nReward: -0.196636\nNext Confusion: 4.720822"} | |
| {"text": "### State\nConfusion: 7.338616\nAction: analogize\nReward: 0.358031\nNext Confusion: 7.647601"} | |
| {"text": "### State\nConfusion: 3.574423\nAction: analogize\nReward: 0.154168\nNext Confusion: 3.77347"} | |
| {"text": "### State\nConfusion: 4.63278\nAction: explain\nReward: 0.621704\nNext Confusion: 4.001392"} | |
| {"text": "### State\nConfusion: 6.210172\nAction: analogize\nReward: 1.340827\nNext Confusion: 5.613212"} | |
| {"text": "### State\nConfusion: 4.389936\nAction: correct_fact\nReward: 0.538823\nNext Confusion: 4.880523"} | |
| {"text": "### State\nConfusion: 5.383227\nAction: analogize\nReward: -0.455558\nNext Confusion: 5.864184"} | |
| {"text": "### State\nConfusion: 4.676518\nAction: question\nReward: -0.024095\nNext Confusion: 4.595433"} | |
| {"text": "### State\nConfusion: 4.116737\nAction: analogize\nReward: -0.200846\nNext Confusion: 4.198854"} | |
| {"text": "### State\nConfusion: 3.158481\nAction: analogize\nReward: -0.501419\nNext Confusion: 3.801235"} | |
| {"text": "### State\nConfusion: 5.381923\nAction: explain\nReward: 0.301327\nNext Confusion: 4.506645"} | |
| {"text": "### State\nConfusion: 3.871241\nAction: analogize\nReward: 0.470844\nNext Confusion: 3.760368"} | |
| {"text": "### State\nConfusion: 5.753503\nAction: question\nReward: 0.836846\nNext Confusion: 5.024852"} | |
| {"text": "### State\nConfusion: 6.185399\nAction: correct_fact\nReward: -0.229113\nNext Confusion: 5.964536"} | |
| {"text": "### State\nConfusion: 5.237682\nAction: analogize\nReward: 0.639695\nNext Confusion: 5.570729"} | |
| {"text": "### State\nConfusion: 3.803306\nAction: analogize\nReward: -0.653361\nNext Confusion: 4.502231"} | |
| {"text": "### State\nConfusion: 4.290909\nAction: analogize\nReward: 0.050406\nNext Confusion: 4.59684"} | |
| {"text": "### State\nConfusion: 3.545454\nAction: correct_fact\nReward: -1.307137\nNext Confusion: 4.240777"} | |
| {"text": "### State\nConfusion: 3.892848\nAction: analogize\nReward: 0.277591\nNext Confusion: 4.556432"} | |
| {"text": "### State\nConfusion: 2.286423\nAction: analogize\nReward: -0.020258\nNext Confusion: 2.711716"} | |
| {"text": "### State\nConfusion: 6.568009\nAction: analogize\nReward: 0.309756\nNext Confusion: 6.701265"} | |
| {"text": "### State\nConfusion: 7.574443\nAction: question\nReward: 0.488642\nNext Confusion: 7.358108"} | |
| {"text": "### State\nConfusion: 2.923719\nAction: question\nReward: 0.680867\nNext Confusion: 1.858226"} | |
| {"text": "### State\nConfusion: 6.309944\nAction: explain\nReward: -0.374884\nNext Confusion: 6.108041"} | |
| {"text": "### State\nConfusion: 3.950947\nAction: explain\nReward: -0.246238\nNext Confusion: 3.967673"} | |
| {"text": "### State\nConfusion: 3.558406\nAction: explain\nReward: -0.86277\nNext Confusion: 4.408267"} | |
| {"text": "### State\nConfusion: 4.903235\nAction: analogize\nReward: -0.33585\nNext Confusion: 5.614946"} | |
| {"text": "### State\nConfusion: 3.336845\nAction: analogize\nReward: -0.129177\nNext Confusion: 3.785467"} | |
| {"text": "### State\nConfusion: 3.309768\nAction: analogize\nReward: 0.973455\nNext Confusion: 2.441803"} | |
| {"text": "### State\nConfusion: 5.035866\nAction: analogize\nReward: 0.479477\nNext Confusion: 5.282769"} | |
| {"text": "### State\nConfusion: 3.717341\nAction: analogize\nReward: -0.445694\nNext Confusion: 3.543303"} | |
| {"text": "### State\nConfusion: 4.434391\nAction: analogize\nReward: -0.524554\nNext Confusion: 4.974204"} | |
| {"text": "### State\nConfusion: 3.056816\nAction: analogize\nReward: -0.758043\nNext Confusion: 4.123322"} | |
| {"text": "### State\nConfusion: 4.187655\nAction: analogize\nReward: -0.539545\nNext Confusion: 4.730906"} | |
| {"text": "### State\nConfusion: 4.638495\nAction: correct_fact\nReward: -0.155797\nNext Confusion: 4.900523"} | |
| {"text": "### State\nConfusion: 6.000943\nAction: analogize\nReward: -1.059716\nNext Confusion: 7.495129"} | |
| {"text": "### State\nConfusion: 3.527035\nAction: question\nReward: -0.523099\nNext Confusion: 4.023082"} | |
| {"text": "### State\nConfusion: 2.94476\nAction: analogize\nReward: 0.002781\nNext Confusion: 3.050672"} | |
| {"text": "### State\nConfusion: 4.197364\nAction: explain\nReward: -0.036997\nNext Confusion: 3.47503"} | |
| {"text": "### State\nConfusion: 2.724514\nAction: analogize\nReward: -0.0952\nNext Confusion: 2.998136"} | |
| {"text": "### State\nConfusion: 4.317858\nAction: analogize\nReward: -0.248636\nNext Confusion: 4.875382"} | |
| {"text": "### State\nConfusion: 8.902446\nAction: analogize\nReward: 0.339665\nNext Confusion: 8.612985"} | |
| {"text": "### State\nConfusion: 7.162479\nAction: explain\nReward: 0.045718\nNext Confusion: 6.794286"} | |
| {"text": "### State\nConfusion: 3.628578\nAction: analogize\nReward: -0.46392\nNext Confusion: 3.195431"} | |
| {"text": "### State\nConfusion: 8.05332\nAction: analogize\nReward: 0.5926\nNext Confusion: 7.991128"} | |
| {"text": "### State\nConfusion: 3.5457\nAction: explain\nReward: 1.156754\nNext Confusion: 2.851767"} | |
| {"text": "### State\nConfusion: 6.851163\nAction: analogize\nReward: -0.991948\nNext Confusion: 7.793215"} | |
| {"text": "### State\nConfusion: 3.835441\nAction: explain\nReward: 0.089078\nNext Confusion: 3.786062"} | |
| {"text": "### State\nConfusion: 2.596621\nAction: analogize\nReward: 0.806401\nNext Confusion: 2.628039"} | |
| {"text": "### State\nConfusion: 8.799524\nAction: analogize\nReward: 0.116618\nNext Confusion: 9.191632"} | |
| {"text": "### State\nConfusion: 3.935173\nAction: explain\nReward: 0.042982\nNext Confusion: 4.48615"} | |
| {"text": "### State\nConfusion: 4.541541\nAction: analogize\nReward: -0.048526\nNext Confusion: 4.233993"} | |
| {"text": "### State\nConfusion: 3.478506\nAction: explain\nReward: -0.555564\nNext Confusion: 3.648344"} | |
| {"text": "### State\nConfusion: 2.207017\nAction: correct_fact\nReward: -1.103466\nNext Confusion: 2.982554"} | |
| {"text": "### State\nConfusion: 6.885302\nAction: analogize\nReward: -0.296764\nNext Confusion: 7.428911"} | |
| {"text": "### State\nConfusion: 4.36444\nAction: analogize\nReward: 0.561046\nNext Confusion: 3.995603"} | |
| {"text": "### State\nConfusion: 4.83831\nAction: correct_fact\nReward: 0.016987\nNext Confusion: 4.787175"} | |
| {"text": "### State\nConfusion: 3.486222\nAction: analogize\nReward: -0.006196\nNext Confusion: 3.984055"} | |
| {"text": "### State\nConfusion: 4.288857\nAction: analogize\nReward: -0.083703\nNext Confusion: 3.823161"} | |
| {"text": "### State\nConfusion: 3.205301\nAction: correct_fact\nReward: -0.636662\nNext Confusion: 4.112988"} | |
| {"text": "### State\nConfusion: 4.280341\nAction: analogize\nReward: -0.699241\nNext Confusion: 4.976665"} | |
| {"text": "### State\nConfusion: 3.260056\nAction: analogize\nReward: -0.594702\nNext Confusion: 4.102306"} | |
| {"text": "### State\nConfusion: 4.612415\nAction: analogize\nReward: 0.936412\nNext Confusion: 3.610846"} | |
| {"text": "### State\nConfusion: 3.823294\nAction: question\nReward: 0.860296\nNext Confusion: 3.350242"} | |
| {"text": "### State\nConfusion: 4.300542\nAction: analogize\nReward: -0.277862\nNext Confusion: 4.604201"} | |
| {"text": "### State\nConfusion: 3.008559\nAction: correct_fact\nReward: 0.791254\nNext Confusion: 3.051819"} | |
| {"text": "### State\nConfusion: 7.152747\nAction: analogize\nReward: -1.218924\nNext Confusion: 8.283377"} | |
| {"text": "### State\nConfusion: 3.386136\nAction: analogize\nReward: -1.36675\nNext Confusion: 4.221894"} | |
| {"text": "### State\nConfusion: 4.445341\nAction: analogize\nReward: -0.451169\nNext Confusion: 5.100821"} | |
| {"text": "### State\nConfusion: 3.284357\nAction: explain\nReward: 0.506617\nNext Confusion: 3.124258"} | |
| {"text": "### State\nConfusion: 5.902904\nAction: analogize\nReward: 0.375122\nNext Confusion: 5.377923"} | |
| {"text": "### State\nConfusion: 6.133891\nAction: analogize\nReward: -0.515857\nNext Confusion: 6.836678"} | |
| {"text": "### State\nConfusion: 4.543045\nAction: analogize\nReward: 0.09579\nNext Confusion: 4.757417"} | |
| {"text": "### State\nConfusion: 3.405838\nAction: analogize\nReward: -0.677895\nNext Confusion: 3.94108"} | |
| {"text": "### State\nConfusion: 3.854457\nAction: analogize\nReward: -0.714533\nNext Confusion: 4.356869"} | |
| {"text": "### State\nConfusion: 8.185769\nAction: worked_example\nReward: 1.678385\nNext Confusion: 6.671469"} | |
| {"text": "### State\nConfusion: 4.561003\nAction: analogize\nReward: -0.987293\nNext Confusion: 4.868517"} | |
| {"text": "### State\nConfusion: 3.15542\nAction: question\nReward: 1.250758\nNext Confusion: 2.164837"} | |
| {"text": "### State\nConfusion: 3.6711\nAction: analogize\nReward: 0.597279\nNext Confusion: 3.621499"} | |
| {"text": "### State\nConfusion: 4.706904\nAction: analogize\nReward: -0.253879\nNext Confusion: 4.531873"} | |
| {"text": "### State\nConfusion: 4.510057\nAction: analogize\nReward: 0.328044\nNext Confusion: 4.414616"} | |
| {"text": "### State\nConfusion: 4.219359\nAction: analogize\nReward: 0.042542\nNext Confusion: 4.427316"} | |
| {"text": "### State\nConfusion: 3.831463\nAction: explain\nReward: 1.360988\nNext Confusion: 2.83884"} | |
| {"text": "### State\nConfusion: 4.097205\nAction: question\nReward: 0.258788\nNext Confusion: 3.725177"} | |
| {"text": "### State\nConfusion: 9.491195\nAction: analogize\nReward: -0.344806\nNext Confusion: 9.492555"} | |
| {"text": "### State\nConfusion: 2.441149\nAction: analogize\nReward: -0.793021\nNext Confusion: 2.837643"} | |
| {"text": "### State\nConfusion: 3.408415\nAction: analogize\nReward: -0.309769\nNext Confusion: 3.985229"} | |
| {"text": "### State\nConfusion: 5.631257\nAction: analogize\nReward: -0.416589\nNext Confusion: 6.514521"} | |
| {"text": "### State\nConfusion: 7.245012\nAction: worked_example\nReward: 2.992593\nNext Confusion: 4.881175"} | |
| {"text": "### State\nConfusion: 6.536276\nAction: explain\nReward: 1.337626\nNext Confusion: 5.404844"} | |
| {"text": "### State\nConfusion: 3.131055\nAction: analogize\nReward: -0.642311\nNext Confusion: 3.90995"} | |
| {"text": "### State\nConfusion: 3.734571\nAction: analogize\nReward: -1.202896\nNext Confusion: 4.529602"} | |
| {"text": "### State\nConfusion: 3.614458\nAction: analogize\nReward: -0.375706\nNext Confusion: 3.960335"} | |
| {"text": "### State\nConfusion: 3.754846\nAction: analogize\nReward: 1.114828\nNext Confusion: 2.872017"} | |
| {"text": "### State\nConfusion: 4.282373\nAction: analogize\nReward: 0.047074\nNext Confusion: 3.929965"} | |
| {"text": "### State\nConfusion: 6.072084\nAction: worked_example\nReward: 2.003795\nNext Confusion: 5.110387"} | |
| {"text": "### State\nConfusion: 7.607367\nAction: worked_example\nReward: 1.966738\nNext Confusion: 5.615737"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198476\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.649977\nAction: explain\nReward: 0.189024\nNext Confusion: 3.923286"} | |
| {"text": "### State\nConfusion: 4.086252\nAction: analogize\nReward: -1.095851\nNext Confusion: 4.539408"} | |
| {"text": "### State\nConfusion: 3.548788\nAction: analogize\nReward: 0.200725\nNext Confusion: 3.887756"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.763297\nNext Confusion: 9.680236"} | |
| {"text": "### State\nConfusion: 3.073316\nAction: worked_example\nReward: -0.489686\nNext Confusion: 4.424902"} | |
| {"text": "### State\nConfusion: 6.668706\nAction: correct_fact\nReward: 1.328342\nNext Confusion: 5.888302"} | |
| {"text": "### State\nConfusion: 7.088053\nAction: analogize\nReward: 0.347794\nNext Confusion: 6.982926"} | |
| {"text": "### State\nConfusion: 7.178564\nAction: worked_example\nReward: 1.450848\nNext Confusion: 6.387661"} | |
| {"text": "### State\nConfusion: 1.744545\nAction: explain\nReward: 0.740145\nNext Confusion: 0.861106"} | |
| {"text": "### State\nConfusion: 8.584458\nAction: worked_example\nReward: 2.582664\nNext Confusion: 7.174828"} | |
| {"text": "### State\nConfusion: 3.625311\nAction: explain\nReward: 0.355308\nNext Confusion: 3.380443"} | |
| {"text": "### State\nConfusion: 4.523099\nAction: analogize\nReward: -0.790397\nNext Confusion: 5.10107"} | |
| {"text": "### State\nConfusion: 4.948718\nAction: analogize\nReward: -1.799253\nNext Confusion: 5.516174"} | |
| {"text": "### State\nConfusion: 6.113761\nAction: question\nReward: 0.485334\nNext Confusion: 5.434281"} | |
| {"text": "### State\nConfusion: 5.818233\nAction: question\nReward: 1.002597\nNext Confusion: 5.021882"} | |
| {"text": "### State\nConfusion: 1.768893\nAction: analogize\nReward: 0.961322\nNext Confusion: 1.464652"} | |
| {"text": "### State\nConfusion: 2.894877\nAction: question\nReward: 1.014309\nNext Confusion: 2.353802"} | |
| {"text": "### State\nConfusion: 5.089719\nAction: analogize\nReward: 0.77709\nNext Confusion: 4.554134"} | |
| {"text": "### State\nConfusion: 4.265329\nAction: analogize\nReward: 0.503933\nNext Confusion: 4.53531"} | |
| {"text": "### State\nConfusion: 2.975997\nAction: question\nReward: 0.782601\nNext Confusion: 2.096145"} | |
| {"text": "### State\nConfusion: 2.969771\nAction: analogize\nReward: -0.040926\nNext Confusion: 2.708039"} | |
| {"text": "### State\nConfusion: 5.588715\nAction: question\nReward: -0.768939\nNext Confusion: 5.495356"} | |
| {"text": "### State\nConfusion: 7.738388\nAction: correct_fact\nReward: 0.097154\nNext Confusion: 7.895498"} | |
| {"text": "### State\nConfusion: 9.789788\nAction: question\nReward: 0.507937\nNext Confusion: 9.316481"} | |
| {"text": "### State\nConfusion: 2.122493\nAction: analogize\nReward: 0.559706\nNext Confusion: 2.205507"} | |
| {"text": "### State\nConfusion: 5.07307\nAction: explain\nReward: 1.405116\nNext Confusion: 4.673156"} | |
| {"text": "### State\nConfusion: 5.187453\nAction: question\nReward: -0.206875\nNext Confusion: 5.470587"} | |
| {"text": "### State\nConfusion: 7.89118\nAction: worked_example\nReward: 3.005133\nNext Confusion: 5.957319"} | |
| {"text": "### State\nConfusion: 3.463503\nAction: analogize\nReward: 0.233139\nNext Confusion: 3.116866"} | |
| {"text": "### State\nConfusion: 4.490139\nAction: analogize\nReward: -1.247777\nNext Confusion: 4.560209"} | |
| {"text": "### State\nConfusion: 6.336253\nAction: analogize\nReward: 0.192\nNext Confusion: 5.439387"} | |
| {"text": "### State\nConfusion: 3.879744\nAction: explain\nReward: 1.933112\nNext Confusion: 2.894167"} | |
| {"text": "### State\nConfusion: 3.602151\nAction: analogize\nReward: 0.405749\nNext Confusion: 3.220006"} | |
| {"text": "### State\nConfusion: 3.939015\nAction: explain\nReward: -0.410771\nNext Confusion: 4.216709"} | |
| {"text": "### State\nConfusion: 4.013006\nAction: analogize\nReward: -0.508277\nNext Confusion: 4.368215"} | |
| {"text": "### State\nConfusion: 6.478064\nAction: analogize\nReward: -1.128476\nNext Confusion: 7.656253"} | |
| {"text": "### State\nConfusion: 7.29305\nAction: analogize\nReward: 1.065129\nNext Confusion: 6.880034"} | |
| {"text": "### State\nConfusion: 2.54377\nAction: analogize\nReward: -0.940805\nNext Confusion: 2.54594"} | |
| {"text": "### State\nConfusion: 4.214607\nAction: analogize\nReward: -1.635581\nNext Confusion: 5.250125"} | |
| {"text": "### State\nConfusion: 3.376368\nAction: question\nReward: 1.454091\nNext Confusion: 2.159586"} | |
| {"text": "### State\nConfusion: 4.664003\nAction: analogize\nReward: -0.481433\nNext Confusion: 4.975037"} | |
| {"text": "### State\nConfusion: 5.789483\nAction: analogize\nReward: 0.351938\nNext Confusion: 5.524223"} | |
| {"text": "### State\nConfusion: 4.134062\nAction: analogize\nReward: -0.746534\nNext Confusion: 4.309115"} | |
| {"text": "### State\nConfusion: 2.75702\nAction: worked_example\nReward: 1.686613\nNext Confusion: 1.597392"} | |
| {"text": "### State\nConfusion: 5.175445\nAction: worked_example\nReward: 2.26482\nNext Confusion: 3.099191"} | |
| {"text": "### State\nConfusion: 6.065045\nAction: analogize\nReward: -1.475176\nNext Confusion: 7.14422"} | |
| {"text": "### State\nConfusion: 4.352107\nAction: analogize\nReward: -0.176949\nNext Confusion: 4.417541"} | |
| {"text": "### State\nConfusion: 5.816594\nAction: analogize\nReward: -2.146036\nNext Confusion: 7.010947"} | |
| {"text": "### State\nConfusion: 3.675687\nAction: analogize\nReward: -0.013575\nNext Confusion: 3.6197"} | |
| {"text": "### State\nConfusion: 6.159766\nAction: explain\nReward: 0.721524\nNext Confusion: 5.935606"} | |
| {"text": "### State\nConfusion: 3.588376\nAction: question\nReward: 0.441773\nNext Confusion: 3.378304"} | |
| {"text": "### State\nConfusion: 6.344508\nAction: question\nReward: 1.770076\nNext Confusion: 5.09023"} | |
| {"text": "### State\nConfusion: 3.821794\nAction: analogize\nReward: -0.387546\nNext Confusion: 4.346112"} | |
| {"text": "### State\nConfusion: 7.260673\nAction: correct_fact\nReward: 0.008941\nNext Confusion: 7.553966"} | |
| {"text": "### State\nConfusion: 5.776783\nAction: analogize\nReward: -0.838409\nNext Confusion: 6.757849"} | |
| {"text": "### State\nConfusion: 3.874547\nAction: explain\nReward: 0.548746\nNext Confusion: 3.753034"} | |
| {"text": "### State\nConfusion: 7.218959\nAction: worked_example\nReward: 0.624518\nNext Confusion: 6.850143"} | |
| {"text": "### State\nConfusion: 7.365728\nAction: explain\nReward: 1.423284\nNext Confusion: 6.245595"} | |
| {"text": "### State\nConfusion: 4.087149\nAction: correct_fact\nReward: 0.54914\nNext Confusion: 4.116221"} | |
| {"text": "### State\nConfusion: 9.569596\nAction: worked_example\nReward: 1.091264\nNext Confusion: 8.481068"} | |
| {"text": "### State\nConfusion: 7.997488\nAction: question\nReward: 0.609481\nNext Confusion: 6.839767"} | |
| {"text": "### State\nConfusion: 6.471966\nAction: analogize\nReward: 0.734048\nNext Confusion: 6.210516"} | |
| {"text": "### State\nConfusion: 5.247116\nAction: analogize\nReward: 0.085486\nNext Confusion: 5.148559"} | |
| {"text": "### State\nConfusion: 5.63931\nAction: question\nReward: 0.358263\nNext Confusion: 5.389858"} | |
| {"text": "### State\nConfusion: 6.156111\nAction: analogize\nReward: 0.161887\nNext Confusion: 5.62754"} | |
| {"text": "### State\nConfusion: 3.755793\nAction: correct_fact\nReward: -0.840027\nNext Confusion: 4.740012"} | |
| {"text": "### State\nConfusion: 7.445958\nAction: worked_example\nReward: 3.084386\nNext Confusion: 5.851674"} | |
| {"text": "### State\nConfusion: 7.444563\nAction: explain\nReward: 0.010491\nNext Confusion: 6.992737"} | |
| {"text": "### State\nConfusion: 4.028035\nAction: analogize\nReward: -0.705654\nNext Confusion: 4.397549"} | |
| {"text": "### State\nConfusion: 3.586522\nAction: correct_fact\nReward: -0.890749\nNext Confusion: 3.880734"} | |
| {"text": "### State\nConfusion: 3.824157\nAction: analogize\nReward: 0.862425\nNext Confusion: 2.996262"} | |
| {"text": "### State\nConfusion: 3.883596\nAction: worked_example\nReward: 1.802463\nNext Confusion: 2.584208"} | |
| {"text": "### State\nConfusion: 5.404881\nAction: correct_fact\nReward: 0.081433\nNext Confusion: 5.07905"} | |
| {"text": "### State\nConfusion: 2.991435\nAction: analogize\nReward: -0.433469\nNext Confusion: 3.357399"} | |
| {"text": "### State\nConfusion: 3.545515\nAction: analogize\nReward: -0.007121\nNext Confusion: 3.71352"} | |
| {"text": "### State\nConfusion: 3.660067\nAction: analogize\nReward: -0.928261\nNext Confusion: 4.0236"} | |
| {"text": "### State\nConfusion: 7.0235\nAction: question\nReward: 0.20218\nNext Confusion: 6.875191"} | |
| {"text": "### State\nConfusion: 9.746426\nAction: analogize\nReward: 0.725931\nNext Confusion: 9.392016"} | |
| {"text": "### State\nConfusion: 2.125439\nAction: explain\nReward: -1.062487\nNext Confusion: 2.867128"} | |
| {"text": "### State\nConfusion: 3.227817\nAction: analogize\nReward: -0.014357\nNext Confusion: 3.537508"} | |
| {"text": "### State\nConfusion: 3.876828\nAction: analogize\nReward: 0.060063\nNext Confusion: 3.857296"} | |
| {"text": "### State\nConfusion: 7.089156\nAction: analogize\nReward: 0.444024\nNext Confusion: 7.298431"} | |
| {"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 2.962947\nNext Confusion: 8.246628"} | |
| {"text": "### State\nConfusion: 5.594191\nAction: explain\nReward: 0.679804\nNext Confusion: 5.233611"} | |
| {"text": "### State\nConfusion: 6.283862\nAction: analogize\nReward: 0.743037\nNext Confusion: 6.186083"} | |
| {"text": "### State\nConfusion: 5.072781\nAction: question\nReward: 1.101868\nNext Confusion: 3.897992"} | |
| {"text": "### State\nConfusion: 4.118371\nAction: correct_fact\nReward: 0.029128\nNext Confusion: 3.826488"} | |
| {"text": "### State\nConfusion: 7.527729\nAction: correct_fact\nReward: 0.845789\nNext Confusion: 7.247021"} | |
| {"text": "### State\nConfusion: 6.289606\nAction: analogize\nReward: 0.422815\nNext Confusion: 5.600898"} | |
| {"text": "### State\nConfusion: 4.523598\nAction: analogize\nReward: -0.188144\nNext Confusion: 4.558487"} | |
| {"text": "### State\nConfusion: 3.763717\nAction: question\nReward: 0.6122\nNext Confusion: 3.588942"} | |
| {"text": "### State\nConfusion: 8.323666\nAction: correct_fact\nReward: 0.409538\nNext Confusion: 8.021692"} | |
| {"text": "### State\nConfusion: 5.981031\nAction: worked_example\nReward: 0.917426\nNext Confusion: 5.423538"} | |
| {"text": "### State\nConfusion: 3.515466\nAction: analogize\nReward: -0.103899\nNext Confusion: 3.695133"} | |
| {"text": "### State\nConfusion: 6.429372\nAction: explain\nReward: -0.029875\nNext Confusion: 6.378915"} | |
| {"text": "### State\nConfusion: 5.750795\nAction: analogize\nReward: -0.089562\nNext Confusion: 5.622753"} | |
| {"text": "### State\nConfusion: 5.025332\nAction: analogize\nReward: -1.312083\nNext Confusion: 6.373254"} | |
| {"text": "### State\nConfusion: 7.359441\nAction: analogize\nReward: -0.720074\nNext Confusion: 7.755872"} | |
| {"text": "### State\nConfusion: 3.776062\nAction: question\nReward: -0.134128\nNext Confusion: 3.433186"} | |
| {"text": "### State\nConfusion: 1.355683\nAction: explain\nReward: -0.00118\nNext Confusion: 1.431554"} | |
| {"text": "### State\nConfusion: 3.912807\nAction: correct_fact\nReward: -1.548555\nNext Confusion: 4.628972"} | |
| {"text": "### State\nConfusion: 3.794357\nAction: correct_fact\nReward: -0.059373\nNext Confusion: 3.670245"} | |
| {"text": "### State\nConfusion: 5.059543\nAction: analogize\nReward: 0.032602\nNext Confusion: 5.353498"} | |
| {"text": "### State\nConfusion: 3.077356\nAction: analogize\nReward: 0.011284\nNext Confusion: 2.947043"} | |
| {"text": "### State\nConfusion: 5.534637\nAction: explain\nReward: 1.721247\nNext Confusion: 4.352992"} | |
| {"text": "### State\nConfusion: 4.366849\nAction: analogize\nReward: -0.917181\nNext Confusion: 5.294018"} | |
| {"text": "### State\nConfusion: 6.835693\nAction: explain\nReward: 0.456146\nNext Confusion: 6.470298"} | |
| {"text": "### State\nConfusion: 9.452318\nAction: analogize\nReward: 0.033633\nNext Confusion: 9.382432"} | |
| {"text": "### State\nConfusion: 3.400432\nAction: analogize\nReward: -0.369133\nNext Confusion: 3.65212"} | |
| {"text": "### State\nConfusion: 5.731019\nAction: analogize\nReward: -1.289566\nNext Confusion: 6.41672"} | |
| {"text": "### State\nConfusion: 3.618958\nAction: explain\nReward: -0.907926\nNext Confusion: 3.69676"} | |
| {"text": "### State\nConfusion: 3.088994\nAction: explain\nReward: 0.2848\nNext Confusion: 2.83016"} | |
| {"text": "### State\nConfusion: 7.169104\nAction: analogize\nReward: 1.486468\nNext Confusion: 6.278144"} | |
| {"text": "### State\nConfusion: 4.010123\nAction: question\nReward: 0.990315\nNext Confusion: 2.898942"} | |
| {"text": "### State\nConfusion: 3.325781\nAction: analogize\nReward: -0.847395\nNext Confusion: 3.638474"} | |
| {"text": "### State\nConfusion: 4.63962\nAction: question\nReward: 1.030137\nNext Confusion: 3.610102"} | |
| {"text": "### State\nConfusion: 3.698579\nAction: analogize\nReward: -0.018877\nNext Confusion: 4.387045"} | |
| {"text": "### State\nConfusion: 1.946232\nAction: analogize\nReward: -1.351303\nNext Confusion: 3.380728"} | |
| {"text": "### State\nConfusion: 4.236727\nAction: analogize\nReward: -0.73162\nNext Confusion: 5.393082"} | |
| {"text": "### State\nConfusion: 8.190957\nAction: analogize\nReward: 0.404816\nNext Confusion: 8.214826"} | |
| {"text": "### State\nConfusion: 6.995987\nAction: analogize\nReward: 0.571194\nNext Confusion: 6.929503"} | |
| {"text": "### State\nConfusion: 2.113818\nAction: explain\nReward: -0.30514\nNext Confusion: 2.129459"} | |
| {"text": "### State\nConfusion: 3.684304\nAction: analogize\nReward: -2.017778\nNext Confusion: 5.452133"} | |
| {"text": "### State\nConfusion: 4.239281\nAction: worked_example\nReward: 1.860685\nNext Confusion: 2.940046"} | |
| {"text": "### State\nConfusion: 3.676164\nAction: analogize\nReward: 0.091998\nNext Confusion: 4.183175"} | |
| {"text": "### State\nConfusion: 4.133906\nAction: analogize\nReward: -0.55272\nNext Confusion: 4.581522"} | |
| {"text": "### State\nConfusion: 4.054602\nAction: correct_fact\nReward: -0.177678\nNext Confusion: 4.056369"} | |
| {"text": "### State\nConfusion: 4.639642\nAction: analogize\nReward: -0.272352\nNext Confusion: 5.553159"} | |
| {"text": "### State\nConfusion: 5.635794\nAction: analogize\nReward: -0.705183\nNext Confusion: 6.451484"} | |
| {"text": "### State\nConfusion: 5.543668\nAction: correct_fact\nReward: 0.885091\nNext Confusion: 4.846454"} | |
| {"text": "### State\nConfusion: 3.267564\nAction: analogize\nReward: -0.098069\nNext Confusion: 3.530591"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.338373\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.627039\nAction: analogize\nReward: -0.495228\nNext Confusion: 3.729506"} | |
| {"text": "### State\nConfusion: 2.909357\nAction: analogize\nReward: 0.712175\nNext Confusion: 2.649315"} | |
| {"text": "### State\nConfusion: 5.226436\nAction: explain\nReward: -0.593975\nNext Confusion: 5.565649"} | |
| {"text": "### State\nConfusion: 3.411177\nAction: analogize\nReward: -0.915413\nNext Confusion: 4.738223"} | |
| {"text": "### State\nConfusion: 2.31554\nAction: correct_fact\nReward: 0.05716\nNext Confusion: 1.97111"} | |
| {"text": "### State\nConfusion: 3.476088\nAction: question\nReward: 2.026609\nNext Confusion: 2.038584"} | |
| {"text": "### State\nConfusion: 5.963129\nAction: explain\nReward: -0.663593\nNext Confusion: 5.924516"} | |
| {"text": "### State\nConfusion: 4.23041\nAction: analogize\nReward: -0.798136\nNext Confusion: 5.034618"} | |
| {"text": "### State\nConfusion: 4.460384\nAction: analogize\nReward: 0.281733\nNext Confusion: 4.369669"} | |
| {"text": "### State\nConfusion: 3.952235\nAction: analogize\nReward: -0.543262\nNext Confusion: 4.522062"} | |
| {"text": "### State\nConfusion: 4.810875\nAction: analogize\nReward: -0.724943\nNext Confusion: 4.581037"} | |
| {"text": "### State\nConfusion: 3.226505\nAction: worked_example\nReward: 0.912334\nNext Confusion: 2.419872"} | |
| {"text": "### State\nConfusion: 9.691536\nAction: worked_example\nReward: 1.77497\nNext Confusion: 8.428343"} | |
| {"text": "### State\nConfusion: 5.033921\nAction: question\nReward: 0.196992\nNext Confusion: 4.573141"} | |
| {"text": "### State\nConfusion: 3.517061\nAction: analogize\nReward: -0.263287\nNext Confusion: 3.85532"} | |
| {"text": "### State\nConfusion: 7.032363\nAction: worked_example\nReward: 1.873676\nNext Confusion: 5.367367"} | |
| {"text": "### State\nConfusion: 4.801627\nAction: analogize\nReward: -0.242848\nNext Confusion: 5.514838"} | |
| {"text": "### State\nConfusion: 6.834419\nAction: analogize\nReward: 0.4653\nNext Confusion: 7.076133"} | |
| {"text": "### State\nConfusion: 6.279381\nAction: question\nReward: 0.830391\nNext Confusion: 5.205801"} | |
| {"text": "### State\nConfusion: 8.198117\nAction: worked_example\nReward: 1.509305\nNext Confusion: 7.05779"} | |
| {"text": "### State\nConfusion: 3.336602\nAction: analogize\nReward: 0.37946\nNext Confusion: 3.545989"} | |
| {"text": "### State\nConfusion: 6.838361\nAction: analogize\nReward: -0.741956\nNext Confusion: 7.300534"} | |
| {"text": "### State\nConfusion: 5.791677\nAction: explain\nReward: 0.437775\nNext Confusion: 5.755174"} | |
| {"text": "### State\nConfusion: 9.722909\nAction: correct_fact\nReward: -1.217724\nNext Confusion: 9.802279"} | |
| {"text": "### State\nConfusion: 7.018476\nAction: question\nReward: 0.563995\nNext Confusion: 6.405234"} | |
| {"text": "### State\nConfusion: 4.592573\nAction: analogize\nReward: -2.146348\nNext Confusion: 5.838669"} | |
| {"text": "### State\nConfusion: 5.724254\nAction: analogize\nReward: 1.270602\nNext Confusion: 5.397553"} | |
| {"text": "### State\nConfusion: 4.446648\nAction: worked_example\nReward: -0.503034\nNext Confusion: 4.548247"} | |
| {"text": "### State\nConfusion: 5.499\nAction: explain\nReward: 0.310256\nNext Confusion: 5.026456"} | |
| {"text": "### State\nConfusion: 3.483297\nAction: question\nReward: 0.985335\nNext Confusion: 2.828101"} | |
| {"text": "### State\nConfusion: 2.728559\nAction: explain\nReward: 0.672884\nNext Confusion: 2.04629"} | |
| {"text": "### State\nConfusion: 6.694008\nAction: question\nReward: -0.553633\nNext Confusion: 7.286873"} | |
| {"text": "### State\nConfusion: 2.769368\nAction: question\nReward: 0.406163\nNext Confusion: 2.904883"} | |
| {"text": "### State\nConfusion: 5.757645\nAction: correct_fact\nReward: 0.664011\nNext Confusion: 5.369445"} | |
| {"text": "### State\nConfusion: 4.197348\nAction: analogize\nReward: 1.133919\nNext Confusion: 3.996284"} | |
| {"text": "### State\nConfusion: 5.534684\nAction: explain\nReward: -0.725343\nNext Confusion: 6.430503"} | |
| {"text": "### State\nConfusion: 2.75554\nAction: worked_example\nReward: 1.502689\nNext Confusion: 1.00548"} | |
| {"text": "### State\nConfusion: 4.337378\nAction: analogize\nReward: 0.228474\nNext Confusion: 4.612438"} | |
| {"text": "### State\nConfusion: 4.446347\nAction: analogize\nReward: -0.524076\nNext Confusion: 5.001291"} | |
| {"text": "### State\nConfusion: 5.104956\nAction: analogize\nReward: -0.163479\nNext Confusion: 5.520312"} | |
| {"text": "### State\nConfusion: 3.629805\nAction: explain\nReward: -0.095137\nNext Confusion: 4.171221"} | |
| {"text": "### State\nConfusion: 3.944147\nAction: analogize\nReward: -0.668701\nNext Confusion: 5.047819"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.550446\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 9.356881\nAction: correct_fact\nReward: -1.110132\nNext Confusion: 9.71459"} | |
| {"text": "### State\nConfusion: 4.205706\nAction: explain\nReward: -0.234715\nNext Confusion: 4.437668"} | |
| {"text": "### State\nConfusion: 5.271949\nAction: analogize\nReward: -0.57591\nNext Confusion: 5.56199"} | |
| {"text": "### State\nConfusion: 3.972523\nAction: analogize\nReward: 0.4965\nNext Confusion: 4.568244"} | |
| {"text": "### State\nConfusion: 6.749858\nAction: analogize\nReward: -0.236824\nNext Confusion: 7.190533"} | |
| {"text": "### State\nConfusion: 8.942906\nAction: correct_fact\nReward: 0.233019\nNext Confusion: 8.72936"} | |
| {"text": "### State\nConfusion: 3.798457\nAction: analogize\nReward: -0.124735\nNext Confusion: 4.220905"} | |
| {"text": "### State\nConfusion: 4.55122\nAction: analogize\nReward: -1.185639\nNext Confusion: 5.466571"} | |
| {"text": "### State\nConfusion: 3.276233\nAction: analogize\nReward: 0.248292\nNext Confusion: 3.85905"} | |
| {"text": "### State\nConfusion: 4.059147\nAction: correct_fact\nReward: 0.407354\nNext Confusion: 3.503689"} | |
| {"text": "### State\nConfusion: 4.406315\nAction: correct_fact\nReward: -0.603789\nNext Confusion: 4.506692"} | |
| {"text": "### State\nConfusion: 2.644092\nAction: analogize\nReward: -0.019094\nNext Confusion: 3.00082"} | |
| {"text": "### State\nConfusion: 5.603155\nAction: worked_example\nReward: 1.284057\nNext Confusion: 4.500092"} | |
| {"text": "### State\nConfusion: 3.218315\nAction: analogize\nReward: -0.348337\nNext Confusion: 3.556726"} | |
| {"text": "### State\nConfusion: 3.800715\nAction: analogize\nReward: -0.820115\nNext Confusion: 4.452045"} | |
| {"text": "### State\nConfusion: 1.973659\nAction: analogize\nReward: -0.355537\nNext Confusion: 2.784944"} | |
| {"text": "### State\nConfusion: 4.688923\nAction: question\nReward: 1.102507\nNext Confusion: 3.759745"} | |
| {"text": "### State\nConfusion: 3.492448\nAction: analogize\nReward: 0.101092\nNext Confusion: 3.807346"} | |
| {"text": "### State\nConfusion: 2.46282\nAction: analogize\nReward: -1.078748\nNext Confusion: 3.07609"} | |
| {"text": "### State\nConfusion: 6.213417\nAction: analogize\nReward: 0.576302\nNext Confusion: 5.873172"} | |
| {"text": "### State\nConfusion: 3.541289\nAction: explain\nReward: 0.015983\nNext Confusion: 3.708063"} | |
| {"text": "### State\nConfusion: 5.982512\nAction: analogize\nReward: -0.190504\nNext Confusion: 6.230028"} | |
| {"text": "### State\nConfusion: 5.589458\nAction: worked_example\nReward: 1.965483\nNext Confusion: 5.141684"} | |
| {"text": "### State\nConfusion: 3.292279\nAction: question\nReward: 0.632446\nNext Confusion: 2.64234"} | |
| {"text": "### State\nConfusion: 1.361117\nAction: analogize\nReward: -0.449704\nNext Confusion: 2.123296"} | |
| {"text": "### State\nConfusion: 5.606995\nAction: worked_example\nReward: 0.968661\nNext Confusion: 5.075988"} | |
| {"text": "### State\nConfusion: 3.824053\nAction: worked_example\nReward: 1.445538\nNext Confusion: 2.7174"} | |
| {"text": "### State\nConfusion: 3.619457\nAction: analogize\nReward: -0.932034\nNext Confusion: 4.530998"} | |
| {"text": "### State\nConfusion: 4.039429\nAction: question\nReward: 0.626477\nNext Confusion: 3.597227"} | |
| {"text": "### State\nConfusion: 5.48916\nAction: question\nReward: 0.787362\nNext Confusion: 5.328984"} | |
| {"text": "### State\nConfusion: 3.539406\nAction: analogize\nReward: -0.441082\nNext Confusion: 4.028983"} | |
| {"text": "### State\nConfusion: 3.917401\nAction: analogize\nReward: -0.314303\nNext Confusion: 4.375538"} | |
| {"text": "### State\nConfusion: 4.657824\nAction: analogize\nReward: 0.393414\nNext Confusion: 4.629042"} | |
| {"text": "### State\nConfusion: 9.817489\nAction: analogize\nReward: -0.384728\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.5677\nAction: explain\nReward: -0.380941\nNext Confusion: 3.946322"} | |
| {"text": "### State\nConfusion: 3.643777\nAction: analogize\nReward: -1.920887\nNext Confusion: 4.448967"} | |
| {"text": "### State\nConfusion: 4.602626\nAction: explain\nReward: 0.775825\nNext Confusion: 3.995621"} | |
| {"text": "### State\nConfusion: 3.493214\nAction: explain\nReward: -0.128712\nNext Confusion: 3.723056"} | |
| {"text": "### State\nConfusion: 4.024774\nAction: analogize\nReward: -2.039025\nNext Confusion: 4.794768"} | |
| {"text": "### State\nConfusion: 5.567354\nAction: question\nReward: 0.953785\nNext Confusion: 5.143233"} | |
| {"text": "### State\nConfusion: 4.857438\nAction: explain\nReward: 0.443781\nNext Confusion: 4.433139"} | |
| {"text": "### State\nConfusion: 6.066289\nAction: explain\nReward: -0.315992\nNext Confusion: 6.312766"} | |
| {"text": "### State\nConfusion: 7.202041\nAction: analogize\nReward: 0.352557\nNext Confusion: 7.09304"} | |
| {"text": "### State\nConfusion: 6.867551\nAction: explain\nReward: -0.373433\nNext Confusion: 7.375757"} | |
| {"text": "### State\nConfusion: 4.086685\nAction: question\nReward: 0.298342\nNext Confusion: 3.762268"} | |
| {"text": "### State\nConfusion: 8.616289\nAction: question\nReward: 1.143804\nNext Confusion: 7.619493"} | |
| {"text": "### State\nConfusion: 4.24828\nAction: analogize\nReward: -0.861769\nNext Confusion: 5.43898"} | |
| {"text": "### State\nConfusion: 2.615268\nAction: correct_fact\nReward: 1.0784\nNext Confusion: 1.460789"} | |
| {"text": "### State\nConfusion: 5.23111\nAction: correct_fact\nReward: -0.202571\nNext Confusion: 5.344155"} | |
| {"text": "### State\nConfusion: 7.022273\nAction: analogize\nReward: -1.226544\nNext Confusion: 7.900564"} | |
| {"text": "### State\nConfusion: 7.907241\nAction: question\nReward: 1.464275\nNext Confusion: 6.918559"} | |
| {"text": "### State\nConfusion: 7.12112\nAction: correct_fact\nReward: 1.403988\nNext Confusion: 6.659081"} | |
| {"text": "### State\nConfusion: 3.515963\nAction: analogize\nReward: -0.759772\nNext Confusion: 4.787727"} | |
| {"text": "### State\nConfusion: 5.604148\nAction: worked_example\nReward: 1.415625\nNext Confusion: 4.33325"} | |
| {"text": "### State\nConfusion: 5.108318\nAction: analogize\nReward: -1.22629\nNext Confusion: 5.783868"} | |
| {"text": "### State\nConfusion: 4.742907\nAction: analogize\nReward: -0.264648\nNext Confusion: 4.893974"} | |
| {"text": "### State\nConfusion: 2.955016\nAction: explain\nReward: 0.640322\nNext Confusion: 2.963681"} | |
| {"text": "### State\nConfusion: 3.553782\nAction: analogize\nReward: 0.463052\nNext Confusion: 3.616991"} | |
| {"text": "### State\nConfusion: 4.315825\nAction: correct_fact\nReward: 0.128917\nNext Confusion: 4.809463"} | |
| {"text": "### State\nConfusion: 6.324152\nAction: worked_example\nReward: 2.24065\nNext Confusion: 4.650812"} | |
| {"text": "### State\nConfusion: 5.497759\nAction: explain\nReward: 1.417489\nNext Confusion: 4.601767"} | |
| {"text": "### State\nConfusion: 3.482203\nAction: analogize\nReward: -0.362165\nNext Confusion: 3.796918"} | |
| {"text": "### State\nConfusion: 4.651682\nAction: explain\nReward: 0.920595\nNext Confusion: 4.110992"} | |
| {"text": "### State\nConfusion: 3.818045\nAction: worked_example\nReward: 0.974113\nNext Confusion: 2.51135"} | |
| {"text": "### State\nConfusion: 4.387833\nAction: question\nReward: 0.807106\nNext Confusion: 3.467108"} | |
| {"text": "### State\nConfusion: 7.444748\nAction: explain\nReward: 0.535044\nNext Confusion: 7.314575"} | |
| {"text": "### State\nConfusion: 8.638442\nAction: analogize\nReward: -0.385962\nNext Confusion: 9.068825"} | |
| {"text": "### State\nConfusion: 3.031337\nAction: analogize\nReward: 1.474946\nNext Confusion: 2.794966"} | |
| {"text": "### State\nConfusion: 7.27936\nAction: analogize\nReward: 0.607482\nNext Confusion: 6.99242"} | |
| {"text": "### State\nConfusion: 3.787557\nAction: analogize\nReward: -0.636365\nNext Confusion: 4.095346"} | |
| {"text": "### State\nConfusion: 7.239841\nAction: analogize\nReward: -0.911259\nNext Confusion: 8.592769"} | |
| {"text": "### State\nConfusion: 7.590043\nAction: worked_example\nReward: 0.271013\nNext Confusion: 7.004351"} | |
| {"text": "### State\nConfusion: 8.092934\nAction: worked_example\nReward: 1.788613\nNext Confusion: 6.377313"} | |
| {"text": "### State\nConfusion: 5.034043\nAction: explain\nReward: 0.301633\nNext Confusion: 5.184921"} | |
| {"text": "### State\nConfusion: 4.350404\nAction: explain\nReward: 0.861178\nNext Confusion: 3.60923"} | |
| {"text": "### State\nConfusion: 2.270528\nAction: explain\nReward: 0.728106\nNext Confusion: 1.638538"} | |
| {"text": "### State\nConfusion: 4.23141\nAction: analogize\nReward: -0.872446\nNext Confusion: 4.772834"} | |
| {"text": "### State\nConfusion: 4.371902\nAction: worked_example\nReward: 0.463624\nNext Confusion: 3.571019"} | |
| {"text": "### State\nConfusion: 6.633394\nAction: question\nReward: -0.168295\nNext Confusion: 6.514897"} | |
| {"text": "### State\nConfusion: 6.584599\nAction: question\nReward: 0.347541\nNext Confusion: 6.205327"} | |
| {"text": "### State\nConfusion: 10.0\nAction: question\nReward: 1.610989\nNext Confusion: 9.107007"} | |
| {"text": "### State\nConfusion: 5.890399\nAction: analogize\nReward: -0.177367\nNext Confusion: 5.894826"} | |
| {"text": "### State\nConfusion: 3.914789\nAction: question\nReward: 0.963476\nNext Confusion: 3.638644"} | |
| {"text": "### State\nConfusion: 4.606835\nAction: analogize\nReward: -1.59837\nNext Confusion: 5.999383"} | |
| {"text": "### State\nConfusion: 5.992104\nAction: analogize\nReward: -0.850966\nNext Confusion: 6.851071"} | |
| {"text": "### State\nConfusion: 4.461924\nAction: correct_fact\nReward: -0.385522\nNext Confusion: 5.865354"} | |
| {"text": "### State\nConfusion: 6.803428\nAction: analogize\nReward: 0.091275\nNext Confusion: 6.193048"} | |
| {"text": "### State\nConfusion: 4.592856\nAction: analogize\nReward: 0.028521\nNext Confusion: 4.672834"} | |
| {"text": "### State\nConfusion: 2.804246\nAction: analogize\nReward: -0.216599\nNext Confusion: 2.494085"} | |
| {"text": "### State\nConfusion: 3.900469\nAction: analogize\nReward: -0.715619\nNext Confusion: 3.978828"} | |
| {"text": "### State\nConfusion: 4.378933\nAction: analogize\nReward: -0.515609\nNext Confusion: 5.002777"} | |
| {"text": "### State\nConfusion: 7.587787\nAction: question\nReward: 1.416706\nNext Confusion: 6.875957"} | |
| {"text": "### State\nConfusion: 3.742195\nAction: analogize\nReward: 0.076616\nNext Confusion: 4.102513"} | |
| {"text": "### State\nConfusion: 3.479584\nAction: explain\nReward: 0.616633\nNext Confusion: 2.790334"} | |
| {"text": "### State\nConfusion: 4.141684\nAction: analogize\nReward: -0.697507\nNext Confusion: 4.529926"} | |
| {"text": "### State\nConfusion: 5.484036\nAction: analogize\nReward: -0.203655\nNext Confusion: 6.119545"} | |
| {"text": "### State\nConfusion: 5.041966\nAction: analogize\nReward: 0.478271\nNext Confusion: 4.934764"} | |
| {"text": "### State\nConfusion: 5.643482\nAction: analogize\nReward: 0.330683\nNext Confusion: 6.250234"} | |
| {"text": "### State\nConfusion: 3.039198\nAction: analogize\nReward: -1.140458\nNext Confusion: 4.141131"} | |
| {"text": "### State\nConfusion: 6.9517\nAction: analogize\nReward: 1.121652\nNext Confusion: 6.827886"} | |
| {"text": "### State\nConfusion: 4.896638\nAction: analogize\nReward: 0.44984\nNext Confusion: 4.884718"} | |
| {"text": "### State\nConfusion: 6.982846\nAction: analogize\nReward: -1.013628\nNext Confusion: 7.971094"} | |
| {"text": "### State\nConfusion: 6.060497\nAction: explain\nReward: 0.290709\nNext Confusion: 5.825891"} | |
| {"text": "### State\nConfusion: 3.599947\nAction: analogize\nReward: 0.693696\nNext Confusion: 3.53343"} | |
| {"text": "### State\nConfusion: 4.170319\nAction: correct_fact\nReward: 0.898607\nNext Confusion: 3.590391"} | |
| {"text": "### State\nConfusion: 7.032939\nAction: analogize\nReward: -0.729945\nNext Confusion: 8.16857"} | |
| {"text": "### State\nConfusion: 4.053638\nAction: worked_example\nReward: 1.543701\nNext Confusion: 2.689349"} | |
| {"text": "### State\nConfusion: 5.90626\nAction: analogize\nReward: -0.607466\nNext Confusion: 6.439805"} | |
| {"text": "### State\nConfusion: 4.583474\nAction: analogize\nReward: -0.97328\nNext Confusion: 5.106393"} | |
| {"text": "### State\nConfusion: 4.896346\nAction: analogize\nReward: -0.610819\nNext Confusion: 5.848339"} | |
| {"text": "### State\nConfusion: 4.550838\nAction: analogize\nReward: -0.475464\nNext Confusion: 4.483279"} | |
| {"text": "### State\nConfusion: 3.38141\nAction: explain\nReward: 0.504023\nNext Confusion: 3.468134"} | |
| {"text": "### State\nConfusion: 4.350189\nAction: correct_fact\nReward: 0.390332\nNext Confusion: 3.921563"} | |
| {"text": "### State\nConfusion: 6.046617\nAction: explain\nReward: 0.985491\nNext Confusion: 5.920085"} | |
| {"text": "### State\nConfusion: 6.886919\nAction: analogize\nReward: 0.792771\nNext Confusion: 6.498229"} | |
| {"text": "### State\nConfusion: 4.564333\nAction: analogize\nReward: 0.421946\nNext Confusion: 4.38198"} | |
| {"text": "### State\nConfusion: 3.378859\nAction: analogize\nReward: -0.767424\nNext Confusion: 3.908994"} | |
| {"text": "### State\nConfusion: 5.998241\nAction: analogize\nReward: -0.010588\nNext Confusion: 6.310215"} | |
| {"text": "### State\nConfusion: 3.777984\nAction: analogize\nReward: 0.490388\nNext Confusion: 3.513688"} | |
| {"text": "### State\nConfusion: 7.019506\nAction: analogize\nReward: -0.255347\nNext Confusion: 8.119068"} | |
| {"text": "### State\nConfusion: 2.699495\nAction: analogize\nReward: -1.213656\nNext Confusion: 3.973977"} | |
| {"text": "### State\nConfusion: 4.276931\nAction: worked_example\nReward: 2.073769\nNext Confusion: 3.504191"} | |
| {"text": "### State\nConfusion: 7.270848\nAction: question\nReward: 0.602047\nNext Confusion: 6.808021"} | |
| {"text": "### State\nConfusion: 6.406384\nAction: explain\nReward: 0.478975\nNext Confusion: 6.852021"} | |
| {"text": "### State\nConfusion: 6.955819\nAction: explain\nReward: -1.291024\nNext Confusion: 6.745135"} | |
| {"text": "### State\nConfusion: 5.362579\nAction: question\nReward: 0.291444\nNext Confusion: 5.313715"} | |
| {"text": "### State\nConfusion: 7.497689\nAction: analogize\nReward: 0.087838\nNext Confusion: 7.549163"} | |
| {"text": "### State\nConfusion: 3.399238\nAction: explain\nReward: 1.114128\nNext Confusion: 2.626598"} | |
| {"text": "### State\nConfusion: 5.609217\nAction: analogize\nReward: -0.140091\nNext Confusion: 5.979127"} | |
| {"text": "### State\nConfusion: 3.267017\nAction: question\nReward: -0.669656\nNext Confusion: 3.420053"} | |
| {"text": "### State\nConfusion: 4.157508\nAction: correct_fact\nReward: -0.824236\nNext Confusion: 4.648945"} | |
| {"text": "### State\nConfusion: 3.989922\nAction: correct_fact\nReward: -0.101446\nNext Confusion: 4.216505"} | |
| {"text": "### State\nConfusion: 2.22757\nAction: analogize\nReward: -1.115784\nNext Confusion: 3.369227"} | |
| {"text": "### State\nConfusion: 8.759944\nAction: worked_example\nReward: 1.584504\nNext Confusion: 7.617804"} | |
| {"text": "### State\nConfusion: 9.389155\nAction: analogize\nReward: 0.213027\nNext Confusion: 9.195565"} | |
| {"text": "### State\nConfusion: 8.246535\nAction: analogize\nReward: 0.202118\nNext Confusion: 8.268204"} | |
| {"text": "### State\nConfusion: 5.907152\nAction: analogize\nReward: 0.268129\nNext Confusion: 6.087424"} | |
| {"text": "### State\nConfusion: 6.813118\nAction: worked_example\nReward: 0.737036\nNext Confusion: 6.27529"} | |
| {"text": "### State\nConfusion: 6.398956\nAction: explain\nReward: 0.268035\nNext Confusion: 5.872039"} | |
| {"text": "### State\nConfusion: 3.590961\nAction: question\nReward: 1.10202\nNext Confusion: 2.945081"} | |
| {"text": "### State\nConfusion: 6.938659\nAction: worked_example\nReward: 2.218891\nNext Confusion: 5.106211"} | |
| {"text": "### State\nConfusion: 3.289629\nAction: analogize\nReward: -0.180204\nNext Confusion: 3.782614"} | |
| {"text": "### State\nConfusion: 7.283386\nAction: analogize\nReward: 1.03868\nNext Confusion: 7.272589"} | |
| {"text": "### State\nConfusion: 2.666499\nAction: worked_example\nReward: 1.600319\nNext Confusion: 1.500345"} | |
| {"text": "### State\nConfusion: 5.878577\nAction: analogize\nReward: 0.211902\nNext Confusion: 5.616477"} | |
| {"text": "### State\nConfusion: 2.740933\nAction: analogize\nReward: -1.123915\nNext Confusion: 3.60959"} | |
| {"text": "### State\nConfusion: 4.148976\nAction: analogize\nReward: 0.320144\nNext Confusion: 4.609655"} | |
| {"text": "### State\nConfusion: 4.86979\nAction: analogize\nReward: -1.519899\nNext Confusion: 5.834114"} | |
| {"text": "### State\nConfusion: 3.21267\nAction: analogize\nReward: 0.160049\nNext Confusion: 3.229467"} | |
| {"text": "### State\nConfusion: 7.290231\nAction: analogize\nReward: 0.607579\nNext Confusion: 7.191082"} | |
| {"text": "### State\nConfusion: 3.509348\nAction: worked_example\nReward: 2.846924\nNext Confusion: 1.174477"} | |
| {"text": "### State\nConfusion: 3.844179\nAction: question\nReward: 0.169155\nNext Confusion: 3.657271"} | |
| {"text": "### State\nConfusion: 3.89389\nAction: analogize\nReward: 0.439295\nNext Confusion: 4.078422"} | |
| {"text": "### State\nConfusion: 4.463733\nAction: analogize\nReward: -0.699769\nNext Confusion: 5.113838"} | |
| {"text": "### State\nConfusion: 3.780769\nAction: explain\nReward: 0.816708\nNext Confusion: 3.649158"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198227\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 7.127927\nAction: worked_example\nReward: 0.287837\nNext Confusion: 6.968576"} | |
| {"text": "### State\nConfusion: 3.089982\nAction: analogize\nReward: -1.625967\nNext Confusion: 4.475578"} | |
| {"text": "### State\nConfusion: 5.169077\nAction: explain\nReward: 0.535541\nNext Confusion: 5.222714"} | |
| {"text": "### State\nConfusion: 4.358125\nAction: explain\nReward: 0.380645\nNext Confusion: 3.897497"} | |
| {"text": "### State\nConfusion: 3.767699\nAction: analogize\nReward: -1.200743\nNext Confusion: 4.472256"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.535228\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 8.875999\nAction: worked_example\nReward: 0.76591\nNext Confusion: 7.645343"} | |
| {"text": "### State\nConfusion: 3.52916\nAction: analogize\nReward: 0.344736\nNext Confusion: 2.717469"} | |
| {"text": "### State\nConfusion: 2.586894\nAction: explain\nReward: -0.517557\nNext Confusion: 3.244847"} | |
| {"text": "### State\nConfusion: 1.909156\nAction: analogize\nReward: -1.324682\nNext Confusion: 2.857654"} | |
| {"text": "### State\nConfusion: 4.005799\nAction: analogize\nReward: -0.305104\nNext Confusion: 3.669192"} | |
| {"text": "### State\nConfusion: 4.355139\nAction: analogize\nReward: 0.109617\nNext Confusion: 4.115402"} | |
| {"text": "### State\nConfusion: 6.679668\nAction: analogize\nReward: 0.091741\nNext Confusion: 6.599718"} | |
| {"text": "### State\nConfusion: 4.419393\nAction: question\nReward: 1.469747\nNext Confusion: 3.633803"} | |
| {"text": "### State\nConfusion: 6.384556\nAction: worked_example\nReward: 1.95714\nNext Confusion: 5.177717"} | |
| {"text": "### State\nConfusion: 4.349121\nAction: worked_example\nReward: 1.24314\nNext Confusion: 3.26899"} | |
| {"text": "### State\nConfusion: 3.324754\nAction: analogize\nReward: -1.137162\nNext Confusion: 3.613057"} | |
| {"text": "### State\nConfusion: 4.626129\nAction: question\nReward: -0.010993\nNext Confusion: 4.778299"} | |
| {"text": "### State\nConfusion: 5.514156\nAction: question\nReward: 0.89604\nNext Confusion: 4.711784"} | |
| {"text": "### State\nConfusion: 4.125212\nAction: analogize\nReward: -0.73635\nNext Confusion: 4.89324"} | |
| {"text": "### State\nConfusion: 3.397703\nAction: analogize\nReward: -1.003393\nNext Confusion: 4.007573"} | |
| {"text": "### State\nConfusion: 5.827044\nAction: analogize\nReward: 0.119715\nNext Confusion: 5.987204"} | |
| {"text": "### State\nConfusion: 3.580581\nAction: analogize\nReward: -0.213034\nNext Confusion: 4.094987"} | |
| {"text": "### State\nConfusion: 4.512679\nAction: explain\nReward: 1.103546\nNext Confusion: 3.983216"} | |
| {"text": "### State\nConfusion: 3.377749\nAction: explain\nReward: 0.041333\nNext Confusion: 2.830174"} | |
| {"text": "### State\nConfusion: 7.021051\nAction: explain\nReward: -0.138561\nNext Confusion: 6.639877"} | |
| {"text": "### State\nConfusion: 3.677575\nAction: analogize\nReward: -0.28998\nNext Confusion: 4.96881"} | |
| {"text": "### State\nConfusion: 4.841562\nAction: correct_fact\nReward: -1.341953\nNext Confusion: 5.510637"} | |
| {"text": "### State\nConfusion: 5.860694\nAction: question\nReward: 1.317609\nNext Confusion: 4.7185"} | |
| {"text": "### State\nConfusion: 5.116674\nAction: worked_example\nReward: 0.900413\nNext Confusion: 3.554832"} | |
| {"text": "### State\nConfusion: 4.904849\nAction: explain\nReward: 0.922088\nNext Confusion: 4.84363"} | |
| {"text": "### State\nConfusion: 4.016333\nAction: explain\nReward: 0.102206\nNext Confusion: 3.400156"} | |
| {"text": "### State\nConfusion: 3.335467\nAction: analogize\nReward: -0.579045\nNext Confusion: 4.053781"} | |
| {"text": "### State\nConfusion: 3.975311\nAction: explain\nReward: 0.27217\nNext Confusion: 3.46649"} | |
| {"text": "### State\nConfusion: 4.029582\nAction: correct_fact\nReward: -0.498656\nNext Confusion: 4.146517"} | |
| {"text": "### State\nConfusion: 5.653948\nAction: analogize\nReward: 0.806549\nNext Confusion: 5.142787"} | |
| {"text": "### State\nConfusion: 8.509027\nAction: correct_fact\nReward: -0.040474\nNext Confusion: 8.127693"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.018308\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.399515\nAction: analogize\nReward: 0.051091\nNext Confusion: 3.382378"} | |
| {"text": "### State\nConfusion: 4.03549\nAction: analogize\nReward: -0.74119\nNext Confusion: 4.477384"} | |
| {"text": "### State\nConfusion: 3.929249\nAction: question\nReward: 0.308091\nNext Confusion: 3.854811"} | |
| {"text": "### State\nConfusion: 3.167563\nAction: analogize\nReward: -0.026105\nNext Confusion: 3.739793"} | |
| {"text": "### State\nConfusion: 5.641661\nAction: question\nReward: 0.902291\nNext Confusion: 5.095738"} | |
| {"text": "### State\nConfusion: 5.811388\nAction: analogize\nReward: -0.956837\nNext Confusion: 6.566842"} | |
| {"text": "### State\nConfusion: 4.939756\nAction: analogize\nReward: 1.215886\nNext Confusion: 4.202513"} | |
| {"text": "### State\nConfusion: 5.161591\nAction: question\nReward: 0.582941\nNext Confusion: 4.985189"} | |
| {"text": "### State\nConfusion: 8.777076\nAction: correct_fact\nReward: 0.470891\nNext Confusion: 8.279322"} | |
| {"text": "### State\nConfusion: 4.090277\nAction: worked_example\nReward: 1.559957\nNext Confusion: 2.979815"} | |
| {"text": "### State\nConfusion: 3.339561\nAction: worked_example\nReward: 1.660879\nNext Confusion: 2.123369"} | |
| {"text": "### State\nConfusion: 4.107422\nAction: analogize\nReward: -0.121353\nNext Confusion: 4.283752"} | |
| {"text": "### State\nConfusion: 6.483735\nAction: explain\nReward: -1.197914\nNext Confusion: 7.035842"} | |
| {"text": "### State\nConfusion: 5.573504\nAction: explain\nReward: 0.782266\nNext Confusion: 5.434095"} | |
| {"text": "### State\nConfusion: 3.374913\nAction: analogize\nReward: -0.884737\nNext Confusion: 3.438567"} | |
| {"text": "### State\nConfusion: 3.380767\nAction: explain\nReward: 1.294365\nNext Confusion: 2.789468"} | |
| {"text": "### State\nConfusion: 5.549697\nAction: question\nReward: 1.678421\nNext Confusion: 4.247832"} | |
| {"text": "### State\nConfusion: 3.579567\nAction: correct_fact\nReward: 0.672959\nNext Confusion: 3.633714"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.020142\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.408587\nAction: correct_fact\nReward: 0.253955\nNext Confusion: 3.646089"} | |
| {"text": "### State\nConfusion: 2.241163\nAction: question\nReward: 0.458367\nNext Confusion: 1.820895"} | |
| {"text": "### State\nConfusion: 7.083729\nAction: analogize\nReward: -0.546349\nNext Confusion: 7.203736"} | |
| {"text": "### State\nConfusion: 3.053561\nAction: analogize\nReward: 0.344281\nNext Confusion: 2.925175"} | |
| {"text": "### State\nConfusion: 4.379266\nAction: question\nReward: 0.309251\nNext Confusion: 3.804448"} | |
| {"text": "### State\nConfusion: 3.469964\nAction: worked_example\nReward: -1.176672\nNext Confusion: 4.13511"} | |
| {"text": "### State\nConfusion: 3.022251\nAction: analogize\nReward: -0.90958\nNext Confusion: 3.919632"} | |
| {"text": "### State\nConfusion: 5.641866\nAction: analogize\nReward: -0.135453\nNext Confusion: 5.936108"} | |
| {"text": "### State\nConfusion: 4.732938\nAction: explain\nReward: 0.067745\nNext Confusion: 4.250308"} | |
| {"text": "### State\nConfusion: 4.456509\nAction: correct_fact\nReward: -0.713073\nNext Confusion: 4.695286"} | |
| {"text": "### State\nConfusion: 3.703105\nAction: analogize\nReward: 0.354142\nNext Confusion: 3.48034"} | |
| {"text": "### State\nConfusion: 3.761903\nAction: analogize\nReward: -0.013819\nNext Confusion: 3.421196"} | |
| {"text": "### State\nConfusion: 4.269754\nAction: analogize\nReward: 0.027496\nNext Confusion: 4.287079"} | |
| {"text": "### State\nConfusion: 5.663837\nAction: explain\nReward: -0.360503\nNext Confusion: 6.315889"} | |
| {"text": "### State\nConfusion: 5.372044\nAction: question\nReward: 1.080587\nNext Confusion: 4.743021"} | |
| {"text": "### State\nConfusion: 2.501908\nAction: analogize\nReward: -0.534156\nNext Confusion: 3.061587"} | |
| {"text": "### State\nConfusion: 6.000041\nAction: explain\nReward: 0.183818\nNext Confusion: 5.951022"} | |
| {"text": "### State\nConfusion: 4.515942\nAction: question\nReward: 0.491305\nNext Confusion: 4.483781"} | |
| {"text": "### State\nConfusion: 3.967531\nAction: question\nReward: 0.292389\nNext Confusion: 3.614035"} | |
| {"text": "### State\nConfusion: 6.566327\nAction: question\nReward: 0.468035\nNext Confusion: 6.588855"} | |
| {"text": "### State\nConfusion: 5.682308\nAction: worked_example\nReward: 2.358126\nNext Confusion: 3.606454"} | |
| {"text": "### State\nConfusion: 3.510834\nAction: analogize\nReward: -0.694941\nNext Confusion: 3.978493"} | |
| {"text": "### State\nConfusion: 4.583116\nAction: question\nReward: -0.700366\nNext Confusion: 5.10747"} | |
| {"text": "### State\nConfusion: 7.197812\nAction: analogize\nReward: -0.017298\nNext Confusion: 7.292065"} | |
| {"text": "### State\nConfusion: 6.225952\nAction: analogize\nReward: 0.520952\nNext Confusion: 5.862716"} | |
| {"text": "### State\nConfusion: 4.177542\nAction: explain\nReward: 0.807856\nNext Confusion: 3.491008"} | |
| {"text": "### State\nConfusion: 3.897059\nAction: analogize\nReward: 0.338542\nNext Confusion: 4.205773"} | |
| {"text": "### State\nConfusion: 5.013536\nAction: analogize\nReward: -0.733278\nNext Confusion: 4.34735"} | |
| {"text": "### State\nConfusion: 5.245119\nAction: question\nReward: 1.180365\nNext Confusion: 3.996855"} | |
| {"text": "### State\nConfusion: 3.496643\nAction: analogize\nReward: -0.370052\nNext Confusion: 3.667639"} | |
| {"text": "### State\nConfusion: 3.817\nAction: analogize\nReward: 0.105576\nNext Confusion: 3.961266"} | |
| {"text": "### State\nConfusion: 3.864491\nAction: analogize\nReward: 0.184483\nNext Confusion: 4.035629"} | |
| {"text": "### State\nConfusion: 4.685336\nAction: analogize\nReward: -0.242223\nNext Confusion: 4.233638"} | |
| {"text": "### State\nConfusion: 3.923928\nAction: analogize\nReward: -0.662827\nNext Confusion: 3.973851"} | |
| {"text": "### State\nConfusion: 2.349183\nAction: analogize\nReward: -0.344813\nNext Confusion: 2.246699"} | |
| {"text": "### State\nConfusion: 4.223802\nAction: analogize\nReward: 0.470074\nNext Confusion: 4.347506"} | |
| {"text": "### State\nConfusion: 5.313014\nAction: explain\nReward: -1.549022\nNext Confusion: 5.749806"} | |
| {"text": "### State\nConfusion: 4.683001\nAction: analogize\nReward: -0.010471\nNext Confusion: 4.584553"} | |
| {"text": "### State\nConfusion: 4.45898\nAction: analogize\nReward: 0.368356\nNext Confusion: 4.006063"} | |
| {"text": "### State\nConfusion: 5.791666\nAction: analogize\nReward: -0.187986\nNext Confusion: 6.063031"} | |
| {"text": "### State\nConfusion: 3.376903\nAction: analogize\nReward: -1.143536\nNext Confusion: 4.587175"} | |
| {"text": "### State\nConfusion: 9.335158\nAction: worked_example\nReward: 2.152653\nNext Confusion: 8.317876"} | |
| {"text": "### State\nConfusion: 3.621349\nAction: analogize\nReward: 0.582054\nNext Confusion: 3.243578"} | |
| {"text": "### State\nConfusion: 4.74215\nAction: question\nReward: 1.291047\nNext Confusion: 3.348962"} | |
| {"text": "### State\nConfusion: 5.683502\nAction: worked_example\nReward: 1.535629\nNext Confusion: 4.375916"} | |
| {"text": "### State\nConfusion: 4.821148\nAction: worked_example\nReward: 1.047725\nNext Confusion: 3.557803"} | |
| {"text": "### State\nConfusion: 3.149289\nAction: analogize\nReward: 0.743005\nNext Confusion: 2.77953"} | |
| {"text": "### State\nConfusion: 5.93888\nAction: analogize\nReward: 0.198077\nNext Confusion: 5.99675"} | |
| {"text": "### State\nConfusion: 3.840465\nAction: analogize\nReward: -1.367385\nNext Confusion: 4.494304"} | |
| {"text": "### State\nConfusion: 5.273225\nAction: worked_example\nReward: 2.120614\nNext Confusion: 3.968337"} | |
| {"text": "### State\nConfusion: 3.967179\nAction: analogize\nReward: 0.973108\nNext Confusion: 3.397983"} | |
| {"text": "### State\nConfusion: 4.808587\nAction: correct_fact\nReward: -0.525625\nNext Confusion: 5.590657"} | |
| {"text": "### State\nConfusion: 4.419318\nAction: analogize\nReward: -0.39736\nNext Confusion: 5.171327"} | |
| {"text": "### State\nConfusion: 3.713422\nAction: analogize\nReward: -0.434615\nNext Confusion: 4.516202"} | |
| {"text": "### State\nConfusion: 4.057846\nAction: analogize\nReward: -0.236593\nNext Confusion: 4.158133"} | |
| {"text": "### State\nConfusion: 5.812652\nAction: analogize\nReward: 0.194617\nNext Confusion: 6.174366"} | |
| {"text": "### State\nConfusion: 3.962215\nAction: correct_fact\nReward: -0.493041\nNext Confusion: 4.044711"} | |
| {"text": "### State\nConfusion: 3.95616\nAction: correct_fact\nReward: -0.764486\nNext Confusion: 4.490559"} | |
| {"text": "### State\nConfusion: 4.067618\nAction: explain\nReward: 0.753649\nNext Confusion: 3.377144"} | |
| {"text": "### State\nConfusion: 3.963561\nAction: correct_fact\nReward: -0.03958\nNext Confusion: 4.470316"} | |
| {"text": "### State\nConfusion: 4.72749\nAction: correct_fact\nReward: 0.187432\nNext Confusion: 4.308219"} | |
| {"text": "### State\nConfusion: 4.982907\nAction: analogize\nReward: -0.282559\nNext Confusion: 5.749441"} | |
| {"text": "### State\nConfusion: 4.76151\nAction: correct_fact\nReward: -1.59992\nNext Confusion: 5.914158"} | |
| {"text": "### State\nConfusion: 6.825388\nAction: analogize\nReward: -0.197075\nNext Confusion: 7.36307"} | |
| {"text": "### State\nConfusion: 6.623585\nAction: analogize\nReward: -1.496314\nNext Confusion: 7.01574"} | |
| {"text": "### State\nConfusion: 5.225947\nAction: explain\nReward: 0.299021\nNext Confusion: 5.249215"} | |
| {"text": "### State\nConfusion: 4.701275\nAction: question\nReward: 0.376803\nNext Confusion: 4.108999"} | |
| {"text": "### State\nConfusion: 6.925973\nAction: analogize\nReward: -0.90206\nNext Confusion: 7.930647"} | |
| {"text": "### State\nConfusion: 5.79284\nAction: question\nReward: 1.013098\nNext Confusion: 5.272104"} | |
| {"text": "### State\nConfusion: 6.839365\nAction: question\nReward: 0.121905\nNext Confusion: 7.054125"} | |
| {"text": "### State\nConfusion: 3.869141\nAction: analogize\nReward: -0.052189\nNext Confusion: 4.202905"} | |
| {"text": "### State\nConfusion: 3.541264\nAction: analogize\nReward: -0.124331\nNext Confusion: 3.888026"} | |
| {"text": "### State\nConfusion: 8.227653\nAction: analogize\nReward: 0.535528\nNext Confusion: 8.023672"} | |
| {"text": "### State\nConfusion: 3.389575\nAction: analogize\nReward: 0.083942\nNext Confusion: 4.252772"} | |
| {"text": "### State\nConfusion: 6.12198\nAction: analogize\nReward: -0.017456\nNext Confusion: 6.250541"} | |
| {"text": "### State\nConfusion: 3.908567\nAction: analogize\nReward: 0.852892\nNext Confusion: 3.425391"} | |
| {"text": "### State\nConfusion: 5.787926\nAction: analogize\nReward: -0.61093\nNext Confusion: 6.209976"} | |
| {"text": "### State\nConfusion: 3.852343\nAction: correct_fact\nReward: 0.01279\nNext Confusion: 3.730582"} | |
| {"text": "### State\nConfusion: 3.345473\nAction: analogize\nReward: -0.182761\nNext Confusion: 4.095672"} | |
| {"text": "### State\nConfusion: 6.41593\nAction: correct_fact\nReward: -1.011081\nNext Confusion: 6.550212"} | |
| {"text": "### State\nConfusion: 5.542386\nAction: analogize\nReward: 0.529705\nNext Confusion: 5.213415"} | |
| {"text": "### State\nConfusion: 3.542093\nAction: analogize\nReward: -0.150191\nNext Confusion: 3.909852"} | |
| {"text": "### State\nConfusion: 3.518513\nAction: correct_fact\nReward: -0.261489\nNext Confusion: 3.642212"} | |
| {"text": "### State\nConfusion: 8.754671\nAction: analogize\nReward: -0.524548\nNext Confusion: 9.529492"} | |
| {"text": "### State\nConfusion: 3.811084\nAction: analogize\nReward: -0.528901\nNext Confusion: 4.075631"} | |
| {"text": "### State\nConfusion: 2.964789\nAction: worked_example\nReward: 2.127217\nNext Confusion: 1.143061"} | |
| {"text": "### State\nConfusion: 5.281669\nAction: correct_fact\nReward: -0.382373\nNext Confusion: 5.910728"} | |
| {"text": "### State\nConfusion: 3.745115\nAction: analogize\nReward: -0.282543\nNext Confusion: 4.276707"} | |
| {"text": "### State\nConfusion: 9.596537\nAction: worked_example\nReward: 0.622909\nNext Confusion: 8.939276"} | |
| {"text": "### State\nConfusion: 4.492318\nAction: correct_fact\nReward: -0.717\nNext Confusion: 5.227583"} | |
| {"text": "### State\nConfusion: 3.361198\nAction: analogize\nReward: -0.411737\nNext Confusion: 3.623981"} | |
| {"text": "### State\nConfusion: 8.01341\nAction: explain\nReward: 0.07588\nNext Confusion: 7.766606"} | |
| {"text": "### State\nConfusion: 5.438063\nAction: explain\nReward: 2.170198\nNext Confusion: 3.728784"} | |
| {"text": "### State\nConfusion: 4.408485\nAction: analogize\nReward: -1.024377\nNext Confusion: 5.03083"} | |
| {"text": "### State\nConfusion: 3.786148\nAction: analogize\nReward: -1.506429\nNext Confusion: 4.877437"} | |
| {"text": "### State\nConfusion: 3.779745\nAction: worked_example\nReward: 1.154955\nNext Confusion: 3.201455"} | |
| {"text": "### State\nConfusion: 7.605229\nAction: analogize\nReward: -0.006583\nNext Confusion: 7.633517"} | |
| {"text": "### State\nConfusion: 4.4073\nAction: worked_example\nReward: 0.928354\nNext Confusion: 3.085973"} | |
| {"text": "### State\nConfusion: 3.486942\nAction: question\nReward: -0.344526\nNext Confusion: 3.059474"} | |
| {"text": "### State\nConfusion: 8.268665\nAction: analogize\nReward: -0.228095\nNext Confusion: 8.628764"} | |
| {"text": "### State\nConfusion: 4.041603\nAction: worked_example\nReward: 2.745715\nNext Confusion: 2.3753"} | |
| {"text": "### State\nConfusion: 1.978829\nAction: explain\nReward: 0.773021\nNext Confusion: 1.639348"} | |
| {"text": "### State\nConfusion: 5.565595\nAction: analogize\nReward: -0.93579\nNext Confusion: 6.736066"} | |
| {"text": "### State\nConfusion: 3.291308\nAction: explain\nReward: 0.089945\nNext Confusion: 3.398372"} | |
| {"text": "### State\nConfusion: 5.692618\nAction: question\nReward: 0.291638\nNext Confusion: 5.638468"} | |
| {"text": "### State\nConfusion: 6.097005\nAction: worked_example\nReward: 1.610145\nNext Confusion: 4.944908"} | |
| {"text": "### State\nConfusion: 1.66989\nAction: question\nReward: 1.231649\nNext Confusion: 0.598919"} | |
| {"text": "### State\nConfusion: 3.015389\nAction: question\nReward: -0.368871\nNext Confusion: 3.171424"} | |
| {"text": "### State\nConfusion: 1.648728\nAction: worked_example\nReward: 2.086258\nNext Confusion: 0.0"} | |
| {"text": "### State\nConfusion: 6.370827\nAction: correct_fact\nReward: -0.578435\nNext Confusion: 6.86617"} | |
| {"text": "### State\nConfusion: 3.956089\nAction: analogize\nReward: -0.351387\nNext Confusion: 4.47287"} | |
| {"text": "### State\nConfusion: 3.763646\nAction: analogize\nReward: -0.429977\nNext Confusion: 4.198875"} | |
| {"text": "### State\nConfusion: 3.052423\nAction: question\nReward: 1.034561\nNext Confusion: 1.931299"} | |
| {"text": "### State\nConfusion: 7.488086\nAction: analogize\nReward: -0.887759\nNext Confusion: 7.420434"} | |
| {"text": "### State\nConfusion: 3.754042\nAction: question\nReward: 0.725976\nNext Confusion: 2.907831"} | |
| {"text": "### State\nConfusion: 6.115601\nAction: analogize\nReward: -0.929617\nNext Confusion: 7.082544"} | |
| {"text": "### State\nConfusion: 3.945445\nAction: analogize\nReward: -0.264867\nNext Confusion: 4.259188"} | |
| {"text": "### State\nConfusion: 5.845489\nAction: analogize\nReward: -0.983428\nNext Confusion: 6.855581"} | |
| {"text": "### State\nConfusion: 3.867403\nAction: analogize\nReward: 0.147113\nNext Confusion: 4.162812"} | |
| {"text": "### State\nConfusion: 3.782122\nAction: analogize\nReward: -0.020962\nNext Confusion: 3.530049"} | |
| {"text": "### State\nConfusion: 8.655141\nAction: worked_example\nReward: 1.226595\nNext Confusion: 6.920914"} | |
| {"text": "### State\nConfusion: 2.859175\nAction: analogize\nReward: -0.561934\nNext Confusion: 3.274979"} | |
| {"text": "### State\nConfusion: 3.47642\nAction: analogize\nReward: -0.883036\nNext Confusion: 4.15313"} | |
| {"text": "### State\nConfusion: 7.00052\nAction: worked_example\nReward: 0.545365\nNext Confusion: 7.42833"} | |
| {"text": "### State\nConfusion: 3.649172\nAction: analogize\nReward: -0.320634\nNext Confusion: 3.90222"} | |
| {"text": "### State\nConfusion: 2.461097\nAction: question\nReward: 1.631821\nNext Confusion: 2.2893"} | |
| {"text": "### State\nConfusion: 6.028571\nAction: explain\nReward: 0.061264\nNext Confusion: 5.679068"} | |
| {"text": "### State\nConfusion: 3.460609\nAction: analogize\nReward: 0.141836\nNext Confusion: 3.841259"} | |
| {"text": "### State\nConfusion: 6.036307\nAction: analogize\nReward: -0.882709\nNext Confusion: 7.126093"} | |
| {"text": "### State\nConfusion: 2.88672\nAction: analogize\nReward: -0.273486\nNext Confusion: 3.479902"} | |
| {"text": "### State\nConfusion: 2.820439\nAction: analogize\nReward: -0.92288\nNext Confusion: 3.16166"} | |
| {"text": "### State\nConfusion: 4.613919\nAction: explain\nReward: -0.088816\nNext Confusion: 4.993456"} | |
| {"text": "### State\nConfusion: 3.547463\nAction: explain\nReward: 0.985073\nNext Confusion: 2.734695"} | |
| {"text": "### State\nConfusion: 4.721343\nAction: analogize\nReward: 0.242835\nNext Confusion: 5.540349"} | |
| {"text": "### State\nConfusion: 4.678206\nAction: analogize\nReward: 0.387364\nNext Confusion: 4.468099"} | |
| {"text": "### State\nConfusion: 4.515748\nAction: worked_example\nReward: 1.756244\nNext Confusion: 3.44937"} | |
| {"text": "### State\nConfusion: 6.943506\nAction: question\nReward: 1.439846\nNext Confusion: 5.421192"} | |
| {"text": "### State\nConfusion: 4.314752\nAction: correct_fact\nReward: 0.779412\nNext Confusion: 3.266661"} | |
| {"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: 0.437882\nNext Confusion: 4.87753"} | |
| {"text": "### State\nConfusion: 9.092433\nAction: analogize\nReward: -0.455093\nNext Confusion: 9.519798"} | |
| {"text": "### State\nConfusion: 5.318067\nAction: correct_fact\nReward: 0.820235\nNext Confusion: 5.713635"} | |
| {"text": "### State\nConfusion: 2.411903\nAction: analogize\nReward: -1.105515\nNext Confusion: 3.279925"} | |
| {"text": "### State\nConfusion: 3.247107\nAction: explain\nReward: 0.429104\nNext Confusion: 2.74135"} | |
| {"text": "### State\nConfusion: 6.919804\nAction: question\nReward: 0.885016\nNext Confusion: 6.092538"} | |
| {"text": "### State\nConfusion: 4.203641\nAction: question\nReward: -0.063637\nNext Confusion: 3.856217"} | |
| {"text": "### State\nConfusion: 3.565731\nAction: correct_fact\nReward: 0.730722\nNext Confusion: 2.730823"} | |
| {"text": "### State\nConfusion: 3.939648\nAction: analogize\nReward: -0.425959\nNext Confusion: 3.892347"} | |
| {"text": "### State\nConfusion: 4.518892\nAction: question\nReward: -1.051303\nNext Confusion: 4.840122"} | |
| {"text": "### State\nConfusion: 3.858027\nAction: explain\nReward: 0.299108\nNext Confusion: 3.981068"} | |
| {"text": "### State\nConfusion: 2.704036\nAction: explain\nReward: -0.342533\nNext Confusion: 3.274198"} | |
| {"text": "### State\nConfusion: 3.906797\nAction: explain\nReward: 0.579545\nNext Confusion: 3.39401"} | |
| {"text": "### State\nConfusion: 4.712357\nAction: analogize\nReward: 0.441648\nNext Confusion: 4.503565"} | |
| {"text": "### State\nConfusion: 3.351565\nAction: question\nReward: 0.226935\nNext Confusion: 3.119711"} | |
| {"text": "### State\nConfusion: 3.833758\nAction: explain\nReward: 0.490659\nNext Confusion: 3.624048"} | |
| {"text": "### State\nConfusion: 3.916031\nAction: analogize\nReward: -0.824355\nNext Confusion: 5.0153"} | |
| {"text": "### State\nConfusion: 4.437003\nAction: explain\nReward: 0.433959\nNext Confusion: 4.287864"} | |
| {"text": "### State\nConfusion: 5.354143\nAction: analogize\nReward: -0.384773\nNext Confusion: 6.078944"} | |
| {"text": "### State\nConfusion: 8.400786\nAction: worked_example\nReward: -0.597962\nNext Confusion: 8.667874"} | |
| {"text": "### State\nConfusion: 7.149515\nAction: correct_fact\nReward: 1.278329\nNext Confusion: 6.462344"} | |
| {"text": "### State\nConfusion: 5.102567\nAction: question\nReward: 0.804076\nNext Confusion: 5.377817"} | |
| {"text": "### State\nConfusion: 3.092537\nAction: analogize\nReward: -0.466619\nNext Confusion: 3.818028"} | |
| {"text": "### State\nConfusion: 5.887365\nAction: analogize\nReward: -1.052783\nNext Confusion: 7.368321"} | |
| {"text": "### State\nConfusion: 4.274332\nAction: analogize\nReward: 1.093872\nNext Confusion: 4.345952"} | |
| {"text": "### State\nConfusion: 9.127607\nAction: analogize\nReward: -0.598524\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 4.085041\nAction: analogize\nReward: -1.321249\nNext Confusion: 5.029472"} | |
| {"text": "### State\nConfusion: 6.516422\nAction: worked_example\nReward: 0.462508\nNext Confusion: 5.468136"} | |
| {"text": "### State\nConfusion: 7.023285\nAction: question\nReward: 1.22947\nNext Confusion: 5.970022"} | |
| {"text": "### State\nConfusion: 3.508864\nAction: analogize\nReward: -0.749677\nNext Confusion: 4.38532"} | |
| {"text": "### State\nConfusion: 4.205511\nAction: analogize\nReward: 0.205528\nNext Confusion: 3.6198"} | |
| {"text": "### State\nConfusion: 5.185164\nAction: analogize\nReward: -1.097998\nNext Confusion: 6.383344"} | |
| {"text": "### State\nConfusion: 6.544308\nAction: explain\nReward: 0.402704\nNext Confusion: 6.435021"} | |
| {"text": "### State\nConfusion: 5.587864\nAction: analogize\nReward: 0.020874\nNext Confusion: 5.902252"} | |
| {"text": "### State\nConfusion: 4.968722\nAction: analogize\nReward: -0.261211\nNext Confusion: 5.042747"} | |
| {"text": "### State\nConfusion: 3.320405\nAction: worked_example\nReward: 1.129541\nNext Confusion: 2.346201"} | |
| {"text": "### State\nConfusion: 4.647589\nAction: explain\nReward: 1.044288\nNext Confusion: 4.038287"} | |
| {"text": "### State\nConfusion: 6.729565\nAction: question\nReward: 0.806191\nNext Confusion: 6.08601"} | |
| {"text": "### State\nConfusion: 5.97209\nAction: analogize\nReward: -0.404765\nNext Confusion: 6.406681"} | |
| {"text": "### State\nConfusion: 3.632277\nAction: analogize\nReward: -0.610554\nNext Confusion: 3.787645"} | |
| {"text": "### State\nConfusion: 5.321507\nAction: analogize\nReward: 0.456858\nNext Confusion: 5.306321"} | |
| {"text": "### State\nConfusion: 3.609982\nAction: explain\nReward: 0.527262\nNext Confusion: 3.772228"} | |
| {"text": "### State\nConfusion: 7.68092\nAction: analogize\nReward: -0.650275\nNext Confusion: 8.177908"} | |
| {"text": "### State\nConfusion: 5.233692\nAction: worked_example\nReward: 1.109516\nNext Confusion: 4.001993"} | |
| {"text": "### State\nConfusion: 5.192301\nAction: analogize\nReward: 0.364094\nNext Confusion: 5.636886"} | |
| {"text": "### State\nConfusion: 5.681577\nAction: explain\nReward: 1.501193\nNext Confusion: 5.096723"} | |
| {"text": "### State\nConfusion: 3.494826\nAction: worked_example\nReward: 0.476455\nNext Confusion: 3.671003"} | |
| {"text": "### State\nConfusion: 4.405832\nAction: analogize\nReward: 0.198525\nNext Confusion: 4.154148"} | |
| {"text": "### State\nConfusion: 1.466139\nAction: worked_example\nReward: 1.668556\nNext Confusion: 0.0"} | |
| {"text": "### State\nConfusion: 4.717231\nAction: analogize\nReward: -0.452539\nNext Confusion: 5.626471"} | |
| {"text": "### State\nConfusion: 3.871939\nAction: analogize\nReward: -0.476882\nNext Confusion: 4.771052"} | |
| {"text": "### State\nConfusion: 3.444428\nAction: explain\nReward: 1.106501\nNext Confusion: 3.333343"} | |
| {"text": "### State\nConfusion: 4.196409\nAction: analogize\nReward: 0.381661\nNext Confusion: 4.17294"} | |
| {"text": "### State\nConfusion: 6.663786\nAction: question\nReward: -0.873553\nNext Confusion: 6.942137"} | |
| {"text": "### State\nConfusion: 5.515987\nAction: analogize\nReward: -1.334938\nNext Confusion: 6.764931"} | |
| {"text": "### State\nConfusion: 5.240704\nAction: analogize\nReward: 0.294774\nNext Confusion: 4.956213"} | |
| {"text": "### State\nConfusion: 3.792751\nAction: question\nReward: 0.624638\nNext Confusion: 3.817527"} | |
| {"text": "### State\nConfusion: 4.063591\nAction: explain\nReward: 0.083103\nNext Confusion: 4.243097"} | |
| {"text": "### State\nConfusion: 4.814852\nAction: analogize\nReward: 0.506158\nNext Confusion: 5.234246"} | |
| {"text": "### State\nConfusion: 3.722708\nAction: question\nReward: 0.660337\nNext Confusion: 3.508"} | |
| {"text": "### State\nConfusion: 5.437449\nAction: explain\nReward: 0.486583\nNext Confusion: 5.524333"} | |
| {"text": "### State\nConfusion: 5.493462\nAction: analogize\nReward: -0.408789\nNext Confusion: 6.130176"} | |
| {"text": "### State\nConfusion: 2.043504\nAction: question\nReward: -0.300682\nNext Confusion: 2.3756"} | |
| {"text": "### State\nConfusion: 2.417647\nAction: analogize\nReward: -0.002908\nNext Confusion: 3.053841"} | |
| {"text": "### State\nConfusion: 4.872236\nAction: analogize\nReward: -0.958127\nNext Confusion: 5.264666"} | |
| {"text": "### State\nConfusion: 4.20766\nAction: worked_example\nReward: 0.891922\nNext Confusion: 3.989266"} | |
| {"text": "### State\nConfusion: 5.122287\nAction: worked_example\nReward: 0.598247\nNext Confusion: 4.653445"} | |
| {"text": "### State\nConfusion: 7.352225\nAction: worked_example\nReward: -1.130684\nNext Confusion: 8.396069"} | |
| {"text": "### State\nConfusion: 3.337869\nAction: question\nReward: 1.275082\nNext Confusion: 2.30635"} | |
| {"text": "### State\nConfusion: 5.843043\nAction: analogize\nReward: -0.530916\nNext Confusion: 6.435978"} | |
| {"text": "### State\nConfusion: 3.31257\nAction: question\nReward: 0.431417\nNext Confusion: 2.247037"} | |
| {"text": "### State\nConfusion: 2.412863\nAction: worked_example\nReward: 2.749443\nNext Confusion: 0.069799"} | |
| {"text": "### State\nConfusion: 8.244789\nAction: analogize\nReward: -0.621143\nNext Confusion: 8.557005"} | |
| {"text": "### State\nConfusion: 2.765021\nAction: explain\nReward: 0.933131\nNext Confusion: 1.681604"} | |
| {"text": "### State\nConfusion: 5.545647\nAction: analogize\nReward: 0.764545\nNext Confusion: 4.779349"} | |
| {"text": "### State\nConfusion: 2.562586\nAction: correct_fact\nReward: 0.358028\nNext Confusion: 1.512542"} | |
| {"text": "### State\nConfusion: 3.947003\nAction: analogize\nReward: 0.585956\nNext Confusion: 3.144356"} | |
| {"text": "### State\nConfusion: 7.056403\nAction: question\nReward: -0.392791\nNext Confusion: 7.174994"} | |
| {"text": "### State\nConfusion: 4.349577\nAction: worked_example\nReward: 2.003419\nNext Confusion: 2.622539"} | |
| {"text": "### State\nConfusion: 4.15257\nAction: question\nReward: 1.022439\nNext Confusion: 3.610753"} | |
| {"text": "### State\nConfusion: 3.867925\nAction: analogize\nReward: -0.732856\nNext Confusion: 4.480718"} | |
| {"text": "### State\nConfusion: 6.072416\nAction: analogize\nReward: -0.400762\nNext Confusion: 7.712231"} | |
| {"text": "### State\nConfusion: 5.692263\nAction: analogize\nReward: -0.409731\nNext Confusion: 6.464062"} | |
| {"text": "### State\nConfusion: 1.97927\nAction: correct_fact\nReward: 0.316701\nNext Confusion: 2.032135"} | |
| {"text": "### State\nConfusion: 4.016954\nAction: analogize\nReward: 0.118688\nNext Confusion: 3.82214"} | |
| {"text": "### State\nConfusion: 3.23223\nAction: explain\nReward: 0.74181\nNext Confusion: 2.84285"} | |
| {"text": "### State\nConfusion: 3.751665\nAction: analogize\nReward: 1.273059\nNext Confusion: 2.951627"} | |
| {"text": "### State\nConfusion: 6.280319\nAction: worked_example\nReward: 1.779139\nNext Confusion: 4.778588"} | |
| {"text": "### State\nConfusion: 9.398644\nAction: analogize\nReward: -0.367722\nNext Confusion: 9.828088"} | |
| {"text": "### State\nConfusion: 4.856673\nAction: analogize\nReward: 0.697777\nNext Confusion: 4.792671"} | |
| {"text": "### State\nConfusion: 7.677342\nAction: analogize\nReward: -1.276954\nNext Confusion: 9.007716"} | |
| {"text": "### State\nConfusion: 5.617439\nAction: analogize\nReward: -0.855311\nNext Confusion: 6.225406"} | |
| {"text": "### State\nConfusion: 6.137228\nAction: analogize\nReward: -0.461357\nNext Confusion: 6.903793"} | |
| {"text": "### State\nConfusion: 2.707235\nAction: analogize\nReward: -1.614262\nNext Confusion: 3.073972"} | |
| {"text": "### State\nConfusion: 3.939227\nAction: analogize\nReward: -0.449349\nNext Confusion: 4.274253"} | |
| {"text": "### State\nConfusion: 5.494228\nAction: explain\nReward: 0.047741\nNext Confusion: 6.083399"} | |
| {"text": "### State\nConfusion: 2.533629\nAction: analogize\nReward: -0.39871\nNext Confusion: 2.856948"} | |
| {"text": "### State\nConfusion: 6.453248\nAction: explain\nReward: 1.17852\nNext Confusion: 5.8642"} | |
| {"text": "### State\nConfusion: 9.124298\nAction: correct_fact\nReward: -0.178922\nNext Confusion: 9.275643"} | |
| {"text": "### State\nConfusion: 4.845806\nAction: explain\nReward: -0.259024\nNext Confusion: 5.530763"} | |
| {"text": "### State\nConfusion: 7.197324\nAction: analogize\nReward: -0.734195\nNext Confusion: 7.665397"} | |
| {"text": "### State\nConfusion: 4.266757\nAction: analogize\nReward: -0.860657\nNext Confusion: 4.993144"} | |
| {"text": "### State\nConfusion: 3.85403\nAction: analogize\nReward: -0.180861\nNext Confusion: 3.252461"} | |
| {"text": "### State\nConfusion: 3.62735\nAction: analogize\nReward: 0.028956\nNext Confusion: 3.763162"} | |
| {"text": "### State\nConfusion: 8.335056\nAction: worked_example\nReward: 1.617562\nNext Confusion: 6.925548"} | |
| {"text": "### State\nConfusion: 3.790295\nAction: analogize\nReward: 0.182729\nNext Confusion: 3.939273"} | |
| {"text": "### State\nConfusion: 3.766018\nAction: analogize\nReward: 0.001838\nNext Confusion: 3.849688"} | |
| {"text": "### State\nConfusion: 4.107547\nAction: analogize\nReward: -0.113302\nNext Confusion: 4.152052"} | |
| {"text": "### State\nConfusion: 7.174244\nAction: analogize\nReward: -1.656013\nNext Confusion: 7.383377"} | |
| {"text": "### State\nConfusion: 3.229383\nAction: analogize\nReward: 0.619507\nNext Confusion: 3.097441"} | |
| {"text": "### State\nConfusion: 4.611253\nAction: analogize\nReward: -0.313888\nNext Confusion: 4.727696"} | |
| {"text": "### State\nConfusion: 8.702278\nAction: worked_example\nReward: 1.775019\nNext Confusion: 7.147692"} | |
| {"text": "### State\nConfusion: 4.831955\nAction: worked_example\nReward: 2.335352\nNext Confusion: 3.308958"} | |
| {"text": "### State\nConfusion: 3.681416\nAction: correct_fact\nReward: 0.502263\nNext Confusion: 3.247155"} | |
| {"text": "### State\nConfusion: 6.803329\nAction: analogize\nReward: -1.358429\nNext Confusion: 7.614142"} | |
| {"text": "### State\nConfusion: 6.14641\nAction: question\nReward: 1.105078\nNext Confusion: 5.013781"} | |
| {"text": "### State\nConfusion: 5.067341\nAction: analogize\nReward: -0.67485\nNext Confusion: 5.623613"} | |
| {"text": "### State\nConfusion: 5.740351\nAction: analogize\nReward: 0.43157\nNext Confusion: 5.821089"} | |
| {"text": "### State\nConfusion: 6.492049\nAction: analogize\nReward: -0.261783\nNext Confusion: 6.899039"} | |
| {"text": "### State\nConfusion: 3.546743\nAction: correct_fact\nReward: -0.021746\nNext Confusion: 4.128534"} | |
| {"text": "### State\nConfusion: 3.470161\nAction: analogize\nReward: 1.411296\nNext Confusion: 2.963803"} | |
| {"text": "### State\nConfusion: 7.305857\nAction: explain\nReward: -0.260928\nNext Confusion: 7.525834"} | |
| {"text": "### State\nConfusion: 4.079695\nAction: analogize\nReward: 0.425925\nNext Confusion: 4.2028"} | |
| {"text": "### State\nConfusion: 3.504302\nAction: explain\nReward: -0.24801\nNext Confusion: 3.646383"} | |
| {"text": "### State\nConfusion: 6.140616\nAction: analogize\nReward: -0.194836\nNext Confusion: 6.193009"} | |
| {"text": "### State\nConfusion: 3.614631\nAction: worked_example\nReward: 2.808057\nNext Confusion: 1.710415"} | |
| {"text": "### State\nConfusion: 4.385109\nAction: explain\nReward: 0.867373\nNext Confusion: 4.039882"} | |
| {"text": "### State\nConfusion: 5.97857\nAction: worked_example\nReward: 0.678469\nNext Confusion: 4.711379"} | |
| {"text": "### State\nConfusion: 5.842787\nAction: explain\nReward: 0.530482\nNext Confusion: 5.485232"} | |
| {"text": "### State\nConfusion: 3.791125\nAction: explain\nReward: -0.120156\nNext Confusion: 3.974842"} | |
| {"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: -0.177215\nNext Confusion: 5.508161"} | |
| {"text": "### State\nConfusion: 2.616865\nAction: explain\nReward: -0.990273\nNext Confusion: 2.763718"} | |
| {"text": "### State\nConfusion: 5.412301\nAction: analogize\nReward: 0.419836\nNext Confusion: 5.34807"} | |
| {"text": "### State\nConfusion: 6.595836\nAction: worked_example\nReward: 0.79788\nNext Confusion: 5.217594"} | |
| {"text": "### State\nConfusion: 6.025871\nAction: analogize\nReward: 0.350992\nNext Confusion: 6.043433"} | |
| {"text": "### State\nConfusion: 8.022219\nAction: analogize\nReward: -0.212897\nNext Confusion: 7.875199"} | |
| {"text": "### State\nConfusion: 7.210607\nAction: correct_fact\nReward: 0.058529\nNext Confusion: 7.347044"} | |
| {"text": "### State\nConfusion: 3.933006\nAction: analogize\nReward: -0.406958\nNext Confusion: 4.179138"} | |
| {"text": "### State\nConfusion: 2.965985\nAction: analogize\nReward: -0.300177\nNext Confusion: 3.029191"} | |
| {"text": "### State\nConfusion: 6.168064\nAction: analogize\nReward: -0.610067\nNext Confusion: 7.171532"} | |
| {"text": "### State\nConfusion: 3.584667\nAction: analogize\nReward: -0.183735\nNext Confusion: 4.292329"} | |
| {"text": "### State\nConfusion: 3.67187\nAction: analogize\nReward: -1.468257\nNext Confusion: 4.664136"} | |
| {"text": "### State\nConfusion: 9.246445\nAction: analogize\nReward: -0.771256\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 4.244543\nAction: analogize\nReward: -0.932989\nNext Confusion: 5.020196"} | |
| {"text": "### State\nConfusion: 3.598906\nAction: worked_example\nReward: 0.802242\nNext Confusion: 2.522359"} | |
| {"text": "### State\nConfusion: 4.469094\nAction: question\nReward: 0.390748\nNext Confusion: 4.036065"} | |
| {"text": "### State\nConfusion: 2.997087\nAction: analogize\nReward: 0.495354\nNext Confusion: 2.832977"} | |
| {"text": "### State\nConfusion: 3.71337\nAction: analogize\nReward: -1.484861\nNext Confusion: 5.056246"} | |
| {"text": "### State\nConfusion: 3.870778\nAction: explain\nReward: 0.392984\nNext Confusion: 3.912732"} | |
| {"text": "### State\nConfusion: 7.175167\nAction: correct_fact\nReward: -0.247504\nNext Confusion: 7.709682"} | |
| {"text": "### State\nConfusion: 3.68497\nAction: worked_example\nReward: 0.488498\nNext Confusion: 3.046532"} | |
| {"text": "### State\nConfusion: 5.920541\nAction: explain\nReward: 1.570687\nNext Confusion: 4.980819"} | |
| {"text": "### State\nConfusion: 4.128818\nAction: analogize\nReward: -0.559041\nNext Confusion: 4.367249"} | |
| {"text": "### State\nConfusion: 4.818598\nAction: analogize\nReward: 0.198094\nNext Confusion: 5.307376"} | |
| {"text": "### State\nConfusion: 4.356233\nAction: analogize\nReward: 0.28791\nNext Confusion: 4.128079"} | |
| {"text": "### State\nConfusion: 5.0037\nAction: analogize\nReward: -1.000578\nNext Confusion: 5.632165"} | |
| {"text": "### State\nConfusion: 3.368967\nAction: analogize\nReward: 1.246838\nNext Confusion: 2.920736"} | |
| {"text": "### State\nConfusion: 3.198133\nAction: worked_example\nReward: 1.680851\nNext Confusion: 1.77803"} | |
| {"text": "### State\nConfusion: 2.746825\nAction: explain\nReward: 0.707715\nNext Confusion: 2.306501"} | |
| {"text": "### State\nConfusion: 3.790369\nAction: analogize\nReward: -0.041662\nNext Confusion: 3.86117"} | |
| {"text": "### State\nConfusion: 2.891872\nAction: correct_fact\nReward: -1.316577\nNext Confusion: 3.124006"} | |
| {"text": "### State\nConfusion: 4.48038\nAction: analogize\nReward: -0.129744\nNext Confusion: 4.754625"} | |
| {"text": "### State\nConfusion: 3.77296\nAction: correct_fact\nReward: 0.427451\nNext Confusion: 3.604445"} | |
| {"text": "### State\nConfusion: 3.225091\nAction: analogize\nReward: -0.507043\nNext Confusion: 3.829337"} | |
| {"text": "### State\nConfusion: 4.265069\nAction: analogize\nReward: -0.038454\nNext Confusion: 4.813678"} | |
| {"text": "### State\nConfusion: 7.177357\nAction: analogize\nReward: -0.298202\nNext Confusion: 8.014968"} | |
| {"text": "### State\nConfusion: 3.607763\nAction: correct_fact\nReward: -0.138047\nNext Confusion: 3.655758"} | |
| {"text": "### State\nConfusion: 8.976868\nAction: worked_example\nReward: 0.023975\nNext Confusion: 8.534332"} | |
| {"text": "### State\nConfusion: 5.039401\nAction: analogize\nReward: -0.601833\nNext Confusion: 5.407775"} | |
| {"text": "### State\nConfusion: 3.497416\nAction: question\nReward: 0.398961\nNext Confusion: 3.075376"} | |
| {"text": "### State\nConfusion: 3.587401\nAction: analogize\nReward: 0.036509\nNext Confusion: 3.688627"} | |
| {"text": "### State\nConfusion: 4.673002\nAction: analogize\nReward: 0.131562\nNext Confusion: 4.674554"} | |
| {"text": "### State\nConfusion: 3.592728\nAction: analogize\nReward: 0.418171\nNext Confusion: 3.849377"} | |
| {"text": "### State\nConfusion: 7.642635\nAction: analogize\nReward: -0.424847\nNext Confusion: 7.853295"} | |
| {"text": "### State\nConfusion: 8.928662\nAction: analogize\nReward: 0.9627\nNext Confusion: 8.62451"} | |
| {"text": "### State\nConfusion: 4.960207\nAction: analogize\nReward: -5.042938\nNext Confusion: 6.014362"} | |
| {"text": "### State\nConfusion: 2.171726\nAction: analogize\nReward: -0.181946\nNext Confusion: 2.327439"} | |
| {"text": "### State\nConfusion: 6.586604\nAction: worked_example\nReward: 0.59854\nNext Confusion: 5.187266"} | |
| {"text": "### State\nConfusion: 3.143118\nAction: analogize\nReward: 0.17305\nNext Confusion: 2.9831"} | |
| {"text": "### State\nConfusion: 3.265929\nAction: analogize\nReward: 0.667022\nNext Confusion: 2.944679"} | |
| {"text": "### State\nConfusion: 7.445652\nAction: question\nReward: 0.353372\nNext Confusion: 6.919861"} | |
| {"text": "### State\nConfusion: 3.062145\nAction: correct_fact\nReward: -1.195106\nNext Confusion: 3.566605"} | |
| {"text": "### State\nConfusion: 4.143488\nAction: question\nReward: -0.733389\nNext Confusion: 4.834106"} | |
| {"text": "### State\nConfusion: 3.424406\nAction: explain\nReward: -0.602404\nNext Confusion: 3.36164"} | |
| {"text": "### State\nConfusion: 3.387601\nAction: explain\nReward: 0.157426\nNext Confusion: 3.688417"} | |
| {"text": "### State\nConfusion: 7.273157\nAction: explain\nReward: 1.178917\nNext Confusion: 6.322024"} | |
| {"text": "### State\nConfusion: 3.979093\nAction: analogize\nReward: -0.631023\nNext Confusion: 4.222602"} | |
| {"text": "### State\nConfusion: 3.696545\nAction: analogize\nReward: -0.230754\nNext Confusion: 4.253306"} | |
| {"text": "### State\nConfusion: 2.966753\nAction: analogize\nReward: -0.52981\nNext Confusion: 3.338369"} | |
| {"text": "### State\nConfusion: 5.275979\nAction: explain\nReward: 0.51978\nNext Confusion: 4.586684"} | |
| {"text": "### State\nConfusion: 4.143547\nAction: analogize\nReward: -0.556281\nNext Confusion: 4.511189"} | |
| {"text": "### State\nConfusion: 6.479927\nAction: explain\nReward: 0.668874\nNext Confusion: 6.162917"} | |
| {"text": "### State\nConfusion: 3.578943\nAction: question\nReward: 1.043996\nNext Confusion: 2.879315"} | |
| {"text": "### State\nConfusion: 4.859414\nAction: worked_example\nReward: 2.318581\nNext Confusion: 2.760855"} | |
| {"text": "### State\nConfusion: 3.379685\nAction: correct_fact\nReward: -0.36739\nNext Confusion: 3.626151"} | |
| {"text": "### State\nConfusion: 4.285751\nAction: explain\nReward: 0.490226\nNext Confusion: 4.115866"} | |
| {"text": "### State\nConfusion: 6.605411\nAction: analogize\nReward: 0.102846\nNext Confusion: 6.825231"} | |
| {"text": "### State\nConfusion: 3.968445\nAction: analogize\nReward: -1.432025\nNext Confusion: 4.917612"} | |
| {"text": "### State\nConfusion: 7.966238\nAction: correct_fact\nReward: 0.406304\nNext Confusion: 8.120245"} | |
| {"text": "### State\nConfusion: 6.721416\nAction: analogize\nReward: -1.520515\nNext Confusion: 7.218895"} | |
| {"text": "### State\nConfusion: 2.738938\nAction: correct_fact\nReward: 0.085715\nNext Confusion: 2.513217"} | |
| {"text": "### State\nConfusion: 5.238395\nAction: analogize\nReward: 0.63322\nNext Confusion: 5.405862"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.927505\nNext Confusion: 9.517879"} | |
| {"text": "### State\nConfusion: 5.823978\nAction: worked_example\nReward: 1.860738\nNext Confusion: 4.157206"} | |
| {"text": "### State\nConfusion: 5.481368\nAction: analogize\nReward: 0.071183\nNext Confusion: 5.627844"} | |
| {"text": "### State\nConfusion: 8.289637\nAction: analogize\nReward: 0.024586\nNext Confusion: 8.378593"} | |
| {"text": "### State\nConfusion: 5.744939\nAction: analogize\nReward: -1.389301\nNext Confusion: 6.877677"} | |
| {"text": "### State\nConfusion: 6.628249\nAction: analogize\nReward: 0.479303\nNext Confusion: 5.94657"} | |
| {"text": "### State\nConfusion: 6.70891\nAction: explain\nReward: 0.714393\nNext Confusion: 6.231401"} | |
| {"text": "### State\nConfusion: 2.966038\nAction: analogize\nReward: -0.399175\nNext Confusion: 2.929795"} | |
| {"text": "### State\nConfusion: 9.408701\nAction: analogize\nReward: -0.493768\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 9.415164\nAction: worked_example\nReward: 1.486642\nNext Confusion: 8.030042"} | |
| {"text": "### State\nConfusion: 4.254503\nAction: question\nReward: 0.917854\nNext Confusion: 3.972505"} | |
| {"text": "### State\nConfusion: 4.4989\nAction: explain\nReward: -0.321324\nNext Confusion: 4.517928"} | |
| {"text": "### State\nConfusion: 3.953625\nAction: explain\nReward: -0.23264\nNext Confusion: 4.198959"} | |
| {"text": "### State\nConfusion: 5.389431\nAction: analogize\nReward: -0.474144\nNext Confusion: 5.669853"} | |
| {"text": "### State\nConfusion: 4.18871\nAction: question\nReward: -0.195394\nNext Confusion: 4.506213"} | |
| {"text": "### State\nConfusion: 3.939333\nAction: worked_example\nReward: 1.129873\nNext Confusion: 2.415744"} | |
| {"text": "### State\nConfusion: 5.82522\nAction: analogize\nReward: -0.874563\nNext Confusion: 6.778791"} | |
| {"text": "### State\nConfusion: 3.446916\nAction: analogize\nReward: 0.005333\nNext Confusion: 3.194818"} | |
| {"text": "### State\nConfusion: 6.168521\nAction: analogize\nReward: 0.004277\nNext Confusion: 6.380842"} | |
| {"text": "### State\nConfusion: 4.693802\nAction: analogize\nReward: -0.562175\nNext Confusion: 4.965051"} | |
| {"text": "### State\nConfusion: 5.756139\nAction: correct_fact\nReward: 0.139779\nNext Confusion: 6.064686"} | |
| {"text": "### State\nConfusion: 3.910989\nAction: analogize\nReward: -2.469072\nNext Confusion: 5.263532"} | |
| {"text": "### State\nConfusion: 5.079583\nAction: question\nReward: 0.720805\nNext Confusion: 4.772066"} | |
| {"text": "### State\nConfusion: 8.160942\nAction: explain\nReward: -0.003734\nNext Confusion: 8.076292"} | |
| {"text": "### State\nConfusion: 4.063059\nAction: analogize\nReward: -1.248169\nNext Confusion: 5.102006"} | |
| {"text": "### State\nConfusion: 3.923972\nAction: correct_fact\nReward: -0.436689\nNext Confusion: 3.59706"} | |
| {"text": "### State\nConfusion: 4.257745\nAction: analogize\nReward: 0.975398\nNext Confusion: 4.396928"} | |
| {"text": "### State\nConfusion: 4.448721\nAction: correct_fact\nReward: -0.095114\nNext Confusion: 4.54971"} | |
| {"text": "### State\nConfusion: 8.187359\nAction: correct_fact\nReward: -0.402091\nNext Confusion: 8.562424"} | |
| {"text": "### State\nConfusion: 6.941269\nAction: analogize\nReward: -0.082177\nNext Confusion: 6.415106"} | |
| {"text": "### State\nConfusion: 3.733281\nAction: analogize\nReward: -0.557303\nNext Confusion: 4.495642"} | |
| {"text": "### State\nConfusion: 5.054769\nAction: question\nReward: 0.385418\nNext Confusion: 4.25285"} | |
| {"text": "### State\nConfusion: 6.988386\nAction: analogize\nReward: -0.997999\nNext Confusion: 7.872257"} | |
| {"text": "### State\nConfusion: 3.323612\nAction: analogize\nReward: -0.417391\nNext Confusion: 3.446508"} | |
| {"text": "### State\nConfusion: 7.580118\nAction: analogize\nReward: -0.193736\nNext Confusion: 8.119838"} | |
| {"text": "### State\nConfusion: 6.262873\nAction: analogize\nReward: 0.25262\nNext Confusion: 6.57022"} | |
| {"text": "### State\nConfusion: 3.24616\nAction: analogize\nReward: 0.176095\nNext Confusion: 3.061988"} | |
| {"text": "### State\nConfusion: 3.785727\nAction: analogize\nReward: -0.123162\nNext Confusion: 4.260601"} | |
| {"text": "### State\nConfusion: 4.510389\nAction: analogize\nReward: -0.087179\nNext Confusion: 4.489273"} | |
| {"text": "### State\nConfusion: 3.244055\nAction: analogize\nReward: -1.109454\nNext Confusion: 3.376668"} | |
| {"text": "### State\nConfusion: 3.47781\nAction: analogize\nReward: 1.156379\nNext Confusion: 3.43952"} | |
| {"text": "### State\nConfusion: 3.995885\nAction: analogize\nReward: -0.481654\nNext Confusion: 4.774538"} | |
| {"text": "### State\nConfusion: 8.914354\nAction: analogize\nReward: 0.049812\nNext Confusion: 8.771034"} | |
| {"text": "### State\nConfusion: 1.772292\nAction: explain\nReward: 0.297904\nNext Confusion: 1.719932"} | |
| {"text": "### State\nConfusion: 4.163656\nAction: worked_example\nReward: 1.919813\nNext Confusion: 3.011497"} | |
| {"text": "### State\nConfusion: 3.538392\nAction: analogize\nReward: -0.22508\nNext Confusion: 4.106795"} | |
| {"text": "### State\nConfusion: 4.116727\nAction: worked_example\nReward: 2.444091\nNext Confusion: 2.687809"} | |
| {"text": "### State\nConfusion: 6.723317\nAction: explain\nReward: 0.072298\nNext Confusion: 6.793771"} | |
| {"text": "### State\nConfusion: 3.330479\nAction: analogize\nReward: -1.032799\nNext Confusion: 4.53574"} | |
| {"text": "### State\nConfusion: 3.567197\nAction: analogize\nReward: -0.658102\nNext Confusion: 4.478553"} | |
| {"text": "### State\nConfusion: 4.345864\nAction: analogize\nReward: -0.269636\nNext Confusion: 4.495819"} | |
| {"text": "### State\nConfusion: 5.757518\nAction: analogize\nReward: -0.708589\nNext Confusion: 5.357489"} | |
| {"text": "### State\nConfusion: 8.696834\nAction: explain\nReward: 0.505359\nNext Confusion: 8.75677"} | |
| {"text": "### State\nConfusion: 3.847008\nAction: question\nReward: -1.407674\nNext Confusion: 4.419054"} | |
| {"text": "### State\nConfusion: 5.584003\nAction: analogize\nReward: -0.606178\nNext Confusion: 6.05324"} | |
| {"text": "### State\nConfusion: 4.09134\nAction: analogize\nReward: 0.24887\nNext Confusion: 3.440027"} | |
| {"text": "### State\nConfusion: 8.342102\nAction: question\nReward: 0.388513\nNext Confusion: 8.468631"} | |
| {"text": "### State\nConfusion: 4.246235\nAction: analogize\nReward: -0.367984\nNext Confusion: 4.55566"} | |
| {"text": "### State\nConfusion: 5.041572\nAction: question\nReward: -0.049042\nNext Confusion: 4.999399"} | |
| {"text": "### State\nConfusion: 3.843461\nAction: question\nReward: 0.018297\nNext Confusion: 3.998616"} | |
| {"text": "### State\nConfusion: 3.468152\nAction: analogize\nReward: -0.219468\nNext Confusion: 3.57777"} | |
| {"text": "### State\nConfusion: 4.381477\nAction: analogize\nReward: 0.03071\nNext Confusion: 4.940474"} | |
| {"text": "### State\nConfusion: 5.756888\nAction: analogize\nReward: 0.89899\nNext Confusion: 5.094104"} | |
| {"text": "### State\nConfusion: 4.333212\nAction: analogize\nReward: -0.6245\nNext Confusion: 5.334216"} | |
| {"text": "### State\nConfusion: 5.47342\nAction: question\nReward: -0.154752\nNext Confusion: 5.659844"} | |
| {"text": "### State\nConfusion: 8.083508\nAction: analogize\nReward: -0.103334\nNext Confusion: 8.280694"} | |
| {"text": "### State\nConfusion: 4.620574\nAction: explain\nReward: -0.077588\nNext Confusion: 4.464561"} | |
| {"text": "### State\nConfusion: 3.974661\nAction: worked_example\nReward: 1.125675\nNext Confusion: 3.31946"} | |
| {"text": "### State\nConfusion: 4.42193\nAction: analogize\nReward: -0.47474\nNext Confusion: 4.429034"} | |
| {"text": "### State\nConfusion: 5.231293\nAction: analogize\nReward: -0.845958\nNext Confusion: 6.019314"} | |
| {"text": "### State\nConfusion: 3.228954\nAction: analogize\nReward: -0.857821\nNext Confusion: 4.013046"} | |
| {"text": "### State\nConfusion: 3.676788\nAction: analogize\nReward: 1.48361\nNext Confusion: 2.914774"} | |
| {"text": "### State\nConfusion: 6.744595\nAction: analogize\nReward: -3.523492\nNext Confusion: 7.119243"} | |
| {"text": "### State\nConfusion: 6.573047\nAction: analogize\nReward: 0.065792\nNext Confusion: 6.158436"} | |
| {"text": "### State\nConfusion: 4.348007\nAction: analogize\nReward: -1.154604\nNext Confusion: 5.238071"} | |
| {"text": "### State\nConfusion: 5.936844\nAction: analogize\nReward: -1.194396\nNext Confusion: 7.531626"} | |
| {"text": "### State\nConfusion: 5.42589\nAction: analogize\nReward: -0.43157\nNext Confusion: 5.821207"} | |
| {"text": "### State\nConfusion: 6.476246\nAction: explain\nReward: -0.381727\nNext Confusion: 6.038479"} | |
| {"text": "### State\nConfusion: 3.408969\nAction: correct_fact\nReward: -0.29008\nNext Confusion: 3.610456"} | |
| {"text": "### State\nConfusion: 3.137696\nAction: analogize\nReward: -1.614321\nNext Confusion: 4.338411"} | |
| {"text": "### State\nConfusion: 9.981143\nAction: question\nReward: 0.336403\nNext Confusion: 9.127187"} | |
| {"text": "### State\nConfusion: 5.250209\nAction: worked_example\nReward: 1.215982\nNext Confusion: 4.074452"} | |
| {"text": "### State\nConfusion: 3.848411\nAction: correct_fact\nReward: -0.315407\nNext Confusion: 3.798653"} | |
| {"text": "### State\nConfusion: 5.364659\nAction: analogize\nReward: -0.832938\nNext Confusion: 6.078216"} | |
| {"text": "### State\nConfusion: 5.980426\nAction: analogize\nReward: -0.953331\nNext Confusion: 6.469077"} | |
| {"text": "### State\nConfusion: 5.083078\nAction: analogize\nReward: -0.609319\nNext Confusion: 5.623208"} | |
| {"text": "### State\nConfusion: 7.247568\nAction: analogize\nReward: -0.292227\nNext Confusion: 7.618113"} | |
| {"text": "### State\nConfusion: 3.334119\nAction: analogize\nReward: -1.237248\nNext Confusion: 4.051495"} | |
| {"text": "### State\nConfusion: 3.771148\nAction: analogize\nReward: -0.28148\nNext Confusion: 4.278025"} | |
| {"text": "### State\nConfusion: 7.054849\nAction: analogize\nReward: 0.276644\nNext Confusion: 7.069348"} | |
| {"text": "### State\nConfusion: 4.223377\nAction: question\nReward: 0.878895\nNext Confusion: 3.37235"} | |
| {"text": "### State\nConfusion: 6.538994\nAction: analogize\nReward: -0.657308\nNext Confusion: 7.263082"} | |
| {"text": "### State\nConfusion: 4.482307\nAction: analogize\nReward: -0.734999\nNext Confusion: 5.658994"} | |
| {"text": "### State\nConfusion: 7.982437\nAction: worked_example\nReward: 2.063794\nNext Confusion: 5.985748"} | |
| {"text": "### State\nConfusion: 6.038796\nAction: analogize\nReward: -0.428013\nNext Confusion: 6.309379"} | |
| {"text": "### State\nConfusion: 3.535395\nAction: explain\nReward: -0.369274\nNext Confusion: 3.703277"} | |
| {"text": "### State\nConfusion: 2.860002\nAction: explain\nReward: 0.130337\nNext Confusion: 2.802935"} | |
| {"text": "### State\nConfusion: 3.763097\nAction: correct_fact\nReward: -0.823417\nNext Confusion: 4.379292"} | |
| {"text": "### State\nConfusion: 4.180916\nAction: question\nReward: 0.741489\nNext Confusion: 3.423447"} | |
| {"text": "### State\nConfusion: 5.614097\nAction: question\nReward: 0.531301\nNext Confusion: 5.212755"} | |
| {"text": "### State\nConfusion: 8.834068\nAction: analogize\nReward: -0.189005\nNext Confusion: 9.355236"} | |
| {"text": "### State\nConfusion: 6.024891\nAction: correct_fact\nReward: 0.064373\nNext Confusion: 6.602543"} | |
| {"text": "### State\nConfusion: 5.876128\nAction: explain\nReward: 0.267207\nNext Confusion: 5.626973"} | |
| {"text": "### State\nConfusion: 6.593964\nAction: analogize\nReward: 0.138768\nNext Confusion: 6.006979"} | |
| {"text": "### State\nConfusion: 3.408307\nAction: analogize\nReward: -0.436008\nNext Confusion: 3.196113"} | |
| {"text": "### State\nConfusion: 7.686703\nAction: question\nReward: -0.568404\nNext Confusion: 7.67233"} | |
| {"text": "### State\nConfusion: 5.321778\nAction: question\nReward: 1.497982\nNext Confusion: 4.268611"} | |
| {"text": "### State\nConfusion: 4.75159\nAction: analogize\nReward: 1.433057\nNext Confusion: 4.059246"} | |
| {"text": "### State\nConfusion: 3.645793\nAction: question\nReward: 1.585023\nNext Confusion: 1.974554"} | |
| {"text": "### State\nConfusion: 6.01909\nAction: analogize\nReward: 0.32362\nNext Confusion: 6.217828"} | |
| {"text": "### State\nConfusion: 6.016679\nAction: analogize\nReward: 0.081677\nNext Confusion: 6.395025"} | |
| {"text": "### State\nConfusion: 6.743756\nAction: analogize\nReward: -1.098936\nNext Confusion: 7.562526"} | |
| {"text": "### State\nConfusion: 7.090468\nAction: analogize\nReward: 0.011776\nNext Confusion: 7.157492"} | |
| {"text": "### State\nConfusion: 4.080909\nAction: question\nReward: 0.274189\nNext Confusion: 3.741003"} | |
| {"text": "### State\nConfusion: 6.508274\nAction: explain\nReward: -0.447604\nNext Confusion: 6.71945"} | |
| {"text": "### State\nConfusion: 6.877663\nAction: analogize\nReward: -0.5954\nNext Confusion: 7.796532"} | |
| {"text": "### State\nConfusion: 5.203895\nAction: analogize\nReward: -0.700037\nNext Confusion: 5.556193"} | |
| {"text": "### State\nConfusion: 3.591128\nAction: correct_fact\nReward: -0.794202\nNext Confusion: 3.814581"} | |
| {"text": "### State\nConfusion: 4.232285\nAction: explain\nReward: 0.143006\nNext Confusion: 3.782716"} | |
| {"text": "### State\nConfusion: 4.805493\nAction: correct_fact\nReward: -0.240368\nNext Confusion: 5.179041"} | |
| {"text": "### State\nConfusion: 3.191029\nAction: correct_fact\nReward: -4.082677\nNext Confusion: 3.805619"} | |
| {"text": "### State\nConfusion: 3.123191\nAction: analogize\nReward: 0.157324\nNext Confusion: 2.772966"} | |
| {"text": "### State\nConfusion: 5.475059\nAction: analogize\nReward: -0.537958\nNext Confusion: 5.817322"} | |
| {"text": "### State\nConfusion: 8.821889\nAction: analogize\nReward: 0.389415\nNext Confusion: 8.971167"} | |
| {"text": "### State\nConfusion: 5.51871\nAction: question\nReward: -0.490781\nNext Confusion: 5.568502"} | |
| {"text": "### State\nConfusion: 4.363986\nAction: worked_example\nReward: 2.794462\nNext Confusion: 2.385778"} | |
| {"text": "### State\nConfusion: 3.660182\nAction: analogize\nReward: 0.197274\nNext Confusion: 4.073714"} | |
| {"text": "### State\nConfusion: 4.188292\nAction: question\nReward: 2.06589\nNext Confusion: 3.152765"} | |
| {"text": "### State\nConfusion: 4.624219\nAction: explain\nReward: 0.03086\nNext Confusion: 4.495928"} | |
| {"text": "### State\nConfusion: 3.352286\nAction: analogize\nReward: 0.358749\nNext Confusion: 3.544025"} | |
| {"text": "### State\nConfusion: 3.218407\nAction: analogize\nReward: -0.809351\nNext Confusion: 3.86212"} | |
| {"text": "### State\nConfusion: 4.368701\nAction: analogize\nReward: 0.097597\nNext Confusion: 4.579212"} | |
| {"text": "### State\nConfusion: 7.926002\nAction: question\nReward: 1.346095\nNext Confusion: 7.494742"} | |
| {"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 2.632339\nNext Confusion: 8.704373"} | |
| {"text": "### State\nConfusion: 6.556739\nAction: worked_example\nReward: 0.730109\nNext Confusion: 5.347394"} | |
| {"text": "### State\nConfusion: 6.622032\nAction: worked_example\nReward: 1.884973\nNext Confusion: 3.975837"} | |
| {"text": "### State\nConfusion: 6.837491\nAction: explain\nReward: -0.347375\nNext Confusion: 7.207565"} | |
| {"text": "### State\nConfusion: 4.140696\nAction: analogize\nReward: -0.565153\nNext Confusion: 4.848446"} | |
| {"text": "### State\nConfusion: 6.545013\nAction: analogize\nReward: 0.076453\nNext Confusion: 6.966989"} | |
| {"text": "### State\nConfusion: 4.751852\nAction: analogize\nReward: -0.798605\nNext Confusion: 5.513865"} | |
| {"text": "### State\nConfusion: 5.967764\nAction: analogize\nReward: 0.171464\nNext Confusion: 6.42246"} | |
| {"text": "### State\nConfusion: 7.188017\nAction: analogize\nReward: -0.994482\nNext Confusion: 8.0922"} | |
| {"text": "### State\nConfusion: 4.973029\nAction: question\nReward: 0.046659\nNext Confusion: 4.721402"} | |
| {"text": "### State\nConfusion: 6.592906\nAction: correct_fact\nReward: 1.085134\nNext Confusion: 5.660255"} | |
| {"text": "### State\nConfusion: 3.720168\nAction: explain\nReward: 1.271093\nNext Confusion: 3.076503"} | |
| {"text": "### State\nConfusion: 5.986892\nAction: explain\nReward: 0.533297\nNext Confusion: 5.874181"} | |
| {"text": "### State\nConfusion: 3.338895\nAction: analogize\nReward: 0.165743\nNext Confusion: 3.483409"} | |
| {"text": "### State\nConfusion: 6.299437\nAction: question\nReward: 0.090953\nNext Confusion: 5.496086"} | |
| {"text": "### State\nConfusion: 3.233407\nAction: analogize\nReward: 0.193732\nNext Confusion: 3.252628"} | |
| {"text": "### State\nConfusion: 3.767879\nAction: analogize\nReward: -0.519034\nNext Confusion: 4.018177"} | |
| {"text": "### State\nConfusion: 3.284015\nAction: explain\nReward: 0.343419\nNext Confusion: 3.226797"} | |
| {"text": "### State\nConfusion: 4.966618\nAction: analogize\nReward: -1.346464\nNext Confusion: 5.568508"} | |
| {"text": "### State\nConfusion: 7.611811\nAction: analogize\nReward: -1.118271\nNext Confusion: 8.414276"} | |
| {"text": "### State\nConfusion: 3.678779\nAction: analogize\nReward: -1.83396\nNext Confusion: 4.816208"} | |
| {"text": "### State\nConfusion: 9.551186\nAction: worked_example\nReward: 1.757724\nNext Confusion: 7.951963"} | |
| {"text": "### State\nConfusion: 6.259004\nAction: analogize\nReward: 0.637289\nNext Confusion: 5.072351"} | |
| {"text": "### State\nConfusion: 4.111844\nAction: analogize\nReward: 0.086688\nNext Confusion: 4.294176"} | |
| {"text": "### State\nConfusion: 5.955097\nAction: analogize\nReward: -1.022934\nNext Confusion: 5.988747"} | |
| {"text": "### State\nConfusion: 3.42395\nAction: analogize\nReward: -0.086424\nNext Confusion: 3.586034"} | |
| {"text": "### State\nConfusion: 6.418479\nAction: worked_example\nReward: 3.015016\nNext Confusion: 4.592889"} | |
| {"text": "### State\nConfusion: 3.27804\nAction: correct_fact\nReward: 1.052366\nNext Confusion: 2.910743"} | |
| {"text": "### State\nConfusion: 6.8594\nAction: question\nReward: 0.394227\nNext Confusion: 6.030883"} | |
| {"text": "### State\nConfusion: 3.918734\nAction: analogize\nReward: 0.392353\nNext Confusion: 3.580582"} | |
| {"text": "### State\nConfusion: 4.108029\nAction: explain\nReward: 0.186519\nNext Confusion: 4.014627"} | |
| {"text": "### State\nConfusion: 6.534582\nAction: analogize\nReward: -1.341765\nNext Confusion: 7.152132"} | |
| {"text": "### State\nConfusion: 4.526416\nAction: analogize\nReward: 0.041798\nNext Confusion: 4.610605"} | |
| {"text": "### State\nConfusion: 3.207484\nAction: analogize\nReward: -0.274951\nNext Confusion: 3.235347"} | |
| {"text": "### State\nConfusion: 7.274545\nAction: correct_fact\nReward: 0.060256\nNext Confusion: 7.385714"} | |
| {"text": "### State\nConfusion: 6.405168\nAction: analogize\nReward: 1.062377\nNext Confusion: 6.203435"} | |
| {"text": "### State\nConfusion: 6.145315\nAction: analogize\nReward: 0.297888\nNext Confusion: 5.948101"} | |
| {"text": "### State\nConfusion: 4.362407\nAction: question\nReward: -0.376688\nNext Confusion: 4.091491"} | |
| {"text": "### State\nConfusion: 8.439035\nAction: analogize\nReward: -0.300894\nNext Confusion: 8.39113"} | |
| {"text": "### State\nConfusion: 3.67231\nAction: analogize\nReward: -1.550801\nNext Confusion: 4.500672"} | |
| {"text": "### State\nConfusion: 3.833536\nAction: correct_fact\nReward: 0.260055\nNext Confusion: 4.460357"} | |
| {"text": "### State\nConfusion: 4.286399\nAction: analogize\nReward: -0.380031\nNext Confusion: 4.813565"} | |
| {"text": "### State\nConfusion: 3.622745\nAction: analogize\nReward: 1.280211\nNext Confusion: 3.017132"} | |
| {"text": "### State\nConfusion: 3.164635\nAction: analogize\nReward: -1.033433\nNext Confusion: 4.108108"} | |
| {"text": "### State\nConfusion: 4.392075\nAction: correct_fact\nReward: -0.532647\nNext Confusion: 4.885617"} | |
| {"text": "### State\nConfusion: 3.536113\nAction: explain\nReward: 1.498938\nNext Confusion: 2.96965"} | |
| {"text": "### State\nConfusion: 8.397891\nAction: correct_fact\nReward: 1.004038\nNext Confusion: 7.644212"} | |
| {"text": "### State\nConfusion: 2.638306\nAction: analogize\nReward: -0.30316\nNext Confusion: 3.268882"} | |
| {"text": "### State\nConfusion: 2.290366\nAction: worked_example\nReward: 1.073395\nNext Confusion: 1.899378"} | |
| {"text": "### State\nConfusion: 4.104017\nAction: correct_fact\nReward: -0.132089\nNext Confusion: 4.67455"} | |
| {"text": "### State\nConfusion: 4.281032\nAction: explain\nReward: 0.437552\nNext Confusion: 3.920859"} | |
| {"text": "### State\nConfusion: 9.430725\nAction: analogize\nReward: -0.210339\nNext Confusion: 9.636165"} | |
| {"text": "### State\nConfusion: 3.160262\nAction: analogize\nReward: 0.90191\nNext Confusion: 2.309478"} | |
| {"text": "### State\nConfusion: 3.093624\nAction: analogize\nReward: -0.678115\nNext Confusion: 4.155883"} | |
| {"text": "### State\nConfusion: 5.114895\nAction: explain\nReward: 0.544845\nNext Confusion: 5.086789"} | |
| {"text": "### State\nConfusion: 7.723752\nAction: analogize\nReward: -0.464143\nNext Confusion: 8.243782"} | |
| {"text": "### State\nConfusion: 4.042224\nAction: correct_fact\nReward: -0.309778\nNext Confusion: 4.224926"} | |
| {"text": "### State\nConfusion: 1.298197\nAction: analogize\nReward: -0.120681\nNext Confusion: 2.041737"} | |
| {"text": "### State\nConfusion: 3.211188\nAction: question\nReward: 0.94096\nNext Confusion: 2.304281"} | |
| {"text": "### State\nConfusion: 2.177077\nAction: question\nReward: 0.754392\nNext Confusion: 1.588065"} | |
| {"text": "### State\nConfusion: 5.599477\nAction: explain\nReward: 0.427335\nNext Confusion: 5.764095"} | |
| {"text": "### State\nConfusion: 5.271157\nAction: explain\nReward: 1.201553\nNext Confusion: 4.389104"} | |
| {"text": "### State\nConfusion: 5.172612\nAction: explain\nReward: -0.578675\nNext Confusion: 5.641872"} | |
| {"text": "### State\nConfusion: 4.746197\nAction: explain\nReward: -0.415519\nNext Confusion: 4.961803"} | |
| {"text": "### State\nConfusion: 5.20597\nAction: analogize\nReward: -0.188651\nNext Confusion: 4.729918"} | |
| {"text": "### State\nConfusion: 7.899179\nAction: explain\nReward: 0.9233\nNext Confusion: 7.617782"} | |
| {"text": "### State\nConfusion: 4.606825\nAction: question\nReward: 1.332907\nNext Confusion: 3.817736"} | |
| {"text": "### State\nConfusion: 3.545838\nAction: question\nReward: 0.584057\nNext Confusion: 3.080739"} | |
| {"text": "### State\nConfusion: 5.416692\nAction: analogize\nReward: -1.181844\nNext Confusion: 6.291075"} | |
| {"text": "### State\nConfusion: 8.278512\nAction: explain\nReward: 0.65172\nNext Confusion: 8.075535"} | |
| {"text": "### State\nConfusion: 4.03923\nAction: worked_example\nReward: 1.425754\nNext Confusion: 2.703941"} | |
| {"text": "### State\nConfusion: 7.588636\nAction: explain\nReward: 0.627005\nNext Confusion: 6.970175"} | |
| {"text": "### State\nConfusion: 6.207129\nAction: analogize\nReward: -0.164946\nNext Confusion: 6.207433"} | |
| {"text": "### State\nConfusion: 4.607996\nAction: analogize\nReward: -0.501607\nNext Confusion: 4.840988"} | |
| {"text": "### State\nConfusion: 4.281465\nAction: question\nReward: 1.016601\nNext Confusion: 3.84681"} | |
| {"text": "### State\nConfusion: 4.746888\nAction: explain\nReward: 1.408749\nNext Confusion: 3.521709"} | |
| {"text": "### State\nConfusion: 2.90317\nAction: analogize\nReward: -0.593073\nNext Confusion: 3.09674"} | |
| {"text": "### State\nConfusion: 5.362331\nAction: analogize\nReward: -0.147509\nNext Confusion: 5.639575"} | |
| {"text": "### State\nConfusion: 5.548513\nAction: analogize\nReward: -0.657696\nNext Confusion: 6.434372"} | |
| {"text": "### State\nConfusion: 4.179947\nAction: explain\nReward: 1.051147\nNext Confusion: 3.829047"} | |
| {"text": "### State\nConfusion: 6.712979\nAction: question\nReward: 0.125504\nNext Confusion: 6.897026"} | |
| {"text": "### State\nConfusion: 5.161298\nAction: explain\nReward: -0.894121\nNext Confusion: 5.72294"} | |
| {"text": "### State\nConfusion: 4.199243\nAction: analogize\nReward: 1.274532\nNext Confusion: 3.317777"} | |
| {"text": "### State\nConfusion: 2.205262\nAction: worked_example\nReward: 0.85029\nNext Confusion: 1.233461"} | |
| {"text": "### State\nConfusion: 4.533222\nAction: question\nReward: -0.208696\nNext Confusion: 4.21478"} | |
| {"text": "### State\nConfusion: 4.401053\nAction: analogize\nReward: 0.256719\nNext Confusion: 4.846919"} | |
| {"text": "### State\nConfusion: 3.643785\nAction: analogize\nReward: -0.314908\nNext Confusion: 3.947566"} | |
| {"text": "### State\nConfusion: 3.676824\nAction: analogize\nReward: -0.267246\nNext Confusion: 3.87749"} | |
| {"text": "### State\nConfusion: 4.581174\nAction: analogize\nReward: -0.067764\nNext Confusion: 4.506774"} | |
| {"text": "### State\nConfusion: 5.592551\nAction: question\nReward: 0.994469\nNext Confusion: 5.1464"} | |
| {"text": "### State\nConfusion: 4.900575\nAction: analogize\nReward: -1.442941\nNext Confusion: 5.849224"} | |
| {"text": "### State\nConfusion: 4.030148\nAction: explain\nReward: -0.223471\nNext Confusion: 4.296577"} | |
| {"text": "### State\nConfusion: 5.54907\nAction: correct_fact\nReward: 1.283735\nNext Confusion: 4.96947"} | |
| {"text": "### State\nConfusion: 6.399455\nAction: explain\nReward: 0.074439\nNext Confusion: 6.507652"} | |
| {"text": "### State\nConfusion: 4.697805\nAction: analogize\nReward: -1.345821\nNext Confusion: 5.294332"} | |
| {"text": "### State\nConfusion: 7.291886\nAction: analogize\nReward: -0.49404\nNext Confusion: 7.596599"} | |
| {"text": "### State\nConfusion: 8.478653\nAction: worked_example\nReward: 1.67051\nNext Confusion: 7.127231"} | |
| {"text": "### State\nConfusion: 4.146376\nAction: question\nReward: -0.30378\nNext Confusion: 4.132405"} | |
| {"text": "### State\nConfusion: 9.333189\nAction: analogize\nReward: -1.131478\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 5.150481\nAction: explain\nReward: -0.021077\nNext Confusion: 5.147341"} | |
| {"text": "### State\nConfusion: 5.005999\nAction: analogize\nReward: 0.261416\nNext Confusion: 5.043668"} | |
| {"text": "### State\nConfusion: 5.417343\nAction: analogize\nReward: 0.55921\nNext Confusion: 5.474882"} | |
| {"text": "### State\nConfusion: 5.937985\nAction: explain\nReward: 0.105923\nNext Confusion: 5.750947"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.186882\nNext Confusion: 9.814193"} | |
| {"text": "### State\nConfusion: 2.234744\nAction: analogize\nReward: 0.880647\nNext Confusion: 1.544325"} | |
| {"text": "### State\nConfusion: 1.236512\nAction: question\nReward: 1.106881\nNext Confusion: 0.7996"} | |
| {"text": "### State\nConfusion: 4.044295\nAction: analogize\nReward: -1.293793\nNext Confusion: 4.683942"} | |
| {"text": "### State\nConfusion: 4.212736\nAction: explain\nReward: 0.423388\nNext Confusion: 3.542225"} | |
| {"text": "### State\nConfusion: 3.587829\nAction: analogize\nReward: -1.093237\nNext Confusion: 4.790663"} | |
| {"text": "### State\nConfusion: 5.96449\nAction: question\nReward: -0.061454\nNext Confusion: 5.93381"} | |
| {"text": "### State\nConfusion: 3.885393\nAction: analogize\nReward: -0.367448\nNext Confusion: 3.773087"} | |
| {"text": "### State\nConfusion: 6.738736\nAction: explain\nReward: 0.954335\nNext Confusion: 6.322676"} | |
| {"text": "### State\nConfusion: 3.025954\nAction: analogize\nReward: -0.77141\nNext Confusion: 3.877892"} | |
| {"text": "### State\nConfusion: 4.1147\nAction: analogize\nReward: 0.392259\nNext Confusion: 3.516709"} | |
| {"text": "### State\nConfusion: 6.666338\nAction: analogize\nReward: 0.324842\nNext Confusion: 6.57961"} | |
| {"text": "### State\nConfusion: 5.232429\nAction: analogize\nReward: 0.382367\nNext Confusion: 4.214555"} | |
| {"text": "### State\nConfusion: 7.714635\nAction: analogize\nReward: 0.360041\nNext Confusion: 8.002491"} | |
| {"text": "### State\nConfusion: 5.043363\nAction: analogize\nReward: -0.185386\nNext Confusion: 5.162632"} | |
| {"text": "### State\nConfusion: 8.755225\nAction: analogize\nReward: -0.111426\nNext Confusion: 9.15125"} | |
| {"text": "### State\nConfusion: 7.030897\nAction: analogize\nReward: -1.325821\nNext Confusion: 7.800182"} | |
| {"text": "### State\nConfusion: 7.853434\nAction: analogize\nReward: 0.080185\nNext Confusion: 8.146503"} | |
| {"text": "### State\nConfusion: 3.47865\nAction: analogize\nReward: -0.742602\nNext Confusion: 3.932566"} | |
| {"text": "### State\nConfusion: 3.318392\nAction: worked_example\nReward: 1.172948\nNext Confusion: 2.795324"} | |
| {"text": "### State\nConfusion: 3.788924\nAction: question\nReward: 0.853461\nNext Confusion: 3.229144"} | |
| {"text": "### State\nConfusion: 3.487349\nAction: question\nReward: 1.199487\nNext Confusion: 2.967386"} | |
| {"text": "### State\nConfusion: 3.806279\nAction: question\nReward: 2.302437\nNext Confusion: 2.469832"} | |
| {"text": "### State\nConfusion: 3.311562\nAction: analogize\nReward: -0.445051\nNext Confusion: 3.310418"} | |
| {"text": "### State\nConfusion: 5.622833\nAction: analogize\nReward: -1.147916\nNext Confusion: 7.239026"} | |
| {"text": "### State\nConfusion: 4.229888\nAction: analogize\nReward: -0.575123\nNext Confusion: 5.167108"} | |
| {"text": "### State\nConfusion: 4.295042\nAction: analogize\nReward: -0.594404\nNext Confusion: 4.929984"} | |
| {"text": "### State\nConfusion: 3.346937\nAction: analogize\nReward: -1.200558\nNext Confusion: 4.280535"} | |
| {"text": "### State\nConfusion: 6.188107\nAction: analogize\nReward: 0.015743\nNext Confusion: 6.055892"} | |
| {"text": "### State\nConfusion: 4.454732\nAction: question\nReward: 1.193721\nNext Confusion: 3.239344"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.732955\nNext Confusion: 9.617961"} | |
| {"text": "### State\nConfusion: 5.953883\nAction: analogize\nReward: 0.366403\nNext Confusion: 6.130521"} | |
| {"text": "### State\nConfusion: 5.771333\nAction: explain\nReward: -1.45427\nNext Confusion: 6.330766"} | |
| {"text": "### State\nConfusion: 4.92416\nAction: explain\nReward: -0.239389\nNext Confusion: 5.232991"} | |
| {"text": "### State\nConfusion: 2.797066\nAction: analogize\nReward: -1.545631\nNext Confusion: 4.092842"} | |
| {"text": "### State\nConfusion: 3.410682\nAction: explain\nReward: 1.680386\nNext Confusion: 2.625942"} | |
| {"text": "### State\nConfusion: 4.196591\nAction: analogize\nReward: 0.143843\nNext Confusion: 4.218358"} | |
| {"text": "### State\nConfusion: 4.527193\nAction: explain\nReward: 1.279641\nNext Confusion: 3.833264"} | |
| {"text": "### State\nConfusion: 7.109824\nAction: analogize\nReward: -1.056292\nNext Confusion: 7.897732"} | |
| {"text": "### State\nConfusion: 3.4512\nAction: question\nReward: 0.101518\nNext Confusion: 2.863511"} | |
| {"text": "### State\nConfusion: 9.769079\nAction: worked_example\nReward: -0.05826\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 7.047394\nAction: correct_fact\nReward: -0.477517\nNext Confusion: 6.644616"} | |
| {"text": "### State\nConfusion: 5.039032\nAction: analogize\nReward: 0.542779\nNext Confusion: 4.704964"} | |
| {"text": "### State\nConfusion: 2.455086\nAction: analogize\nReward: -0.42512\nNext Confusion: 2.582648"} | |
| {"text": "### State\nConfusion: 8.79387\nAction: explain\nReward: 1.493231\nNext Confusion: 8.595444"} | |
| {"text": "### State\nConfusion: 6.5493\nAction: question\nReward: 1.596032\nNext Confusion: 5.77124"} | |
| {"text": "### State\nConfusion: 3.847614\nAction: analogize\nReward: -0.041072\nNext Confusion: 3.546278"} | |
| {"text": "### State\nConfusion: 3.479255\nAction: worked_example\nReward: 0.772089\nNext Confusion: 2.406743"} | |
| {"text": "### State\nConfusion: 3.405935\nAction: worked_example\nReward: 0.631187\nNext Confusion: 2.943548"} | |
| {"text": "### State\nConfusion: 7.263623\nAction: analogize\nReward: -0.889767\nNext Confusion: 7.725207"} | |
| {"text": "### State\nConfusion: 3.591611\nAction: worked_example\nReward: 1.93213\nNext Confusion: 2.058504"} | |
| {"text": "### State\nConfusion: 5.776166\nAction: explain\nReward: 0.764305\nNext Confusion: 5.656176"} | |
| {"text": "### State\nConfusion: 3.816689\nAction: worked_example\nReward: 2.065074\nNext Confusion: 2.308856"} | |
| {"text": "### State\nConfusion: 6.615742\nAction: analogize\nReward: -1.522533\nNext Confusion: 7.848629"} | |
| {"text": "### State\nConfusion: 3.603578\nAction: analogize\nReward: 0.183606\nNext Confusion: 3.590095"} | |
| {"text": "### State\nConfusion: 6.595288\nAction: analogize\nReward: -0.773955\nNext Confusion: 7.387215"} | |
| {"text": "### State\nConfusion: 3.243061\nAction: analogize\nReward: -0.86511\nNext Confusion: 4.083089"} | |
| {"text": "### State\nConfusion: 6.382723\nAction: correct_fact\nReward: 0.514668\nNext Confusion: 6.59727"} | |
| {"text": "### State\nConfusion: 3.61621\nAction: correct_fact\nReward: 0.218221\nNext Confusion: 3.916531"} | |
| {"text": "### State\nConfusion: 4.585879\nAction: explain\nReward: 0.441228\nNext Confusion: 4.214521"} | |
| {"text": "### State\nConfusion: 5.550642\nAction: analogize\nReward: -0.23505\nNext Confusion: 5.782431"} | |
| {"text": "### State\nConfusion: 3.977782\nAction: analogize\nReward: 0.619823\nNext Confusion: 3.979027"} | |
| {"text": "### State\nConfusion: 3.403716\nAction: analogize\nReward: -0.803805\nNext Confusion: 3.704608"} | |
| {"text": "### State\nConfusion: 3.522781\nAction: analogize\nReward: -0.594497\nNext Confusion: 3.853947"} | |
| {"text": "### State\nConfusion: 3.428078\nAction: worked_example\nReward: 2.908679\nNext Confusion: 0.783914"} | |
| {"text": "### State\nConfusion: 2.765526\nAction: worked_example\nReward: 0.146944\nNext Confusion: 2.475324"} | |
| {"text": "### State\nConfusion: 5.662729\nAction: analogize\nReward: -0.053028\nNext Confusion: 6.267484"} | |
| {"text": "### State\nConfusion: 2.97392\nAction: correct_fact\nReward: 0.207476\nNext Confusion: 2.529974"} | |
| {"text": "### State\nConfusion: 4.33869\nAction: analogize\nReward: -0.048698\nNext Confusion: 4.062849"} | |
| {"text": "### State\nConfusion: 4.013338\nAction: explain\nReward: 0.400833\nNext Confusion: 3.595311"} | |
| {"text": "### State\nConfusion: 4.072468\nAction: analogize\nReward: -1.069305\nNext Confusion: 4.625163"} | |
| {"text": "### State\nConfusion: 3.534719\nAction: analogize\nReward: -0.496206\nNext Confusion: 3.767223"} | |
| {"text": "### State\nConfusion: 3.301023\nAction: analogize\nReward: -0.343358\nNext Confusion: 3.224041"} | |
| {"text": "### State\nConfusion: 5.737082\nAction: question\nReward: 0.787803\nNext Confusion: 5.253803"} | |
| {"text": "### State\nConfusion: 5.034364\nAction: correct_fact\nReward: -0.452177\nNext Confusion: 5.496845"} | |
| {"text": "### State\nConfusion: 8.053705\nAction: analogize\nReward: 0.306109\nNext Confusion: 8.010042"} | |
| {"text": "### State\nConfusion: 4.250006\nAction: analogize\nReward: -1.557089\nNext Confusion: 5.197942"} | |
| {"text": "### State\nConfusion: 2.612885\nAction: analogize\nReward: 0.520245\nNext Confusion: 2.377651"} | |
| {"text": "### State\nConfusion: 4.320713\nAction: question\nReward: 0.08004\nNext Confusion: 4.111726"} | |
| {"text": "### State\nConfusion: 6.200604\nAction: analogize\nReward: 0.148336\nNext Confusion: 6.110532"} | |
| {"text": "### State\nConfusion: 2.905517\nAction: explain\nReward: 0.205134\nNext Confusion: 2.580832"} | |
| {"text": "### State\nConfusion: 7.576805\nAction: analogize\nReward: -1.736052\nNext Confusion: 7.978537"} | |
| {"text": "### State\nConfusion: 4.337534\nAction: analogize\nReward: -0.830804\nNext Confusion: 5.049174"} | |
| {"text": "### State\nConfusion: 4.344432\nAction: analogize\nReward: 0.553108\nNext Confusion: 4.202552"} | |
| {"text": "### State\nConfusion: 4.759101\nAction: analogize\nReward: 1.546088\nNext Confusion: 3.756994"} | |
| {"text": "### State\nConfusion: 5.246162\nAction: question\nReward: 0.419569\nNext Confusion: 5.021464"} | |
| {"text": "### State\nConfusion: 4.600087\nAction: analogize\nReward: -0.103434\nNext Confusion: 4.98533"} | |
| {"text": "### State\nConfusion: 5.103688\nAction: explain\nReward: -1.820209\nNext Confusion: 5.43225"} | |
| {"text": "### State\nConfusion: 4.324837\nAction: analogize\nReward: 0.048282\nNext Confusion: 4.139078"} | |
| {"text": "### State\nConfusion: 2.427948\nAction: analogize\nReward: -0.687189\nNext Confusion: 2.827288"} | |
| {"text": "### State\nConfusion: 5.790867\nAction: explain\nReward: 0.480449\nNext Confusion: 5.232456"} | |
| {"text": "### State\nConfusion: 2.040263\nAction: analogize\nReward: -0.197799\nNext Confusion: 2.24734"} | |
| {"text": "### State\nConfusion: 2.582153\nAction: worked_example\nReward: 0.554199\nNext Confusion: 1.467742"} | |
| {"text": "### State\nConfusion: 4.024901\nAction: explain\nReward: 0.246961\nNext Confusion: 3.849331"} | |
| {"text": "### State\nConfusion: 3.155271\nAction: explain\nReward: 0.93429\nNext Confusion: 2.779514"} | |
| {"text": "### State\nConfusion: 4.333934\nAction: analogize\nReward: -1.382026\nNext Confusion: 5.333732"} | |
| {"text": "### State\nConfusion: 3.711759\nAction: analogize\nReward: -1.054925\nNext Confusion: 4.427508"} | |
| {"text": "### State\nConfusion: 4.197458\nAction: analogize\nReward: -0.672473\nNext Confusion: 4.707225"} | |
| {"text": "### State\nConfusion: 3.595974\nAction: analogize\nReward: -0.30356\nNext Confusion: 3.692486"} | |
| {"text": "### State\nConfusion: 9.424139\nAction: question\nReward: -0.051805\nNext Confusion: 9.827569"} | |
| {"text": "### State\nConfusion: 3.847582\nAction: explain\nReward: -0.545496\nNext Confusion: 4.474236"} | |
| {"text": "### State\nConfusion: 4.257097\nAction: correct_fact\nReward: -1.007093\nNext Confusion: 5.195939"} | |
| {"text": "### State\nConfusion: 3.375196\nAction: analogize\nReward: -0.613092\nNext Confusion: 3.231864"} | |
| {"text": "### State\nConfusion: 4.372277\nAction: analogize\nReward: -0.062679\nNext Confusion: 4.743267"} | |
| {"text": "### State\nConfusion: 5.252807\nAction: analogize\nReward: -1.110442\nNext Confusion: 6.075631"} | |
| {"text": "### State\nConfusion: 4.687475\nAction: analogize\nReward: 0.085918\nNext Confusion: 4.711984"} | |
| {"text": "### State\nConfusion: 4.754649\nAction: analogize\nReward: -0.419951\nNext Confusion: 5.299336"} | |
| {"text": "### State\nConfusion: 6.040562\nAction: analogize\nReward: -0.296113\nNext Confusion: 6.770728"} | |
| {"text": "### State\nConfusion: 5.219658\nAction: explain\nReward: -0.377415\nNext Confusion: 5.787689"} | |
| {"text": "### State\nConfusion: 4.082221\nAction: explain\nReward: 1.391503\nNext Confusion: 3.019216"} | |
| {"text": "### State\nConfusion: 5.875564\nAction: explain\nReward: 0.35298\nNext Confusion: 5.948457"} | |
| {"text": "### State\nConfusion: 3.879623\nAction: analogize\nReward: 0.372709\nNext Confusion: 3.920777"} | |
| {"text": "### State\nConfusion: 4.371979\nAction: analogize\nReward: 0.109884\nNext Confusion: 4.681374"} | |
| {"text": "### State\nConfusion: 4.799329\nAction: analogize\nReward: 0.134149\nNext Confusion: 4.814604"} | |
| {"text": "### State\nConfusion: 2.772295\nAction: analogize\nReward: -1.053458\nNext Confusion: 3.162443"} | |
| {"text": "### State\nConfusion: 4.656308\nAction: analogize\nReward: -0.716218\nNext Confusion: 5.735703"} | |
| {"text": "### State\nConfusion: 3.399582\nAction: analogize\nReward: 0.010502\nNext Confusion: 3.909108"} | |
| {"text": "### State\nConfusion: 4.250917\nAction: analogize\nReward: 0.365275\nNext Confusion: 4.656218"} | |
| {"text": "### State\nConfusion: 3.886501\nAction: analogize\nReward: 0.02291\nNext Confusion: 4.124634"} | |
| {"text": "### State\nConfusion: 7.677275\nAction: analogize\nReward: 0.266499\nNext Confusion: 7.756824"} | |
| {"text": "### State\nConfusion: 4.214218\nAction: analogize\nReward: 0.21845\nNext Confusion: 4.028525"} | |
| {"text": "### State\nConfusion: 3.2064\nAction: correct_fact\nReward: 0.527978\nNext Confusion: 2.366183"} | |
| {"text": "### State\nConfusion: 6.950843\nAction: analogize\nReward: 0.101171\nNext Confusion: 7.08694"} | |
| {"text": "### State\nConfusion: 3.835536\nAction: analogize\nReward: -0.621992\nNext Confusion: 4.231325"} | |
| {"text": "### State\nConfusion: 6.523788\nAction: analogize\nReward: -0.239707\nNext Confusion: 6.37458"} | |
| {"text": "### State\nConfusion: 3.839309\nAction: analogize\nReward: -0.047621\nNext Confusion: 4.649327"} | |
| {"text": "### State\nConfusion: 8.457514\nAction: analogize\nReward: -0.49514\nNext Confusion: 8.68329"} | |
| {"text": "### State\nConfusion: 6.079686\nAction: analogize\nReward: -0.505621\nNext Confusion: 6.997733"} | |
| {"text": "### State\nConfusion: 3.052662\nAction: worked_example\nReward: 2.304519\nNext Confusion: 1.063142"} | |
| {"text": "### State\nConfusion: 4.0362\nAction: worked_example\nReward: 1.032511\nNext Confusion: 2.903929"} | |
| {"text": "### State\nConfusion: 5.259984\nAction: question\nReward: 0.140425\nNext Confusion: 5.599321"} | |
| {"text": "### State\nConfusion: 5.692397\nAction: analogize\nReward: 0.152449\nNext Confusion: 5.766351"} | |
| {"text": "### State\nConfusion: 3.010824\nAction: analogize\nReward: -0.822476\nNext Confusion: 4.061491"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.54809\nNext Confusion: 9.630048"} | |
| {"text": "### State\nConfusion: 6.365635\nAction: correct_fact\nReward: 0.303342\nNext Confusion: 6.359275"} | |
| {"text": "### State\nConfusion: 4.506102\nAction: worked_example\nReward: 3.257079\nNext Confusion: 2.463102"} | |
| {"text": "### State\nConfusion: 6.650249\nAction: question\nReward: 0.689143\nNext Confusion: 5.702714"} | |
| {"text": "### State\nConfusion: 3.872385\nAction: analogize\nReward: -1.4376\nNext Confusion: 5.064346"} | |
| {"text": "### State\nConfusion: 5.604603\nAction: question\nReward: 0.936614\nNext Confusion: 4.981571"} | |
| {"text": "### State\nConfusion: 6.364416\nAction: question\nReward: -0.058641\nNext Confusion: 5.740748"} | |
| {"text": "### State\nConfusion: 5.162976\nAction: analogize\nReward: -0.52851\nNext Confusion: 5.366485"} | |
| {"text": "### State\nConfusion: 4.989271\nAction: question\nReward: 0.420257\nNext Confusion: 3.911954"} | |
| {"text": "### State\nConfusion: 4.054232\nAction: analogize\nReward: -0.821515\nNext Confusion: 4.815986"} | |
| {"text": "### State\nConfusion: 3.795604\nAction: correct_fact\nReward: -0.037172\nNext Confusion: 4.267697"} | |
| {"text": "### State\nConfusion: 7.883514\nAction: analogize\nReward: -0.555406\nNext Confusion: 8.099345"} | |
| {"text": "### State\nConfusion: 4.195034\nAction: analogize\nReward: -0.593037\nNext Confusion: 4.585204"} | |
| {"text": "### State\nConfusion: 3.851079\nAction: question\nReward: -0.757824\nNext Confusion: 3.551079"} | |
| {"text": "### State\nConfusion: 2.634171\nAction: analogize\nReward: -1.035621\nNext Confusion: 3.713475"} | |
| {"text": "### State\nConfusion: 8.174025\nAction: analogize\nReward: -0.735074\nNext Confusion: 8.931459"} | |
| {"text": "### State\nConfusion: 5.127021\nAction: analogize\nReward: -0.197244\nNext Confusion: 5.958104"} | |
| {"text": "### State\nConfusion: 5.421781\nAction: question\nReward: 0.750681\nNext Confusion: 4.586025"} | |
| {"text": "### State\nConfusion: 5.553261\nAction: question\nReward: 0.301218\nNext Confusion: 4.546919"} | |
| {"text": "### State\nConfusion: 7.039034\nAction: analogize\nReward: 0.021269\nNext Confusion: 7.317868"} | |
| {"text": "### State\nConfusion: 4.994831\nAction: analogize\nReward: -0.938323\nNext Confusion: 5.784461"} | |
| {"text": "### State\nConfusion: 3.516691\nAction: worked_example\nReward: 1.905112\nNext Confusion: 2.429983"} | |
| {"text": "### State\nConfusion: 9.025807\nAction: analogize\nReward: -0.680691\nNext Confusion: 9.413644"} | |
| {"text": "### State\nConfusion: 5.016836\nAction: question\nReward: 1.476656\nNext Confusion: 4.174343"} | |
| {"text": "### State\nConfusion: 3.783532\nAction: explain\nReward: 0.61822\nNext Confusion: 3.470704"} | |
| {"text": "### State\nConfusion: 7.004241\nAction: analogize\nReward: 0.271521\nNext Confusion: 6.929998"} | |
| {"text": "### State\nConfusion: 3.788744\nAction: correct_fact\nReward: 0.356577\nNext Confusion: 3.955624"} | |
| {"text": "### State\nConfusion: 3.585034\nAction: analogize\nReward: 0.180996\nNext Confusion: 3.571377"} | |
| {"text": "### State\nConfusion: 5.884622\nAction: analogize\nReward: 0.365228\nNext Confusion: 5.668638"} | |
| {"text": "### State\nConfusion: 3.248542\nAction: analogize\nReward: 0.325974\nNext Confusion: 3.396679"} | |
| {"text": "### State\nConfusion: 4.457415\nAction: question\nReward: -0.504474\nNext Confusion: 5.242377"} | |
| {"text": "### State\nConfusion: 2.183769\nAction: analogize\nReward: -0.43012\nNext Confusion: 2.796507"} | |
| {"text": "### State\nConfusion: 3.337488\nAction: question\nReward: 0.915789\nNext Confusion: 3.028513"} | |
| {"text": "### State\nConfusion: 3.885993\nAction: analogize\nReward: 0.845579\nNext Confusion: 4.075472"} | |
| {"text": "### State\nConfusion: 5.473674\nAction: explain\nReward: 0.411274\nNext Confusion: 5.449886"} | |
| {"text": "### State\nConfusion: 6.636641\nAction: analogize\nReward: 0.105483\nNext Confusion: 6.738043"} | |
| {"text": "### State\nConfusion: 3.585574\nAction: question\nReward: 0.51839\nNext Confusion: 3.376465"} | |
| {"text": "### State\nConfusion: 4.07502\nAction: question\nReward: 1.004779\nNext Confusion: 3.735128"} | |
| {"text": "### State\nConfusion: 2.115222\nAction: analogize\nReward: -1.265686\nNext Confusion: 2.850984"} | |
| {"text": "### State\nConfusion: 3.176735\nAction: explain\nReward: 0.091548\nNext Confusion: 3.295829"} | |
| {"text": "### State\nConfusion: 5.73152\nAction: worked_example\nReward: 0.402069\nNext Confusion: 5.152857"} | |
| {"text": "### State\nConfusion: 2.005058\nAction: analogize\nReward: -1.365192\nNext Confusion: 2.684636"} | |
| {"text": "### State\nConfusion: 3.535708\nAction: explain\nReward: 0.106466\nNext Confusion: 3.094336"} | |
| {"text": "### State\nConfusion: 7.704071\nAction: explain\nReward: 0.721175\nNext Confusion: 6.908477"} | |
| {"text": "### State\nConfusion: 5.243719\nAction: explain\nReward: 1.340814\nNext Confusion: 4.052736"} | |
| {"text": "### State\nConfusion: 7.535902\nAction: analogize\nReward: 0.726927\nNext Confusion: 7.673487"} | |
| {"text": "### State\nConfusion: 4.64706\nAction: analogize\nReward: -0.708878\nNext Confusion: 5.376009"} | |
| {"text": "### State\nConfusion: 7.268917\nAction: correct_fact\nReward: -1.127313\nNext Confusion: 7.566716"} | |
| {"text": "### State\nConfusion: 5.639025\nAction: analogize\nReward: -0.472023\nNext Confusion: 6.138049"} | |
| {"text": "### State\nConfusion: 6.242031\nAction: analogize\nReward: 0.58307\nNext Confusion: 5.923123"} | |
| {"text": "### State\nConfusion: 3.121297\nAction: analogize\nReward: -0.48891\nNext Confusion: 3.906541"} | |
| {"text": "### State\nConfusion: 5.029388\nAction: correct_fact\nReward: -1.002845\nNext Confusion: 6.173575"} | |
| {"text": "### State\nConfusion: 5.053395\nAction: analogize\nReward: -0.313301\nNext Confusion: 5.932772"} | |
| {"text": "### State\nConfusion: 3.194567\nAction: analogize\nReward: 0.039439\nNext Confusion: 3.161913"} | |
| {"text": "### State\nConfusion: 4.130982\nAction: explain\nReward: 0.624081\nNext Confusion: 3.669775"} | |
| {"text": "### State\nConfusion: 7.574032\nAction: analogize\nReward: -0.309397\nNext Confusion: 8.215817"} | |
| {"text": "### State\nConfusion: 6.767454\nAction: analogize\nReward: -0.829196\nNext Confusion: 7.24061"} | |
| {"text": "### State\nConfusion: 2.361092\nAction: analogize\nReward: 0.40583\nNext Confusion: 2.588848"} | |
| {"text": "### State\nConfusion: 3.66922\nAction: analogize\nReward: -0.820735\nNext Confusion: 4.432349"} | |
| {"text": "### State\nConfusion: 6.70331\nAction: correct_fact\nReward: 0.493159\nNext Confusion: 6.04956"} | |
| {"text": "### State\nConfusion: 4.071138\nAction: explain\nReward: -0.054266\nNext Confusion: 3.786052"} | |
| {"text": "### State\nConfusion: 7.498981\nAction: worked_example\nReward: 0.760847\nNext Confusion: 7.090668"} | |
| {"text": "### State\nConfusion: 4.860355\nAction: analogize\nReward: 0.671602\nNext Confusion: 5.004103"} | |
| {"text": "### State\nConfusion: 5.043371\nAction: analogize\nReward: -1.172208\nNext Confusion: 6.048717"} | |
| {"text": "### State\nConfusion: 7.594638\nAction: question\nReward: 0.169686\nNext Confusion: 7.070739"} | |
| {"text": "### State\nConfusion: 4.307424\nAction: question\nReward: 0.69544\nNext Confusion: 3.544158"} | |
| {"text": "### State\nConfusion: 3.624445\nAction: analogize\nReward: -1.039277\nNext Confusion: 4.741978"} | |
| {"text": "### State\nConfusion: 5.519311\nAction: analogize\nReward: -0.172748\nNext Confusion: 6.113349"} | |
| {"text": "### State\nConfusion: 7.701839\nAction: worked_example\nReward: 0.471455\nNext Confusion: 7.059561"} | |
| {"text": "### State\nConfusion: 3.533838\nAction: analogize\nReward: -1.50476\nNext Confusion: 4.605111"} | |
| {"text": "### State\nConfusion: 4.857623\nAction: analogize\nReward: 0.089029\nNext Confusion: 5.381652"} | |
| {"text": "### State\nConfusion: 5.524953\nAction: explain\nReward: 1.281716\nNext Confusion: 4.764929"} | |
| {"text": "### State\nConfusion: 4.382893\nAction: analogize\nReward: 0.915903\nNext Confusion: 4.357092"} | |
| {"text": "### State\nConfusion: 3.510692\nAction: worked_example\nReward: 2.042163\nNext Confusion: 1.725706"} | |
| {"text": "### State\nConfusion: 3.533618\nAction: analogize\nReward: -0.318574\nNext Confusion: 3.606989"} | |
| {"text": "### State\nConfusion: 4.581437\nAction: analogize\nReward: -0.908441\nNext Confusion: 5.250843"} | |
| {"text": "### State\nConfusion: 3.491754\nAction: explain\nReward: 0.09608\nNext Confusion: 3.502002"} | |
| {"text": "### State\nConfusion: 2.910742\nAction: analogize\nReward: -0.185903\nNext Confusion: 2.924014"} | |
| {"text": "### State\nConfusion: 4.220527\nAction: analogize\nReward: 0.157805\nNext Confusion: 4.241376"} | |
| {"text": "### State\nConfusion: 4.236191\nAction: worked_example\nReward: 1.708519\nNext Confusion: 2.755738"} | |
| {"text": "### State\nConfusion: 6.356474\nAction: worked_example\nReward: 1.986318\nNext Confusion: 4.847041"} | |
| {"text": "### State\nConfusion: 6.596906\nAction: explain\nReward: 0.945082\nNext Confusion: 5.777353"} | |
| {"text": "### State\nConfusion: 9.065663\nAction: worked_example\nReward: 2.490255\nNext Confusion: 7.027289"} | |
| {"text": "### State\nConfusion: 5.798057\nAction: question\nReward: 1.632281\nNext Confusion: 4.945551"} | |
| {"text": "### State\nConfusion: 7.547707\nAction: analogize\nReward: -0.470323\nNext Confusion: 8.281344"} | |
| {"text": "### State\nConfusion: 6.886375\nAction: correct_fact\nReward: 0.841331\nNext Confusion: 6.200259"} | |
| {"text": "### State\nConfusion: 2.545521\nAction: analogize\nReward: -0.931296\nNext Confusion: 2.997872"} | |
| {"text": "### State\nConfusion: 3.436865\nAction: question\nReward: 0.605545\nNext Confusion: 3.034606"} | |
| {"text": "### State\nConfusion: 5.48512\nAction: question\nReward: 0.552247\nNext Confusion: 4.395566"} | |
| {"text": "### State\nConfusion: 1.99361\nAction: correct_fact\nReward: 0.398921\nNext Confusion: 2.023035"} | |
| {"text": "### State\nConfusion: 5.620854\nAction: analogize\nReward: -0.537399\nNext Confusion: 6.000232"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.398559\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 4.712558\nAction: explain\nReward: 0.35094\nNext Confusion: 4.100639"} | |
| {"text": "### State\nConfusion: 6.00095\nAction: question\nReward: -0.739487\nNext Confusion: 6.590231"} | |
| {"text": "### State\nConfusion: 3.43446\nAction: analogize\nReward: -0.346556\nNext Confusion: 3.86933"} | |
| {"text": "### State\nConfusion: 4.312879\nAction: correct_fact\nReward: -0.838297\nNext Confusion: 4.408473"} | |
| {"text": "### State\nConfusion: 5.588582\nAction: analogize\nReward: -0.240599\nNext Confusion: 5.828334"} | |
| {"text": "### State\nConfusion: 4.105963\nAction: analogize\nReward: 0.322535\nNext Confusion: 4.015285"} | |
| {"text": "### State\nConfusion: 3.421049\nAction: analogize\nReward: 0.553435\nNext Confusion: 3.008563"} | |
| {"text": "### State\nConfusion: 7.728908\nAction: question\nReward: 1.823694\nNext Confusion: 6.789086"} | |
| {"text": "### State\nConfusion: 3.844443\nAction: worked_example\nReward: 0.401461\nNext Confusion: 2.993685"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: -0.052131\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 4.919034\nAction: analogize\nReward: -1.120059\nNext Confusion: 5.821943"} | |
| {"text": "### State\nConfusion: 6.176862\nAction: analogize\nReward: 0.605978\nNext Confusion: 6.245404"} | |
| {"text": "### State\nConfusion: 3.05765\nAction: analogize\nReward: 0.151237\nNext Confusion: 3.941056"} | |
| {"text": "### State\nConfusion: 4.497888\nAction: analogize\nReward: 0.20143\nNext Confusion: 4.707649"} | |
| {"text": "### State\nConfusion: 2.331526\nAction: explain\nReward: -0.390644\nNext Confusion: 1.887389"} | |
| {"text": "### State\nConfusion: 5.476439\nAction: correct_fact\nReward: -0.625659\nNext Confusion: 5.484992"} | |
| {"text": "### State\nConfusion: 4.749814\nAction: correct_fact\nReward: 0.255413\nNext Confusion: 4.287553"} | |
| {"text": "### State\nConfusion: 4.367284\nAction: correct_fact\nReward: 0.754914\nNext Confusion: 4.831099"} | |
| {"text": "### State\nConfusion: 3.990207\nAction: analogize\nReward: 0.489978\nNext Confusion: 3.777346"} | |
| {"text": "### State\nConfusion: 2.36078\nAction: correct_fact\nReward: 0.131446\nNext Confusion: 2.419431"} | |
| {"text": "### State\nConfusion: 5.181594\nAction: analogize\nReward: -1.023529\nNext Confusion: 6.165031"} | |
| {"text": "### State\nConfusion: 6.155968\nAction: worked_example\nReward: 1.867317\nNext Confusion: 4.540814"} | |
| {"text": "### State\nConfusion: 3.921279\nAction: worked_example\nReward: 1.38055\nNext Confusion: 3.467207"} | |
| {"text": "### State\nConfusion: 4.874241\nAction: analogize\nReward: 0.049841\nNext Confusion: 4.888297"} | |
| {"text": "### State\nConfusion: 3.457221\nAction: analogize\nReward: -0.881108\nNext Confusion: 3.81485"} | |
| {"text": "### State\nConfusion: 3.625238\nAction: analogize\nReward: -1.632032\nNext Confusion: 4.600665"} | |
| {"text": "### State\nConfusion: 3.610027\nAction: correct_fact\nReward: 0.990948\nNext Confusion: 3.138358"} | |
| {"text": "### State\nConfusion: 4.592354\nAction: explain\nReward: -1.103914\nNext Confusion: 4.699113"} | |
| {"text": "### State\nConfusion: 3.965907\nAction: analogize\nReward: 0.427269\nNext Confusion: 4.632581"} | |
| {"text": "### State\nConfusion: 6.224049\nAction: analogize\nReward: 0.403771\nNext Confusion: 6.236999"} | |
| {"text": "### State\nConfusion: 3.513805\nAction: explain\nReward: 0.413231\nNext Confusion: 3.090316"} | |
| {"text": "### State\nConfusion: 3.581797\nAction: analogize\nReward: -1.081045\nNext Confusion: 4.690057"} | |
| {"text": "### State\nConfusion: 4.502285\nAction: analogize\nReward: -0.115318\nNext Confusion: 4.403182"} | |
| {"text": "### State\nConfusion: 1.682893\nAction: analogize\nReward: 0.704574\nNext Confusion: 1.458029"} | |
| {"text": "### State\nConfusion: 3.767197\nAction: explain\nReward: -0.390816\nNext Confusion: 3.399625"} | |
| {"text": "### State\nConfusion: 9.782035\nAction: explain\nReward: 1.040522\nNext Confusion: 9.875731"} | |
| {"text": "### State\nConfusion: 3.654296\nAction: analogize\nReward: 0.143296\nNext Confusion: 3.830031"} | |
| {"text": "### State\nConfusion: 5.027151\nAction: analogize\nReward: -0.585108\nNext Confusion: 5.767681"} | |
| {"text": "### State\nConfusion: 6.183936\nAction: question\nReward: 1.22717\nNext Confusion: 5.29231"} | |
| {"text": "### State\nConfusion: 3.36798\nAction: analogize\nReward: -0.535297\nNext Confusion: 3.329399"} | |
| {"text": "### State\nConfusion: 2.521666\nAction: correct_fact\nReward: -0.191486\nNext Confusion: 2.38397"} | |
| {"text": "### State\nConfusion: 3.486265\nAction: explain\nReward: -1.035973\nNext Confusion: 3.673044"} | |
| {"text": "### State\nConfusion: 2.735054\nAction: analogize\nReward: -0.808363\nNext Confusion: 3.324509"} | |
| {"text": "### State\nConfusion: 3.34873\nAction: explain\nReward: -0.335458\nNext Confusion: 3.526425"} | |
| {"text": "### State\nConfusion: 5.857225\nAction: analogize\nReward: -0.201824\nNext Confusion: 6.406119"} | |
| {"text": "### State\nConfusion: 5.450879\nAction: worked_example\nReward: 0.994392\nNext Confusion: 4.314799"} | |
| {"text": "### State\nConfusion: 6.129226\nAction: explain\nReward: -0.347463\nNext Confusion: 6.183104"} | |
| {"text": "### State\nConfusion: 5.796948\nAction: worked_example\nReward: 1.181532\nNext Confusion: 4.332111"} | |
| {"text": "### State\nConfusion: 4.019942\nAction: analogize\nReward: -1.097066\nNext Confusion: 4.38926"} | |
| {"text": "### State\nConfusion: 6.346842\nAction: analogize\nReward: -0.05843\nNext Confusion: 6.536785"} | |
| {"text": "### State\nConfusion: 4.351801\nAction: analogize\nReward: 1.803047\nNext Confusion: 3.652701"} | |
| {"text": "### State\nConfusion: 3.17073\nAction: question\nReward: 0.985057\nNext Confusion: 2.444106"} | |
| {"text": "### State\nConfusion: 3.334849\nAction: analogize\nReward: -0.158321\nNext Confusion: 3.459907"} | |
| {"text": "### State\nConfusion: 2.858408\nAction: analogize\nReward: -1.283425\nNext Confusion: 4.023058"} | |
| {"text": "### State\nConfusion: 3.845057\nAction: worked_example\nReward: 2.053946\nNext Confusion: 2.178561"} | |
| {"text": "### State\nConfusion: 3.872064\nAction: analogize\nReward: -0.908379\nNext Confusion: 4.351231"} | |
| {"text": "### State\nConfusion: 6.930616\nAction: analogize\nReward: -0.606489\nNext Confusion: 7.437333"} | |
| {"text": "### State\nConfusion: 4.201954\nAction: analogize\nReward: -0.143287\nNext Confusion: 4.545249"} | |
| {"text": "### State\nConfusion: 3.460118\nAction: analogize\nReward: -0.210642\nNext Confusion: 3.958432"} | |
| {"text": "### State\nConfusion: 5.932957\nAction: analogize\nReward: -0.423899\nNext Confusion: 6.711923"} | |
| {"text": "### State\nConfusion: 6.451794\nAction: analogize\nReward: 0.163913\nNext Confusion: 6.371612"} | |
| {"text": "### State\nConfusion: 6.143884\nAction: analogize\nReward: -1.50412\nNext Confusion: 7.402514"} | |
| {"text": "### State\nConfusion: 3.787366\nAction: analogize\nReward: 0.144803\nNext Confusion: 3.96288"} | |
| {"text": "### State\nConfusion: 3.46052\nAction: analogize\nReward: -0.435846\nNext Confusion: 4.18104"} | |
| {"text": "### State\nConfusion: 5.670783\nAction: analogize\nReward: -1.127217\nNext Confusion: 6.834265"} | |
| {"text": "### State\nConfusion: 6.505923\nAction: analogize\nReward: 0.189847\nNext Confusion: 6.595182"} | |
| {"text": "### State\nConfusion: 3.762953\nAction: worked_example\nReward: -0.449213\nNext Confusion: 4.189637"} | |
| {"text": "### State\nConfusion: 7.300265\nAction: analogize\nReward: -0.242874\nNext Confusion: 7.255626"} | |
| {"text": "### State\nConfusion: 3.446495\nAction: explain\nReward: 0.351842\nNext Confusion: 3.319413"} | |
| {"text": "### State\nConfusion: 6.007587\nAction: analogize\nReward: -0.389668\nNext Confusion: 6.238716"} | |
| {"text": "### State\nConfusion: 4.569729\nAction: worked_example\nReward: 1.310543\nNext Confusion: 3.685556"} | |
| {"text": "### State\nConfusion: 4.289463\nAction: question\nReward: -3.159156\nNext Confusion: 3.588308"} | |
| {"text": "### State\nConfusion: 3.805428\nAction: analogize\nReward: -0.137826\nNext Confusion: 4.271468"} | |
| {"text": "### State\nConfusion: 5.339787\nAction: analogize\nReward: -0.725403\nNext Confusion: 5.959084"} | |
| {"text": "### State\nConfusion: 6.367439\nAction: worked_example\nReward: -0.410931\nNext Confusion: 5.970048"} | |
| {"text": "### State\nConfusion: 4.185916\nAction: analogize\nReward: -1.11682\nNext Confusion: 4.895042"} | |
| {"text": "### State\nConfusion: 4.987749\nAction: explain\nReward: -0.259711\nNext Confusion: 5.626599"} | |
| {"text": "### State\nConfusion: 4.17443\nAction: explain\nReward: -0.007389\nNext Confusion: 3.915439"} | |
| {"text": "### State\nConfusion: 7.400229\nAction: analogize\nReward: -0.440491\nNext Confusion: 7.743072"} | |
| {"text": "### State\nConfusion: 5.192929\nAction: correct_fact\nReward: 0.679553\nNext Confusion: 5.149954"} | |
| {"text": "### State\nConfusion: 4.627472\nAction: analogize\nReward: -0.525458\nNext Confusion: 5.036076"} | |
| {"text": "### State\nConfusion: 3.448749\nAction: correct_fact\nReward: -0.686939\nNext Confusion: 4.127151"} | |
| {"text": "### State\nConfusion: 4.950369\nAction: analogize\nReward: -0.497006\nNext Confusion: 5.154066"} | |
| {"text": "### State\nConfusion: 3.633871\nAction: analogize\nReward: -0.05189\nNext Confusion: 4.004499"} | |
| {"text": "### State\nConfusion: 7.003464\nAction: worked_example\nReward: 2.687881\nNext Confusion: 4.899817"} | |
| {"text": "### State\nConfusion: 3.792127\nAction: analogize\nReward: -0.308802\nNext Confusion: 4.434352"} | |
| {"text": "### State\nConfusion: 3.283564\nAction: analogize\nReward: 1.190446\nNext Confusion: 3.436299"} | |
| {"text": "### State\nConfusion: 7.512014\nAction: analogize\nReward: -0.078398\nNext Confusion: 8.393634"} | |
| {"text": "### State\nConfusion: 3.925258\nAction: correct_fact\nReward: 0.514447\nNext Confusion: 3.727096"} | |
| {"text": "### State\nConfusion: 6.571545\nAction: worked_example\nReward: 1.64758\nNext Confusion: 5.171705"} | |
| {"text": "### State\nConfusion: 3.203366\nAction: analogize\nReward: -0.582273\nNext Confusion: 3.776733"} | |
| {"text": "### State\nConfusion: 8.83236\nAction: question\nReward: -1.0229\nNext Confusion: 9.455678"} | |
| {"text": "### State\nConfusion: 6.196302\nAction: analogize\nReward: -0.494157\nNext Confusion: 6.269166"} | |
| {"text": "### State\nConfusion: 7.119833\nAction: analogize\nReward: -0.632348\nNext Confusion: 7.671856"} | |
| {"text": "### State\nConfusion: 3.446799\nAction: analogize\nReward: -0.235273\nNext Confusion: 3.195469"} | |
| {"text": "### State\nConfusion: 7.038426\nAction: worked_example\nReward: 1.144741\nNext Confusion: 5.865378"} | |
| {"text": "### State\nConfusion: 4.291528\nAction: explain\nReward: -1.507047\nNext Confusion: 5.162663"} | |
| {"text": "### State\nConfusion: 6.868341\nAction: question\nReward: 1.142\nNext Confusion: 6.448683"} | |
| {"text": "### State\nConfusion: 3.429575\nAction: question\nReward: 0.260133\nNext Confusion: 3.166194"} | |
| {"text": "### State\nConfusion: 4.569086\nAction: explain\nReward: 1.034115\nNext Confusion: 3.786755"} | |
| {"text": "### State\nConfusion: 4.636881\nAction: analogize\nReward: 1.196279\nNext Confusion: 4.518783"} | |
| {"text": "### State\nConfusion: 4.169616\nAction: explain\nReward: 0.036306\nNext Confusion: 3.886209"} | |
| {"text": "### State\nConfusion: 5.246602\nAction: analogize\nReward: -0.15856\nNext Confusion: 5.307223"} | |
| {"text": "### State\nConfusion: 3.581232\nAction: analogize\nReward: 0.436844\nNext Confusion: 3.550339"} | |
| {"text": "### State\nConfusion: 5.709446\nAction: analogize\nReward: -0.037479\nNext Confusion: 6.23242"} | |
| {"text": "### State\nConfusion: 6.971954\nAction: analogize\nReward: -0.883138\nNext Confusion: 7.784509"} | |
| {"text": "### State\nConfusion: 4.800322\nAction: explain\nReward: -0.156482\nNext Confusion: 5.04152"} | |
| {"text": "### State\nConfusion: 5.968896\nAction: question\nReward: 0.735787\nNext Confusion: 5.41699"} | |
| {"text": "### State\nConfusion: 7.609529\nAction: analogize\nReward: -0.907761\nNext Confusion: 8.138722"} | |
| {"text": "### State\nConfusion: 3.348556\nAction: analogize\nReward: -0.522565\nNext Confusion: 3.955441"} | |
| {"text": "### State\nConfusion: 6.54799\nAction: question\nReward: 0.839793\nNext Confusion: 5.740779"} | |
| {"text": "### State\nConfusion: 5.126441\nAction: explain\nReward: 2.21922\nNext Confusion: 3.600956"} | |
| {"text": "### State\nConfusion: 4.444181\nAction: analogize\nReward: -0.272342\nNext Confusion: 4.869495"} | |
| {"text": "### State\nConfusion: 4.507113\nAction: question\nReward: 0.040065\nNext Confusion: 4.782022"} | |
| {"text": "### State\nConfusion: 4.47993\nAction: question\nReward: 1.40697\nNext Confusion: 3.401652"} | |
| {"text": "### State\nConfusion: 6.068524\nAction: analogize\nReward: -0.164\nNext Confusion: 6.512142"} | |
| {"text": "### State\nConfusion: 8.649179\nAction: analogize\nReward: -0.362601\nNext Confusion: 8.878508"} | |
| {"text": "### State\nConfusion: 2.618763\nAction: analogize\nReward: -0.2353\nNext Confusion: 3.374444"} | |
| {"text": "### State\nConfusion: 3.655497\nAction: analogize\nReward: 0.47788\nNext Confusion: 3.574526"} | |
| {"text": "### State\nConfusion: 6.825738\nAction: question\nReward: 1.524791\nNext Confusion: 5.907894"} | |
| {"text": "### State\nConfusion: 3.849934\nAction: analogize\nReward: -0.454719\nNext Confusion: 4.542527"} | |
| {"text": "### State\nConfusion: 3.52636\nAction: analogize\nReward: -1.335167\nNext Confusion: 4.591458"} | |
| {"text": "### State\nConfusion: 5.653258\nAction: analogize\nReward: 0.425317\nNext Confusion: 6.027638"} | |
| {"text": "### State\nConfusion: 6.79264\nAction: question\nReward: -0.456067\nNext Confusion: 6.680654"} | |
| {"text": "### State\nConfusion: 5.825899\nAction: explain\nReward: 1.137091\nNext Confusion: 4.860346"} | |
| {"text": "### State\nConfusion: 4.024473\nAction: analogize\nReward: -1.130899\nNext Confusion: 5.080507"} | |
| {"text": "### State\nConfusion: 5.027373\nAction: question\nReward: -0.279873\nNext Confusion: 5.103498"} | |
| {"text": "### State\nConfusion: 4.107711\nAction: analogize\nReward: 0.062419\nNext Confusion: 3.716159"} | |
| {"text": "### State\nConfusion: 6.085206\nAction: analogize\nReward: -0.034186\nNext Confusion: 6.579785"} | |
| {"text": "### State\nConfusion: 9.329837\nAction: correct_fact\nReward: -0.34011\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 7.806332\nAction: analogize\nReward: -1.354402\nNext Confusion: 8.877954"} | |
| {"text": "### State\nConfusion: 9.883806\nAction: analogize\nReward: -0.513869\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 6.910408\nAction: question\nReward: -0.016033\nNext Confusion: 6.908116"} | |
| {"text": "### State\nConfusion: 4.543718\nAction: analogize\nReward: 0.746005\nNext Confusion: 4.320139"} | |
| {"text": "### State\nConfusion: 3.82457\nAction: worked_example\nReward: 0.980693\nNext Confusion: 2.952715"} | |
| {"text": "### State\nConfusion: 4.841876\nAction: analogize\nReward: 0.688047\nNext Confusion: 4.906706"} | |
| {"text": "### State\nConfusion: 4.36744\nAction: explain\nReward: 0.149362\nNext Confusion: 4.382951"} | |
| {"text": "### State\nConfusion: 3.4519\nAction: analogize\nReward: -0.289845\nNext Confusion: 3.852726"} | |
| {"text": "### State\nConfusion: 4.559366\nAction: analogize\nReward: 0.248624\nNext Confusion: 4.250278"} | |
| {"text": "### State\nConfusion: 4.871343\nAction: worked_example\nReward: 2.006758\nNext Confusion: 3.021118"} | |
| {"text": "### State\nConfusion: 2.90186\nAction: worked_example\nReward: 0.47045\nNext Confusion: 1.383204"} | |
| {"text": "### State\nConfusion: 3.085492\nAction: explain\nReward: 0.036318\nNext Confusion: 3.096477"} | |
| {"text": "### State\nConfusion: 7.723372\nAction: analogize\nReward: -0.830827\nNext Confusion: 8.442617"} | |
| {"text": "### State\nConfusion: 4.313051\nAction: analogize\nReward: 0.710948\nNext Confusion: 4.525727"} | |
| {"text": "### State\nConfusion: 3.148665\nAction: correct_fact\nReward: 0.685984\nNext Confusion: 2.654194"} | |
| {"text": "### State\nConfusion: 2.622224\nAction: explain\nReward: -0.410488\nNext Confusion: 2.958908"} | |
| {"text": "### State\nConfusion: 4.929009\nAction: explain\nReward: 0.961794\nNext Confusion: 4.239855"} | |
| {"text": "### State\nConfusion: 3.513432\nAction: question\nReward: 1.100215\nNext Confusion: 2.54015"} | |
| {"text": "### State\nConfusion: 6.457459\nAction: analogize\nReward: -0.458103\nNext Confusion: 6.367057"} | |
| {"text": "### State\nConfusion: 6.632654\nAction: analogize\nReward: 0.177102\nNext Confusion: 6.513361"} | |
| {"text": "### State\nConfusion: 6.090064\nAction: question\nReward: 0.381686\nNext Confusion: 5.960601"} | |
| {"text": "### State\nConfusion: 2.783939\nAction: analogize\nReward: 0.773949\nNext Confusion: 2.812135"} | |
| {"text": "### State\nConfusion: 3.035819\nAction: analogize\nReward: -0.798472\nNext Confusion: 4.235182"} | |
| {"text": "### State\nConfusion: 2.786038\nAction: worked_example\nReward: 0.139796\nNext Confusion: 2.822438"} | |
| {"text": "### State\nConfusion: 3.744302\nAction: correct_fact\nReward: 0.623173\nNext Confusion: 3.392101"} | |
| {"text": "### State\nConfusion: 3.43836\nAction: explain\nReward: 0.338272\nNext Confusion: 3.515679"} | |
| {"text": "### State\nConfusion: 4.052148\nAction: analogize\nReward: -0.345972\nNext Confusion: 4.051585"} | |
| {"text": "### State\nConfusion: 5.878376\nAction: analogize\nReward: 0.581745\nNext Confusion: 5.632409"} | |
| {"text": "### State\nConfusion: 7.096414\nAction: worked_example\nReward: 0.754376\nNext Confusion: 6.939009"} | |
| {"text": "### State\nConfusion: 3.708639\nAction: correct_fact\nReward: 0.057415\nNext Confusion: 4.125999"} | |
| {"text": "### State\nConfusion: 7.392665\nAction: worked_example\nReward: 2.514391\nNext Confusion: 5.432892"} | |
| {"text": "### State\nConfusion: 6.486508\nAction: analogize\nReward: -1.212701\nNext Confusion: 7.813913"} | |
| {"text": "### State\nConfusion: 6.889399\nAction: analogize\nReward: -1.428461\nNext Confusion: 8.115419"} | |
| {"text": "### State\nConfusion: 2.396941\nAction: analogize\nReward: 1.012256\nNext Confusion: 2.412092"} | |
| {"text": "### State\nConfusion: 4.669146\nAction: worked_example\nReward: 0.570246\nNext Confusion: 4.402059"} | |
| {"text": "### State\nConfusion: 4.528219\nAction: analogize\nReward: 0.682524\nNext Confusion: 4.172763"} | |
| {"text": "### State\nConfusion: 7.065587\nAction: worked_example\nReward: 0.844311\nNext Confusion: 5.942281"} | |
| {"text": "### State\nConfusion: 8.21628\nAction: explain\nReward: 1.303762\nNext Confusion: 7.426588"} | |
| {"text": "### State\nConfusion: 4.642588\nAction: correct_fact\nReward: 0.030873\nNext Confusion: 4.616276"} | |
| {"text": "### State\nConfusion: 6.436354\nAction: worked_example\nReward: 1.043848\nNext Confusion: 5.170756"} | |
| {"text": "### State\nConfusion: 3.18685\nAction: question\nReward: 0.569739\nNext Confusion: 2.5131"} | |
| {"text": "### State\nConfusion: 4.502162\nAction: analogize\nReward: 0.123031\nNext Confusion: 4.256639"} | |
| {"text": "### State\nConfusion: 3.236621\nAction: analogize\nReward: -1.226024\nNext Confusion: 4.357628"} | |
| {"text": "### State\nConfusion: 2.31076\nAction: analogize\nReward: 0.351427\nNext Confusion: 2.393765"} | |
| {"text": "### State\nConfusion: 5.464483\nAction: analogize\nReward: -0.107958\nNext Confusion: 5.520666"} | |
| {"text": "### State\nConfusion: 3.708102\nAction: explain\nReward: 0.049102\nNext Confusion: 2.566842"} | |
| {"text": "### State\nConfusion: 4.404992\nAction: analogize\nReward: -0.510053\nNext Confusion: 4.818144"} | |
| {"text": "### State\nConfusion: 3.450883\nAction: analogize\nReward: -1.250229\nNext Confusion: 4.223539"} | |
| {"text": "### State\nConfusion: 5.159201\nAction: question\nReward: -0.898699\nNext Confusion: 5.392422"} | |
| {"text": "### State\nConfusion: 5.612621\nAction: analogize\nReward: -1.899629\nNext Confusion: 6.393727"} | |
| {"text": "### State\nConfusion: 2.496055\nAction: analogize\nReward: 0.083258\nNext Confusion: 2.69881"} | |
| {"text": "### State\nConfusion: 4.094158\nAction: analogize\nReward: -0.281068\nNext Confusion: 4.18822"} | |
| {"text": "### State\nConfusion: 6.628239\nAction: question\nReward: 0.187419\nNext Confusion: 6.615895"} | |
| {"text": "### State\nConfusion: 5.738016\nAction: analogize\nReward: -1.199077\nNext Confusion: 6.711353"} | |
| {"text": "### State\nConfusion: 6.312516\nAction: analogize\nReward: 0.42972\nNext Confusion: 6.950694"} | |
| {"text": "### State\nConfusion: 3.805835\nAction: analogize\nReward: -0.01728\nNext Confusion: 3.534842"} | |
| {"text": "### State\nConfusion: 7.522794\nAction: explain\nReward: 0.367578\nNext Confusion: 7.324257"} | |
| {"text": "### State\nConfusion: 8.889452\nAction: analogize\nReward: -0.572201\nNext Confusion: 8.881808"} | |
| {"text": "### State\nConfusion: 4.964946\nAction: analogize\nReward: -0.624785\nNext Confusion: 5.750177"} | |
| {"text": "### State\nConfusion: 4.519431\nAction: analogize\nReward: -0.219609\nNext Confusion: 5.098336"} | |
| {"text": "### State\nConfusion: 6.143557\nAction: question\nReward: 0.970562\nNext Confusion: 5.958963"} | |
| {"text": "### State\nConfusion: 7.568764\nAction: analogize\nReward: -1.154265\nNext Confusion: 8.696467"} | |
| {"text": "### State\nConfusion: 4.139323\nAction: explain\nReward: -0.497746\nNext Confusion: 4.463565"} | |
| {"text": "### State\nConfusion: 8.346292\nAction: explain\nReward: 0.947031\nNext Confusion: 7.685566"} | |
| {"text": "### State\nConfusion: 2.578793\nAction: worked_example\nReward: 0.8092\nNext Confusion: 1.972243"} | |
| {"text": "### State\nConfusion: 3.574151\nAction: question\nReward: -0.001893\nNext Confusion: 2.94033"} | |
| {"text": "### State\nConfusion: 3.391303\nAction: analogize\nReward: 0.159351\nNext Confusion: 2.838915"} | |
| {"text": "### State\nConfusion: 4.434367\nAction: correct_fact\nReward: 0.725639\nNext Confusion: 3.582488"} | |
| {"text": "### State\nConfusion: 4.485438\nAction: analogize\nReward: -1.644401\nNext Confusion: 5.267482"} | |
| {"text": "### State\nConfusion: 3.795971\nAction: worked_example\nReward: 2.16479\nNext Confusion: 1.835349"} | |
| {"text": "### State\nConfusion: 4.48812\nAction: analogize\nReward: -0.252056\nNext Confusion: 4.740558"} | |
| {"text": "### State\nConfusion: 6.62453\nAction: analogize\nReward: -0.599549\nNext Confusion: 7.097754"} | |
| {"text": "### State\nConfusion: 3.436073\nAction: analogize\nReward: -0.128818\nNext Confusion: 3.550781"} | |
| {"text": "### State\nConfusion: 6.385056\nAction: explain\nReward: -0.614619\nNext Confusion: 6.825878"} | |
| {"text": "### State\nConfusion: 4.543844\nAction: analogize\nReward: -0.357182\nNext Confusion: 4.814017"} | |
| {"text": "### State\nConfusion: 5.016735\nAction: correct_fact\nReward: -1.125121\nNext Confusion: 5.913329"} | |
| {"text": "### State\nConfusion: 3.947467\nAction: analogize\nReward: -1.027909\nNext Confusion: 5.059095"} | |
| {"text": "### State\nConfusion: 5.171448\nAction: analogize\nReward: 0.577806\nNext Confusion: 5.198316"} | |
| {"text": "### State\nConfusion: 8.871197\nAction: question\nReward: -0.803998\nNext Confusion: 9.891808"} | |
| {"text": "### State\nConfusion: 4.985002\nAction: correct_fact\nReward: 0.845959\nNext Confusion: 4.625676"} | |
| {"text": "### State\nConfusion: 5.58028\nAction: analogize\nReward: 0.36508\nNext Confusion: 5.53048"} | |
| {"text": "### State\nConfusion: 3.853096\nAction: question\nReward: 1.561354\nNext Confusion: 3.282271"} | |
| {"text": "### State\nConfusion: 5.917102\nAction: analogize\nReward: 0.05721\nNext Confusion: 5.640356"} | |
| {"text": "### State\nConfusion: 3.994782\nAction: analogize\nReward: -0.043592\nNext Confusion: 4.123508"} | |
| {"text": "### State\nConfusion: 4.253486\nAction: analogize\nReward: -1.054318\nNext Confusion: 5.083886"} | |
| {"text": "### State\nConfusion: 3.477302\nAction: correct_fact\nReward: 0.558578\nNext Confusion: 3.341924"} | |
| {"text": "### State\nConfusion: 6.411841\nAction: question\nReward: 0.474881\nNext Confusion: 5.896121"} | |
| {"text": "### State\nConfusion: 5.725423\nAction: explain\nReward: -0.616484\nNext Confusion: 6.39949"} | |
| {"text": "### State\nConfusion: 6.111221\nAction: analogize\nReward: 0.543065\nNext Confusion: 5.609612"} | |
| {"text": "### State\nConfusion: 4.020193\nAction: analogize\nReward: -1.372713\nNext Confusion: 4.147141"} | |
| {"text": "### State\nConfusion: 3.316797\nAction: explain\nReward: 1.090464\nNext Confusion: 2.999543"} | |
| {"text": "### State\nConfusion: 2.233988\nAction: question\nReward: 0.011496\nNext Confusion: 2.114241"} | |
| {"text": "### State\nConfusion: 7.038006\nAction: question\nReward: -0.747112\nNext Confusion: 6.650014"} | |
| {"text": "### State\nConfusion: 5.194492\nAction: question\nReward: 0.315717\nNext Confusion: 4.832783"} | |
| {"text": "### State\nConfusion: 5.63014\nAction: analogize\nReward: 0.03927\nNext Confusion: 5.690156"} | |
| {"text": "### State\nConfusion: 3.946496\nAction: analogize\nReward: 0.064232\nNext Confusion: 5.154892"} | |
| {"text": "### State\nConfusion: 4.438356\nAction: analogize\nReward: 0.268679\nNext Confusion: 3.728327"} | |
| {"text": "### State\nConfusion: 5.453129\nAction: worked_example\nReward: 1.927628\nNext Confusion: 3.642222"} | |
| {"text": "### State\nConfusion: 9.513212\nAction: question\nReward: 0.707923\nNext Confusion: 8.978129"} | |
| {"text": "### State\nConfusion: 4.612859\nAction: question\nReward: 0.298371\nNext Confusion: 3.895462"} | |
| {"text": "### State\nConfusion: 4.710078\nAction: explain\nReward: -0.092907\nNext Confusion: 5.249335"} | |
| {"text": "### State\nConfusion: 5.980734\nAction: analogize\nReward: -0.194105\nNext Confusion: 6.089187"} | |
| {"text": "### State\nConfusion: 2.867381\nAction: analogize\nReward: -0.180572\nNext Confusion: 3.381789"} | |
| {"text": "### State\nConfusion: 3.278762\nAction: analogize\nReward: -0.748468\nNext Confusion: 4.113945"} | |
| {"text": "### State\nConfusion: 5.969358\nAction: correct_fact\nReward: 1.535565\nNext Confusion: 4.871317"} | |
| {"text": "### State\nConfusion: 7.275512\nAction: question\nReward: 0.812077\nNext Confusion: 6.31911"} | |
| {"text": "### State\nConfusion: 2.584528\nAction: analogize\nReward: 1.050292\nNext Confusion: 2.113127"} | |
| {"text": "### State\nConfusion: 3.503871\nAction: explain\nReward: 0.04849\nNext Confusion: 3.540186"} | |
| {"text": "### State\nConfusion: 4.614442\nAction: analogize\nReward: 0.522376\nNext Confusion: 5.167736"} | |
| {"text": "### State\nConfusion: 7.105223\nAction: explain\nReward: 0.918648\nNext Confusion: 6.37787"} | |
| {"text": "### State\nConfusion: 3.646493\nAction: question\nReward: 1.323374\nNext Confusion: 2.361944"} | |
| {"text": "### State\nConfusion: 4.541859\nAction: analogize\nReward: -0.008162\nNext Confusion: 4.688088"} | |
| {"text": "### State\nConfusion: 3.714432\nAction: correct_fact\nReward: -1.29251\nNext Confusion: 5.221759"} | |
| {"text": "### State\nConfusion: 4.098722\nAction: worked_example\nReward: 1.309035\nNext Confusion: 1.963477"} | |
| {"text": "### State\nConfusion: 2.639115\nAction: analogize\nReward: 1.241288\nNext Confusion: 1.959279"} | |
| {"text": "### State\nConfusion: 4.209787\nAction: explain\nReward: -0.674517\nNext Confusion: 4.646072"} | |
| {"text": "### State\nConfusion: 4.237337\nAction: analogize\nReward: 0.299678\nNext Confusion: 3.768544"} | |
| {"text": "### State\nConfusion: 4.216485\nAction: analogize\nReward: -0.08135\nNext Confusion: 4.33587"} | |
| {"text": "### State\nConfusion: 4.031052\nAction: explain\nReward: 0.003192\nNext Confusion: 3.64745"} | |
| {"text": "### State\nConfusion: 4.382634\nAction: worked_example\nReward: 1.73771\nNext Confusion: 2.649923"} | |
| {"text": "### State\nConfusion: 5.132607\nAction: explain\nReward: 0.382928\nNext Confusion: 4.589305"} | |
| {"text": "### State\nConfusion: 3.012049\nAction: correct_fact\nReward: 0.826988\nNext Confusion: 2.747293"} | |
| {"text": "### State\nConfusion: 3.640825\nAction: question\nReward: 0.142369\nNext Confusion: 3.34056"} | |
| {"text": "### State\nConfusion: 7.855445\nAction: analogize\nReward: -0.694125\nNext Confusion: 7.793014"} | |
| {"text": "### State\nConfusion: 3.575469\nAction: analogize\nReward: 0.731786\nNext Confusion: 2.930617"} | |
| {"text": "### State\nConfusion: 5.864926\nAction: correct_fact\nReward: -0.739393\nNext Confusion: 6.226672"} | |
| {"text": "### State\nConfusion: 4.742312\nAction: worked_example\nReward: 2.256657\nNext Confusion: 3.236478"} | |
| {"text": "### State\nConfusion: 3.681698\nAction: worked_example\nReward: 2.565199\nNext Confusion: 1.781532"} | |
| {"text": "### State\nConfusion: 5.45446\nAction: worked_example\nReward: 1.317665\nNext Confusion: 4.860599"} | |
| {"text": "### State\nConfusion: 3.974952\nAction: explain\nReward: 0.611576\nNext Confusion: 3.247132"} | |
| {"text": "### State\nConfusion: 4.964568\nAction: analogize\nReward: -1.248745\nNext Confusion: 5.991206"} | |
| {"text": "### State\nConfusion: 6.005152\nAction: question\nReward: -0.355016\nNext Confusion: 6.667488"} | |
| {"text": "### State\nConfusion: 3.601624\nAction: explain\nReward: 1.362752\nNext Confusion: 2.763752"} | |
| {"text": "### State\nConfusion: 3.35013\nAction: explain\nReward: 1.195844\nNext Confusion: 3.74787"} | |
| {"text": "### State\nConfusion: 2.871308\nAction: question\nReward: 1.241142\nNext Confusion: 1.719885"} | |
| {"text": "### State\nConfusion: 3.469416\nAction: question\nReward: 0.918632\nNext Confusion: 3.167326"} | |
| {"text": "### State\nConfusion: 3.741222\nAction: explain\nReward: 0.985201\nNext Confusion: 3.32284"} | |
| {"text": "### State\nConfusion: 6.040923\nAction: analogize\nReward: -0.280699\nNext Confusion: 6.575557"} | |
| {"text": "### State\nConfusion: 7.684596\nAction: explain\nReward: 0.497976\nNext Confusion: 7.635492"} | |
| {"text": "### State\nConfusion: 3.280561\nAction: question\nReward: 1.638365\nNext Confusion: 1.803264"} | |
| {"text": "### State\nConfusion: 7.663548\nAction: analogize\nReward: -0.226292\nNext Confusion: 7.610589"} | |
| {"text": "### State\nConfusion: 3.984347\nAction: explain\nReward: 0.399343\nNext Confusion: 3.598188"} | |
| {"text": "### State\nConfusion: 4.128422\nAction: analogize\nReward: -0.020421\nNext Confusion: 4.24922"} | |
| {"text": "### State\nConfusion: 4.86738\nAction: explain\nReward: 0.412249\nNext Confusion: 4.323055"} | |
| {"text": "### State\nConfusion: 3.86008\nAction: analogize\nReward: -0.201859\nNext Confusion: 4.178868"} | |
| {"text": "### State\nConfusion: 2.770174\nAction: analogize\nReward: -0.44956\nNext Confusion: 2.613835"} | |
| {"text": "### State\nConfusion: 4.151423\nAction: worked_example\nReward: 1.723241\nNext Confusion: 2.956266"} | |
| {"text": "### State\nConfusion: 6.310332\nAction: analogize\nReward: 0.928881\nNext Confusion: 5.853417"} | |
| {"text": "### State\nConfusion: 3.257129\nAction: analogize\nReward: -0.580045\nNext Confusion: 3.712077"} | |
| {"text": "### State\nConfusion: 3.381224\nAction: analogize\nReward: 0.35832\nNext Confusion: 2.915107"} | |
| {"text": "### State\nConfusion: 5.293595\nAction: analogize\nReward: 0.024167\nNext Confusion: 5.410278"} | |
| {"text": "### State\nConfusion: 3.664918\nAction: analogize\nReward: 0.629177\nNext Confusion: 3.607038"} | |
| {"text": "### State\nConfusion: 3.814348\nAction: analogize\nReward: -0.991218\nNext Confusion: 4.655007"} | |
| {"text": "### State\nConfusion: 5.846762\nAction: worked_example\nReward: 1.942244\nNext Confusion: 4.556337"} | |
| {"text": "### State\nConfusion: 5.780618\nAction: worked_example\nReward: 0.564407\nNext Confusion: 5.13104"} | |
| {"text": "### State\nConfusion: 3.274057\nAction: correct_fact\nReward: -0.765777\nNext Confusion: 3.590828"} | |
| {"text": "### State\nConfusion: 3.975992\nAction: analogize\nReward: -1.591306\nNext Confusion: 4.650961"} | |
| {"text": "### State\nConfusion: 2.629499\nAction: analogize\nReward: 0.559346\nNext Confusion: 2.924919"} | |
| {"text": "### State\nConfusion: 4.576322\nAction: question\nReward: 1.115169\nNext Confusion: 3.710506"} | |
| {"text": "### State\nConfusion: 4.406433\nAction: question\nReward: 0.384513\nNext Confusion: 4.542675"} | |
| {"text": "### State\nConfusion: 3.695796\nAction: explain\nReward: -0.09499\nNext Confusion: 3.903161"} | |
| {"text": "### State\nConfusion: 3.598451\nAction: analogize\nReward: -0.256206\nNext Confusion: 4.140747"} | |
| {"text": "### State\nConfusion: 3.289128\nAction: analogize\nReward: -0.30123\nNext Confusion: 3.63686"} | |
| {"text": "### State\nConfusion: 6.212872\nAction: analogize\nReward: 1.21673\nNext Confusion: 5.301171"} | |
| {"text": "### State\nConfusion: 2.665315\nAction: analogize\nReward: 0.487963\nNext Confusion: 2.517148"} | |
| {"text": "### State\nConfusion: 7.634206\nAction: analogize\nReward: 0.364259\nNext Confusion: 7.525106"} | |
| {"text": "### State\nConfusion: 4.694621\nAction: analogize\nReward: -0.110508\nNext Confusion: 4.591361"} | |
| {"text": "### State\nConfusion: 3.257582\nAction: analogize\nReward: 0.048877\nNext Confusion: 3.63751"} | |
| {"text": "### State\nConfusion: 7.117423\nAction: analogize\nReward: 0.360643\nNext Confusion: 6.834822"} | |
| {"text": "### State\nConfusion: 2.396412\nAction: analogize\nReward: -0.941327\nNext Confusion: 3.360732"} | |
| {"text": "### State\nConfusion: 3.254998\nAction: explain\nReward: 0.913498\nNext Confusion: 2.997523"} | |
| {"text": "### State\nConfusion: 4.529406\nAction: question\nReward: 0.384746\nNext Confusion: 4.120297"} | |
| {"text": "### State\nConfusion: 3.558558\nAction: analogize\nReward: -0.363472\nNext Confusion: 3.638471"} | |
| {"text": "### State\nConfusion: 3.043111\nAction: question\nReward: 0.349095\nNext Confusion: 2.999848"} | |
| {"text": "### State\nConfusion: 7.750188\nAction: analogize\nReward: -0.581808\nNext Confusion: 8.439862"} | |
| {"text": "### State\nConfusion: 3.544147\nAction: analogize\nReward: -0.685245\nNext Confusion: 4.018972"} | |
| {"text": "### State\nConfusion: 3.828862\nAction: question\nReward: 1.050056\nNext Confusion: 3.542843"} | |
| {"text": "### State\nConfusion: 3.389573\nAction: explain\nReward: 0.664424\nNext Confusion: 3.723847"} | |
| {"text": "### State\nConfusion: 4.171442\nAction: analogize\nReward: -1.336033\nNext Confusion: 5.128333"} | |
| {"text": "### State\nConfusion: 3.576818\nAction: worked_example\nReward: 0.172307\nNext Confusion: 3.437141"} | |
| {"text": "### State\nConfusion: 4.544904\nAction: correct_fact\nReward: -0.733044\nNext Confusion: 4.568539"} | |
| {"text": "### State\nConfusion: 7.43306\nAction: worked_example\nReward: 2.463857\nNext Confusion: 5.143341"} | |
| {"text": "### State\nConfusion: 3.439448\nAction: analogize\nReward: -0.571421\nNext Confusion: 4.087216"} | |
| {"text": "### State\nConfusion: 3.704676\nAction: analogize\nReward: -1.422864\nNext Confusion: 4.662764"} | |
| {"text": "### State\nConfusion: 3.942663\nAction: correct_fact\nReward: 0.486764\nNext Confusion: 3.753754"} | |
| {"text": "### State\nConfusion: 6.66982\nAction: worked_example\nReward: 1.728911\nNext Confusion: 4.802022"} | |
| {"text": "### State\nConfusion: 5.246306\nAction: worked_example\nReward: -0.294401\nNext Confusion: 5.037916"} | |
| {"text": "### State\nConfusion: 9.410426\nAction: analogize\nReward: -1.320736\nNext Confusion: 9.686011"} | |
| {"text": "### State\nConfusion: 6.195813\nAction: analogize\nReward: 0.606987\nNext Confusion: 6.49038"} | |
| {"text": "### State\nConfusion: 5.922952\nAction: explain\nReward: 0.232012\nNext Confusion: 5.318613"} | |
| {"text": "### State\nConfusion: 5.650737\nAction: question\nReward: 0.866942\nNext Confusion: 4.455626"} | |
| {"text": "### State\nConfusion: 3.919665\nAction: analogize\nReward: -0.997712\nNext Confusion: 4.632429"} | |
| {"text": "### State\nConfusion: 3.941246\nAction: question\nReward: 0.649643\nNext Confusion: 3.760049"} | |
| {"text": "### State\nConfusion: 6.998311\nAction: analogize\nReward: -0.427791\nNext Confusion: 7.825326"} | |
| {"text": "### State\nConfusion: 3.439216\nAction: analogize\nReward: -0.835926\nNext Confusion: 4.451193"} | |
| {"text": "### State\nConfusion: 6.045847\nAction: analogize\nReward: 0.214939\nNext Confusion: 6.031241"} | |
| {"text": "### State\nConfusion: 6.210586\nAction: question\nReward: -3.694161\nNext Confusion: 6.180187"} | |
| {"text": "### State\nConfusion: 4.026471\nAction: analogize\nReward: -0.220299\nNext Confusion: 4.869419"} | |
| {"text": "### State\nConfusion: 7.577714\nAction: analogize\nReward: -1.174037\nNext Confusion: 8.802045"} | |
| {"text": "### State\nConfusion: 3.543173\nAction: analogize\nReward: -0.365294\nNext Confusion: 3.866641"} | |
| {"text": "### State\nConfusion: 5.09092\nAction: analogize\nReward: 1.13697\nNext Confusion: 4.349956"} | |
| {"text": "### State\nConfusion: 8.079839\nAction: analogize\nReward: 0.226414\nNext Confusion: 8.204414"} | |
| {"text": "### State\nConfusion: 7.353573\nAction: analogize\nReward: -1.642375\nNext Confusion: 8.902788"} | |
| {"text": "### State\nConfusion: 5.057345\nAction: correct_fact\nReward: 0.380797\nNext Confusion: 4.734569"} | |
| {"text": "### State\nConfusion: 9.910346\nAction: analogize\nReward: -1.254739\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.257287\nAction: explain\nReward: 1.209915\nNext Confusion: 2.730555"} | |
| {"text": "### State\nConfusion: 2.891581\nAction: correct_fact\nReward: 1.087497\nNext Confusion: 2.386734"} | |
| {"text": "### State\nConfusion: 3.396667\nAction: analogize\nReward: -0.498005\nNext Confusion: 3.937934"} | |
| {"text": "### State\nConfusion: 6.116508\nAction: analogize\nReward: 0.1768\nNext Confusion: 6.346785"} | |
| {"text": "### State\nConfusion: 9.778449\nAction: analogize\nReward: -0.537666\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.730837\nAction: analogize\nReward: 0.814024\nNext Confusion: 3.501215"} | |
| {"text": "### State\nConfusion: 6.362095\nAction: question\nReward: -0.753607\nNext Confusion: 6.451304"} | |
| {"text": "### State\nConfusion: 3.924527\nAction: question\nReward: 1.359091\nNext Confusion: 2.809764"} | |
| {"text": "### State\nConfusion: 4.227033\nAction: analogize\nReward: -0.552635\nNext Confusion: 4.370997"} | |
| {"text": "### State\nConfusion: 4.017463\nAction: explain\nReward: 0.588962\nNext Confusion: 3.727648"} | |
| {"text": "### State\nConfusion: 3.403827\nAction: analogize\nReward: -0.26206\nNext Confusion: 4.009012"} | |
| {"text": "### State\nConfusion: 5.242961\nAction: analogize\nReward: 0.309312\nNext Confusion: 5.459402"} | |
| {"text": "### State\nConfusion: 5.55746\nAction: analogize\nReward: -0.133931\nNext Confusion: 5.654018"} | |
| {"text": "### State\nConfusion: 7.313769\nAction: analogize\nReward: 1.068778\nNext Confusion: 7.331645"} | |
| {"text": "### State\nConfusion: 4.527921\nAction: analogize\nReward: 0.174773\nNext Confusion: 4.651058"} | |
| {"text": "### State\nConfusion: 4.213427\nAction: explain\nReward: 0.440255\nNext Confusion: 3.94091"} | |
| {"text": "### State\nConfusion: 5.50433\nAction: analogize\nReward: -0.638466\nNext Confusion: 6.355938"} | |
| {"text": "### State\nConfusion: 4.101085\nAction: analogize\nReward: -1.054182\nNext Confusion: 5.146448"} | |
| {"text": "### State\nConfusion: 4.349844\nAction: analogize\nReward: -0.170631\nNext Confusion: 4.653336"} | |
| {"text": "### State\nConfusion: 4.015178\nAction: correct_fact\nReward: 0.047997\nNext Confusion: 4.260065"} | |
| {"text": "### State\nConfusion: 2.358372\nAction: analogize\nReward: -0.598425\nNext Confusion: 2.979395"} | |
| {"text": "### State\nConfusion: 4.52034\nAction: explain\nReward: 0.450091\nNext Confusion: 4.371052"} | |
| {"text": "### State\nConfusion: 6.761803\nAction: analogize\nReward: 1.344641\nNext Confusion: 5.632773"} | |
| {"text": "### State\nConfusion: 5.87241\nAction: question\nReward: 0.377697\nNext Confusion: 5.843579"} | |
| {"text": "### State\nConfusion: 3.923287\nAction: analogize\nReward: -0.657101\nNext Confusion: 4.637989"} | |
| {"text": "### State\nConfusion: 9.313203\nAction: question\nReward: 0.856034\nNext Confusion: 8.638969"} | |
| {"text": "### State\nConfusion: 3.786844\nAction: analogize\nReward: 0.06864\nNext Confusion: 4.0975"} | |
| {"text": "### State\nConfusion: 5.629326\nAction: explain\nReward: 0.078317\nNext Confusion: 6.028976"} | |
| {"text": "### State\nConfusion: 10.0\nAction: question\nReward: -0.123322\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 5.23106\nAction: analogize\nReward: -0.280209\nNext Confusion: 5.898155"} | |
| {"text": "### State\nConfusion: 4.382555\nAction: analogize\nReward: -1.659106\nNext Confusion: 5.728748"} | |
| {"text": "### State\nConfusion: 6.25581\nAction: question\nReward: 0.21789\nNext Confusion: 5.880257"} | |
| {"text": "### State\nConfusion: 2.868613\nAction: analogize\nReward: 0.909742\nNext Confusion: 2.372117"} | |
| {"text": "### State\nConfusion: 3.684495\nAction: correct_fact\nReward: -0.745467\nNext Confusion: 4.462787"} | |
| {"text": "### State\nConfusion: 3.525275\nAction: analogize\nReward: -0.111069\nNext Confusion: 4.110827"} | |
| {"text": "### State\nConfusion: 3.225854\nAction: analogize\nReward: -0.629036\nNext Confusion: 3.792701"} | |
| {"text": "### State\nConfusion: 4.270598\nAction: analogize\nReward: 0.480308\nNext Confusion: 3.756942"} | |
| {"text": "### State\nConfusion: 4.836082\nAction: analogize\nReward: -0.761899\nNext Confusion: 5.27111"} | |
| {"text": "### State\nConfusion: 8.947462\nAction: analogize\nReward: -1.034251\nNext Confusion: 8.811069"} | |
| {"text": "### State\nConfusion: 6.235946\nAction: explain\nReward: 0.433649\nNext Confusion: 5.740307"} | |
| {"text": "### State\nConfusion: 7.087507\nAction: question\nReward: 1.290575\nNext Confusion: 5.952477"} | |
| {"text": "### State\nConfusion: 3.511142\nAction: correct_fact\nReward: 0.069971\nNext Confusion: 3.650214"} | |
| {"text": "### State\nConfusion: 3.852046\nAction: question\nReward: 0.918159\nNext Confusion: 3.057334"} | |
| {"text": "### State\nConfusion: 6.66341\nAction: analogize\nReward: 0.553079\nNext Confusion: 6.19827"} | |
| {"text": "### State\nConfusion: 4.063512\nAction: analogize\nReward: -0.20142\nNext Confusion: 4.057294"} | |
| {"text": "### State\nConfusion: 9.915004\nAction: analogize\nReward: 0.468495\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 4.206673\nAction: question\nReward: 1.256381\nNext Confusion: 3.352159"} | |
| {"text": "### State\nConfusion: 8.36885\nAction: analogize\nReward: -1.800544\nNext Confusion: 9.007835"} | |
| {"text": "### State\nConfusion: 5.879703\nAction: explain\nReward: 0.015996\nNext Confusion: 5.779414"} | |
| {"text": "### State\nConfusion: 3.387135\nAction: worked_example\nReward: -0.285351\nNext Confusion: 3.143748"} | |
| {"text": "### State\nConfusion: 3.42407\nAction: explain\nReward: -1.038417\nNext Confusion: 4.218859"} | |
| {"text": "### State\nConfusion: 5.131143\nAction: explain\nReward: 0.547275\nNext Confusion: 4.681826"} | |
| {"text": "### State\nConfusion: 6.287015\nAction: question\nReward: 2.149246\nNext Confusion: 5.357315"} | |
| {"text": "### State\nConfusion: 3.635542\nAction: correct_fact\nReward: 0.099958\nNext Confusion: 3.996667"} | |
| {"text": "### State\nConfusion: 4.814803\nAction: analogize\nReward: -0.395849\nNext Confusion: 4.792355"} | |
| {"text": "### State\nConfusion: 8.044463\nAction: explain\nReward: -0.297554\nNext Confusion: 7.542534"} | |
| {"text": "### State\nConfusion: 8.401709\nAction: analogize\nReward: -0.967848\nNext Confusion: 9.894008"} | |
| {"text": "### State\nConfusion: 6.648936\nAction: explain\nReward: 1.280506\nNext Confusion: 5.837753"} | |
| {"text": "### State\nConfusion: 2.353691\nAction: analogize\nReward: 1.116544\nNext Confusion: 1.840919"} | |
| {"text": "### State\nConfusion: 2.624949\nAction: analogize\nReward: 0.572625\nNext Confusion: 2.075723"} | |
| {"text": "### State\nConfusion: 2.680676\nAction: worked_example\nReward: 0.813016\nNext Confusion: 1.673576"} | |
| {"text": "### State\nConfusion: 6.013613\nAction: explain\nReward: 0.225777\nNext Confusion: 5.772486"} | |
| {"text": "### State\nConfusion: 6.153138\nAction: analogize\nReward: 0.394142\nNext Confusion: 6.72213"} | |
| {"text": "### State\nConfusion: 3.991692\nAction: analogize\nReward: -0.533825\nNext Confusion: 3.808707"} | |
| {"text": "### State\nConfusion: 4.422607\nAction: analogize\nReward: 0.446483\nNext Confusion: 4.097887"} | |
| {"text": "### State\nConfusion: 3.941558\nAction: analogize\nReward: -0.217683\nNext Confusion: 4.405933"} | |
| {"text": "### State\nConfusion: 4.22295\nAction: analogize\nReward: 0.082211\nNext Confusion: 4.529451"} | |
| {"text": "### State\nConfusion: 5.897951\nAction: analogize\nReward: -0.363261\nNext Confusion: 6.494216"} | |
| {"text": "### State\nConfusion: 3.695955\nAction: analogize\nReward: 0.097225\nNext Confusion: 3.346347"} | |
| {"text": "### State\nConfusion: 3.633662\nAction: worked_example\nReward: 0.364471\nNext Confusion: 3.045426"} | |
| {"text": "### State\nConfusion: 8.276167\nAction: analogize\nReward: -0.287066\nNext Confusion: 8.230134"} | |
| {"text": "### State\nConfusion: 7.056122\nAction: analogize\nReward: 0.076491\nNext Confusion: 7.140624"} | |
| {"text": "### State\nConfusion: 6.252013\nAction: analogize\nReward: -0.740944\nNext Confusion: 7.121952"} | |
| {"text": "### State\nConfusion: 9.184165\nAction: analogize\nReward: -0.583071\nNext Confusion: 9.831767"} | |
| {"text": "### State\nConfusion: 7.184398\nAction: explain\nReward: -0.123245\nNext Confusion: 7.505643"} | |
| {"text": "### State\nConfusion: 3.933074\nAction: analogize\nReward: -0.656493\nNext Confusion: 4.151541"} | |
| {"text": "### State\nConfusion: 4.39857\nAction: correct_fact\nReward: 0.819409\nNext Confusion: 3.612621"} | |
| {"text": "### State\nConfusion: 7.368159\nAction: question\nReward: -0.305729\nNext Confusion: 6.645311"} | |
| {"text": "### State\nConfusion: 6.044369\nAction: explain\nReward: -1.484875\nNext Confusion: 6.056402"} | |
| {"text": "### State\nConfusion: 3.630255\nAction: explain\nReward: 0.14536\nNext Confusion: 3.800318"} | |
| {"text": "### State\nConfusion: 3.970609\nAction: explain\nReward: 0.544449\nNext Confusion: 3.713812"} | |
| {"text": "### State\nConfusion: 6.995214\nAction: analogize\nReward: -0.404806\nNext Confusion: 7.366908"} | |
| {"text": "### State\nConfusion: 2.539684\nAction: question\nReward: 0.684036\nNext Confusion: 1.758763"} | |
| {"text": "### State\nConfusion: 4.291467\nAction: analogize\nReward: 0.211242\nNext Confusion: 4.725763"} | |
| {"text": "### State\nConfusion: 3.88243\nAction: analogize\nReward: 1.228902\nNext Confusion: 3.252029"} | |
| {"text": "### State\nConfusion: 3.367319\nAction: question\nReward: 0.896723\nNext Confusion: 2.189595"} | |
| {"text": "### State\nConfusion: 4.465155\nAction: explain\nReward: 0.145001\nNext Confusion: 4.276695"} | |
| {"text": "### State\nConfusion: 5.148044\nAction: analogize\nReward: -0.85651\nNext Confusion: 5.814228"} | |
| {"text": "### State\nConfusion: 6.414485\nAction: analogize\nReward: -0.167799\nNext Confusion: 6.776225"} | |
| {"text": "### State\nConfusion: 5.660143\nAction: explain\nReward: 0.666448\nNext Confusion: 5.634968"} | |
| {"text": "### State\nConfusion: 3.073078\nAction: worked_example\nReward: 1.191585\nNext Confusion: 1.84006"} | |
| {"text": "### State\nConfusion: 4.392269\nAction: explain\nReward: -0.13347\nNext Confusion: 4.073843"} | |
| {"text": "### State\nConfusion: 5.864765\nAction: analogize\nReward: -1.350922\nNext Confusion: 6.857552"} | |
| {"text": "### State\nConfusion: 4.392626\nAction: analogize\nReward: -1.048435\nNext Confusion: 4.934402"} | |
| {"text": "### State\nConfusion: 7.997655\nAction: correct_fact\nReward: 0.441853\nNext Confusion: 7.844183"} | |
| {"text": "### State\nConfusion: 4.253817\nAction: analogize\nReward: -0.180777\nNext Confusion: 4.751152"} | |
| {"text": "### State\nConfusion: 4.194475\nAction: analogize\nReward: -0.13063\nNext Confusion: 4.203718"} | |
| {"text": "### State\nConfusion: 3.775372\nAction: analogize\nReward: -0.102879\nNext Confusion: 4.218565"} | |
| {"text": "### State\nConfusion: 3.37428\nAction: explain\nReward: -0.147437\nNext Confusion: 3.824593"} | |
| {"text": "### State\nConfusion: 5.427704\nAction: explain\nReward: 0.223987\nNext Confusion: 4.968712"} | |
| {"text": "### State\nConfusion: 3.636294\nAction: analogize\nReward: -1.018032\nNext Confusion: 4.057275"} | |
| {"text": "### State\nConfusion: 6.214283\nAction: worked_example\nReward: 2.571471\nNext Confusion: 5.29688"} | |
| {"text": "### State\nConfusion: 3.93336\nAction: analogize\nReward: -0.744836\nNext Confusion: 4.682724"} | |
| {"text": "### State\nConfusion: 4.802226\nAction: analogize\nReward: -0.819463\nNext Confusion: 6.311615"} | |
| {"text": "### State\nConfusion: 3.885915\nAction: analogize\nReward: 0.055154\nNext Confusion: 3.762716"} | |
| {"text": "### State\nConfusion: 4.20142\nAction: analogize\nReward: -0.829035\nNext Confusion: 4.799394"} | |
| {"text": "### State\nConfusion: 3.003221\nAction: analogize\nReward: -0.314853\nNext Confusion: 3.496412"} | |
| {"text": "### State\nConfusion: 4.048622\nAction: analogize\nReward: -1.332944\nNext Confusion: 5.046173"} | |
| {"text": "### State\nConfusion: 5.844535\nAction: analogize\nReward: 0.587102\nNext Confusion: 5.329001"} | |
| {"text": "### State\nConfusion: 2.72501\nAction: analogize\nReward: -0.226736\nNext Confusion: 2.524352"} | |
| {"text": "### State\nConfusion: 7.002024\nAction: correct_fact\nReward: 0.509639\nNext Confusion: 6.743065"} | |
| {"text": "### State\nConfusion: 3.325475\nAction: question\nReward: 0.06805\nNext Confusion: 3.499943"} | |
| {"text": "### State\nConfusion: 3.802268\nAction: analogize\nReward: -0.0121\nNext Confusion: 4.107089"} | |
| {"text": "### State\nConfusion: 2.142409\nAction: correct_fact\nReward: 0.629906\nNext Confusion: 1.889285"} | |
| {"text": "### State\nConfusion: 4.549087\nAction: analogize\nReward: -0.264564\nNext Confusion: 5.161147"} | |
| {"text": "### State\nConfusion: 3.565095\nAction: question\nReward: -0.083092\nNext Confusion: 3.521617"} | |
| {"text": "### State\nConfusion: 6.562612\nAction: analogize\nReward: -0.482755\nNext Confusion: 7.10455"} | |
| {"text": "### State\nConfusion: 4.241969\nAction: explain\nReward: -0.754929\nNext Confusion: 5.114543"} | |
| {"text": "### State\nConfusion: 2.703632\nAction: analogize\nReward: -0.503238\nNext Confusion: 3.434828"} | |
| {"text": "### State\nConfusion: 4.048455\nAction: worked_example\nReward: 1.670669\nNext Confusion: 3.173782"} | |
| {"text": "### State\nConfusion: 3.324484\nAction: correct_fact\nReward: -0.601735\nNext Confusion: 4.07383"} | |
| {"text": "### State\nConfusion: 5.943546\nAction: analogize\nReward: -0.626702\nNext Confusion: 6.768705"} | |
| {"text": "### State\nConfusion: 7.54099\nAction: worked_example\nReward: -0.038655\nNext Confusion: 7.901897"} | |
| {"text": "### State\nConfusion: 5.921177\nAction: analogize\nReward: -0.518356\nNext Confusion: 6.339199"} | |
| {"text": "### State\nConfusion: 3.627352\nAction: question\nReward: 1.265732\nNext Confusion: 2.68943"} | |
| {"text": "### State\nConfusion: 4.268603\nAction: analogize\nReward: 0.224228\nNext Confusion: 4.325311"} | |
| {"text": "### State\nConfusion: 7.438092\nAction: analogize\nReward: 0.769506\nNext Confusion: 6.760568"} | |
| {"text": "### State\nConfusion: 5.561396\nAction: analogize\nReward: 0.192368\nNext Confusion: 6.595321"} | |
| {"text": "### State\nConfusion: 3.728523\nAction: analogize\nReward: 0.288576\nNext Confusion: 4.402052"} | |
| {"text": "### State\nConfusion: 2.824914\nAction: explain\nReward: -0.298342\nNext Confusion: 3.015885"} | |
| {"text": "### State\nConfusion: 3.5681\nAction: analogize\nReward: -1.062345\nNext Confusion: 3.871775"} | |
| {"text": "### State\nConfusion: 5.129294\nAction: correct_fact\nReward: 0.217271\nNext Confusion: 4.869125"} | |
| {"text": "### State\nConfusion: 3.513123\nAction: analogize\nReward: 0.348811\nNext Confusion: 3.595584"} | |
| {"text": "### State\nConfusion: 3.052817\nAction: worked_example\nReward: 2.832272\nNext Confusion: 0.91998"} | |
| {"text": "### State\nConfusion: 7.955312\nAction: analogize\nReward: -0.117205\nNext Confusion: 8.157463"} | |
| {"text": "### State\nConfusion: 3.807566\nAction: explain\nReward: 0.103445\nNext Confusion: 3.867911"} | |
| {"text": "### State\nConfusion: 3.244115\nAction: worked_example\nReward: 1.05523\nNext Confusion: 1.530957"} | |
| {"text": "### State\nConfusion: 2.521808\nAction: analogize\nReward: -0.867435\nNext Confusion: 2.434005"} | |
| {"text": "### State\nConfusion: 9.124791\nAction: analogize\nReward: -0.841239\nNext Confusion: 9.679058"} | |
| {"text": "### State\nConfusion: 5.325014\nAction: analogize\nReward: -1.782482\nNext Confusion: 6.970171"} | |
| {"text": "### State\nConfusion: 4.339715\nAction: explain\nReward: 0.008463\nNext Confusion: 3.846507"} | |
| {"text": "### State\nConfusion: 6.615342\nAction: question\nReward: 1.733143\nNext Confusion: 5.407709"} | |
| {"text": "### State\nConfusion: 4.24576\nAction: explain\nReward: -0.29446\nNext Confusion: 4.733876"} | |
| {"text": "### State\nConfusion: 4.335828\nAction: analogize\nReward: -0.146346\nNext Confusion: 4.271027"} | |
| {"text": "### State\nConfusion: 4.58664\nAction: analogize\nReward: -0.103805\nNext Confusion: 4.874441"} | |
| {"text": "### State\nConfusion: 4.006128\nAction: analogize\nReward: -0.725112\nNext Confusion: 4.798853"} | |
| {"text": "### State\nConfusion: 5.253432\nAction: worked_example\nReward: 2.352804\nNext Confusion: 3.767527"} | |
| {"text": "### State\nConfusion: 3.005701\nAction: explain\nReward: 0.139484\nNext Confusion: 2.093722"} | |
| {"text": "### State\nConfusion: 3.874975\nAction: explain\nReward: -0.787808\nNext Confusion: 3.802166"} | |
| {"text": "### State\nConfusion: 3.41289\nAction: explain\nReward: 1.219302\nNext Confusion: 2.298687"} | |
| {"text": "### State\nConfusion: 3.578395\nAction: question\nReward: 0.811461\nNext Confusion: 2.339393"} | |
| {"text": "### State\nConfusion: 3.207485\nAction: analogize\nReward: 0.315275\nNext Confusion: 2.610947"} | |
| {"text": "### State\nConfusion: 2.575161\nAction: worked_example\nReward: 2.987322\nNext Confusion: 0.201491"} | |
| {"text": "### State\nConfusion: 4.277697\nAction: analogize\nReward: 0.129558\nNext Confusion: 4.751849"} | |
| {"text": "### State\nConfusion: 8.911267\nAction: correct_fact\nReward: -0.124284\nNext Confusion: 8.876928"} | |
| {"text": "### State\nConfusion: 3.571608\nAction: explain\nReward: -0.07214\nNext Confusion: 3.462044"} | |
| {"text": "### State\nConfusion: 6.763837\nAction: analogize\nReward: -0.857645\nNext Confusion: 7.317293"} | |
| {"text": "### State\nConfusion: 9.618838\nAction: analogize\nReward: -0.535427\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.973634\nAction: explain\nReward: -0.52296\nNext Confusion: 4.014759"} | |
| {"text": "### State\nConfusion: 4.366366\nAction: analogize\nReward: 0.616717\nNext Confusion: 3.759719"} | |
| {"text": "### State\nConfusion: 3.682107\nAction: correct_fact\nReward: -0.336892\nNext Confusion: 3.918612"} | |
| {"text": "### State\nConfusion: 5.293279\nAction: worked_example\nReward: 0.835641\nNext Confusion: 4.078385"} | |
| {"text": "### State\nConfusion: 4.117586\nAction: analogize\nReward: -1.002237\nNext Confusion: 4.322363"} | |
| {"text": "### State\nConfusion: 4.685913\nAction: question\nReward: 0.069347\nNext Confusion: 4.376846"} | |
| {"text": "### State\nConfusion: 7.71473\nAction: worked_example\nReward: 2.620265\nNext Confusion: 6.301953"} | |
| {"text": "### State\nConfusion: 4.964281\nAction: correct_fact\nReward: 0.475344\nNext Confusion: 5.341691"} | |
| {"text": "### State\nConfusion: 3.69864\nAction: explain\nReward: 0.306914\nNext Confusion: 3.666771"} | |
| {"text": "### State\nConfusion: 4.640393\nAction: correct_fact\nReward: 0.83143\nNext Confusion: 4.648771"} | |
| {"text": "### State\nConfusion: 3.663903\nAction: explain\nReward: 0.146509\nNext Confusion: 3.056985"} | |
| {"text": "### State\nConfusion: 4.372581\nAction: analogize\nReward: 1.235951\nNext Confusion: 3.717001"} | |
| {"text": "### State\nConfusion: 4.812475\nAction: question\nReward: 0.287407\nNext Confusion: 4.611772"} | |
| {"text": "### State\nConfusion: 4.098743\nAction: correct_fact\nReward: 1.399562\nNext Confusion: 3.505099"} | |
| {"text": "### State\nConfusion: 3.51753\nAction: correct_fact\nReward: 0.518148\nNext Confusion: 3.218788"} | |
| {"text": "### State\nConfusion: 7.673138\nAction: analogize\nReward: -0.506786\nNext Confusion: 8.241877"} | |
| {"text": "### State\nConfusion: 5.163452\nAction: correct_fact\nReward: 0.662886\nNext Confusion: 4.834873"} | |
| {"text": "### State\nConfusion: 3.634682\nAction: explain\nReward: 1.605538\nNext Confusion: 2.658165"} | |
| {"text": "### State\nConfusion: 4.804548\nAction: analogize\nReward: -0.526519\nNext Confusion: 5.3719"} | |
| {"text": "### State\nConfusion: 5.730246\nAction: analogize\nReward: -0.369718\nNext Confusion: 6.35551"} | |
| {"text": "### State\nConfusion: 3.63205\nAction: analogize\nReward: -0.231718\nNext Confusion: 4.320949"} | |
| {"text": "### State\nConfusion: 6.188833\nAction: explain\nReward: -0.086604\nNext Confusion: 6.12198"} | |
| {"text": "### State\nConfusion: 3.921316\nAction: question\nReward: 0.490084\nNext Confusion: 3.494523"} | |
| {"text": "### State\nConfusion: 3.867314\nAction: question\nReward: 0.795442\nNext Confusion: 2.899061"} | |
| {"text": "### State\nConfusion: 6.886967\nAction: explain\nReward: -0.57278\nNext Confusion: 6.923401"} | |
| {"text": "### State\nConfusion: 3.680679\nAction: analogize\nReward: -0.210942\nNext Confusion: 3.925346"} | |
| {"text": "### State\nConfusion: 8.736197\nAction: explain\nReward: -0.914355\nNext Confusion: 9.615757"} | |
| {"text": "### State\nConfusion: 3.546871\nAction: question\nReward: 1.117404\nNext Confusion: 2.736447"} | |
| {"text": "### State\nConfusion: 6.864706\nAction: analogize\nReward: 0.214425\nNext Confusion: 6.507564"} | |
| {"text": "### State\nConfusion: 5.739324\nAction: explain\nReward: 0.335455\nNext Confusion: 5.200863"} | |
| {"text": "### State\nConfusion: 6.305572\nAction: analogize\nReward: 0.075181\nNext Confusion: 6.520935"} | |
| {"text": "### State\nConfusion: 4.454816\nAction: analogize\nReward: -0.289589\nNext Confusion: 4.773785"} | |
| {"text": "### State\nConfusion: 3.731757\nAction: explain\nReward: 0.873925\nNext Confusion: 2.932281"} | |
| {"text": "### State\nConfusion: 5.496557\nAction: correct_fact\nReward: 0.868323\nNext Confusion: 5.627773"} | |
| {"text": "### State\nConfusion: 4.492536\nAction: analogize\nReward: -0.193267\nNext Confusion: 4.587128"} | |
| {"text": "### State\nConfusion: 3.143882\nAction: explain\nReward: 0.736479\nNext Confusion: 2.814084"} | |
| {"text": "### State\nConfusion: 6.209274\nAction: question\nReward: -0.687091\nNext Confusion: 6.21104"} | |
| {"text": "### State\nConfusion: 4.661629\nAction: explain\nReward: 0.805475\nNext Confusion: 4.795467"} | |
| {"text": "### State\nConfusion: 3.818923\nAction: analogize\nReward: -0.621434\nNext Confusion: 4.386176"} | |
| {"text": "### State\nConfusion: 3.080188\nAction: correct_fact\nReward: 0.555658\nNext Confusion: 2.880363"} | |
| {"text": "### State\nConfusion: 3.506171\nAction: analogize\nReward: 0.546359\nNext Confusion: 3.254782"} | |
| {"text": "### State\nConfusion: 6.788947\nAction: question\nReward: 1.500851\nNext Confusion: 5.293974"} | |
| {"text": "### State\nConfusion: 4.457938\nAction: analogize\nReward: -0.031805\nNext Confusion: 4.962243"} | |
| {"text": "### State\nConfusion: 6.438289\nAction: explain\nReward: 0.217682\nNext Confusion: 6.198248"} | |
| {"text": "### State\nConfusion: 4.113327\nAction: worked_example\nReward: 2.453192\nNext Confusion: 2.296527"} | |
| {"text": "### State\nConfusion: 4.691368\nAction: analogize\nReward: -0.428181\nNext Confusion: 5.063083"} | |
| {"text": "### State\nConfusion: 6.270288\nAction: analogize\nReward: 0.043957\nNext Confusion: 7.006448"} | |
| {"text": "### State\nConfusion: 3.941789\nAction: analogize\nReward: -1.714923\nNext Confusion: 3.669449"} | |
| {"text": "### State\nConfusion: 6.964881\nAction: analogize\nReward: -0.724558\nNext Confusion: 7.170291"} | |
| {"text": "### State\nConfusion: 6.03055\nAction: explain\nReward: -0.30925\nNext Confusion: 5.772726"} | |
| {"text": "### State\nConfusion: 4.264237\nAction: question\nReward: 1.085219\nNext Confusion: 3.315322"} | |
| {"text": "### State\nConfusion: 3.291669\nAction: analogize\nReward: -0.695105\nNext Confusion: 3.578062"} | |
| {"text": "### State\nConfusion: 2.852946\nAction: explain\nReward: 0.896694\nNext Confusion: 2.766197"} | |
| {"text": "### State\nConfusion: 7.655255\nAction: analogize\nReward: -0.602577\nNext Confusion: 8.688943"} | |
| {"text": "### State\nConfusion: 7.109231\nAction: analogize\nReward: -1.82364\nNext Confusion: 7.303028"} | |
| {"text": "### State\nConfusion: 6.423125\nAction: analogize\nReward: -0.296018\nNext Confusion: 7.129556"} | |
| {"text": "### State\nConfusion: 3.713895\nAction: correct_fact\nReward: 1.061697\nNext Confusion: 3.08923"} | |
| {"text": "### State\nConfusion: 5.848164\nAction: explain\nReward: 0.196832\nNext Confusion: 5.547879"} | |
| {"text": "### State\nConfusion: 3.044751\nAction: analogize\nReward: 0.050225\nNext Confusion: 2.399746"} | |
| {"text": "### State\nConfusion: 3.267077\nAction: correct_fact\nReward: 0.065062\nNext Confusion: 3.377568"} | |
| {"text": "### State\nConfusion: 3.677239\nAction: analogize\nReward: 0.623853\nNext Confusion: 3.404431"} | |
| {"text": "### State\nConfusion: 3.430977\nAction: analogize\nReward: -0.637506\nNext Confusion: 4.219885"} | |
| {"text": "### State\nConfusion: 3.54131\nAction: analogize\nReward: -0.296704\nNext Confusion: 3.700912"} | |
| {"text": "### State\nConfusion: 3.03738\nAction: analogize\nReward: -0.444641\nNext Confusion: 3.498177"} | |
| {"text": "### State\nConfusion: 4.582879\nAction: analogize\nReward: -0.954809\nNext Confusion: 5.478829"} | |
| {"text": "### State\nConfusion: 5.151244\nAction: worked_example\nReward: 2.380252\nNext Confusion: 3.726861"} | |
| {"text": "### State\nConfusion: 5.093281\nAction: correct_fact\nReward: 0.95357\nNext Confusion: 4.533796"} | |
| {"text": "### State\nConfusion: 7.686016\nAction: analogize\nReward: -0.938085\nNext Confusion: 8.366612"} | |
| {"text": "### State\nConfusion: 4.178343\nAction: worked_example\nReward: 0.374517\nNext Confusion: 3.552812"} | |
| {"text": "### State\nConfusion: 6.83574\nAction: question\nReward: 0.747584\nNext Confusion: 6.205879"} | |
| {"text": "### State\nConfusion: 4.541863\nAction: analogize\nReward: -0.557367\nNext Confusion: 4.824588"} | |
| {"text": "### State\nConfusion: 3.081633\nAction: explain\nReward: 0.136932\nNext Confusion: 2.914456"} | |
| {"text": "### State\nConfusion: 2.926489\nAction: worked_example\nReward: 1.157325\nNext Confusion: 1.859197"} | |
| {"text": "### State\nConfusion: 4.609409\nAction: analogize\nReward: 0.394905\nNext Confusion: 4.347848"} | |
| {"text": "### State\nConfusion: 5.320403\nAction: correct_fact\nReward: 0.135497\nNext Confusion: 5.200538"} | |
| {"text": "### State\nConfusion: 4.829214\nAction: explain\nReward: -0.83525\nNext Confusion: 5.605744"} | |
| {"text": "### State\nConfusion: 6.698942\nAction: analogize\nReward: 0.293412\nNext Confusion: 6.224935"} | |
| {"text": "### State\nConfusion: 3.408383\nAction: analogize\nReward: -1.13421\nNext Confusion: 4.121609"} | |
| {"text": "### State\nConfusion: 2.550534\nAction: explain\nReward: 1.595885\nNext Confusion: 2.337572"} | |
| {"text": "### State\nConfusion: 4.26689\nAction: analogize\nReward: -0.923485\nNext Confusion: 4.911927"} | |
| {"text": "### State\nConfusion: 4.54758\nAction: analogize\nReward: 0.028784\nNext Confusion: 4.688157"} | |
| {"text": "### State\nConfusion: 3.963094\nAction: analogize\nReward: 0.601362\nNext Confusion: 3.715459"} | |
| {"text": "### State\nConfusion: 4.264097\nAction: analogize\nReward: 0.165267\nNext Confusion: 4.050135"} | |
| {"text": "### State\nConfusion: 4.775359\nAction: analogize\nReward: 0.805797\nNext Confusion: 4.071631"} | |
| {"text": "### State\nConfusion: 4.309736\nAction: analogize\nReward: -1.096488\nNext Confusion: 4.92112"} | |
| {"text": "### State\nConfusion: 3.836866\nAction: analogize\nReward: 0.243327\nNext Confusion: 3.894223"} | |
| {"text": "### State\nConfusion: 3.551428\nAction: analogize\nReward: -0.240741\nNext Confusion: 3.635041"} | |
| {"text": "### State\nConfusion: 6.219377\nAction: analogize\nReward: -0.440877\nNext Confusion: 6.443679"} | |
| {"text": "### State\nConfusion: 8.134678\nAction: analogize\nReward: -0.952597\nNext Confusion: 9.351403"} | |
| {"text": "### State\nConfusion: 7.455897\nAction: worked_example\nReward: 3.000091\nNext Confusion: 5.743115"} | |
| {"text": "### State\nConfusion: 3.591007\nAction: analogize\nReward: 0.415582\nNext Confusion: 3.694337"} | |
| {"text": "### State\nConfusion: 5.798405\nAction: correct_fact\nReward: 0.444779\nNext Confusion: 5.077715"} | |
| {"text": "### State\nConfusion: 3.156499\nAction: correct_fact\nReward: 0.71058\nNext Confusion: 3.226074"} | |
| {"text": "### State\nConfusion: 2.320361\nAction: analogize\nReward: -0.406777\nNext Confusion: 2.661581"} | |
| {"text": "### State\nConfusion: 3.254967\nAction: explain\nReward: 0.564712\nNext Confusion: 2.827411"} | |
| {"text": "### State\nConfusion: 3.106249\nAction: analogize\nReward: -0.246119\nNext Confusion: 3.193833"} | |
| {"text": "### State\nConfusion: 6.7713\nAction: analogize\nReward: -0.996812\nNext Confusion: 7.599147"} | |
| {"text": "### State\nConfusion: 3.82842\nAction: analogize\nReward: -0.442356\nNext Confusion: 3.9427"} | |
| {"text": "### State\nConfusion: 3.163116\nAction: analogize\nReward: -0.57468\nNext Confusion: 3.300192"} | |
| {"text": "### State\nConfusion: 3.769666\nAction: analogize\nReward: -1.033696\nNext Confusion: 3.965607"} | |
| {"text": "### State\nConfusion: 4.149986\nAction: explain\nReward: 2.104363\nNext Confusion: 3.151176"} | |
| {"text": "### State\nConfusion: 3.644197\nAction: correct_fact\nReward: 0.883312\nNext Confusion: 2.699045"} | |
| {"text": "### State\nConfusion: 3.353661\nAction: worked_example\nReward: -1.517132\nNext Confusion: 3.120078"} | |
| {"text": "### State\nConfusion: 3.097823\nAction: worked_example\nReward: 0.987777\nNext Confusion: 2.541394"} | |
| {"text": "### State\nConfusion: 1.986863\nAction: analogize\nReward: -0.166395\nNext Confusion: 2.263733"} | |
| {"text": "### State\nConfusion: 3.803623\nAction: explain\nReward: -0.599474\nNext Confusion: 4.320712"} | |
| {"text": "### State\nConfusion: 2.989635\nAction: analogize\nReward: -0.391854\nNext Confusion: 3.153213"} | |
| {"text": "### State\nConfusion: 3.825881\nAction: analogize\nReward: -0.304546\nNext Confusion: 4.124488"} | |
| {"text": "### State\nConfusion: 3.925646\nAction: analogize\nReward: 1.280908\nNext Confusion: 3.250851"} | |
| {"text": "### State\nConfusion: 3.520161\nAction: correct_fact\nReward: 1.316759\nNext Confusion: 3.189941"} | |
| {"text": "### State\nConfusion: 5.792756\nAction: worked_example\nReward: 1.211052\nNext Confusion: 4.469619"} | |
| {"text": "### State\nConfusion: 5.700114\nAction: worked_example\nReward: 1.746722\nNext Confusion: 4.20747"} | |
| {"text": "### State\nConfusion: 3.441217\nAction: analogize\nReward: -1.146556\nNext Confusion: 4.742431"} | |
| {"text": "### State\nConfusion: 6.980942\nAction: worked_example\nReward: 2.354988\nNext Confusion: 5.094528"} | |
| {"text": "### State\nConfusion: 4.636788\nAction: worked_example\nReward: 1.395544\nNext Confusion: 2.763631"} | |
| {"text": "### State\nConfusion: 4.57248\nAction: worked_example\nReward: -0.893071\nNext Confusion: 5.374165"} | |
| {"text": "### State\nConfusion: 4.616354\nAction: correct_fact\nReward: -0.458687\nNext Confusion: 4.978016"} | |
| {"text": "### State\nConfusion: 9.871258\nAction: worked_example\nReward: 0.951345\nNext Confusion: 8.626527"} | |
| {"text": "### State\nConfusion: 5.400334\nAction: analogize\nReward: -3.107368\nNext Confusion: 4.699464"} | |
| {"text": "### State\nConfusion: 3.301483\nAction: analogize\nReward: -0.056496\nNext Confusion: 3.928677"} | |
| {"text": "### State\nConfusion: 6.662239\nAction: explain\nReward: -0.308954\nNext Confusion: 7.263927"} | |
| {"text": "### State\nConfusion: 4.49765\nAction: analogize\nReward: -0.627163\nNext Confusion: 4.901896"} | |
| {"text": "### State\nConfusion: 7.890362\nAction: correct_fact\nReward: -1.032236\nNext Confusion: 8.792729"} | |
| {"text": "### State\nConfusion: 3.356429\nAction: analogize\nReward: 0.527811\nNext Confusion: 3.568363"} | |
| {"text": "### State\nConfusion: 3.184431\nAction: analogize\nReward: 0.474491\nNext Confusion: 2.551207"} | |
| {"text": "### State\nConfusion: 4.585125\nAction: analogize\nReward: 0.313417\nNext Confusion: 4.652011"} | |
| {"text": "### State\nConfusion: 7.036767\nAction: analogize\nReward: -0.249813\nNext Confusion: 8.359595"} | |
| {"text": "### State\nConfusion: 4.977924\nAction: analogize\nReward: -0.39375\nNext Confusion: 5.75664"} | |
| {"text": "### State\nConfusion: 4.899474\nAction: analogize\nReward: 0.324894\nNext Confusion: 4.782554"} | |
| {"text": "### State\nConfusion: 5.46263\nAction: analogize\nReward: 0.041644\nNext Confusion: 5.704385"} | |
| {"text": "### State\nConfusion: 3.728646\nAction: explain\nReward: 0.553889\nNext Confusion: 3.630325"} | |
| {"text": "### State\nConfusion: 3.874559\nAction: analogize\nReward: 0.23354\nNext Confusion: 3.770426"} | |
| {"text": "### State\nConfusion: 4.150585\nAction: analogize\nReward: -0.386776\nNext Confusion: 4.294056"} | |
| {"text": "### State\nConfusion: 3.750043\nAction: analogize\nReward: -0.098659\nNext Confusion: 3.729299"} | |
| {"text": "### State\nConfusion: 4.046793\nAction: analogize\nReward: 1.120879\nNext Confusion: 3.69392"} | |
| {"text": "### State\nConfusion: 4.282802\nAction: analogize\nReward: 0.229984\nNext Confusion: 4.534772"} | |
| {"text": "### State\nConfusion: 9.815567\nAction: explain\nReward: 0.113322\nNext Confusion: 9.897961"} | |
| {"text": "### State\nConfusion: 8.447412\nAction: question\nReward: 0.905137\nNext Confusion: 7.587751"} | |
| {"text": "### State\nConfusion: 8.702408\nAction: correct_fact\nReward: 0.327523\nNext Confusion: 8.957442"} | |
| {"text": "### State\nConfusion: 4.547598\nAction: question\nReward: 0.19703\nNext Confusion: 4.136933"} | |
| {"text": "### State\nConfusion: 3.093116\nAction: analogize\nReward: -0.690177\nNext Confusion: 3.494307"} | |
| {"text": "### State\nConfusion: 6.10634\nAction: analogize\nReward: -0.06686\nNext Confusion: 6.080076"} | |
| {"text": "### State\nConfusion: 5.061502\nAction: explain\nReward: -0.08977\nNext Confusion: 5.423969"} | |
| {"text": "### State\nConfusion: 6.275469\nAction: analogize\nReward: -0.406191\nNext Confusion: 6.53923"} | |
| {"text": "### State\nConfusion: 2.794288\nAction: explain\nReward: -0.477082\nNext Confusion: 3.057377"} | |
| {"text": "### State\nConfusion: 7.193281\nAction: analogize\nReward: -0.987684\nNext Confusion: 8.28434"} | |
| {"text": "### State\nConfusion: 4.47008\nAction: analogize\nReward: 2.03497\nNext Confusion: 4.062569"} | |
| {"text": "### State\nConfusion: 7.197754\nAction: explain\nReward: 0.563753\nNext Confusion: 6.897256"} | |
| {"text": "### State\nConfusion: 6.664058\nAction: analogize\nReward: -1.519211\nNext Confusion: 7.664808"} | |
| {"text": "### State\nConfusion: 9.278358\nAction: analogize\nReward: -0.130705\nNext Confusion: 9.787616"} | |
| {"text": "### State\nConfusion: 3.831932\nAction: analogize\nReward: 0.197891\nNext Confusion: 4.277423"} | |
| {"text": "### State\nConfusion: 5.387299\nAction: correct_fact\nReward: 0.229777\nNext Confusion: 5.204511"} | |
| {"text": "### State\nConfusion: 3.000513\nAction: question\nReward: 0.870176\nNext Confusion: 2.886877"} | |
| {"text": "### State\nConfusion: 4.005711\nAction: explain\nReward: 0.577621\nNext Confusion: 3.404496"} | |
| {"text": "### State\nConfusion: 3.297642\nAction: correct_fact\nReward: 0.209132\nNext Confusion: 3.396185"} | |
| {"text": "### State\nConfusion: 3.705981\nAction: question\nReward: -0.267627\nNext Confusion: 3.913401"} | |
| {"text": "### State\nConfusion: 2.617965\nAction: analogize\nReward: 0.747505\nNext Confusion: 1.880171"} | |
| {"text": "### State\nConfusion: 7.499238\nAction: analogize\nReward: 0.395212\nNext Confusion: 7.566947"} | |
| {"text": "### State\nConfusion: 7.075433\nAction: explain\nReward: 0.394626\nNext Confusion: 6.730183"} | |
| {"text": "### State\nConfusion: 5.561482\nAction: explain\nReward: 0.444965\nNext Confusion: 5.071474"} | |
| {"text": "### State\nConfusion: 4.43107\nAction: question\nReward: 1.475692\nNext Confusion: 3.331222"} | |
| {"text": "### State\nConfusion: 4.206248\nAction: worked_example\nReward: 1.131951\nNext Confusion: 2.576638"} | |
| {"text": "### State\nConfusion: 4.046033\nAction: analogize\nReward: 0.078906\nNext Confusion: 3.6898"} | |
| {"text": "### State\nConfusion: 3.234105\nAction: explain\nReward: 1.041857\nNext Confusion: 2.462756"} | |
| {"text": "### State\nConfusion: 3.184991\nAction: analogize\nReward: -0.740992\nNext Confusion: 3.408039"} | |
| {"text": "### State\nConfusion: 7.069261\nAction: worked_example\nReward: 1.829486\nNext Confusion: 5.310039"} | |
| {"text": "### State\nConfusion: 4.233379\nAction: correct_fact\nReward: -0.757781\nNext Confusion: 4.137143"} | |
| {"text": "### State\nConfusion: 5.110789\nAction: analogize\nReward: 0.68815\nNext Confusion: 4.324455"} | |
| {"text": "### State\nConfusion: 6.596481\nAction: explain\nReward: -0.336485\nNext Confusion: 6.980236"} | |
| {"text": "### State\nConfusion: 4.242512\nAction: analogize\nReward: -0.919348\nNext Confusion: 4.882627"} | |
| {"text": "### State\nConfusion: 4.002249\nAction: analogize\nReward: -0.165824\nNext Confusion: 4.386279"} | |
| {"text": "### State\nConfusion: 3.579485\nAction: worked_example\nReward: 1.023444\nNext Confusion: 2.80122"} | |
| {"text": "### State\nConfusion: 3.234276\nAction: analogize\nReward: -0.679574\nNext Confusion: 4.246226"} | |
| {"text": "### State\nConfusion: 4.081505\nAction: analogize\nReward: -0.179928\nNext Confusion: 4.543588"} | |
| {"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.626965\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.283777\nAction: worked_example\nReward: 1.920165\nNext Confusion: 1.740775"} | |
| {"text": "### State\nConfusion: 4.382779\nAction: analogize\nReward: 0.254878\nNext Confusion: 4.928416"} | |
| {"text": "### State\nConfusion: 6.469818\nAction: analogize\nReward: 0.10987\nNext Confusion: 6.650122"} | |
| {"text": "### State\nConfusion: 3.401842\nAction: analogize\nReward: -0.199118\nNext Confusion: 3.205163"} | |
| {"text": "### State\nConfusion: 4.53089\nAction: analogize\nReward: -1.084081\nNext Confusion: 4.936621"} | |
| {"text": "### State\nConfusion: 7.181551\nAction: explain\nReward: 0.857128\nNext Confusion: 7.342774"} | |
| {"text": "### State\nConfusion: 4.468257\nAction: analogize\nReward: -0.448214\nNext Confusion: 5.072941"} | |
| {"text": "### State\nConfusion: 3.547867\nAction: analogize\nReward: -0.621887\nNext Confusion: 4.220783"} | |
| {"text": "### State\nConfusion: 8.407737\nAction: explain\nReward: 0.888179\nNext Confusion: 7.801633"} | |
| {"text": "### State\nConfusion: 3.330818\nAction: analogize\nReward: 0.427229\nNext Confusion: 3.057645"} | |
| {"text": "### State\nConfusion: 9.220737\nAction: question\nReward: 1.039978\nNext Confusion: 8.286354"} | |
| {"text": "### State\nConfusion: 6.95472\nAction: correct_fact\nReward: 0.03733\nNext Confusion: 7.105115"} | |
| {"text": "### State\nConfusion: 3.038147\nAction: explain\nReward: -0.052942\nNext Confusion: 3.534167"} | |
| {"text": "### State\nConfusion: 4.203768\nAction: analogize\nReward: 0.44414\nNext Confusion: 3.748747"} | |
| {"text": "### State\nConfusion: 4.394054\nAction: analogize\nReward: 0.863723\nNext Confusion: 4.08186"} | |
| {"text": "### State\nConfusion: 7.25743\nAction: correct_fact\nReward: 0.615971\nNext Confusion: 6.862161"} | |
| {"text": "### State\nConfusion: 3.332897\nAction: worked_example\nReward: 1.561058\nNext Confusion: 2.125503"} | |
| {"text": "### State\nConfusion: 2.435858\nAction: analogize\nReward: 0.474768\nNext Confusion: 2.184254"} | |
| {"text": "### State\nConfusion: 3.347793\nAction: question\nReward: 0.604531\nNext Confusion: 3.077612"} | |
| {"text": "### State\nConfusion: 4.475967\nAction: analogize\nReward: -0.318223\nNext Confusion: 4.587817"} | |
| {"text": "### State\nConfusion: 2.567078\nAction: analogize\nReward: 0.606041\nNext Confusion: 2.992178"} | |
| {"text": "### State\nConfusion: 3.475035\nAction: correct_fact\nReward: 0.14881\nNext Confusion: 3.128379"} | |
| {"text": "### State\nConfusion: 7.965955\nAction: analogize\nReward: -0.464932\nNext Confusion: 8.695142"} | |
| {"text": "### State\nConfusion: 6.437036\nAction: worked_example\nReward: 1.469581\nNext Confusion: 5.73296"} | |
| {"text": "### State\nConfusion: 5.183217\nAction: question\nReward: 0.797294\nNext Confusion: 4.91528"} | |
| {"text": "### State\nConfusion: 4.545303\nAction: analogize\nReward: -0.524589\nNext Confusion: 4.333676"} | |
| {"text": "### State\nConfusion: 3.575009\nAction: analogize\nReward: -0.159195\nNext Confusion: 3.466167"} | |
| {"text": "### State\nConfusion: 3.788009\nAction: correct_fact\nReward: 0.762772\nNext Confusion: 3.053306"} | |
| {"text": "### State\nConfusion: 9.45097\nAction: analogize\nReward: 0.388164\nNext Confusion: 9.557405"} | |
| {"text": "### State\nConfusion: 2.62543\nAction: analogize\nReward: 0.251797\nNext Confusion: 2.618268"} | |
| {"text": "### State\nConfusion: 2.320139\nAction: analogize\nReward: 0.979596\nNext Confusion: 1.804903"} | |
| {"text": "### State\nConfusion: 6.174851\nAction: analogize\nReward: -0.47453\nNext Confusion: 6.922316"} | |
| {"text": "### State\nConfusion: 4.333913\nAction: analogize\nReward: -0.327869\nNext Confusion: 4.996931"} | |
| {"text": "### State\nConfusion: 5.262707\nAction: correct_fact\nReward: -0.606454\nNext Confusion: 5.759839"} | |
| {"text": "### State\nConfusion: 5.174058\nAction: correct_fact\nReward: -0.344999\nNext Confusion: 5.809796"} | |
| {"text": "### State\nConfusion: 4.217094\nAction: correct_fact\nReward: -1.139956\nNext Confusion: 4.768854"} | |
| {"text": "### State\nConfusion: 2.933767\nAction: explain\nReward: 0.647692\nNext Confusion: 2.456029"} | |
| {"text": "### State\nConfusion: 3.497738\nAction: correct_fact\nReward: 0.961823\nNext Confusion: 2.937699"} | |
| {"text": "### State\nConfusion: 4.59177\nAction: analogize\nReward: 0.722301\nNext Confusion: 4.028663"} | |
| {"text": "### State\nConfusion: 4.772787\nAction: explain\nReward: -0.079586\nNext Confusion: 5.282044"} | |
| {"text": "### State\nConfusion: 2.663238\nAction: correct_fact\nReward: 0.744137\nNext Confusion: 2.511148"} | |
| {"text": "### State\nConfusion: 3.442076\nAction: worked_example\nReward: 1.852468\nNext Confusion: 1.87048"} | |
| {"text": "### State\nConfusion: 3.418484\nAction: correct_fact\nReward: -1.434631\nNext Confusion: 4.678504"} | |
| {"text": "### State\nConfusion: 7.50074\nAction: analogize\nReward: -1.03624\nNext Confusion: 7.782828"} | |
| {"text": "### State\nConfusion: 3.662425\nAction: analogize\nReward: 0.288273\nNext Confusion: 3.881542"} | |
| {"text": "### State\nConfusion: 5.825604\nAction: explain\nReward: 0.167269\nNext Confusion: 5.945172"} | |
| {"text": "### State\nConfusion: 8.298186\nAction: explain\nReward: -0.642418\nNext Confusion: 9.145937"} | |
| {"text": "### State\nConfusion: 3.544472\nAction: analogize\nReward: 1.496793\nNext Confusion: 3.059354"} | |
| {"text": "### State\nConfusion: 5.426896\nAction: worked_example\nReward: 2.035843\nNext Confusion: 3.913632"} | |
| {"text": "### State\nConfusion: 7.057615\nAction: worked_example\nReward: 1.300917\nNext Confusion: 6.035897"} | |
| {"text": "### State\nConfusion: 5.308103\nAction: worked_example\nReward: 0.597929\nNext Confusion: 4.281115"} | |
| {"text": "### State\nConfusion: 4.660218\nAction: correct_fact\nReward: -1.198085\nNext Confusion: 5.178064"} | |
| {"text": "### State\nConfusion: 4.182568\nAction: analogize\nReward: -0.669219\nNext Confusion: 4.998801"} | |
| {"text": "### State\nConfusion: 3.374448\nAction: question\nReward: 0.148296\nNext Confusion: 3.367422"} | |
| {"text": "### State\nConfusion: 2.244774\nAction: analogize\nReward: 0.139914\nNext Confusion: 2.010998"} | |
| {"text": "### State\nConfusion: 7.57388\nAction: worked_example\nReward: 2.927079\nNext Confusion: 5.321944"} | |
| {"text": "### State\nConfusion: 5.053628\nAction: analogize\nReward: -0.848654\nNext Confusion: 5.590656"} | |
| {"text": "### State\nConfusion: 4.58681\nAction: question\nReward: 0.398616\nNext Confusion: 3.826893"} | |
| {"text": "### State\nConfusion: 4.385522\nAction: analogize\nReward: 0.196747\nNext Confusion: 4.364781"} | |
| {"text": "### State\nConfusion: 5.258719\nAction: analogize\nReward: -0.478197\nNext Confusion: 5.745747"} | |
| {"text": "### State\nConfusion: 4.160741\nAction: analogize\nReward: -0.373814\nNext Confusion: 4.572683"} | |
| {"text": "### State\nConfusion: 4.594178\nAction: explain\nReward: -0.379261\nNext Confusion: 4.967944"} | |
| {"text": "### State\nConfusion: 3.734883\nAction: explain\nReward: -0.135488\nNext Confusion: 3.877711"} | |
| {"text": "### State\nConfusion: 8.422437\nAction: correct_fact\nReward: 1.419393\nNext Confusion: 8.090842"} | |
| {"text": "### State\nConfusion: 4.626043\nAction: question\nReward: 0.781404\nNext Confusion: 4.271732"} | |
| {"text": "### State\nConfusion: 4.303492\nAction: analogize\nReward: -0.486873\nNext Confusion: 5.130393"} | |
| {"text": "### State\nConfusion: 6.131956\nAction: analogize\nReward: -0.443139\nNext Confusion: 5.680384"} | |
| {"text": "### State\nConfusion: 3.401462\nAction: analogize\nReward: -0.628829\nNext Confusion: 3.549128"} | |
| {"text": "### State\nConfusion: 4.063388\nAction: question\nReward: 0.669799\nNext Confusion: 3.684196"} | |
| {"text": "### State\nConfusion: 4.185434\nAction: analogize\nReward: 0.082712\nNext Confusion: 4.268066"} | |
| {"text": "### State\nConfusion: 5.392101\nAction: analogize\nReward: -0.201228\nNext Confusion: 5.383713"} | |
| {"text": "### State\nConfusion: 3.389313\nAction: analogize\nReward: -1.13185\nNext Confusion: 4.473208"} | |
| {"text": "### State\nConfusion: 3.382116\nAction: explain\nReward: -0.083349\nNext Confusion: 3.077605"} | |
| {"text": "### State\nConfusion: 6.708632\nAction: analogize\nReward: 0.235717\nNext Confusion: 6.575146"} | |
| {"text": "### State\nConfusion: 4.120906\nAction: question\nReward: -0.201961\nNext Confusion: 4.228239"} | |
| {"text": "### State\nConfusion: 5.375295\nAction: analogize\nReward: -0.566432\nNext Confusion: 6.098354"} | |
| {"text": "### State\nConfusion: 5.3075\nAction: explain\nReward: 0.840567\nNext Confusion: 4.593798"} | |
| {"text": "### State\nConfusion: 3.732203\nAction: analogize\nReward: 0.019524\nNext Confusion: 3.975943"} | |
| {"text": "### State\nConfusion: 6.234458\nAction: question\nReward: -0.545665\nNext Confusion: 5.8335"} | |
| {"text": "### State\nConfusion: 2.99104\nAction: analogize\nReward: -0.644436\nNext Confusion: 3.476136"} | |
| {"text": "### State\nConfusion: 2.404935\nAction: explain\nReward: 0.535707\nNext Confusion: 2.225598"} | |
| {"text": "### State\nConfusion: 3.773187\nAction: analogize\nReward: -0.455111\nNext Confusion: 4.499895"} | |
| {"text": "### State\nConfusion: 3.483044\nAction: explain\nReward: 0.347968\nNext Confusion: 2.844657"} | |
| {"text": "### State\nConfusion: 4.503647\nAction: analogize\nReward: -0.185608\nNext Confusion: 4.902484"} | |
| {"text": "### State\nConfusion: 5.3724\nAction: explain\nReward: 0.451463\nNext Confusion: 5.077805"} | |
| {"text": "### State\nConfusion: 3.309671\nAction: analogize\nReward: -0.143453\nNext Confusion: 3.113236"} | |
| {"text": "### State\nConfusion: 4.513547\nAction: question\nReward: 0.606718\nNext Confusion: 4.085341"} | |
| {"text": "### State\nConfusion: 3.603875\nAction: analogize\nReward: -1.52553\nNext Confusion: 4.786602"} | |
| {"text": "### State\nConfusion: 7.368618\nAction: analogize\nReward: -0.765855\nNext Confusion: 7.418029"} | |
| {"text": "### State\nConfusion: 7.257043\nAction: worked_example\nReward: 0.905552\nNext Confusion: 6.61073"} | |
| {"text": "### State\nConfusion: 5.561694\nAction: analogize\nReward: -0.508069\nNext Confusion: 6.053774"} | |
| {"text": "### State\nConfusion: 2.828592\nAction: analogize\nReward: -0.277313\nNext Confusion: 2.854013"} | |
| {"text": "### State\nConfusion: 3.715696\nAction: analogize\nReward: 0.745882\nNext Confusion: 4.146977"} | |
| {"text": "### State\nConfusion: 3.437749\nAction: analogize\nReward: 0.142635\nNext Confusion: 3.554578"} | |
| {"text": "### State\nConfusion: 2.441164\nAction: analogize\nReward: 0.021128\nNext Confusion: 2.135772"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.631064\nNext Confusion: 9.471717"} | |
| {"text": "### State\nConfusion: 3.799863\nAction: analogize\nReward: -0.517685\nNext Confusion: 4.380528"} | |
| {"text": "### State\nConfusion: 4.649355\nAction: analogize\nReward: -0.19733\nNext Confusion: 5.630637"} | |
| {"text": "### State\nConfusion: 4.754737\nAction: analogize\nReward: 0.957142\nNext Confusion: 4.203445"} | |
| {"text": "### State\nConfusion: 3.006349\nAction: analogize\nReward: 0.189458\nNext Confusion: 3.092292"} | |
| {"text": "### State\nConfusion: 3.176708\nAction: analogize\nReward: 0.504138\nNext Confusion: 3.474577"} | |
| {"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.068862\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.380745\nAction: analogize\nReward: 0.278275\nNext Confusion: 3.226712"} | |
| {"text": "### State\nConfusion: 4.818123\nAction: correct_fact\nReward: -0.642482\nNext Confusion: 5.129877"} | |
| {"text": "### State\nConfusion: 1.902845\nAction: question\nReward: -0.081912\nNext Confusion: 2.182037"} | |
| {"text": "### State\nConfusion: 6.238435\nAction: question\nReward: -0.37372\nNext Confusion: 5.979242"} | |
| {"text": "### State\nConfusion: 3.918744\nAction: explain\nReward: -0.178161\nNext Confusion: 3.932915"} | |
| {"text": "### State\nConfusion: 6.612762\nAction: analogize\nReward: 0.452281\nNext Confusion: 6.863951"} | |
| {"text": "### State\nConfusion: 3.425594\nAction: analogize\nReward: -0.412629\nNext Confusion: 3.878789"} | |
| {"text": "### State\nConfusion: 4.775546\nAction: analogize\nReward: -0.009128\nNext Confusion: 4.696767"} | |
| {"text": "### State\nConfusion: 7.393962\nAction: analogize\nReward: -0.722784\nNext Confusion: 7.283319"} | |
| {"text": "### State\nConfusion: 5.283406\nAction: analogize\nReward: 0.286061\nNext Confusion: 5.141407"} | |
| {"text": "### State\nConfusion: 6.754779\nAction: worked_example\nReward: 2.055535\nNext Confusion: 5.141479"} | |
| {"text": "### State\nConfusion: 4.003936\nAction: question\nReward: -0.513979\nNext Confusion: 4.440584"} | |
| {"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 0.83318\nNext Confusion: 9.88262"} | |
| {"text": "### State\nConfusion: 6.166142\nAction: worked_example\nReward: 0.421776\nNext Confusion: 5.762331"} | |
| {"text": "### State\nConfusion: 3.861957\nAction: question\nReward: 0.724535\nNext Confusion: 3.452488"} | |
| {"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 1.315561\nNext Confusion: 7.715245"} | |
| {"text": "### State\nConfusion: 8.259786\nAction: explain\nReward: 0.558777\nNext Confusion: 7.904824"} | |
| {"text": "### State\nConfusion: 3.152868\nAction: correct_fact\nReward: 0.234806\nNext Confusion: 3.087118"} | |
| {"text": "### State\nConfusion: 6.291952\nAction: analogize\nReward: -1.134346\nNext Confusion: 6.971364"} | |
| {"text": "### State\nConfusion: 3.391283\nAction: explain\nReward: -0.301607\nNext Confusion: 3.773489"} | |
| {"text": "### State\nConfusion: 7.058365\nAction: question\nReward: 0.096737\nNext Confusion: 6.488666"} | |
| {"text": "### State\nConfusion: 5.330192\nAction: question\nReward: 0.434085\nNext Confusion: 4.503624"} | |
| {"text": "### State\nConfusion: 7.3815\nAction: explain\nReward: -0.624081\nNext Confusion: 7.829628"} | |
| {"text": "### State\nConfusion: 3.936956\nAction: explain\nReward: 0.156011\nNext Confusion: 4.77193"} | |
| {"text": "### State\nConfusion: 8.507776\nAction: analogize\nReward: -0.267206\nNext Confusion: 8.839627"} | |
| {"text": "### State\nConfusion: 3.966226\nAction: analogize\nReward: 0.07544\nNext Confusion: 3.734597"} | |
| {"text": "### State\nConfusion: 3.389673\nAction: analogize\nReward: -0.385492\nNext Confusion: 3.757603"} | |
| {"text": "### State\nConfusion: 4.152238\nAction: explain\nReward: 0.194201\nNext Confusion: 3.124795"} | |
| {"text": "### State\nConfusion: 3.671381\nAction: explain\nReward: -0.851671\nNext Confusion: 4.85722"} | |
| {"text": "### State\nConfusion: 3.87501\nAction: analogize\nReward: 0.963411\nNext Confusion: 4.011062"} | |
| {"text": "### State\nConfusion: 3.591396\nAction: correct_fact\nReward: -0.060506\nNext Confusion: 3.749811"} | |
| {"text": "### State\nConfusion: 5.843234\nAction: explain\nReward: 1.538094\nNext Confusion: 4.893917"} | |
| {"text": "### State\nConfusion: 6.427896\nAction: worked_example\nReward: 0.989949\nNext Confusion: 4.926106"} | |
| {"text": "### State\nConfusion: 6.490404\nAction: analogize\nReward: -0.035181\nNext Confusion: 6.649741"} | |
| {"text": "### State\nConfusion: 4.298608\nAction: analogize\nReward: 0.51613\nNext Confusion: 4.339033"} | |
| {"text": "### State\nConfusion: 2.794963\nAction: explain\nReward: -0.328953\nNext Confusion: 3.284198"} | |
| {"text": "### State\nConfusion: 0.692194\nAction: worked_example\nReward: 0.527875\nNext Confusion: 0.0"} | |
| {"text": "### State\nConfusion: 3.597074\nAction: explain\nReward: -0.452491\nNext Confusion: 4.077954"} | |
| {"text": "### State\nConfusion: 3.767463\nAction: analogize\nReward: -1.452971\nNext Confusion: 4.617874"} | |
| {"text": "### State\nConfusion: 4.362273\nAction: worked_example\nReward: 0.933517\nNext Confusion: 3.596099"} | |
| {"text": "### State\nConfusion: 4.670532\nAction: analogize\nReward: -1.184355\nNext Confusion: 4.818303"} | |
| {"text": "### State\nConfusion: 3.000032\nAction: analogize\nReward: -0.435179\nNext Confusion: 4.011406"} | |
| {"text": "### State\nConfusion: 3.608845\nAction: explain\nReward: 0.150313\nNext Confusion: 3.592252"} | |
| {"text": "### State\nConfusion: 3.324292\nAction: question\nReward: -0.160978\nNext Confusion: 3.667143"} | |
| {"text": "### State\nConfusion: 3.206405\nAction: analogize\nReward: 0.718679\nNext Confusion: 2.95461"} | |
| {"text": "### State\nConfusion: 6.566045\nAction: analogize\nReward: -0.764221\nNext Confusion: 7.17747"} | |
| {"text": "### State\nConfusion: 4.402418\nAction: analogize\nReward: -1.400358\nNext Confusion: 5.781072"} | |
| {"text": "### State\nConfusion: 3.839939\nAction: analogize\nReward: 1.133303\nNext Confusion: 3.523669"} | |
| {"text": "### State\nConfusion: 4.051802\nAction: analogize\nReward: 0.732211\nNext Confusion: 3.626338"} | |
| {"text": "### State\nConfusion: 3.779393\nAction: correct_fact\nReward: -0.882573\nNext Confusion: 4.401586"} | |
| {"text": "### State\nConfusion: 4.234075\nAction: analogize\nReward: 0.006339\nNext Confusion: 4.259672"} | |
| {"text": "### State\nConfusion: 5.705486\nAction: analogize\nReward: -0.177564\nNext Confusion: 5.695643"} | |
| {"text": "### State\nConfusion: 3.404266\nAction: question\nReward: 0.306554\nNext Confusion: 3.231444"} | |
| {"text": "### State\nConfusion: 3.485604\nAction: analogize\nReward: 0.322089\nNext Confusion: 3.532859"} | |
| {"text": "### State\nConfusion: 5.956239\nAction: analogize\nReward: 0.175887\nNext Confusion: 5.894433"} | |
| {"text": "### State\nConfusion: 3.64884\nAction: correct_fact\nReward: 0.186027\nNext Confusion: 4.024583"} | |
| {"text": "### State\nConfusion: 4.357332\nAction: analogize\nReward: 0.094647\nNext Confusion: 4.234993"} | |
| {"text": "### State\nConfusion: 3.956645\nAction: analogize\nReward: 0.143816\nNext Confusion: 2.74001"} | |
| {"text": "### State\nConfusion: 4.275213\nAction: analogize\nReward: -0.112997\nNext Confusion: 4.803157"} | |
| {"text": "### State\nConfusion: 7.308207\nAction: question\nReward: -0.002123\nNext Confusion: 6.719409"} | |
| {"text": "### State\nConfusion: 6.641357\nAction: analogize\nReward: -0.387628\nNext Confusion: 7.074882"} | |
| {"text": "### State\nConfusion: 7.486015\nAction: analogize\nReward: -0.275908\nNext Confusion: 7.823219"} | |
| {"text": "### State\nConfusion: 7.737795\nAction: worked_example\nReward: 0.754587\nNext Confusion: 7.086144"} | |
| {"text": "### State\nConfusion: 6.91396\nAction: worked_example\nReward: -0.301934\nNext Confusion: 7.024792"} | |
| {"text": "### State\nConfusion: 3.362437\nAction: analogize\nReward: -0.823035\nNext Confusion: 3.973793"} | |
| {"text": "### State\nConfusion: 4.437325\nAction: analogize\nReward: -0.477802\nNext Confusion: 5.210613"} | |
| {"text": "### State\nConfusion: 3.576501\nAction: analogize\nReward: -0.928897\nNext Confusion: 3.691247"} | |
| {"text": "### State\nConfusion: 3.308704\nAction: analogize\nReward: 0.099583\nNext Confusion: 3.11851"} | |
| {"text": "### State\nConfusion: 4.28096\nAction: analogize\nReward: -0.935757\nNext Confusion: 4.505311"} | |
| {"text": "### State\nConfusion: 7.126233\nAction: analogize\nReward: -2.046618\nNext Confusion: 8.965673"} | |
| {"text": "### State\nConfusion: 5.618792\nAction: explain\nReward: -0.070551\nNext Confusion: 6.015597"} | |
| {"text": "### State\nConfusion: 7.323637\nAction: analogize\nReward: 0.798525\nNext Confusion: 7.287678"} | |
| {"text": "### State\nConfusion: 3.949625\nAction: analogize\nReward: -0.422096\nNext Confusion: 3.592318"} | |
| {"text": "### State\nConfusion: 5.734334\nAction: analogize\nReward: -0.061426\nNext Confusion: 5.932269"} | |
| {"text": "### State\nConfusion: 4.095278\nAction: explain\nReward: 0.150416\nNext Confusion: 3.920164"} | |
| {"text": "### State\nConfusion: 4.267157\nAction: correct_fact\nReward: -0.55467\nNext Confusion: 4.589067"} | |
| {"text": "### State\nConfusion: 3.476372\nAction: analogize\nReward: 0.434812\nNext Confusion: 3.368467"} | |
| {"text": "### State\nConfusion: 3.845721\nAction: analogize\nReward: -0.44389\nNext Confusion: 3.958671"} | |
| {"text": "### State\nConfusion: 5.89789\nAction: explain\nReward: 0.289961\nNext Confusion: 5.80951"} | |
| {"text": "### State\nConfusion: 3.30418\nAction: question\nReward: 0.561687\nNext Confusion: 2.938793"} | |
| {"text": "### State\nConfusion: 5.601084\nAction: analogize\nReward: 0.090898\nNext Confusion: 6.04158"} | |
| {"text": "### State\nConfusion: 3.804507\nAction: explain\nReward: -0.214608\nNext Confusion: 4.102091"} | |
| {"text": "### State\nConfusion: 5.699248\nAction: analogize\nReward: -0.86557\nNext Confusion: 7.082009"} | |
| {"text": "### State\nConfusion: 3.841494\nAction: question\nReward: 1.661304\nNext Confusion: 2.886172"} | |
| {"text": "### State\nConfusion: 6.02223\nAction: analogize\nReward: -0.655605\nNext Confusion: 6.230086"} | |
| {"text": "### State\nConfusion: 5.195364\nAction: explain\nReward: -0.001099\nNext Confusion: 4.825065"} | |
| {"text": "### State\nConfusion: 4.841122\nAction: question\nReward: 1.924098\nNext Confusion: 4.41556"} | |
| {"text": "### State\nConfusion: 3.552158\nAction: explain\nReward: 1.212741\nNext Confusion: 1.933405"} | |
| {"text": "### State\nConfusion: 7.492652\nAction: analogize\nReward: -0.788627\nNext Confusion: 8.387055"} | |
| {"text": "### State\nConfusion: 2.427369\nAction: analogize\nReward: -0.966672\nNext Confusion: 2.26519"} | |
| {"text": "### State\nConfusion: 5.075143\nAction: analogize\nReward: -0.249792\nNext Confusion: 5.690755"} | |
| {"text": "### State\nConfusion: 6.168964\nAction: analogize\nReward: -0.402024\nNext Confusion: 6.470056"} | |
| {"text": "### State\nConfusion: 9.379551\nAction: correct_fact\nReward: 0.264547\nNext Confusion: 9.748802"} | |
| {"text": "### State\nConfusion: 5.41286\nAction: analogize\nReward: 0.3176\nNext Confusion: 5.959353"} | |
| {"text": "### State\nConfusion: 3.271821\nAction: analogize\nReward: -0.490908\nNext Confusion: 2.909299"} | |
| {"text": "### State\nConfusion: 3.993392\nAction: explain\nReward: 0.02944\nNext Confusion: 4.182259"} | |
| {"text": "### State\nConfusion: 3.636055\nAction: analogize\nReward: 0.567075\nNext Confusion: 3.196599"} | |
| {"text": "### State\nConfusion: 4.865194\nAction: question\nReward: 0.345734\nNext Confusion: 4.333092"} | |
| {"text": "### State\nConfusion: 3.210343\nAction: worked_example\nReward: 1.467505\nNext Confusion: 1.968105"} | |
| {"text": "### State\nConfusion: 3.497403\nAction: analogize\nReward: -0.218089\nNext Confusion: 3.823413"} | |
| {"text": "### State\nConfusion: 7.678603\nAction: question\nReward: 1.366405\nNext Confusion: 6.973949"} | |
| {"text": "### State\nConfusion: 5.769523\nAction: question\nReward: 1.414712\nNext Confusion: 4.295852"} | |
| {"text": "### State\nConfusion: 6.377737\nAction: analogize\nReward: 1.021054\nNext Confusion: 5.484272"} | |
| {"text": "### State\nConfusion: 4.100323\nAction: analogize\nReward: 0.385391\nNext Confusion: 3.907769"} | |
| {"text": "### State\nConfusion: 3.378142\nAction: analogize\nReward: -0.65456\nNext Confusion: 3.887522"} | |
| {"text": "### State\nConfusion: 2.248417\nAction: analogize\nReward: 1.451595\nNext Confusion: 1.826454"} | |
| {"text": "### State\nConfusion: 5.806881\nAction: analogize\nReward: 0.423723\nNext Confusion: 5.425294"} | |
| {"text": "### State\nConfusion: 9.48094\nAction: question\nReward: -0.237491\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 7.971427\nAction: analogize\nReward: -0.560404\nNext Confusion: 8.549126"} | |
| {"text": "### State\nConfusion: 4.571994\nAction: analogize\nReward: -0.816992\nNext Confusion: 4.862229"} | |
| {"text": "### State\nConfusion: 7.443913\nAction: analogize\nReward: -0.611569\nNext Confusion: 7.605993"} | |
| {"text": "### State\nConfusion: 7.047747\nAction: worked_example\nReward: 2.416605\nNext Confusion: 4.614026"} | |
| {"text": "### State\nConfusion: 4.453329\nAction: analogize\nReward: -0.823522\nNext Confusion: 4.277873"} | |
| {"text": "### State\nConfusion: 4.519389\nAction: analogize\nReward: 0.845583\nNext Confusion: 3.553505"} | |
| {"text": "### State\nConfusion: 3.012619\nAction: worked_example\nReward: 1.78637\nNext Confusion: 1.214761"} | |
| {"text": "### State\nConfusion: 6.717383\nAction: analogize\nReward: 0.672834\nNext Confusion: 6.077421"} | |
| {"text": "### State\nConfusion: 8.516796\nAction: analogize\nReward: 0.896699\nNext Confusion: 8.070201"} | |
| {"text": "### State\nConfusion: 6.385722\nAction: worked_example\nReward: 1.639703\nNext Confusion: 5.6651"} | |
| {"text": "### State\nConfusion: 3.420088\nAction: worked_example\nReward: 1.214258\nNext Confusion: 1.911085"} | |
| {"text": "### State\nConfusion: 3.304491\nAction: correct_fact\nReward: -0.659982\nNext Confusion: 3.89256"} | |
| {"text": "### State\nConfusion: 4.34604\nAction: analogize\nReward: 0.367453\nNext Confusion: 4.545939"} | |
| {"text": "### State\nConfusion: 5.70637\nAction: explain\nReward: 0.986834\nNext Confusion: 5.318554"} | |
| {"text": "### State\nConfusion: 4.569731\nAction: explain\nReward: -0.251817\nNext Confusion: 5.206977"} | |
| {"text": "### State\nConfusion: 6.623116\nAction: question\nReward: 0.813307\nNext Confusion: 6.747557"} | |
| {"text": "### State\nConfusion: 2.478569\nAction: question\nReward: 0.624874\nNext Confusion: 2.662309"} | |
| {"text": "### State\nConfusion: 2.731783\nAction: explain\nReward: -0.700712\nNext Confusion: 3.661917"} | |
| {"text": "### State\nConfusion: 2.529542\nAction: analogize\nReward: -0.754609\nNext Confusion: 3.141305"} | |
| {"text": "### State\nConfusion: 4.801964\nAction: explain\nReward: 0.7866\nNext Confusion: 3.92003"} | |
| {"text": "### State\nConfusion: 5.041254\nAction: worked_example\nReward: 2.127081\nNext Confusion: 3.543361"} | |
| {"text": "### State\nConfusion: 2.69548\nAction: analogize\nReward: -1.212333\nNext Confusion: 3.334755"} | |
| {"text": "### State\nConfusion: 6.504969\nAction: correct_fact\nReward: -0.521073\nNext Confusion: 6.356454"} | |
| {"text": "### State\nConfusion: 6.60377\nAction: question\nReward: 1.630758\nNext Confusion: 5.570652"} | |
| {"text": "### State\nConfusion: 6.362903\nAction: question\nReward: 0.464984\nNext Confusion: 6.482695"} | |
| {"text": "### State\nConfusion: 3.440521\nAction: analogize\nReward: 0.046752\nNext Confusion: 3.519933"} | |
| {"text": "### State\nConfusion: 5.921055\nAction: correct_fact\nReward: -0.238961\nNext Confusion: 6.003142"} | |
| {"text": "### State\nConfusion: 3.715529\nAction: analogize\nReward: 0.378563\nNext Confusion: 3.28992"} | |
| {"text": "### State\nConfusion: 5.372323\nAction: analogize\nReward: -0.712887\nNext Confusion: 5.683921"} | |
| {"text": "### State\nConfusion: 5.731329\nAction: analogize\nReward: -0.035359\nNext Confusion: 6.064223"} | |
| {"text": "### State\nConfusion: 6.944568\nAction: analogize\nReward: 0.637097\nNext Confusion: 6.785502"} | |
| {"text": "### State\nConfusion: 5.343425\nAction: question\nReward: 1.413053\nNext Confusion: 4.877586"} | |
| {"text": "### State\nConfusion: 4.044001\nAction: correct_fact\nReward: 0.791123\nNext Confusion: 4.111125"} | |
| {"text": "### State\nConfusion: 2.885599\nAction: question\nReward: 0.741619\nNext Confusion: 2.525219"} | |
| {"text": "### State\nConfusion: 4.033328\nAction: analogize\nReward: -0.324879\nNext Confusion: 3.825331"} | |
| {"text": "### State\nConfusion: 3.583706\nAction: analogize\nReward: 0.5554\nNext Confusion: 2.749167"} | |
| {"text": "### State\nConfusion: 6.577643\nAction: analogize\nReward: 0.07324\nNext Confusion: 6.620709"} | |
| {"text": "### State\nConfusion: 3.076465\nAction: explain\nReward: 0.322549\nNext Confusion: 2.944134"} | |
| {"text": "### State\nConfusion: 4.1453\nAction: analogize\nReward: -0.292769\nNext Confusion: 4.125049"} | |
| {"text": "### State\nConfusion: 3.86788\nAction: worked_example\nReward: 2.352502\nNext Confusion: 2.928135"} | |
| {"text": "### State\nConfusion: 3.783789\nAction: analogize\nReward: 0.076052\nNext Confusion: 4.21214"} | |
| {"text": "### State\nConfusion: 2.774392\nAction: analogize\nReward: 0.770279\nNext Confusion: 2.368991"} | |
| {"text": "### State\nConfusion: 5.348946\nAction: explain\nReward: 0.926692\nNext Confusion: 4.922467"} | |
| {"text": "### State\nConfusion: 6.317099\nAction: worked_example\nReward: 1.679236\nNext Confusion: 5.322797"} | |
| {"text": "### State\nConfusion: 3.215445\nAction: worked_example\nReward: 1.053004\nNext Confusion: 1.814326"} | |
| {"text": "### State\nConfusion: 3.391156\nAction: analogize\nReward: -0.276477\nNext Confusion: 3.740653"} | |
| {"text": "### State\nConfusion: 2.778777\nAction: analogize\nReward: 0.684112\nNext Confusion: 2.150054"} | |
| {"text": "### State\nConfusion: 5.19486\nAction: analogize\nReward: 1.399383\nNext Confusion: 4.791388"} | |
| {"text": "### State\nConfusion: 6.92237\nAction: worked_example\nReward: 2.276795\nNext Confusion: 5.880061"} | |
| {"text": "### State\nConfusion: 2.999296\nAction: correct_fact\nReward: -0.39091\nNext Confusion: 3.133839"} | |
| {"text": "### State\nConfusion: 3.52445\nAction: worked_example\nReward: 1.497804\nNext Confusion: 2.473041"} | |
| {"text": "### State\nConfusion: 3.491305\nAction: analogize\nReward: -1.428821\nNext Confusion: 4.530365"} | |
| {"text": "### State\nConfusion: 7.938798\nAction: question\nReward: 1.634904\nNext Confusion: 7.270522"} | |
| {"text": "### State\nConfusion: 3.63357\nAction: explain\nReward: -1.215149\nNext Confusion: 4.452176"} | |
| {"text": "### State\nConfusion: 7.658671\nAction: correct_fact\nReward: -0.057792\nNext Confusion: 7.543473"} | |
| {"text": "### State\nConfusion: 4.069772\nAction: analogize\nReward: 0.751406\nNext Confusion: 3.895314"} | |
| {"text": "### State\nConfusion: 5.882581\nAction: correct_fact\nReward: -0.371607\nNext Confusion: 5.911959"} | |
| {"text": "### State\nConfusion: 4.669287\nAction: explain\nReward: -0.120479\nNext Confusion: 4.880317"} | |
| {"text": "### State\nConfusion: 4.291442\nAction: correct_fact\nReward: -0.798368\nNext Confusion: 4.442684"} | |
| {"text": "### State\nConfusion: 5.854513\nAction: analogize\nReward: -0.261615\nNext Confusion: 6.171615"} | |
| {"text": "### State\nConfusion: 6.836235\nAction: explain\nReward: -0.649459\nNext Confusion: 6.768502"} | |
| {"text": "### State\nConfusion: 3.439055\nAction: analogize\nReward: -0.147351\nNext Confusion: 3.926904"} | |
| {"text": "### State\nConfusion: 3.48054\nAction: correct_fact\nReward: 0.060255\nNext Confusion: 3.50379"} | |
| {"text": "### State\nConfusion: 4.569834\nAction: analogize\nReward: -1.409163\nNext Confusion: 5.57295"} | |
| {"text": "### State\nConfusion: 6.003526\nAction: question\nReward: 0.689142\nNext Confusion: 5.342512"} | |
| {"text": "### State\nConfusion: 4.746644\nAction: analogize\nReward: -1.093781\nNext Confusion: 5.532921"} | |
| {"text": "### State\nConfusion: 7.746864\nAction: analogize\nReward: -0.978909\nNext Confusion: 8.778952"} | |
| {"text": "### State\nConfusion: 8.820141\nAction: explain\nReward: 0.943031\nNext Confusion: 7.812218"} | |
| {"text": "### State\nConfusion: 8.521159\nAction: analogize\nReward: 0.563675\nNext Confusion: 9.113123"} | |
| {"text": "### State\nConfusion: 2.340023\nAction: explain\nReward: 0.847821\nNext Confusion: 1.814186"} | |
| {"text": "### State\nConfusion: 4.345832\nAction: question\nReward: 1.241657\nNext Confusion: 3.922095"} | |
| {"text": "### State\nConfusion: 4.367841\nAction: analogize\nReward: -3.81745\nNext Confusion: 4.413113"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.006\nNext Confusion: 10.0"} | |
| {"text": "### State\nConfusion: 3.855926\nAction: worked_example\nReward: 1.359279\nNext Confusion: 2.140503"} | |
| {"text": "### State\nConfusion: 5.947092\nAction: analogize\nReward: -1.062392\nNext Confusion: 6.499333"} | |
| {"text": "### State\nConfusion: 4.267658\nAction: explain\nReward: -0.076467\nNext Confusion: 4.075721"} | |
| {"text": "### State\nConfusion: 5.994388\nAction: worked_example\nReward: 1.125528\nNext Confusion: 4.426499"} | |
| {"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.999445\nNext Confusion: 8.992741"} | |
| {"text": "### State\nConfusion: 5.015469\nAction: question\nReward: -0.550988\nNext Confusion: 5.527272"} | |
| {"text": "### State\nConfusion: 4.494957\nAction: analogize\nReward: -1.693285\nNext Confusion: 5.166589"} | |
| {"text": "### State\nConfusion: 3.628634\nAction: analogize\nReward: -1.178641\nNext Confusion: 3.971706"} | |
| {"text": "### State\nConfusion: 3.521578\nAction: explain\nReward: 0.53535\nNext Confusion: 4.013176"} | |
| {"text": "### State\nConfusion: 3.901601\nAction: analogize\nReward: -0.547104\nNext Confusion: 4.631837"} | |
| {"text": "### State\nConfusion: 4.647506\nAction: explain\nReward: 0.631395\nNext Confusion: 4.224959"} | |
| {"text": "### State\nConfusion: 5.711406\nAction: explain\nReward: 0.49222\nNext Confusion: 6.024854"} | |
| {"text": "### State\nConfusion: 4.07159\nAction: analogize\nReward: -0.424504\nNext Confusion: 4.798265"} | |
| {"text": "### State\nConfusion: 4.140967\nAction: correct_fact\nReward: -1.231822\nNext Confusion: 5.130908"} | |
| {"text": "### State\nConfusion: 6.352411\nAction: analogize\nReward: -1.18697\nNext Confusion: 7.181125"} | |
| {"text": "### State\nConfusion: 7.185508\nAction: worked_example\nReward: 3.034571\nNext Confusion: 4.742269"} | |
| {"text": "### State\nConfusion: 6.044193\nAction: correct_fact\nReward: -0.131452\nNext Confusion: 6.259499"} | |
| {"text": "### State\nConfusion: 3.850863\nAction: correct_fact\nReward: -1.165361\nNext Confusion: 4.437674"} | |
| {"text": "### State\nConfusion: 5.709232\nAction: analogize\nReward: -0.335393\nNext Confusion: 6.03693"} | |
| {"text": "### State\nConfusion: 2.790189\nAction: question\nReward: -0.015608\nNext Confusion: 2.420717"} | |
| {"text": "### State\nConfusion: 2.813883\nAction: analogize\nReward: -0.325674\nNext Confusion: 3.097643"} | |
| {"text": "### State\nConfusion: 4.600974\nAction: analogize\nReward: -0.689222\nNext Confusion: 5.273267"} | |
| {"text": "### State\nConfusion: 6.823961\nAction: question\nReward: 0.231637\nNext Confusion: 6.530535"} | |
| {"text": "### State\nConfusion: 4.187769\nAction: analogize\nReward: 0.41784\nNext Confusion: 4.147315"} | |
| {"text": "### State\nConfusion: 3.270071\nAction: analogize\nReward: -0.701233\nNext Confusion: 3.924204"} | |
| {"text": "### State\nConfusion: 3.892113\nAction: worked_example\nReward: 1.771034\nNext Confusion: 3.077213"} | |
| {"text": "### State\nConfusion: 3.897737\nAction: analogize\nReward: -0.880082\nNext Confusion: 4.28867"} | |
| {"text": "### State\nConfusion: 4.182186\nAction: question\nReward: 1.025072\nNext Confusion: 3.581476"} | |
| {"text": "### State\nConfusion: 3.280212\nAction: analogize\nReward: -0.230556\nNext Confusion: 3.718891"} | |
| {"text": "### State\nConfusion: 5.115473\nAction: analogize\nReward: -0.052009\nNext Confusion: 5.389236"} | |
| {"text": "### State\nConfusion: 3.25951\nAction: explain\nReward: 0.456638\nNext Confusion: 2.630789"} | |
| {"text": "### State\nConfusion: 4.461349\nAction: explain\nReward: -0.238552\nNext Confusion: 4.220826"} | |
| {"text": "### State\nConfusion: 3.37934\nAction: explain\nReward: 1.300042\nNext Confusion: 2.233323"} | |
| {"text": "### State\nConfusion: 7.708539\nAction: explain\nReward: 0.779534\nNext Confusion: 7.262962"} | |
| {"text": "### State\nConfusion: 4.487832\nAction: correct_fact\nReward: -0.640726\nNext Confusion: 4.850804"} | |
| {"text": "### State\nConfusion: 7.527032\nAction: explain\nReward: 0.188903\nNext Confusion: 7.260336"} | |
| {"text": "### State\nConfusion: 5.608341\nAction: correct_fact\nReward: 0.029112\nNext Confusion: 5.848946"} | |
| {"text": "### State\nConfusion: 2.400969\nAction: analogize\nReward: -1.50911\nNext Confusion: 4.221312"} | |
| {"text": "### State\nConfusion: 4.617443\nAction: explain\nReward: 0.140889\nNext Confusion: 4.101977"} | |
| {"text": "### State\nConfusion: 1.713455\nAction: analogize\nReward: -1.625323\nNext Confusion: 3.040552"} | |
| {"text": "### State\nConfusion: 3.207202\nAction: worked_example\nReward: 0.662639\nNext Confusion: 2.216147"} | |
| {"text": "### State\nConfusion: 6.116821\nAction: explain\nReward: 0.739173\nNext Confusion: 5.414842"} | |
| {"text": "### State\nConfusion: 7.716421\nAction: question\nReward: -0.244427\nNext Confusion: 7.80996"} | |
| {"text": "### State\nConfusion: 4.312419\nAction: analogize\nReward: 0.045445\nNext Confusion: 4.849009"} | |
| {"text": "### State\nConfusion: 2.497267\nAction: analogize\nReward: -0.53257\nNext Confusion: 3.211382"} | |
| {"text": "### State\nConfusion: 4.186003\nAction: explain\nReward: -0.100915\nNext Confusion: 4.1359"} | |
| {"text": "### State\nConfusion: 6.124384\nAction: correct_fact\nReward: -0.222745\nNext Confusion: 6.004959"} | |
| {"text": "### State\nConfusion: 4.838198\nAction: analogize\nReward: -0.537104\nNext Confusion: 5.21931"} | |
| {"text": "### State\nConfusion: 4.336282\nAction: analogize\nReward: 0.362864\nNext Confusion: 4.121377"} | |
| {"text": "### State\nConfusion: 6.199643\nAction: analogize\nReward: 0.089828\nNext Confusion: 6.90752"} | |
| {"text": "### State\nConfusion: 6.246179\nAction: worked_example\nReward: 0.016199\nNext Confusion: 6.216495"} | |
| {"text": "### State\nConfusion: 3.753611\nAction: worked_example\nReward: 2.03355\nNext Confusion: 1.908327"} | |
| {"text": "### State\nConfusion: 3.90575\nAction: analogize\nReward: -0.864023\nNext Confusion: 4.875975"} | |
| {"text": "### State\nConfusion: 3.125639\nAction: analogize\nReward: -0.59186\nNext Confusion: 3.940023"} | |
| {"text": "### State\nConfusion: 4.659585\nAction: analogize\nReward: -0.154829\nNext Confusion: 4.735131"} | |
| {"text": "### State\nConfusion: 2.798429\nAction: analogize\nReward: -0.435412\nNext Confusion: 3.529701"} | |
| {"text": "### State\nConfusion: 3.181256\nAction: explain\nReward: 0.548676\nNext Confusion: 3.000934"} | |
| {"text": "### State\nConfusion: 4.824714\nAction: worked_example\nReward: 0.915687\nNext Confusion: 3.833705"} | |
| {"text": "### State\nConfusion: 3.840567\nAction: explain\nReward: 0.957436\nNext Confusion: 3.357632"} | |
| {"text": "### State\nConfusion: 7.498173\nAction: analogize\nReward: 0.597861\nNext Confusion: 7.415368"} | |
| {"text": "### State\nConfusion: 4.988446\nAction: explain\nReward: 0.252937\nNext Confusion: 4.888795"} | |
| {"text": "### State\nConfusion: 4.628919\nAction: analogize\nReward: -0.793281\nNext Confusion: 5.547164"} | |
| {"text": "### State\nConfusion: 2.532769\nAction: analogize\nReward: 0.608471\nNext Confusion: 2.095517"} | |
| {"text": "### State\nConfusion: 6.552197\nAction: analogize\nReward: -0.143695\nNext Confusion: 6.75866"} | |
| {"text": "### State\nConfusion: 6.521643\nAction: analogize\nReward: 0.625005\nNext Confusion: 6.203167"} | |
| {"text": "### State\nConfusion: 4.492259\nAction: worked_example\nReward: 2.173731\nNext Confusion: 2.631922"} | |
| {"text": "### State\nConfusion: 7.241621\nAction: question\nReward: 1.406091\nNext Confusion: 5.86407"} | |
| {"text": "### State\nConfusion: 3.693046\nAction: explain\nReward: 1.068534\nNext Confusion: 2.440671"} | |
| {"text": "### State\nConfusion: 3.962626\nAction: explain\nReward: -0.730675\nNext Confusion: 4.393443"} | |
| {"text": "### State\nConfusion: 6.47488\nAction: analogize\nReward: -0.811074\nNext Confusion: 7.624598"} | |
| {"text": "### State\nConfusion: 7.081945\nAction: analogize\nReward: 0.137783\nNext Confusion: 6.88515"} | |
| {"text": "### State\nConfusion: 6.003791\nAction: analogize\nReward: 0.25566\nNext Confusion: 6.172901"} | |
| {"text": "### State\nConfusion: 4.536524\nAction: analogize\nReward: -0.882808\nNext Confusion: 5.352852"} | |
| {"text": "### State\nConfusion: 7.325707\nAction: analogize\nReward: 0.390309\nNext Confusion: 6.489121"} | |
| {"text": "### State\nConfusion: 3.344648\nAction: analogize\nReward: -3.003033\nNext Confusion: 2.995784"} | |
| {"text": "### State\nConfusion: 4.936317\nAction: analogize\nReward: 0.065177\nNext Confusion: 4.409051"} | |
| {"text": "### State\nConfusion: 5.412323\nAction: worked_example\nReward: 0.69626\nNext Confusion: 4.851012"} | |
| {"text": "### State\nConfusion: 3.733434\nAction: correct_fact\nReward: 1.215873\nNext Confusion: 3.299668"} | |
| {"text": "### State\nConfusion: 3.594582\nAction: analogize\nReward: -0.245522\nNext Confusion: 4.261171"} | |
| {"text": "### State\nConfusion: 5.702906\nAction: analogize\nReward: -1.517803\nNext Confusion: 7.966025"} | |
| {"text": "### State\nConfusion: 4.458236\nAction: analogize\nReward: -0.448019\nNext Confusion: 4.331777"} | |
| {"text": "### State\nConfusion: 3.877192\nAction: correct_fact\nReward: -0.606948\nNext Confusion: 4.709797"} | |
| {"text": "### State\nConfusion: 4.004076\nAction: explain\nReward: 1.357555\nNext Confusion: 3.258263"} | |
| {"text": "### State\nConfusion: 7.576537\nAction: analogize\nReward: -1.53424\nNext Confusion: 8.860916"} | |
| {"text": "### State\nConfusion: 3.554901\nAction: question\nReward: 0.474365\nNext Confusion: 3.006228"} | |
| {"text": "### State\nConfusion: 3.251683\nAction: correct_fact\nReward: 0.518483\nNext Confusion: 2.64814"} | |
| {"text": "### State\nConfusion: 3.569207\nAction: analogize\nReward: -0.410212\nNext Confusion: 3.989583"} | |
| {"text": "### State\nConfusion: 3.670016\nAction: explain\nReward: 1.762957\nNext Confusion: 2.516897"} | |
| {"text": "### State\nConfusion: 2.353776\nAction: worked_example\nReward: 2.261622\nNext Confusion: 0.0"} | |
| {"text": "### State\nConfusion: 5.180178\nAction: worked_example\nReward: 1.90911\nNext Confusion: 3.493268"} | |
| {"text": "### State\nConfusion: 4.770649\nAction: analogize\nReward: -0.145658\nNext Confusion: 4.804286"} | |
| {"text": "### State\nConfusion: 3.80372\nAction: analogize\nReward: -1.000576\nNext Confusion: 4.46522"} | |
| {"text": "### State\nConfusion: 6.532478\nAction: analogize\nReward: -0.946917\nNext Confusion: 7.290173"} | |
| {"text": "### State\nConfusion: 6.0253\nAction: analogize\nReward: -0.456855\nNext Confusion: 6.661849"} | |
| {"text": "### State\nConfusion: 5.025003\nAction: correct_fact\nReward: 0.758891\nNext Confusion: 4.576565"} | |
| {"text": "### State\nConfusion: 2.845722\nAction: analogize\nReward: 1.140977\nNext Confusion: 2.995232"} | |
| {"text": "### State\nConfusion: 7.297413\nAction: analogize\nReward: 1.366689\nNext Confusion: 7.151979"} | |
| {"text": "### State\nConfusion: 9.557812\nAction: analogize\nReward: -0.288579\nNext Confusion: 9.169003"} | |
| {"text": "### State\nConfusion: 6.37981\nAction: explain\nReward: 1.050409\nNext Confusion: 6.148158"} | |
| {"text": "### State\nConfusion: 3.714115\nAction: worked_example\nReward: 2.349328\nNext Confusion: 1.744698"} | |
| {"text": "### State\nConfusion: 3.174654\nAction: analogize\nReward: 0.685784\nNext Confusion: 3.14699"} | |
| {"text": "### State\nConfusion: 6.137669\nAction: question\nReward: 0.671082\nNext Confusion: 5.555594"} | |
| {"text": "### State\nConfusion: 3.0194\nAction: question\nReward: 0.66125\nNext Confusion: 2.640104"} | |
| {"text": "### State\nConfusion: 5.61132\nAction: worked_example\nReward: 1.569835\nNext Confusion: 3.932574"} | |
| {"text": "### State\nConfusion: 5.073577\nAction: analogize\nReward: -0.484999\nNext Confusion: 5.577819"} | |
| {"text": "### State\nConfusion: 6.558275\nAction: analogize\nReward: 0.953879\nNext Confusion: 6.599469"} | |
| {"text": "### State\nConfusion: 3.460505\nAction: analogize\nReward: -1.369673\nNext Confusion: 4.018704"} | |
| {"text": "### State\nConfusion: 4.546068\nAction: analogize\nReward: 0.296617\nNext Confusion: 3.999932"} | |
| {"text": "### State\nConfusion: 2.789076\nAction: correct_fact\nReward: -0.139055\nNext Confusion: 3.162709"} | |
| {"text": "### State\nConfusion: 4.722394\nAction: analogize\nReward: -0.272211\nNext Confusion: 5.144951"} | |
| {"text": "### State\nConfusion: 6.894866\nAction: analogize\nReward: 0.365742\nNext Confusion: 6.84301"} | |
| {"text": "### State\nConfusion: 6.848865\nAction: analogize\nReward: -0.199233\nNext Confusion: 6.911935"} | |
| {"text": "### State\nConfusion: 7.232148\nAction: analogize\nReward: 0.043035\nNext Confusion: 7.288547"} | |
| {"text": "### State\nConfusion: 3.149628\nAction: explain\nReward: -0.427429\nNext Confusion: 3.316867"} | |
| {"text": "### State\nConfusion: 3.926137\nAction: question\nReward: 1.42821\nNext Confusion: 2.881466"} | |
| {"text": "### State\nConfusion: 3.196238\nAction: analogize\nReward: 1.17027\nNext Confusion: 2.384046"} | |
| {"text": "### State\nConfusion: 5.307725\nAction: correct_fact\nReward: -1.086515\nNext Confusion: 5.717581"} | |
| {"text": "### State\nConfusion: 5.070612\nAction: question\nReward: -0.603608\nNext Confusion: 5.819668"} | |
| {"text": "### State\nConfusion: 7.103436\nAction: analogize\nReward: -1.143868\nNext Confusion: 8.541348"} | |
| {"text": "### State\nConfusion: 6.538433\nAction: analogize\nReward: 0.299716\nNext Confusion: 7.164129"} | |
| {"text": "### State\nConfusion: 5.592933\nAction: analogize\nReward: -0.571434\nNext Confusion: 6.062175"} | |
| {"text": "### State\nConfusion: 3.944029\nAction: analogize\nReward: -0.0834\nNext Confusion: 4.574897"} | |
| {"text": "### State\nConfusion: 2.918218\nAction: analogize\nReward: 0.128027\nNext Confusion: 3.573923"} | |
| {"text": "### State\nConfusion: 4.998973\nAction: analogize\nReward: 0.099739\nNext Confusion: 4.395178"} | |
| {"text": "### State\nConfusion: 4.694908\nAction: analogize\nReward: 0.148056\nNext Confusion: 5.204955"} | |
| {"text": "### State\nConfusion: 3.781684\nAction: analogize\nReward: -0.263645\nNext Confusion: 4.12463"} | |
| {"text": "### State\nConfusion: 6.591872\nAction: analogize\nReward: -1.118598\nNext Confusion: 7.24768"} | |
| {"text": "### State\nConfusion: 3.995603\nAction: explain\nReward: 1.786482\nNext Confusion: 2.911538"} | |
| {"text": "### State\nConfusion: 5.361206\nAction: question\nReward: 0.458134\nNext Confusion: 5.455734"} | |
| {"text": "### State\nConfusion: 3.610285\nAction: analogize\nReward: 0.819867\nNext Confusion: 2.220746"} | |
| {"text": "### State\nConfusion: 6.07102\nAction: explain\nReward: 0.175273\nNext Confusion: 6.668109"} | |
| {"text": "### State\nConfusion: 3.806757\nAction: question\nReward: 0.064293\nNext Confusion: 3.739635"} | |
| {"text": "### State\nConfusion: 4.38536\nAction: analogize\nReward: -1.787108\nNext Confusion: 5.131299"} | |
| {"text": "### State\nConfusion: 7.341646\nAction: correct_fact\nReward: -0.362596\nNext Confusion: 8.125962"} | |
| {"text": "### State\nConfusion: 4.368033\nAction: question\nReward: -0.037405\nNext Confusion: 4.460615"} | |
| {"text": "### State\nConfusion: 4.966889\nAction: analogize\nReward: 0.38992\nNext Confusion: 4.741419"} | |
| {"text": "### State\nConfusion: 1.737497\nAction: explain\nReward: -0.588786\nNext Confusion: 1.845481"} | |
| {"text": "### State\nConfusion: 4.904597\nAction: analogize\nReward: -0.504161\nNext Confusion: 5.030465"} | |
| {"text": "### State\nConfusion: 3.854805\nAction: correct_fact\nReward: 0.143636\nNext Confusion: 4.084965"} | |
| {"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.161128\nNext Confusion: 9.897921"} | |
| {"text": "### State\nConfusion: 6.948946\nAction: explain\nReward: 0.019254\nNext Confusion: 7.208404"} | |
| {"text": "### State\nConfusion: 5.288423\nAction: worked_example\nReward: 0.99203\nNext Confusion: 3.951557"} | |
| {"text": "### State\nConfusion: 6.829811\nAction: analogize\nReward: -0.455124\nNext Confusion: 7.361673"} | |
| {"text": "### State\nConfusion: 3.869974\nAction: analogize\nReward: 0.530551\nNext Confusion: 3.98222"} | |
| {"text": "### State\nConfusion: 7.334596\nAction: analogize\nReward: -0.876755\nNext Confusion: 8.262695"} | |
| {"text": "### State\nConfusion: 7.459252\nAction: analogize\nReward: -0.095127\nNext Confusion: 7.275376"} | |
| {"text": "### State\nConfusion: 2.977803\nAction: correct_fact\nReward: -0.305983\nNext Confusion: 3.23653"} | |
| {"text": "### State\nConfusion: 4.767777\nAction: analogize\nReward: -0.346948\nNext Confusion: 4.939303"} | |
| {"text": "### State\nConfusion: 9.385743\nAction: correct_fact\nReward: -1.353628\nNext Confusion: 9.932337"} | |
| {"text": "### State\nConfusion: 4.616424\nAction: worked_example\nReward: 1.173654\nNext Confusion: 2.993332"} | |
| {"text": "### State\nConfusion: 3.982618\nAction: question\nReward: 1.465169\nNext Confusion: 3.196667"} | |
| {"text": "### State\nConfusion: 3.731763\nAction: worked_example\nReward: 0.96159\nNext Confusion: 2.827726"} | |
| {"text": "### State\nConfusion: 3.987744\nAction: analogize\nReward: -0.768711\nNext Confusion: 4.888868"} | |
| {"text": "### State\nConfusion: 4.67894\nAction: question\nReward: 0.242026\nNext Confusion: 3.964318"} | |
| {"text": "### State\nConfusion: 2.752789\nAction: analogize\nReward: -1.218773\nNext Confusion: 2.988962"} | |
| {"text": "### State\nConfusion: 6.775572\nAction: analogize\nReward: 0.72213\nNext Confusion: 6.405007"} | |
| {"text": "### State\nConfusion: 2.37896\nAction: worked_example\nReward: 1.470493\nNext Confusion: 1.213226"} | |
| {"text": "### State\nConfusion: 4.411928\nAction: analogize\nReward: 2.35412\nNext Confusion: 3.227744"} | |
| {"text": "### State\nConfusion: 4.708626\nAction: analogize\nReward: -0.456251\nNext Confusion: 4.466039"} | |