diff --git "a/src/environment/training_samples.jsonl" "b/src/environment/training_samples.jsonl" new file mode 100644--- /dev/null +++ "b/src/environment/training_samples.jsonl" @@ -0,0 +1,2712 @@ +{"text": "### State\nConfusion: 3.250681\nAction: explain\nReward: 0.277968\nNext Confusion: 2.895787"} +{"text": "### State\nConfusion: 6.946829\nAction: correct_fact\nReward: 1.136782\nNext Confusion: 6.797103"} +{"text": "### State\nConfusion: 3.01263\nAction: explain\nReward: 0.161669\nNext Confusion: 2.434628"} +{"text": "### State\nConfusion: 4.200218\nAction: analogize\nReward: -0.536253\nNext Confusion: 4.50549"} +{"text": "### State\nConfusion: 4.204886\nAction: correct_fact\nReward: 0.001798\nNext Confusion: 4.348224"} +{"text": "### State\nConfusion: 4.431564\nAction: analogize\nReward: -0.00913\nNext Confusion: 5.02598"} +{"text": "### State\nConfusion: 7.112898\nAction: analogize\nReward: -0.260503\nNext Confusion: 7.776484"} +{"text": "### State\nConfusion: 6.707709\nAction: worked_example\nReward: 0.623157\nNext Confusion: 5.590979"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.200613\nNext Confusion: 9.88075"} +{"text": "### State\nConfusion: 6.441003\nAction: analogize\nReward: 0.059471\nNext Confusion: 6.277812"} +{"text": "### State\nConfusion: 4.039312\nAction: analogize\nReward: 0.588281\nNext Confusion: 3.436871"} +{"text": "### State\nConfusion: 4.283644\nAction: analogize\nReward: 0.377662\nNext Confusion: 4.225992"} +{"text": "### State\nConfusion: 5.741777\nAction: correct_fact\nReward: -0.086082\nNext Confusion: 6.010756"} +{"text": "### State\nConfusion: 2.020052\nAction: correct_fact\nReward: 0.065653\nNext Confusion: 1.847908"} +{"text": "### State\nConfusion: 4.519175\nAction: explain\nReward: -0.532623\nNext Confusion: 4.565185"} +{"text": "### State\nConfusion: 8.321839\nAction: correct_fact\nReward: -0.03258\nNext Confusion: 8.116296"} +{"text": "### State\nConfusion: 8.126682\nAction: worked_example\nReward: 1.752661\nNext Confusion: 6.691122"} +{"text": "### State\nConfusion: 5.13175\nAction: worked_example\nReward: 1.046388\nNext Confusion: 3.915293"} +{"text": "### State\nConfusion: 2.675358\nAction: analogize\nReward: -0.527201\nNext Confusion: 3.269576"} +{"text": "### State\nConfusion: 4.452432\nAction: question\nReward: -0.12029\nNext Confusion: 4.301777"} +{"text": "### State\nConfusion: 5.261161\nAction: explain\nReward: -0.348596\nNext Confusion: 5.673129"} +{"text": "### State\nConfusion: 4.41783\nAction: correct_fact\nReward: 0.606589\nNext Confusion: 3.844203"} +{"text": "### State\nConfusion: 7.167364\nAction: analogize\nReward: -0.036751\nNext Confusion: 7.91662"} +{"text": "### State\nConfusion: 4.443446\nAction: analogize\nReward: -0.405494\nNext Confusion: 4.455853"} +{"text": "### State\nConfusion: 2.394889\nAction: worked_example\nReward: 1.569233\nNext Confusion: 0.521398"} +{"text": "### State\nConfusion: 5.666886\nAction: analogize\nReward: 0.132937\nNext Confusion: 5.507486"} +{"text": "### State\nConfusion: 8.903051\nAction: worked_example\nReward: 1.168286\nNext Confusion: 8.015495"} +{"text": "### State\nConfusion: 7.419142\nAction: analogize\nReward: 0.370696\nNext Confusion: 6.90989"} +{"text": "### State\nConfusion: 4.930095\nAction: analogize\nReward: -0.354928\nNext Confusion: 5.273698"} +{"text": "### State\nConfusion: 3.654876\nAction: correct_fact\nReward: 0.075058\nNext Confusion: 4.012523"} +{"text": "### State\nConfusion: 3.298562\nAction: correct_fact\nReward: 0.274487\nNext Confusion: 3.207063"} +{"text": "### State\nConfusion: 5.36716\nAction: worked_example\nReward: 1.439936\nNext Confusion: 3.664801"} +{"text": "### State\nConfusion: 3.746032\nAction: question\nReward: 0.769005\nNext Confusion: 3.475201"} +{"text": "### State\nConfusion: 5.479237\nAction: correct_fact\nReward: 0.557245\nNext Confusion: 5.685524"} +{"text": "### State\nConfusion: 5.16923\nAction: analogize\nReward: 0.659608\nNext Confusion: 4.931562"} +{"text": "### State\nConfusion: 5.465634\nAction: analogize\nReward: -0.455456\nNext Confusion: 6.204408"} +{"text": "### State\nConfusion: 4.611916\nAction: analogize\nReward: 0.505761\nNext Confusion: 4.227184"} +{"text": "### State\nConfusion: 6.265313\nAction: analogize\nReward: -0.2694\nNext Confusion: 6.18744"} +{"text": "### State\nConfusion: 4.897626\nAction: analogize\nReward: 0.702536\nNext Confusion: 4.261006"} +{"text": "### State\nConfusion: 4.620578\nAction: analogize\nReward: 0.574101\nNext Confusion: 4.518202"} +{"text": "### State\nConfusion: 4.456707\nAction: question\nReward: -0.11188\nNext Confusion: 4.38086"} +{"text": "### State\nConfusion: 3.882776\nAction: correct_fact\nReward: 0.216437\nNext Confusion: 3.257319"} +{"text": "### State\nConfusion: 7.64693\nAction: question\nReward: 0.516669\nNext Confusion: 6.653407"} +{"text": "### State\nConfusion: 3.631051\nAction: analogize\nReward: -0.060158\nNext Confusion: 3.668511"} +{"text": "### State\nConfusion: 3.242117\nAction: analogize\nReward: -0.632007\nNext Confusion: 4.07684"} +{"text": "### State\nConfusion: 5.477382\nAction: worked_example\nReward: 0.014376\nNext Confusion: 5.024957"} +{"text": "### State\nConfusion: 4.500757\nAction: question\nReward: 0.791141\nNext Confusion: 4.249756"} +{"text": "### State\nConfusion: 4.149765\nAction: analogize\nReward: -0.737377\nNext Confusion: 5.078204"} +{"text": "### State\nConfusion: 4.0037\nAction: analogize\nReward: 0.026296\nNext Confusion: 4.308817"} +{"text": "### State\nConfusion: 3.601284\nAction: correct_fact\nReward: 0.736689\nNext Confusion: 2.987959"} +{"text": "### State\nConfusion: 4.442764\nAction: analogize\nReward: -1.300369\nNext Confusion: 5.400936"} +{"text": "### State\nConfusion: 3.490773\nAction: explain\nReward: -0.677986\nNext Confusion: 3.864996"} +{"text": "### State\nConfusion: 4.677259\nAction: question\nReward: 0.948914\nNext Confusion: 4.377257"} +{"text": "### State\nConfusion: 5.060442\nAction: correct_fact\nReward: 0.766447\nNext Confusion: 4.600817"} +{"text": "### State\nConfusion: 4.615941\nAction: analogize\nReward: 1.128073\nNext Confusion: 4.953195"} +{"text": "### State\nConfusion: 2.83426\nAction: analogize\nReward: 0.942352\nNext Confusion: 2.650195"} +{"text": "### State\nConfusion: 3.270736\nAction: analogize\nReward: 0.441857\nNext Confusion: 2.980848"} +{"text": "### State\nConfusion: 5.609833\nAction: analogize\nReward: -0.276144\nNext Confusion: 6.021879"} +{"text": "### State\nConfusion: 3.269245\nAction: analogize\nReward: -0.558671\nNext Confusion: 3.781189"} +{"text": "### State\nConfusion: 5.657327\nAction: correct_fact\nReward: -0.370105\nNext Confusion: 6.336563"} +{"text": "### State\nConfusion: 4.241743\nAction: question\nReward: 0.405085\nNext Confusion: 3.343711"} +{"text": "### State\nConfusion: 4.50831\nAction: worked_example\nReward: 1.323088\nNext Confusion: 3.333388"} +{"text": "### State\nConfusion: 8.612566\nAction: analogize\nReward: -0.590897\nNext Confusion: 9.277476"} +{"text": "### State\nConfusion: 3.341706\nAction: question\nReward: 0.605924\nNext Confusion: 2.983989"} +{"text": "### State\nConfusion: 6.981562\nAction: worked_example\nReward: 1.42079\nNext Confusion: 6.22347"} +{"text": "### State\nConfusion: 6.482682\nAction: explain\nReward: 0.134391\nNext Confusion: 6.252345"} +{"text": "### State\nConfusion: 3.801888\nAction: correct_fact\nReward: -0.720082\nNext Confusion: 3.976293"} +{"text": "### State\nConfusion: 3.833232\nAction: analogize\nReward: -0.353737\nNext Confusion: 4.299214"} +{"text": "### State\nConfusion: 5.998812\nAction: explain\nReward: 0.389272\nNext Confusion: 5.514136"} +{"text": "### State\nConfusion: 4.684952\nAction: question\nReward: -0.067119\nNext Confusion: 4.627373"} +{"text": "### State\nConfusion: 3.739171\nAction: explain\nReward: 0.081376\nNext Confusion: 4.377791"} +{"text": "### State\nConfusion: 6.828212\nAction: explain\nReward: 1.582702\nNext Confusion: 5.688067"} +{"text": "### State\nConfusion: 7.270997\nAction: analogize\nReward: -1.004266\nNext Confusion: 7.936233"} +{"text": "### State\nConfusion: 6.698097\nAction: analogize\nReward: -0.106108\nNext Confusion: 7.112916"} +{"text": "### State\nConfusion: 5.745265\nAction: analogize\nReward: -0.139695\nNext Confusion: 5.720766"} +{"text": "### State\nConfusion: 3.632954\nAction: question\nReward: 1.16064\nNext Confusion: 3.356993"} +{"text": "### State\nConfusion: 6.09853\nAction: analogize\nReward: -1.18527\nNext Confusion: 7.567443"} +{"text": "### State\nConfusion: 3.86836\nAction: worked_example\nReward: 0.259128\nNext Confusion: 3.484797"} +{"text": "### State\nConfusion: 6.078642\nAction: question\nReward: -1.233893\nNext Confusion: 7.035697"} +{"text": "### State\nConfusion: 2.798933\nAction: explain\nReward: 0.579925\nNext Confusion: 2.698838"} +{"text": "### State\nConfusion: 5.148643\nAction: analogize\nReward: 0.709493\nNext Confusion: 4.744413"} +{"text": "### State\nConfusion: 7.496325\nAction: explain\nReward: 1.159911\nNext Confusion: 7.308018"} +{"text": "### State\nConfusion: 3.956711\nAction: analogize\nReward: -0.442151\nNext Confusion: 4.599521"} +{"text": "### State\nConfusion: 5.598244\nAction: analogize\nReward: -1.461143\nNext Confusion: 6.285129"} +{"text": "### State\nConfusion: 2.592946\nAction: analogize\nReward: -0.963043\nNext Confusion: 3.637503"} +{"text": "### State\nConfusion: 5.577053\nAction: question\nReward: -1.324903\nNext Confusion: 6.152676"} +{"text": "### State\nConfusion: 5.230882\nAction: analogize\nReward: -0.518079\nNext Confusion: 5.693272"} +{"text": "### State\nConfusion: 4.305274\nAction: question\nReward: 0.976141\nNext Confusion: 4.003798"} +{"text": "### State\nConfusion: 8.230191\nAction: analogize\nReward: -0.636257\nNext Confusion: 9.311369"} +{"text": "### State\nConfusion: 3.902071\nAction: analogize\nReward: 0.500202\nNext Confusion: 3.75269"} +{"text": "### State\nConfusion: 2.887785\nAction: correct_fact\nReward: 1.377221\nNext Confusion: 2.390823"} +{"text": "### State\nConfusion: 4.391108\nAction: analogize\nReward: -0.342566\nNext Confusion: 4.73378"} +{"text": "### State\nConfusion: 6.183204\nAction: question\nReward: -0.198667\nNext Confusion: 6.669494"} +{"text": "### State\nConfusion: 5.336835\nAction: worked_example\nReward: 0.735701\nNext Confusion: 3.867037"} +{"text": "### State\nConfusion: 3.049285\nAction: analogize\nReward: 1.806344\nNext Confusion: 1.962377"} +{"text": "### State\nConfusion: 5.835483\nAction: analogize\nReward: 0.322794\nNext Confusion: 5.636015"} +{"text": "### State\nConfusion: 6.197648\nAction: analogize\nReward: -0.037625\nNext Confusion: 6.726842"} +{"text": "### State\nConfusion: 3.712592\nAction: analogize\nReward: -1.302483\nNext Confusion: 4.627915"} +{"text": "### State\nConfusion: 2.942607\nAction: analogize\nReward: 0.391221\nNext Confusion: 3.280312"} +{"text": "### State\nConfusion: 7.031706\nAction: analogize\nReward: -0.006694\nNext Confusion: 7.219192"} +{"text": "### State\nConfusion: 6.227323\nAction: analogize\nReward: -1.185675\nNext Confusion: 7.040853"} +{"text": "### State\nConfusion: 4.443704\nAction: explain\nReward: -0.222932\nNext Confusion: 4.861446"} +{"text": "### State\nConfusion: 5.72539\nAction: analogize\nReward: -0.945966\nNext Confusion: 6.40068"} +{"text": "### State\nConfusion: 3.285097\nAction: analogize\nReward: 0.502635\nNext Confusion: 2.970505"} +{"text": "### State\nConfusion: 4.861887\nAction: question\nReward: -0.581137\nNext Confusion: 5.077682"} +{"text": "### State\nConfusion: 3.146511\nAction: worked_example\nReward: -0.022938\nNext Confusion: 2.971046"} +{"text": "### State\nConfusion: 7.118391\nAction: question\nReward: 0.68446\nNext Confusion: 6.163698"} +{"text": "### State\nConfusion: 3.458978\nAction: analogize\nReward: -0.578027\nNext Confusion: 3.857936"} +{"text": "### State\nConfusion: 3.074595\nAction: analogize\nReward: -0.545842\nNext Confusion: 3.548289"} +{"text": "### State\nConfusion: 3.44214\nAction: analogize\nReward: 0.225382\nNext Confusion: 3.721544"} +{"text": "### State\nConfusion: 8.819264\nAction: analogize\nReward: 0.151339\nNext Confusion: 8.657555"} +{"text": "### State\nConfusion: 4.521422\nAction: worked_example\nReward: -0.10238\nNext Confusion: 4.557052"} +{"text": "### State\nConfusion: 6.23277\nAction: explain\nReward: 0.457413\nNext Confusion: 6.462687"} +{"text": "### State\nConfusion: 6.84021\nAction: correct_fact\nReward: -0.057949\nNext Confusion: 6.665882"} +{"text": "### State\nConfusion: 4.478228\nAction: analogize\nReward: -1.01402\nNext Confusion: 4.81966"} +{"text": "### State\nConfusion: 3.90523\nAction: analogize\nReward: -0.397117\nNext Confusion: 4.581836"} +{"text": "### State\nConfusion: 5.505198\nAction: explain\nReward: -0.791799\nNext Confusion: 5.179022"} +{"text": "### State\nConfusion: 6.171377\nAction: analogize\nReward: 2.02061\nNext Confusion: 5.293525"} +{"text": "### State\nConfusion: 7.07752\nAction: analogize\nReward: -0.140945\nNext Confusion: 7.439284"} +{"text": "### State\nConfusion: 4.74815\nAction: question\nReward: -0.921987\nNext Confusion: 4.901676"} +{"text": "### State\nConfusion: 4.509364\nAction: explain\nReward: 1.032465\nNext Confusion: 3.376094"} +{"text": "### State\nConfusion: 2.027247\nAction: worked_example\nReward: 0.728003\nNext Confusion: 2.481546"} +{"text": "### State\nConfusion: 3.262276\nAction: analogize\nReward: -0.181705\nNext Confusion: 4.062567"} +{"text": "### State\nConfusion: 4.404922\nAction: analogize\nReward: 0.137978\nNext Confusion: 4.550036"} +{"text": "### State\nConfusion: 8.323386\nAction: analogize\nReward: -0.82731\nNext Confusion: 8.967352"} +{"text": "### State\nConfusion: 3.065048\nAction: worked_example\nReward: 1.422474\nNext Confusion: 1.396024"} +{"text": "### State\nConfusion: 8.650606\nAction: correct_fact\nReward: 0.013713\nNext Confusion: 8.505588"} +{"text": "### State\nConfusion: 4.651423\nAction: correct_fact\nReward: 0.975844\nNext Confusion: 3.993101"} +{"text": "### State\nConfusion: 5.509835\nAction: analogize\nReward: 1.279046\nNext Confusion: 5.567154"} +{"text": "### State\nConfusion: 6.096856\nAction: analogize\nReward: -0.38767\nNext Confusion: 6.7619"} +{"text": "### State\nConfusion: 4.852611\nAction: analogize\nReward: -0.843568\nNext Confusion: 4.90363"} +{"text": "### State\nConfusion: 3.32426\nAction: correct_fact\nReward: 0.634364\nNext Confusion: 3.443108"} +{"text": "### State\nConfusion: 3.007626\nAction: explain\nReward: -0.714219\nNext Confusion: 3.19942"} +{"text": "### State\nConfusion: 8.091647\nAction: analogize\nReward: 0.138296\nNext Confusion: 8.641704"} +{"text": "### State\nConfusion: 6.707883\nAction: analogize\nReward: -1.551451\nNext Confusion: 7.149846"} +{"text": "### State\nConfusion: 5.131027\nAction: analogize\nReward: -0.898392\nNext Confusion: 5.243616"} +{"text": "### State\nConfusion: 7.653114\nAction: analogize\nReward: 0.004476\nNext Confusion: 7.78867"} +{"text": "### State\nConfusion: 3.999675\nAction: worked_example\nReward: 0.461726\nNext Confusion: 3.61097"} +{"text": "### State\nConfusion: 9.165361\nAction: analogize\nReward: -0.127371\nNext Confusion: 9.594726"} +{"text": "### State\nConfusion: 5.877134\nAction: analogize\nReward: -0.827503\nNext Confusion: 6.792844"} +{"text": "### State\nConfusion: 4.240065\nAction: analogize\nReward: 0.017438\nNext Confusion: 4.091051"} +{"text": "### State\nConfusion: 6.373348\nAction: worked_example\nReward: 2.087573\nNext Confusion: 4.599887"} +{"text": "### State\nConfusion: 3.398287\nAction: analogize\nReward: 0.804075\nNext Confusion: 2.719617"} +{"text": "### State\nConfusion: 5.074518\nAction: analogize\nReward: 0.123028\nNext Confusion: 5.348822"} +{"text": "### State\nConfusion: 2.402497\nAction: analogize\nReward: -0.223833\nNext Confusion: 2.35697"} +{"text": "### State\nConfusion: 4.486272\nAction: question\nReward: 0.398914\nNext Confusion: 4.16057"} +{"text": "### State\nConfusion: 5.279123\nAction: analogize\nReward: -0.645918\nNext Confusion: 5.878665"} +{"text": "### State\nConfusion: 2.826214\nAction: explain\nReward: 0.117254\nNext Confusion: 2.519507"} +{"text": "### State\nConfusion: 7.463021\nAction: correct_fact\nReward: -0.078153\nNext Confusion: 7.250635"} +{"text": "### State\nConfusion: 4.378883\nAction: correct_fact\nReward: 0.349144\nNext Confusion: 4.469529"} +{"text": "### State\nConfusion: 3.601191\nAction: analogize\nReward: -0.4524\nNext Confusion: 3.516358"} +{"text": "### State\nConfusion: 3.17514\nAction: worked_example\nReward: 1.188492\nNext Confusion: 2.420519"} +{"text": "### State\nConfusion: 4.386989\nAction: analogize\nReward: -0.239119\nNext Confusion: 4.577966"} +{"text": "### State\nConfusion: 3.484874\nAction: analogize\nReward: 0.235369\nNext Confusion: 3.315143"} +{"text": "### State\nConfusion: 5.282593\nAction: analogize\nReward: 0.254918\nNext Confusion: 5.733146"} +{"text": "### State\nConfusion: 2.864476\nAction: explain\nReward: 0.342867\nNext Confusion: 3.148373"} +{"text": "### State\nConfusion: 4.97228\nAction: analogize\nReward: -0.818184\nNext Confusion: 4.98446"} +{"text": "### State\nConfusion: 5.917563\nAction: analogize\nReward: 0.363083\nNext Confusion: 6.30535"} +{"text": "### State\nConfusion: 4.550479\nAction: analogize\nReward: 0.016655\nNext Confusion: 5.802547"} +{"text": "### State\nConfusion: 3.973916\nAction: analogize\nReward: -0.288334\nNext Confusion: 4.395061"} +{"text": "### State\nConfusion: 3.831629\nAction: analogize\nReward: 0.150414\nNext Confusion: 4.073593"} +{"text": "### State\nConfusion: 6.013124\nAction: analogize\nReward: -0.745808\nNext Confusion: 6.446821"} +{"text": "### State\nConfusion: 4.514874\nAction: analogize\nReward: 0.229664\nNext Confusion: 4.723423"} +{"text": "### State\nConfusion: 4.115125\nAction: analogize\nReward: -1.187277\nNext Confusion: 4.327879"} +{"text": "### State\nConfusion: 4.383352\nAction: analogize\nReward: -0.218114\nNext Confusion: 4.679171"} +{"text": "### State\nConfusion: 3.9782\nAction: correct_fact\nReward: 0.984978\nNext Confusion: 3.826136"} +{"text": "### State\nConfusion: 4.19293\nAction: correct_fact\nReward: -0.565662\nNext Confusion: 4.888168"} +{"text": "### State\nConfusion: 3.7524\nAction: correct_fact\nReward: 0.912141\nNext Confusion: 2.79205"} +{"text": "### State\nConfusion: 5.939085\nAction: analogize\nReward: 0.14276\nNext Confusion: 5.931013"} +{"text": "### State\nConfusion: 3.940862\nAction: question\nReward: 1.299393\nNext Confusion: 3.066277"} +{"text": "### State\nConfusion: 6.421869\nAction: analogize\nReward: 0.36015\nNext Confusion: 6.279908"} +{"text": "### State\nConfusion: 1.453712\nAction: analogize\nReward: -0.959622\nNext Confusion: 2.468141"} +{"text": "### State\nConfusion: 5.496111\nAction: analogize\nReward: -0.43066\nNext Confusion: 5.796453"} +{"text": "### State\nConfusion: 3.835277\nAction: analogize\nReward: -0.210495\nNext Confusion: 4.315474"} +{"text": "### State\nConfusion: 4.667592\nAction: analogize\nReward: 0.221866\nNext Confusion: 4.160811"} +{"text": "### State\nConfusion: 7.403551\nAction: worked_example\nReward: 0.907719\nNext Confusion: 6.486184"} +{"text": "### State\nConfusion: 4.500176\nAction: analogize\nReward: -0.275044\nNext Confusion: 4.99979"} +{"text": "### State\nConfusion: 3.816011\nAction: analogize\nReward: -0.839258\nNext Confusion: 4.310652"} +{"text": "### State\nConfusion: 8.19787\nAction: question\nReward: 0.946011\nNext Confusion: 7.058693"} +{"text": "### State\nConfusion: 3.899465\nAction: analogize\nReward: 0.877398\nNext Confusion: 2.888416"} +{"text": "### State\nConfusion: 5.417362\nAction: question\nReward: 1.217177\nNext Confusion: 4.386165"} +{"text": "### State\nConfusion: 4.163053\nAction: analogize\nReward: -0.983947\nNext Confusion: 5.03493"} +{"text": "### State\nConfusion: 5.122217\nAction: analogize\nReward: -1.302016\nNext Confusion: 5.724295"} +{"text": "### State\nConfusion: 4.713499\nAction: question\nReward: -1.06088\nNext Confusion: 5.390096"} +{"text": "### State\nConfusion: 5.252266\nAction: correct_fact\nReward: 0.112053\nNext Confusion: 5.166234"} +{"text": "### State\nConfusion: 4.277287\nAction: analogize\nReward: 0.349988\nNext Confusion: 3.858081"} +{"text": "### State\nConfusion: 4.312006\nAction: analogize\nReward: -0.280414\nNext Confusion: 4.743623"} +{"text": "### State\nConfusion: 2.116424\nAction: worked_example\nReward: 2.219539\nNext Confusion: 0.30772"} +{"text": "### State\nConfusion: 4.487134\nAction: analogize\nReward: -0.316827\nNext Confusion: 4.72908"} +{"text": "### State\nConfusion: 3.734942\nAction: explain\nReward: 0.559271\nNext Confusion: 3.387467"} +{"text": "### State\nConfusion: 3.817736\nAction: correct_fact\nReward: -0.371755\nNext Confusion: 4.35576"} +{"text": "### State\nConfusion: 3.534894\nAction: question\nReward: 0.714752\nNext Confusion: 2.6903"} +{"text": "### State\nConfusion: 3.297557\nAction: analogize\nReward: -0.02651\nNext Confusion: 3.73844"} +{"text": "### State\nConfusion: 3.66799\nAction: question\nReward: 1.278086\nNext Confusion: 2.777691"} +{"text": "### State\nConfusion: 8.029835\nAction: explain\nReward: 0.004985\nNext Confusion: 8.023991"} +{"text": "### State\nConfusion: 4.69018\nAction: analogize\nReward: -0.212867\nNext Confusion: 5.239052"} +{"text": "### State\nConfusion: 3.043147\nAction: analogize\nReward: 0.341268\nNext Confusion: 2.599216"} +{"text": "### State\nConfusion: 5.367027\nAction: question\nReward: 0.914907\nNext Confusion: 4.669119"} +{"text": "### State\nConfusion: 2.610881\nAction: worked_example\nReward: 1.154435\nNext Confusion: 0.578632"} +{"text": "### State\nConfusion: 2.696339\nAction: question\nReward: -0.000458\nNext Confusion: 2.809412"} +{"text": "### State\nConfusion: 4.532858\nAction: analogize\nReward: 1.700078\nNext Confusion: 3.571719"} +{"text": "### State\nConfusion: 3.726544\nAction: question\nReward: -0.297797\nNext Confusion: 3.881641"} +{"text": "### State\nConfusion: 5.898968\nAction: analogize\nReward: -0.010863\nNext Confusion: 6.54058"} +{"text": "### State\nConfusion: 4.530686\nAction: correct_fact\nReward: -0.477902\nNext Confusion: 5.114145"} +{"text": "### State\nConfusion: 4.004507\nAction: analogize\nReward: -0.184868\nNext Confusion: 4.069702"} +{"text": "### State\nConfusion: 3.988666\nAction: correct_fact\nReward: -0.211808\nNext Confusion: 4.288907"} +{"text": "### State\nConfusion: 3.219075\nAction: worked_example\nReward: 0.314439\nNext Confusion: 3.734656"} +{"text": "### State\nConfusion: 3.293107\nAction: explain\nReward: -0.32457\nNext Confusion: 2.891417"} +{"text": "### State\nConfusion: 4.433997\nAction: explain\nReward: 1.305128\nNext Confusion: 3.859591"} +{"text": "### State\nConfusion: 3.627563\nAction: worked_example\nReward: 1.166838\nNext Confusion: 3.274162"} +{"text": "### State\nConfusion: 2.615709\nAction: analogize\nReward: 0.055049\nNext Confusion: 2.620762"} +{"text": "### State\nConfusion: 4.573371\nAction: analogize\nReward: 0.162817\nNext Confusion: 4.686983"} +{"text": "### State\nConfusion: 2.914325\nAction: analogize\nReward: 0.573057\nNext Confusion: 2.620443"} +{"text": "### State\nConfusion: 4.103402\nAction: correct_fact\nReward: -0.109456\nNext Confusion: 4.510875"} +{"text": "### State\nConfusion: 4.716958\nAction: analogize\nReward: -1.069869\nNext Confusion: 5.22988"} +{"text": "### State\nConfusion: 1.439015\nAction: worked_example\nReward: 1.653535\nNext Confusion: 0.0"} +{"text": "### State\nConfusion: 4.512936\nAction: explain\nReward: -0.674713\nNext Confusion: 4.853434"} +{"text": "### State\nConfusion: 4.616469\nAction: analogize\nReward: 0.228202\nNext Confusion: 4.998136"} +{"text": "### State\nConfusion: 3.472289\nAction: analogize\nReward: -0.518788\nNext Confusion: 4.016916"} +{"text": "### State\nConfusion: 5.290328\nAction: explain\nReward: 1.622873\nNext Confusion: 4.628211"} +{"text": "### State\nConfusion: 4.509018\nAction: analogize\nReward: -0.246907\nNext Confusion: 4.696386"} +{"text": "### State\nConfusion: 3.649479\nAction: analogize\nReward: -0.646281\nNext Confusion: 3.705608"} +{"text": "### State\nConfusion: 4.436886\nAction: explain\nReward: 0.071447\nNext Confusion: 4.62008"} +{"text": "### State\nConfusion: 6.165114\nAction: explain\nReward: 0.102394\nNext Confusion: 5.989851"} +{"text": "### State\nConfusion: 3.751394\nAction: question\nReward: 0.110986\nNext Confusion: 3.572822"} +{"text": "### State\nConfusion: 4.967533\nAction: worked_example\nReward: 1.888933\nNext Confusion: 3.294329"} +{"text": "### State\nConfusion: 6.802119\nAction: analogize\nReward: -0.318646\nNext Confusion: 6.787519"} +{"text": "### State\nConfusion: 2.757781\nAction: analogize\nReward: -0.041224\nNext Confusion: 2.945709"} +{"text": "### State\nConfusion: 3.441426\nAction: question\nReward: -0.356823\nNext Confusion: 3.619329"} +{"text": "### State\nConfusion: 3.824876\nAction: analogize\nReward: -0.282262\nNext Confusion: 4.405335"} +{"text": "### State\nConfusion: 6.033777\nAction: analogize\nReward: 0.118754\nNext Confusion: 5.973301"} +{"text": "### State\nConfusion: 3.73862\nAction: analogize\nReward: 0.371175\nNext Confusion: 3.56842"} +{"text": "### State\nConfusion: 9.286809\nAction: worked_example\nReward: 2.644093\nNext Confusion: 7.658459"} +{"text": "### State\nConfusion: 3.810243\nAction: worked_example\nReward: 1.870768\nNext Confusion: 1.740508"} +{"text": "### State\nConfusion: 3.956185\nAction: question\nReward: 0.81304\nNext Confusion: 3.16671"} +{"text": "### State\nConfusion: 3.00301\nAction: analogize\nReward: -0.345361\nNext Confusion: 2.953303"} +{"text": "### State\nConfusion: 3.451303\nAction: analogize\nReward: -0.858153\nNext Confusion: 4.097912"} +{"text": "### State\nConfusion: 3.48126\nAction: question\nReward: -0.04059\nNext Confusion: 3.13211"} +{"text": "### State\nConfusion: 5.124149\nAction: question\nReward: 0.945285\nNext Confusion: 3.9529"} +{"text": "### State\nConfusion: 3.216448\nAction: analogize\nReward: 0.281472\nNext Confusion: 3.077387"} +{"text": "### State\nConfusion: 3.474833\nAction: analogize\nReward: -1.38124\nNext Confusion: 4.510687"} +{"text": "### State\nConfusion: 3.63635\nAction: analogize\nReward: 0.229631\nNext Confusion: 4.149948"} +{"text": "### State\nConfusion: 3.650369\nAction: analogize\nReward: -0.573367\nNext Confusion: 4.224239"} +{"text": "### State\nConfusion: 3.732268\nAction: explain\nReward: -0.387552\nNext Confusion: 4.25324"} +{"text": "### State\nConfusion: 3.145265\nAction: analogize\nReward: -0.078527\nNext Confusion: 3.347933"} +{"text": "### State\nConfusion: 6.957926\nAction: analogize\nReward: 1.066442\nNext Confusion: 6.509846"} +{"text": "### State\nConfusion: 3.81907\nAction: analogize\nReward: -0.537125\nNext Confusion: 3.932789"} +{"text": "### State\nConfusion: 6.875212\nAction: explain\nReward: 0.923676\nNext Confusion: 6.357508"} +{"text": "### State\nConfusion: 6.113523\nAction: analogize\nReward: 0.216784\nNext Confusion: 5.652343"} +{"text": "### State\nConfusion: 4.34719\nAction: analogize\nReward: 0.100165\nNext Confusion: 4.293112"} +{"text": "### State\nConfusion: 3.46102\nAction: analogize\nReward: -1.955337\nNext Confusion: 5.25365"} +{"text": "### State\nConfusion: 2.541971\nAction: analogize\nReward: -0.67109\nNext Confusion: 3.242385"} +{"text": "### State\nConfusion: 3.4017\nAction: analogize\nReward: -1.546172\nNext Confusion: 4.715109"} +{"text": "### State\nConfusion: 3.903403\nAction: explain\nReward: 0.955664\nNext Confusion: 3.233826"} +{"text": "### State\nConfusion: 4.410839\nAction: correct_fact\nReward: -0.778426\nNext Confusion: 4.867506"} +{"text": "### State\nConfusion: 5.352035\nAction: analogize\nReward: 0.083221\nNext Confusion: 5.309198"} +{"text": "### State\nConfusion: 3.845539\nAction: worked_example\nReward: 0.721115\nNext Confusion: 3.197238"} +{"text": "### State\nConfusion: 4.363106\nAction: analogize\nReward: -0.783213\nNext Confusion: 4.303111"} +{"text": "### State\nConfusion: 7.119\nAction: analogize\nReward: -0.595028\nNext Confusion: 8.178642"} +{"text": "### State\nConfusion: 2.084962\nAction: correct_fact\nReward: 0.396716\nNext Confusion: 2.375542"} +{"text": "### State\nConfusion: 8.238723\nAction: analogize\nReward: -0.264989\nNext Confusion: 8.752825"} +{"text": "### State\nConfusion: 5.645957\nAction: analogize\nReward: -0.080122\nNext Confusion: 5.676078"} +{"text": "### State\nConfusion: 6.062615\nAction: analogize\nReward: 1.713626\nNext Confusion: 5.300618"} +{"text": "### State\nConfusion: 8.921528\nAction: worked_example\nReward: 1.502296\nNext Confusion: 7.384206"} +{"text": "### State\nConfusion: 3.535991\nAction: explain\nReward: 0.402923\nNext Confusion: 2.849609"} +{"text": "### State\nConfusion: 4.234336\nAction: analogize\nReward: 0.416406\nNext Confusion: 4.556367"} +{"text": "### State\nConfusion: 2.388265\nAction: analogize\nReward: -0.320608\nNext Confusion: 2.556611"} +{"text": "### State\nConfusion: 5.013006\nAction: analogize\nReward: -0.899276\nNext Confusion: 5.653823"} +{"text": "### State\nConfusion: 3.866427\nAction: analogize\nReward: -0.164342\nNext Confusion: 4.012446"} +{"text": "### State\nConfusion: 3.142777\nAction: correct_fact\nReward: 1.00956\nNext Confusion: 2.708875"} +{"text": "### State\nConfusion: 5.851146\nAction: worked_example\nReward: 1.087909\nNext Confusion: 4.400152"} +{"text": "### State\nConfusion: 9.969285\nAction: correct_fact\nReward: -0.29484\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.26319\nAction: analogize\nReward: -0.381748\nNext Confusion: 3.710803"} +{"text": "### State\nConfusion: 3.560582\nAction: explain\nReward: -1.057823\nNext Confusion: 3.697929"} +{"text": "### State\nConfusion: 5.7797\nAction: worked_example\nReward: 1.58499\nNext Confusion: 4.221551"} +{"text": "### State\nConfusion: 3.365968\nAction: analogize\nReward: -0.12409\nNext Confusion: 3.294733"} +{"text": "### State\nConfusion: 7.357676\nAction: analogize\nReward: -0.930477\nNext Confusion: 7.947465"} +{"text": "### State\nConfusion: 5.213246\nAction: correct_fact\nReward: -0.457683\nNext Confusion: 5.714824"} +{"text": "### State\nConfusion: 8.686376\nAction: worked_example\nReward: 0.914465\nNext Confusion: 7.735434"} +{"text": "### State\nConfusion: 5.906049\nAction: question\nReward: 0.914918\nNext Confusion: 4.746426"} +{"text": "### State\nConfusion: 3.43873\nAction: analogize\nReward: -0.791984\nNext Confusion: 4.093406"} +{"text": "### State\nConfusion: 8.147875\nAction: analogize\nReward: -0.339623\nNext Confusion: 9.074441"} +{"text": "### State\nConfusion: 4.987865\nAction: correct_fact\nReward: -0.135189\nNext Confusion: 5.126443"} +{"text": "### State\nConfusion: 6.827771\nAction: analogize\nReward: -0.182827\nNext Confusion: 7.09621"} +{"text": "### State\nConfusion: 4.841376\nAction: correct_fact\nReward: 0.440885\nNext Confusion: 4.646943"} +{"text": "### State\nConfusion: 6.711827\nAction: explain\nReward: 1.491523\nNext Confusion: 5.848803"} +{"text": "### State\nConfusion: 4.967377\nAction: analogize\nReward: 0.201888\nNext Confusion: 5.331802"} +{"text": "### State\nConfusion: 7.154704\nAction: analogize\nReward: 0.341228\nNext Confusion: 7.220116"} +{"text": "### State\nConfusion: 2.715033\nAction: analogize\nReward: 0.773613\nNext Confusion: 2.429066"} +{"text": "### State\nConfusion: 6.547896\nAction: explain\nReward: 0.563126\nNext Confusion: 6.683662"} +{"text": "### State\nConfusion: 6.269861\nAction: analogize\nReward: -0.157441\nNext Confusion: 6.715462"} +{"text": "### State\nConfusion: 5.309012\nAction: analogize\nReward: -0.39383\nNext Confusion: 6.213959"} +{"text": "### State\nConfusion: 4.32928\nAction: analogize\nReward: -1.387919\nNext Confusion: 5.461956"} +{"text": "### State\nConfusion: 7.075807\nAction: question\nReward: 0.342144\nNext Confusion: 6.488871"} +{"text": "### State\nConfusion: 2.908937\nAction: worked_example\nReward: 1.786775\nNext Confusion: 1.24221"} +{"text": "### State\nConfusion: 3.163769\nAction: analogize\nReward: -0.649834\nNext Confusion: 3.567671"} +{"text": "### State\nConfusion: 6.413857\nAction: analogize\nReward: 0.632799\nNext Confusion: 6.437032"} +{"text": "### State\nConfusion: 4.157505\nAction: correct_fact\nReward: -0.609828\nNext Confusion: 5.336113"} +{"text": "### State\nConfusion: 5.211759\nAction: analogize\nReward: -0.736369\nNext Confusion: 5.220968"} +{"text": "### State\nConfusion: 4.079003\nAction: analogize\nReward: 0.088859\nNext Confusion: 3.57106"} +{"text": "### State\nConfusion: 7.824298\nAction: worked_example\nReward: 1.347861\nNext Confusion: 7.120889"} +{"text": "### State\nConfusion: 3.864172\nAction: correct_fact\nReward: -1.319164\nNext Confusion: 5.237199"} +{"text": "### State\nConfusion: 3.599646\nAction: analogize\nReward: -0.234941\nNext Confusion: 3.997227"} +{"text": "### State\nConfusion: 6.169248\nAction: analogize\nReward: -0.73131\nNext Confusion: 6.311524"} +{"text": "### State\nConfusion: 4.205811\nAction: analogize\nReward: -0.028118\nNext Confusion: 4.480411"} +{"text": "### State\nConfusion: 6.248756\nAction: worked_example\nReward: 2.320086\nNext Confusion: 4.345955"} +{"text": "### State\nConfusion: 5.422293\nAction: correct_fact\nReward: -0.231537\nNext Confusion: 5.617543"} +{"text": "### State\nConfusion: 2.242248\nAction: analogize\nReward: -2.077594\nNext Confusion: 4.016796"} +{"text": "### State\nConfusion: 6.597207\nAction: worked_example\nReward: 0.058722\nNext Confusion: 6.496094"} +{"text": "### State\nConfusion: 6.364598\nAction: analogize\nReward: -0.576298\nNext Confusion: 6.859088"} +{"text": "### State\nConfusion: 4.490757\nAction: analogize\nReward: -0.434451\nNext Confusion: 4.642448"} +{"text": "### State\nConfusion: 3.946703\nAction: analogize\nReward: -0.681441\nNext Confusion: 4.066546"} +{"text": "### State\nConfusion: 4.001388\nAction: analogize\nReward: -0.120379\nNext Confusion: 3.938912"} +{"text": "### State\nConfusion: 9.710864\nAction: analogize\nReward: 0.004072\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.747454\nAction: analogize\nReward: -0.778482\nNext Confusion: 4.130773"} +{"text": "### State\nConfusion: 4.575968\nAction: explain\nReward: -0.196636\nNext Confusion: 4.720822"} +{"text": "### State\nConfusion: 7.338616\nAction: analogize\nReward: 0.358031\nNext Confusion: 7.647601"} +{"text": "### State\nConfusion: 3.574423\nAction: analogize\nReward: 0.154168\nNext Confusion: 3.77347"} +{"text": "### State\nConfusion: 4.63278\nAction: explain\nReward: 0.621704\nNext Confusion: 4.001392"} +{"text": "### State\nConfusion: 6.210172\nAction: analogize\nReward: 1.340827\nNext Confusion: 5.613212"} +{"text": "### State\nConfusion: 4.389936\nAction: correct_fact\nReward: 0.538823\nNext Confusion: 4.880523"} +{"text": "### State\nConfusion: 5.383227\nAction: analogize\nReward: -0.455558\nNext Confusion: 5.864184"} +{"text": "### State\nConfusion: 4.676518\nAction: question\nReward: -0.024095\nNext Confusion: 4.595433"} +{"text": "### State\nConfusion: 4.116737\nAction: analogize\nReward: -0.200846\nNext Confusion: 4.198854"} +{"text": "### State\nConfusion: 3.158481\nAction: analogize\nReward: -0.501419\nNext Confusion: 3.801235"} +{"text": "### State\nConfusion: 5.381923\nAction: explain\nReward: 0.301327\nNext Confusion: 4.506645"} +{"text": "### State\nConfusion: 3.871241\nAction: analogize\nReward: 0.470844\nNext Confusion: 3.760368"} +{"text": "### State\nConfusion: 5.753503\nAction: question\nReward: 0.836846\nNext Confusion: 5.024852"} +{"text": "### State\nConfusion: 6.185399\nAction: correct_fact\nReward: -0.229113\nNext Confusion: 5.964536"} +{"text": "### State\nConfusion: 5.237682\nAction: analogize\nReward: 0.639695\nNext Confusion: 5.570729"} +{"text": "### State\nConfusion: 3.803306\nAction: analogize\nReward: -0.653361\nNext Confusion: 4.502231"} +{"text": "### State\nConfusion: 4.290909\nAction: analogize\nReward: 0.050406\nNext Confusion: 4.59684"} +{"text": "### State\nConfusion: 3.545454\nAction: correct_fact\nReward: -1.307137\nNext Confusion: 4.240777"} +{"text": "### State\nConfusion: 3.892848\nAction: analogize\nReward: 0.277591\nNext Confusion: 4.556432"} +{"text": "### State\nConfusion: 2.286423\nAction: analogize\nReward: -0.020258\nNext Confusion: 2.711716"} +{"text": "### State\nConfusion: 6.568009\nAction: analogize\nReward: 0.309756\nNext Confusion: 6.701265"} +{"text": "### State\nConfusion: 7.574443\nAction: question\nReward: 0.488642\nNext Confusion: 7.358108"} +{"text": "### State\nConfusion: 2.923719\nAction: question\nReward: 0.680867\nNext Confusion: 1.858226"} +{"text": "### State\nConfusion: 6.309944\nAction: explain\nReward: -0.374884\nNext Confusion: 6.108041"} +{"text": "### State\nConfusion: 3.950947\nAction: explain\nReward: -0.246238\nNext Confusion: 3.967673"} +{"text": "### State\nConfusion: 3.558406\nAction: explain\nReward: -0.86277\nNext Confusion: 4.408267"} +{"text": "### State\nConfusion: 4.903235\nAction: analogize\nReward: -0.33585\nNext Confusion: 5.614946"} +{"text": "### State\nConfusion: 3.336845\nAction: analogize\nReward: -0.129177\nNext Confusion: 3.785467"} +{"text": "### State\nConfusion: 3.309768\nAction: analogize\nReward: 0.973455\nNext Confusion: 2.441803"} +{"text": "### State\nConfusion: 5.035866\nAction: analogize\nReward: 0.479477\nNext Confusion: 5.282769"} +{"text": "### State\nConfusion: 3.717341\nAction: analogize\nReward: -0.445694\nNext Confusion: 3.543303"} +{"text": "### State\nConfusion: 4.434391\nAction: analogize\nReward: -0.524554\nNext Confusion: 4.974204"} +{"text": "### State\nConfusion: 3.056816\nAction: analogize\nReward: -0.758043\nNext Confusion: 4.123322"} +{"text": "### State\nConfusion: 4.187655\nAction: analogize\nReward: -0.539545\nNext Confusion: 4.730906"} +{"text": "### State\nConfusion: 4.638495\nAction: correct_fact\nReward: -0.155797\nNext Confusion: 4.900523"} +{"text": "### State\nConfusion: 6.000943\nAction: analogize\nReward: -1.059716\nNext Confusion: 7.495129"} +{"text": "### State\nConfusion: 3.527035\nAction: question\nReward: -0.523099\nNext Confusion: 4.023082"} +{"text": "### State\nConfusion: 2.94476\nAction: analogize\nReward: 0.002781\nNext Confusion: 3.050672"} +{"text": "### State\nConfusion: 4.197364\nAction: explain\nReward: -0.036997\nNext Confusion: 3.47503"} +{"text": "### State\nConfusion: 2.724514\nAction: analogize\nReward: -0.0952\nNext Confusion: 2.998136"} +{"text": "### State\nConfusion: 4.317858\nAction: analogize\nReward: -0.248636\nNext Confusion: 4.875382"} +{"text": "### State\nConfusion: 8.902446\nAction: analogize\nReward: 0.339665\nNext Confusion: 8.612985"} +{"text": "### State\nConfusion: 7.162479\nAction: explain\nReward: 0.045718\nNext Confusion: 6.794286"} +{"text": "### State\nConfusion: 3.628578\nAction: analogize\nReward: -0.46392\nNext Confusion: 3.195431"} +{"text": "### State\nConfusion: 8.05332\nAction: analogize\nReward: 0.5926\nNext Confusion: 7.991128"} +{"text": "### State\nConfusion: 3.5457\nAction: explain\nReward: 1.156754\nNext Confusion: 2.851767"} +{"text": "### State\nConfusion: 6.851163\nAction: analogize\nReward: -0.991948\nNext Confusion: 7.793215"} +{"text": "### State\nConfusion: 3.835441\nAction: explain\nReward: 0.089078\nNext Confusion: 3.786062"} +{"text": "### State\nConfusion: 2.596621\nAction: analogize\nReward: 0.806401\nNext Confusion: 2.628039"} +{"text": "### State\nConfusion: 8.799524\nAction: analogize\nReward: 0.116618\nNext Confusion: 9.191632"} +{"text": "### State\nConfusion: 3.935173\nAction: explain\nReward: 0.042982\nNext Confusion: 4.48615"} +{"text": "### State\nConfusion: 4.541541\nAction: analogize\nReward: -0.048526\nNext Confusion: 4.233993"} +{"text": "### State\nConfusion: 3.478506\nAction: explain\nReward: -0.555564\nNext Confusion: 3.648344"} +{"text": "### State\nConfusion: 2.207017\nAction: correct_fact\nReward: -1.103466\nNext Confusion: 2.982554"} +{"text": "### State\nConfusion: 6.885302\nAction: analogize\nReward: -0.296764\nNext Confusion: 7.428911"} +{"text": "### State\nConfusion: 4.36444\nAction: analogize\nReward: 0.561046\nNext Confusion: 3.995603"} +{"text": "### State\nConfusion: 4.83831\nAction: correct_fact\nReward: 0.016987\nNext Confusion: 4.787175"} +{"text": "### State\nConfusion: 3.486222\nAction: analogize\nReward: -0.006196\nNext Confusion: 3.984055"} +{"text": "### State\nConfusion: 4.288857\nAction: analogize\nReward: -0.083703\nNext Confusion: 3.823161"} +{"text": "### State\nConfusion: 3.205301\nAction: correct_fact\nReward: -0.636662\nNext Confusion: 4.112988"} +{"text": "### State\nConfusion: 4.280341\nAction: analogize\nReward: -0.699241\nNext Confusion: 4.976665"} +{"text": "### State\nConfusion: 3.260056\nAction: analogize\nReward: -0.594702\nNext Confusion: 4.102306"} +{"text": "### State\nConfusion: 4.612415\nAction: analogize\nReward: 0.936412\nNext Confusion: 3.610846"} +{"text": "### State\nConfusion: 3.823294\nAction: question\nReward: 0.860296\nNext Confusion: 3.350242"} +{"text": "### State\nConfusion: 4.300542\nAction: analogize\nReward: -0.277862\nNext Confusion: 4.604201"} +{"text": "### State\nConfusion: 3.008559\nAction: correct_fact\nReward: 0.791254\nNext Confusion: 3.051819"} +{"text": "### State\nConfusion: 7.152747\nAction: analogize\nReward: -1.218924\nNext Confusion: 8.283377"} +{"text": "### State\nConfusion: 3.386136\nAction: analogize\nReward: -1.36675\nNext Confusion: 4.221894"} +{"text": "### State\nConfusion: 4.445341\nAction: analogize\nReward: -0.451169\nNext Confusion: 5.100821"} +{"text": "### State\nConfusion: 3.284357\nAction: explain\nReward: 0.506617\nNext Confusion: 3.124258"} +{"text": "### State\nConfusion: 5.902904\nAction: analogize\nReward: 0.375122\nNext Confusion: 5.377923"} +{"text": "### State\nConfusion: 6.133891\nAction: analogize\nReward: -0.515857\nNext Confusion: 6.836678"} +{"text": "### State\nConfusion: 4.543045\nAction: analogize\nReward: 0.09579\nNext Confusion: 4.757417"} +{"text": "### State\nConfusion: 3.405838\nAction: analogize\nReward: -0.677895\nNext Confusion: 3.94108"} +{"text": "### State\nConfusion: 3.854457\nAction: analogize\nReward: -0.714533\nNext Confusion: 4.356869"} +{"text": "### State\nConfusion: 8.185769\nAction: worked_example\nReward: 1.678385\nNext Confusion: 6.671469"} +{"text": "### State\nConfusion: 4.561003\nAction: analogize\nReward: -0.987293\nNext Confusion: 4.868517"} +{"text": "### State\nConfusion: 3.15542\nAction: question\nReward: 1.250758\nNext Confusion: 2.164837"} +{"text": "### State\nConfusion: 3.6711\nAction: analogize\nReward: 0.597279\nNext Confusion: 3.621499"} +{"text": "### State\nConfusion: 4.706904\nAction: analogize\nReward: -0.253879\nNext Confusion: 4.531873"} +{"text": "### State\nConfusion: 4.510057\nAction: analogize\nReward: 0.328044\nNext Confusion: 4.414616"} +{"text": "### State\nConfusion: 4.219359\nAction: analogize\nReward: 0.042542\nNext Confusion: 4.427316"} +{"text": "### State\nConfusion: 3.831463\nAction: explain\nReward: 1.360988\nNext Confusion: 2.83884"} +{"text": "### State\nConfusion: 4.097205\nAction: question\nReward: 0.258788\nNext Confusion: 3.725177"} +{"text": "### State\nConfusion: 9.491195\nAction: analogize\nReward: -0.344806\nNext Confusion: 9.492555"} +{"text": "### State\nConfusion: 2.441149\nAction: analogize\nReward: -0.793021\nNext Confusion: 2.837643"} +{"text": "### State\nConfusion: 3.408415\nAction: analogize\nReward: -0.309769\nNext Confusion: 3.985229"} +{"text": "### State\nConfusion: 5.631257\nAction: analogize\nReward: -0.416589\nNext Confusion: 6.514521"} +{"text": "### State\nConfusion: 7.245012\nAction: worked_example\nReward: 2.992593\nNext Confusion: 4.881175"} +{"text": "### State\nConfusion: 6.536276\nAction: explain\nReward: 1.337626\nNext Confusion: 5.404844"} +{"text": "### State\nConfusion: 3.131055\nAction: analogize\nReward: -0.642311\nNext Confusion: 3.90995"} +{"text": "### State\nConfusion: 3.734571\nAction: analogize\nReward: -1.202896\nNext Confusion: 4.529602"} +{"text": "### State\nConfusion: 3.614458\nAction: analogize\nReward: -0.375706\nNext Confusion: 3.960335"} +{"text": "### State\nConfusion: 3.754846\nAction: analogize\nReward: 1.114828\nNext Confusion: 2.872017"} +{"text": "### State\nConfusion: 4.282373\nAction: analogize\nReward: 0.047074\nNext Confusion: 3.929965"} +{"text": "### State\nConfusion: 6.072084\nAction: worked_example\nReward: 2.003795\nNext Confusion: 5.110387"} +{"text": "### State\nConfusion: 7.607367\nAction: worked_example\nReward: 1.966738\nNext Confusion: 5.615737"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198476\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.649977\nAction: explain\nReward: 0.189024\nNext Confusion: 3.923286"} +{"text": "### State\nConfusion: 4.086252\nAction: analogize\nReward: -1.095851\nNext Confusion: 4.539408"} +{"text": "### State\nConfusion: 3.548788\nAction: analogize\nReward: 0.200725\nNext Confusion: 3.887756"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.763297\nNext Confusion: 9.680236"} +{"text": "### State\nConfusion: 3.073316\nAction: worked_example\nReward: -0.489686\nNext Confusion: 4.424902"} +{"text": "### State\nConfusion: 6.668706\nAction: correct_fact\nReward: 1.328342\nNext Confusion: 5.888302"} +{"text": "### State\nConfusion: 7.088053\nAction: analogize\nReward: 0.347794\nNext Confusion: 6.982926"} +{"text": "### State\nConfusion: 7.178564\nAction: worked_example\nReward: 1.450848\nNext Confusion: 6.387661"} +{"text": "### State\nConfusion: 1.744545\nAction: explain\nReward: 0.740145\nNext Confusion: 0.861106"} +{"text": "### State\nConfusion: 8.584458\nAction: worked_example\nReward: 2.582664\nNext Confusion: 7.174828"} +{"text": "### State\nConfusion: 3.625311\nAction: explain\nReward: 0.355308\nNext Confusion: 3.380443"} +{"text": "### State\nConfusion: 4.523099\nAction: analogize\nReward: -0.790397\nNext Confusion: 5.10107"} +{"text": "### State\nConfusion: 4.948718\nAction: analogize\nReward: -1.799253\nNext Confusion: 5.516174"} +{"text": "### State\nConfusion: 6.113761\nAction: question\nReward: 0.485334\nNext Confusion: 5.434281"} +{"text": "### State\nConfusion: 5.818233\nAction: question\nReward: 1.002597\nNext Confusion: 5.021882"} +{"text": "### State\nConfusion: 1.768893\nAction: analogize\nReward: 0.961322\nNext Confusion: 1.464652"} +{"text": "### State\nConfusion: 2.894877\nAction: question\nReward: 1.014309\nNext Confusion: 2.353802"} +{"text": "### State\nConfusion: 5.089719\nAction: analogize\nReward: 0.77709\nNext Confusion: 4.554134"} +{"text": "### State\nConfusion: 4.265329\nAction: analogize\nReward: 0.503933\nNext Confusion: 4.53531"} +{"text": "### State\nConfusion: 2.975997\nAction: question\nReward: 0.782601\nNext Confusion: 2.096145"} +{"text": "### State\nConfusion: 2.969771\nAction: analogize\nReward: -0.040926\nNext Confusion: 2.708039"} +{"text": "### State\nConfusion: 5.588715\nAction: question\nReward: -0.768939\nNext Confusion: 5.495356"} +{"text": "### State\nConfusion: 7.738388\nAction: correct_fact\nReward: 0.097154\nNext Confusion: 7.895498"} +{"text": "### State\nConfusion: 9.789788\nAction: question\nReward: 0.507937\nNext Confusion: 9.316481"} +{"text": "### State\nConfusion: 2.122493\nAction: analogize\nReward: 0.559706\nNext Confusion: 2.205507"} +{"text": "### State\nConfusion: 5.07307\nAction: explain\nReward: 1.405116\nNext Confusion: 4.673156"} +{"text": "### State\nConfusion: 5.187453\nAction: question\nReward: -0.206875\nNext Confusion: 5.470587"} +{"text": "### State\nConfusion: 7.89118\nAction: worked_example\nReward: 3.005133\nNext Confusion: 5.957319"} +{"text": "### State\nConfusion: 3.463503\nAction: analogize\nReward: 0.233139\nNext Confusion: 3.116866"} +{"text": "### State\nConfusion: 4.490139\nAction: analogize\nReward: -1.247777\nNext Confusion: 4.560209"} +{"text": "### State\nConfusion: 6.336253\nAction: analogize\nReward: 0.192\nNext Confusion: 5.439387"} +{"text": "### State\nConfusion: 3.879744\nAction: explain\nReward: 1.933112\nNext Confusion: 2.894167"} +{"text": "### State\nConfusion: 3.602151\nAction: analogize\nReward: 0.405749\nNext Confusion: 3.220006"} +{"text": "### State\nConfusion: 3.939015\nAction: explain\nReward: -0.410771\nNext Confusion: 4.216709"} +{"text": "### State\nConfusion: 4.013006\nAction: analogize\nReward: -0.508277\nNext Confusion: 4.368215"} +{"text": "### State\nConfusion: 6.478064\nAction: analogize\nReward: -1.128476\nNext Confusion: 7.656253"} +{"text": "### State\nConfusion: 7.29305\nAction: analogize\nReward: 1.065129\nNext Confusion: 6.880034"} +{"text": "### State\nConfusion: 2.54377\nAction: analogize\nReward: -0.940805\nNext Confusion: 2.54594"} +{"text": "### State\nConfusion: 4.214607\nAction: analogize\nReward: -1.635581\nNext Confusion: 5.250125"} +{"text": "### State\nConfusion: 3.376368\nAction: question\nReward: 1.454091\nNext Confusion: 2.159586"} +{"text": "### State\nConfusion: 4.664003\nAction: analogize\nReward: -0.481433\nNext Confusion: 4.975037"} +{"text": "### State\nConfusion: 5.789483\nAction: analogize\nReward: 0.351938\nNext Confusion: 5.524223"} +{"text": "### State\nConfusion: 4.134062\nAction: analogize\nReward: -0.746534\nNext Confusion: 4.309115"} +{"text": "### State\nConfusion: 2.75702\nAction: worked_example\nReward: 1.686613\nNext Confusion: 1.597392"} +{"text": "### State\nConfusion: 5.175445\nAction: worked_example\nReward: 2.26482\nNext Confusion: 3.099191"} +{"text": "### State\nConfusion: 6.065045\nAction: analogize\nReward: -1.475176\nNext Confusion: 7.14422"} +{"text": "### State\nConfusion: 4.352107\nAction: analogize\nReward: -0.176949\nNext Confusion: 4.417541"} +{"text": "### State\nConfusion: 5.816594\nAction: analogize\nReward: -2.146036\nNext Confusion: 7.010947"} +{"text": "### State\nConfusion: 3.675687\nAction: analogize\nReward: -0.013575\nNext Confusion: 3.6197"} +{"text": "### State\nConfusion: 6.159766\nAction: explain\nReward: 0.721524\nNext Confusion: 5.935606"} +{"text": "### State\nConfusion: 3.588376\nAction: question\nReward: 0.441773\nNext Confusion: 3.378304"} +{"text": "### State\nConfusion: 6.344508\nAction: question\nReward: 1.770076\nNext Confusion: 5.09023"} +{"text": "### State\nConfusion: 3.821794\nAction: analogize\nReward: -0.387546\nNext Confusion: 4.346112"} +{"text": "### State\nConfusion: 7.260673\nAction: correct_fact\nReward: 0.008941\nNext Confusion: 7.553966"} +{"text": "### State\nConfusion: 5.776783\nAction: analogize\nReward: -0.838409\nNext Confusion: 6.757849"} +{"text": "### State\nConfusion: 3.874547\nAction: explain\nReward: 0.548746\nNext Confusion: 3.753034"} +{"text": "### State\nConfusion: 7.218959\nAction: worked_example\nReward: 0.624518\nNext Confusion: 6.850143"} +{"text": "### State\nConfusion: 7.365728\nAction: explain\nReward: 1.423284\nNext Confusion: 6.245595"} +{"text": "### State\nConfusion: 4.087149\nAction: correct_fact\nReward: 0.54914\nNext Confusion: 4.116221"} +{"text": "### State\nConfusion: 9.569596\nAction: worked_example\nReward: 1.091264\nNext Confusion: 8.481068"} +{"text": "### State\nConfusion: 7.997488\nAction: question\nReward: 0.609481\nNext Confusion: 6.839767"} +{"text": "### State\nConfusion: 6.471966\nAction: analogize\nReward: 0.734048\nNext Confusion: 6.210516"} +{"text": "### State\nConfusion: 5.247116\nAction: analogize\nReward: 0.085486\nNext Confusion: 5.148559"} +{"text": "### State\nConfusion: 5.63931\nAction: question\nReward: 0.358263\nNext Confusion: 5.389858"} +{"text": "### State\nConfusion: 6.156111\nAction: analogize\nReward: 0.161887\nNext Confusion: 5.62754"} +{"text": "### State\nConfusion: 3.755793\nAction: correct_fact\nReward: -0.840027\nNext Confusion: 4.740012"} +{"text": "### State\nConfusion: 7.445958\nAction: worked_example\nReward: 3.084386\nNext Confusion: 5.851674"} +{"text": "### State\nConfusion: 7.444563\nAction: explain\nReward: 0.010491\nNext Confusion: 6.992737"} +{"text": "### State\nConfusion: 4.028035\nAction: analogize\nReward: -0.705654\nNext Confusion: 4.397549"} +{"text": "### State\nConfusion: 3.586522\nAction: correct_fact\nReward: -0.890749\nNext Confusion: 3.880734"} +{"text": "### State\nConfusion: 3.824157\nAction: analogize\nReward: 0.862425\nNext Confusion: 2.996262"} +{"text": "### State\nConfusion: 3.883596\nAction: worked_example\nReward: 1.802463\nNext Confusion: 2.584208"} +{"text": "### State\nConfusion: 5.404881\nAction: correct_fact\nReward: 0.081433\nNext Confusion: 5.07905"} +{"text": "### State\nConfusion: 2.991435\nAction: analogize\nReward: -0.433469\nNext Confusion: 3.357399"} +{"text": "### State\nConfusion: 3.545515\nAction: analogize\nReward: -0.007121\nNext Confusion: 3.71352"} +{"text": "### State\nConfusion: 3.660067\nAction: analogize\nReward: -0.928261\nNext Confusion: 4.0236"} +{"text": "### State\nConfusion: 7.0235\nAction: question\nReward: 0.20218\nNext Confusion: 6.875191"} +{"text": "### State\nConfusion: 9.746426\nAction: analogize\nReward: 0.725931\nNext Confusion: 9.392016"} +{"text": "### State\nConfusion: 2.125439\nAction: explain\nReward: -1.062487\nNext Confusion: 2.867128"} +{"text": "### State\nConfusion: 3.227817\nAction: analogize\nReward: -0.014357\nNext Confusion: 3.537508"} +{"text": "### State\nConfusion: 3.876828\nAction: analogize\nReward: 0.060063\nNext Confusion: 3.857296"} +{"text": "### State\nConfusion: 7.089156\nAction: analogize\nReward: 0.444024\nNext Confusion: 7.298431"} +{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 2.962947\nNext Confusion: 8.246628"} +{"text": "### State\nConfusion: 5.594191\nAction: explain\nReward: 0.679804\nNext Confusion: 5.233611"} +{"text": "### State\nConfusion: 6.283862\nAction: analogize\nReward: 0.743037\nNext Confusion: 6.186083"} +{"text": "### State\nConfusion: 5.072781\nAction: question\nReward: 1.101868\nNext Confusion: 3.897992"} +{"text": "### State\nConfusion: 4.118371\nAction: correct_fact\nReward: 0.029128\nNext Confusion: 3.826488"} +{"text": "### State\nConfusion: 7.527729\nAction: correct_fact\nReward: 0.845789\nNext Confusion: 7.247021"} +{"text": "### State\nConfusion: 6.289606\nAction: analogize\nReward: 0.422815\nNext Confusion: 5.600898"} +{"text": "### State\nConfusion: 4.523598\nAction: analogize\nReward: -0.188144\nNext Confusion: 4.558487"} +{"text": "### State\nConfusion: 3.763717\nAction: question\nReward: 0.6122\nNext Confusion: 3.588942"} +{"text": "### State\nConfusion: 8.323666\nAction: correct_fact\nReward: 0.409538\nNext Confusion: 8.021692"} +{"text": "### State\nConfusion: 5.981031\nAction: worked_example\nReward: 0.917426\nNext Confusion: 5.423538"} +{"text": "### State\nConfusion: 3.515466\nAction: analogize\nReward: -0.103899\nNext Confusion: 3.695133"} +{"text": "### State\nConfusion: 6.429372\nAction: explain\nReward: -0.029875\nNext Confusion: 6.378915"} +{"text": "### State\nConfusion: 5.750795\nAction: analogize\nReward: -0.089562\nNext Confusion: 5.622753"} +{"text": "### State\nConfusion: 5.025332\nAction: analogize\nReward: -1.312083\nNext Confusion: 6.373254"} +{"text": "### State\nConfusion: 7.359441\nAction: analogize\nReward: -0.720074\nNext Confusion: 7.755872"} +{"text": "### State\nConfusion: 3.776062\nAction: question\nReward: -0.134128\nNext Confusion: 3.433186"} +{"text": "### State\nConfusion: 1.355683\nAction: explain\nReward: -0.00118\nNext Confusion: 1.431554"} +{"text": "### State\nConfusion: 3.912807\nAction: correct_fact\nReward: -1.548555\nNext Confusion: 4.628972"} +{"text": "### State\nConfusion: 3.794357\nAction: correct_fact\nReward: -0.059373\nNext Confusion: 3.670245"} +{"text": "### State\nConfusion: 5.059543\nAction: analogize\nReward: 0.032602\nNext Confusion: 5.353498"} +{"text": "### State\nConfusion: 3.077356\nAction: analogize\nReward: 0.011284\nNext Confusion: 2.947043"} +{"text": "### State\nConfusion: 5.534637\nAction: explain\nReward: 1.721247\nNext Confusion: 4.352992"} +{"text": "### State\nConfusion: 4.366849\nAction: analogize\nReward: -0.917181\nNext Confusion: 5.294018"} +{"text": "### State\nConfusion: 6.835693\nAction: explain\nReward: 0.456146\nNext Confusion: 6.470298"} +{"text": "### State\nConfusion: 9.452318\nAction: analogize\nReward: 0.033633\nNext Confusion: 9.382432"} +{"text": "### State\nConfusion: 3.400432\nAction: analogize\nReward: -0.369133\nNext Confusion: 3.65212"} +{"text": "### State\nConfusion: 5.731019\nAction: analogize\nReward: -1.289566\nNext Confusion: 6.41672"} +{"text": "### State\nConfusion: 3.618958\nAction: explain\nReward: -0.907926\nNext Confusion: 3.69676"} +{"text": "### State\nConfusion: 3.088994\nAction: explain\nReward: 0.2848\nNext Confusion: 2.83016"} +{"text": "### State\nConfusion: 7.169104\nAction: analogize\nReward: 1.486468\nNext Confusion: 6.278144"} +{"text": "### State\nConfusion: 4.010123\nAction: question\nReward: 0.990315\nNext Confusion: 2.898942"} +{"text": "### State\nConfusion: 3.325781\nAction: analogize\nReward: -0.847395\nNext Confusion: 3.638474"} +{"text": "### State\nConfusion: 4.63962\nAction: question\nReward: 1.030137\nNext Confusion: 3.610102"} +{"text": "### State\nConfusion: 3.698579\nAction: analogize\nReward: -0.018877\nNext Confusion: 4.387045"} +{"text": "### State\nConfusion: 1.946232\nAction: analogize\nReward: -1.351303\nNext Confusion: 3.380728"} +{"text": "### State\nConfusion: 4.236727\nAction: analogize\nReward: -0.73162\nNext Confusion: 5.393082"} +{"text": "### State\nConfusion: 8.190957\nAction: analogize\nReward: 0.404816\nNext Confusion: 8.214826"} +{"text": "### State\nConfusion: 6.995987\nAction: analogize\nReward: 0.571194\nNext Confusion: 6.929503"} +{"text": "### State\nConfusion: 2.113818\nAction: explain\nReward: -0.30514\nNext Confusion: 2.129459"} +{"text": "### State\nConfusion: 3.684304\nAction: analogize\nReward: -2.017778\nNext Confusion: 5.452133"} +{"text": "### State\nConfusion: 4.239281\nAction: worked_example\nReward: 1.860685\nNext Confusion: 2.940046"} +{"text": "### State\nConfusion: 3.676164\nAction: analogize\nReward: 0.091998\nNext Confusion: 4.183175"} +{"text": "### State\nConfusion: 4.133906\nAction: analogize\nReward: -0.55272\nNext Confusion: 4.581522"} +{"text": "### State\nConfusion: 4.054602\nAction: correct_fact\nReward: -0.177678\nNext Confusion: 4.056369"} +{"text": "### State\nConfusion: 4.639642\nAction: analogize\nReward: -0.272352\nNext Confusion: 5.553159"} +{"text": "### State\nConfusion: 5.635794\nAction: analogize\nReward: -0.705183\nNext Confusion: 6.451484"} +{"text": "### State\nConfusion: 5.543668\nAction: correct_fact\nReward: 0.885091\nNext Confusion: 4.846454"} +{"text": "### State\nConfusion: 3.267564\nAction: analogize\nReward: -0.098069\nNext Confusion: 3.530591"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.338373\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.627039\nAction: analogize\nReward: -0.495228\nNext Confusion: 3.729506"} +{"text": "### State\nConfusion: 2.909357\nAction: analogize\nReward: 0.712175\nNext Confusion: 2.649315"} +{"text": "### State\nConfusion: 5.226436\nAction: explain\nReward: -0.593975\nNext Confusion: 5.565649"} +{"text": "### State\nConfusion: 3.411177\nAction: analogize\nReward: -0.915413\nNext Confusion: 4.738223"} +{"text": "### State\nConfusion: 2.31554\nAction: correct_fact\nReward: 0.05716\nNext Confusion: 1.97111"} +{"text": "### State\nConfusion: 3.476088\nAction: question\nReward: 2.026609\nNext Confusion: 2.038584"} +{"text": "### State\nConfusion: 5.963129\nAction: explain\nReward: -0.663593\nNext Confusion: 5.924516"} +{"text": "### State\nConfusion: 4.23041\nAction: analogize\nReward: -0.798136\nNext Confusion: 5.034618"} +{"text": "### State\nConfusion: 4.460384\nAction: analogize\nReward: 0.281733\nNext Confusion: 4.369669"} +{"text": "### State\nConfusion: 3.952235\nAction: analogize\nReward: -0.543262\nNext Confusion: 4.522062"} +{"text": "### State\nConfusion: 4.810875\nAction: analogize\nReward: -0.724943\nNext Confusion: 4.581037"} +{"text": "### State\nConfusion: 3.226505\nAction: worked_example\nReward: 0.912334\nNext Confusion: 2.419872"} +{"text": "### State\nConfusion: 9.691536\nAction: worked_example\nReward: 1.77497\nNext Confusion: 8.428343"} +{"text": "### State\nConfusion: 5.033921\nAction: question\nReward: 0.196992\nNext Confusion: 4.573141"} +{"text": "### State\nConfusion: 3.517061\nAction: analogize\nReward: -0.263287\nNext Confusion: 3.85532"} +{"text": "### State\nConfusion: 7.032363\nAction: worked_example\nReward: 1.873676\nNext Confusion: 5.367367"} +{"text": "### State\nConfusion: 4.801627\nAction: analogize\nReward: -0.242848\nNext Confusion: 5.514838"} +{"text": "### State\nConfusion: 6.834419\nAction: analogize\nReward: 0.4653\nNext Confusion: 7.076133"} +{"text": "### State\nConfusion: 6.279381\nAction: question\nReward: 0.830391\nNext Confusion: 5.205801"} +{"text": "### State\nConfusion: 8.198117\nAction: worked_example\nReward: 1.509305\nNext Confusion: 7.05779"} +{"text": "### State\nConfusion: 3.336602\nAction: analogize\nReward: 0.37946\nNext Confusion: 3.545989"} +{"text": "### State\nConfusion: 6.838361\nAction: analogize\nReward: -0.741956\nNext Confusion: 7.300534"} +{"text": "### State\nConfusion: 5.791677\nAction: explain\nReward: 0.437775\nNext Confusion: 5.755174"} +{"text": "### State\nConfusion: 9.722909\nAction: correct_fact\nReward: -1.217724\nNext Confusion: 9.802279"} +{"text": "### State\nConfusion: 7.018476\nAction: question\nReward: 0.563995\nNext Confusion: 6.405234"} +{"text": "### State\nConfusion: 4.592573\nAction: analogize\nReward: -2.146348\nNext Confusion: 5.838669"} +{"text": "### State\nConfusion: 5.724254\nAction: analogize\nReward: 1.270602\nNext Confusion: 5.397553"} +{"text": "### State\nConfusion: 4.446648\nAction: worked_example\nReward: -0.503034\nNext Confusion: 4.548247"} +{"text": "### State\nConfusion: 5.499\nAction: explain\nReward: 0.310256\nNext Confusion: 5.026456"} +{"text": "### State\nConfusion: 3.483297\nAction: question\nReward: 0.985335\nNext Confusion: 2.828101"} +{"text": "### State\nConfusion: 2.728559\nAction: explain\nReward: 0.672884\nNext Confusion: 2.04629"} +{"text": "### State\nConfusion: 6.694008\nAction: question\nReward: -0.553633\nNext Confusion: 7.286873"} +{"text": "### State\nConfusion: 2.769368\nAction: question\nReward: 0.406163\nNext Confusion: 2.904883"} +{"text": "### State\nConfusion: 5.757645\nAction: correct_fact\nReward: 0.664011\nNext Confusion: 5.369445"} +{"text": "### State\nConfusion: 4.197348\nAction: analogize\nReward: 1.133919\nNext Confusion: 3.996284"} +{"text": "### State\nConfusion: 5.534684\nAction: explain\nReward: -0.725343\nNext Confusion: 6.430503"} +{"text": "### State\nConfusion: 2.75554\nAction: worked_example\nReward: 1.502689\nNext Confusion: 1.00548"} +{"text": "### State\nConfusion: 4.337378\nAction: analogize\nReward: 0.228474\nNext Confusion: 4.612438"} +{"text": "### State\nConfusion: 4.446347\nAction: analogize\nReward: -0.524076\nNext Confusion: 5.001291"} +{"text": "### State\nConfusion: 5.104956\nAction: analogize\nReward: -0.163479\nNext Confusion: 5.520312"} +{"text": "### State\nConfusion: 3.629805\nAction: explain\nReward: -0.095137\nNext Confusion: 4.171221"} +{"text": "### State\nConfusion: 3.944147\nAction: analogize\nReward: -0.668701\nNext Confusion: 5.047819"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.550446\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 9.356881\nAction: correct_fact\nReward: -1.110132\nNext Confusion: 9.71459"} +{"text": "### State\nConfusion: 4.205706\nAction: explain\nReward: -0.234715\nNext Confusion: 4.437668"} +{"text": "### State\nConfusion: 5.271949\nAction: analogize\nReward: -0.57591\nNext Confusion: 5.56199"} +{"text": "### State\nConfusion: 3.972523\nAction: analogize\nReward: 0.4965\nNext Confusion: 4.568244"} +{"text": "### State\nConfusion: 6.749858\nAction: analogize\nReward: -0.236824\nNext Confusion: 7.190533"} +{"text": "### State\nConfusion: 8.942906\nAction: correct_fact\nReward: 0.233019\nNext Confusion: 8.72936"} +{"text": "### State\nConfusion: 3.798457\nAction: analogize\nReward: -0.124735\nNext Confusion: 4.220905"} +{"text": "### State\nConfusion: 4.55122\nAction: analogize\nReward: -1.185639\nNext Confusion: 5.466571"} +{"text": "### State\nConfusion: 3.276233\nAction: analogize\nReward: 0.248292\nNext Confusion: 3.85905"} +{"text": "### State\nConfusion: 4.059147\nAction: correct_fact\nReward: 0.407354\nNext Confusion: 3.503689"} +{"text": "### State\nConfusion: 4.406315\nAction: correct_fact\nReward: -0.603789\nNext Confusion: 4.506692"} +{"text": "### State\nConfusion: 2.644092\nAction: analogize\nReward: -0.019094\nNext Confusion: 3.00082"} +{"text": "### State\nConfusion: 5.603155\nAction: worked_example\nReward: 1.284057\nNext Confusion: 4.500092"} +{"text": "### State\nConfusion: 3.218315\nAction: analogize\nReward: -0.348337\nNext Confusion: 3.556726"} +{"text": "### State\nConfusion: 3.800715\nAction: analogize\nReward: -0.820115\nNext Confusion: 4.452045"} +{"text": "### State\nConfusion: 1.973659\nAction: analogize\nReward: -0.355537\nNext Confusion: 2.784944"} +{"text": "### State\nConfusion: 4.688923\nAction: question\nReward: 1.102507\nNext Confusion: 3.759745"} +{"text": "### State\nConfusion: 3.492448\nAction: analogize\nReward: 0.101092\nNext Confusion: 3.807346"} +{"text": "### State\nConfusion: 2.46282\nAction: analogize\nReward: -1.078748\nNext Confusion: 3.07609"} +{"text": "### State\nConfusion: 6.213417\nAction: analogize\nReward: 0.576302\nNext Confusion: 5.873172"} +{"text": "### State\nConfusion: 3.541289\nAction: explain\nReward: 0.015983\nNext Confusion: 3.708063"} +{"text": "### State\nConfusion: 5.982512\nAction: analogize\nReward: -0.190504\nNext Confusion: 6.230028"} +{"text": "### State\nConfusion: 5.589458\nAction: worked_example\nReward: 1.965483\nNext Confusion: 5.141684"} +{"text": "### State\nConfusion: 3.292279\nAction: question\nReward: 0.632446\nNext Confusion: 2.64234"} +{"text": "### State\nConfusion: 1.361117\nAction: analogize\nReward: -0.449704\nNext Confusion: 2.123296"} +{"text": "### State\nConfusion: 5.606995\nAction: worked_example\nReward: 0.968661\nNext Confusion: 5.075988"} +{"text": "### State\nConfusion: 3.824053\nAction: worked_example\nReward: 1.445538\nNext Confusion: 2.7174"} +{"text": "### State\nConfusion: 3.619457\nAction: analogize\nReward: -0.932034\nNext Confusion: 4.530998"} +{"text": "### State\nConfusion: 4.039429\nAction: question\nReward: 0.626477\nNext Confusion: 3.597227"} +{"text": "### State\nConfusion: 5.48916\nAction: question\nReward: 0.787362\nNext Confusion: 5.328984"} +{"text": "### State\nConfusion: 3.539406\nAction: analogize\nReward: -0.441082\nNext Confusion: 4.028983"} +{"text": "### State\nConfusion: 3.917401\nAction: analogize\nReward: -0.314303\nNext Confusion: 4.375538"} +{"text": "### State\nConfusion: 4.657824\nAction: analogize\nReward: 0.393414\nNext Confusion: 4.629042"} +{"text": "### State\nConfusion: 9.817489\nAction: analogize\nReward: -0.384728\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.5677\nAction: explain\nReward: -0.380941\nNext Confusion: 3.946322"} +{"text": "### State\nConfusion: 3.643777\nAction: analogize\nReward: -1.920887\nNext Confusion: 4.448967"} +{"text": "### State\nConfusion: 4.602626\nAction: explain\nReward: 0.775825\nNext Confusion: 3.995621"} +{"text": "### State\nConfusion: 3.493214\nAction: explain\nReward: -0.128712\nNext Confusion: 3.723056"} +{"text": "### State\nConfusion: 4.024774\nAction: analogize\nReward: -2.039025\nNext Confusion: 4.794768"} +{"text": "### State\nConfusion: 5.567354\nAction: question\nReward: 0.953785\nNext Confusion: 5.143233"} +{"text": "### State\nConfusion: 4.857438\nAction: explain\nReward: 0.443781\nNext Confusion: 4.433139"} +{"text": "### State\nConfusion: 6.066289\nAction: explain\nReward: -0.315992\nNext Confusion: 6.312766"} +{"text": "### State\nConfusion: 7.202041\nAction: analogize\nReward: 0.352557\nNext Confusion: 7.09304"} +{"text": "### State\nConfusion: 6.867551\nAction: explain\nReward: -0.373433\nNext Confusion: 7.375757"} +{"text": "### State\nConfusion: 4.086685\nAction: question\nReward: 0.298342\nNext Confusion: 3.762268"} +{"text": "### State\nConfusion: 8.616289\nAction: question\nReward: 1.143804\nNext Confusion: 7.619493"} +{"text": "### State\nConfusion: 4.24828\nAction: analogize\nReward: -0.861769\nNext Confusion: 5.43898"} +{"text": "### State\nConfusion: 2.615268\nAction: correct_fact\nReward: 1.0784\nNext Confusion: 1.460789"} +{"text": "### State\nConfusion: 5.23111\nAction: correct_fact\nReward: -0.202571\nNext Confusion: 5.344155"} +{"text": "### State\nConfusion: 7.022273\nAction: analogize\nReward: -1.226544\nNext Confusion: 7.900564"} +{"text": "### State\nConfusion: 7.907241\nAction: question\nReward: 1.464275\nNext Confusion: 6.918559"} +{"text": "### State\nConfusion: 7.12112\nAction: correct_fact\nReward: 1.403988\nNext Confusion: 6.659081"} +{"text": "### State\nConfusion: 3.515963\nAction: analogize\nReward: -0.759772\nNext Confusion: 4.787727"} +{"text": "### State\nConfusion: 5.604148\nAction: worked_example\nReward: 1.415625\nNext Confusion: 4.33325"} +{"text": "### State\nConfusion: 5.108318\nAction: analogize\nReward: -1.22629\nNext Confusion: 5.783868"} +{"text": "### State\nConfusion: 4.742907\nAction: analogize\nReward: -0.264648\nNext Confusion: 4.893974"} +{"text": "### State\nConfusion: 2.955016\nAction: explain\nReward: 0.640322\nNext Confusion: 2.963681"} +{"text": "### State\nConfusion: 3.553782\nAction: analogize\nReward: 0.463052\nNext Confusion: 3.616991"} +{"text": "### State\nConfusion: 4.315825\nAction: correct_fact\nReward: 0.128917\nNext Confusion: 4.809463"} +{"text": "### State\nConfusion: 6.324152\nAction: worked_example\nReward: 2.24065\nNext Confusion: 4.650812"} +{"text": "### State\nConfusion: 5.497759\nAction: explain\nReward: 1.417489\nNext Confusion: 4.601767"} +{"text": "### State\nConfusion: 3.482203\nAction: analogize\nReward: -0.362165\nNext Confusion: 3.796918"} +{"text": "### State\nConfusion: 4.651682\nAction: explain\nReward: 0.920595\nNext Confusion: 4.110992"} +{"text": "### State\nConfusion: 3.818045\nAction: worked_example\nReward: 0.974113\nNext Confusion: 2.51135"} +{"text": "### State\nConfusion: 4.387833\nAction: question\nReward: 0.807106\nNext Confusion: 3.467108"} +{"text": "### State\nConfusion: 7.444748\nAction: explain\nReward: 0.535044\nNext Confusion: 7.314575"} +{"text": "### State\nConfusion: 8.638442\nAction: analogize\nReward: -0.385962\nNext Confusion: 9.068825"} +{"text": "### State\nConfusion: 3.031337\nAction: analogize\nReward: 1.474946\nNext Confusion: 2.794966"} +{"text": "### State\nConfusion: 7.27936\nAction: analogize\nReward: 0.607482\nNext Confusion: 6.99242"} +{"text": "### State\nConfusion: 3.787557\nAction: analogize\nReward: -0.636365\nNext Confusion: 4.095346"} +{"text": "### State\nConfusion: 7.239841\nAction: analogize\nReward: -0.911259\nNext Confusion: 8.592769"} +{"text": "### State\nConfusion: 7.590043\nAction: worked_example\nReward: 0.271013\nNext Confusion: 7.004351"} +{"text": "### State\nConfusion: 8.092934\nAction: worked_example\nReward: 1.788613\nNext Confusion: 6.377313"} +{"text": "### State\nConfusion: 5.034043\nAction: explain\nReward: 0.301633\nNext Confusion: 5.184921"} +{"text": "### State\nConfusion: 4.350404\nAction: explain\nReward: 0.861178\nNext Confusion: 3.60923"} +{"text": "### State\nConfusion: 2.270528\nAction: explain\nReward: 0.728106\nNext Confusion: 1.638538"} +{"text": "### State\nConfusion: 4.23141\nAction: analogize\nReward: -0.872446\nNext Confusion: 4.772834"} +{"text": "### State\nConfusion: 4.371902\nAction: worked_example\nReward: 0.463624\nNext Confusion: 3.571019"} +{"text": "### State\nConfusion: 6.633394\nAction: question\nReward: -0.168295\nNext Confusion: 6.514897"} +{"text": "### State\nConfusion: 6.584599\nAction: question\nReward: 0.347541\nNext Confusion: 6.205327"} +{"text": "### State\nConfusion: 10.0\nAction: question\nReward: 1.610989\nNext Confusion: 9.107007"} +{"text": "### State\nConfusion: 5.890399\nAction: analogize\nReward: -0.177367\nNext Confusion: 5.894826"} +{"text": "### State\nConfusion: 3.914789\nAction: question\nReward: 0.963476\nNext Confusion: 3.638644"} +{"text": "### State\nConfusion: 4.606835\nAction: analogize\nReward: -1.59837\nNext Confusion: 5.999383"} +{"text": "### State\nConfusion: 5.992104\nAction: analogize\nReward: -0.850966\nNext Confusion: 6.851071"} +{"text": "### State\nConfusion: 4.461924\nAction: correct_fact\nReward: -0.385522\nNext Confusion: 5.865354"} +{"text": "### State\nConfusion: 6.803428\nAction: analogize\nReward: 0.091275\nNext Confusion: 6.193048"} +{"text": "### State\nConfusion: 4.592856\nAction: analogize\nReward: 0.028521\nNext Confusion: 4.672834"} +{"text": "### State\nConfusion: 2.804246\nAction: analogize\nReward: -0.216599\nNext Confusion: 2.494085"} +{"text": "### State\nConfusion: 3.900469\nAction: analogize\nReward: -0.715619\nNext Confusion: 3.978828"} +{"text": "### State\nConfusion: 4.378933\nAction: analogize\nReward: -0.515609\nNext Confusion: 5.002777"} +{"text": "### State\nConfusion: 7.587787\nAction: question\nReward: 1.416706\nNext Confusion: 6.875957"} +{"text": "### State\nConfusion: 3.742195\nAction: analogize\nReward: 0.076616\nNext Confusion: 4.102513"} +{"text": "### State\nConfusion: 3.479584\nAction: explain\nReward: 0.616633\nNext Confusion: 2.790334"} +{"text": "### State\nConfusion: 4.141684\nAction: analogize\nReward: -0.697507\nNext Confusion: 4.529926"} +{"text": "### State\nConfusion: 5.484036\nAction: analogize\nReward: -0.203655\nNext Confusion: 6.119545"} +{"text": "### State\nConfusion: 5.041966\nAction: analogize\nReward: 0.478271\nNext Confusion: 4.934764"} +{"text": "### State\nConfusion: 5.643482\nAction: analogize\nReward: 0.330683\nNext Confusion: 6.250234"} +{"text": "### State\nConfusion: 3.039198\nAction: analogize\nReward: -1.140458\nNext Confusion: 4.141131"} +{"text": "### State\nConfusion: 6.9517\nAction: analogize\nReward: 1.121652\nNext Confusion: 6.827886"} +{"text": "### State\nConfusion: 4.896638\nAction: analogize\nReward: 0.44984\nNext Confusion: 4.884718"} +{"text": "### State\nConfusion: 6.982846\nAction: analogize\nReward: -1.013628\nNext Confusion: 7.971094"} +{"text": "### State\nConfusion: 6.060497\nAction: explain\nReward: 0.290709\nNext Confusion: 5.825891"} +{"text": "### State\nConfusion: 3.599947\nAction: analogize\nReward: 0.693696\nNext Confusion: 3.53343"} +{"text": "### State\nConfusion: 4.170319\nAction: correct_fact\nReward: 0.898607\nNext Confusion: 3.590391"} +{"text": "### State\nConfusion: 7.032939\nAction: analogize\nReward: -0.729945\nNext Confusion: 8.16857"} +{"text": "### State\nConfusion: 4.053638\nAction: worked_example\nReward: 1.543701\nNext Confusion: 2.689349"} +{"text": "### State\nConfusion: 5.90626\nAction: analogize\nReward: -0.607466\nNext Confusion: 6.439805"} +{"text": "### State\nConfusion: 4.583474\nAction: analogize\nReward: -0.97328\nNext Confusion: 5.106393"} +{"text": "### State\nConfusion: 4.896346\nAction: analogize\nReward: -0.610819\nNext Confusion: 5.848339"} +{"text": "### State\nConfusion: 4.550838\nAction: analogize\nReward: -0.475464\nNext Confusion: 4.483279"} +{"text": "### State\nConfusion: 3.38141\nAction: explain\nReward: 0.504023\nNext Confusion: 3.468134"} +{"text": "### State\nConfusion: 4.350189\nAction: correct_fact\nReward: 0.390332\nNext Confusion: 3.921563"} +{"text": "### State\nConfusion: 6.046617\nAction: explain\nReward: 0.985491\nNext Confusion: 5.920085"} +{"text": "### State\nConfusion: 6.886919\nAction: analogize\nReward: 0.792771\nNext Confusion: 6.498229"} +{"text": "### State\nConfusion: 4.564333\nAction: analogize\nReward: 0.421946\nNext Confusion: 4.38198"} +{"text": "### State\nConfusion: 3.378859\nAction: analogize\nReward: -0.767424\nNext Confusion: 3.908994"} +{"text": "### State\nConfusion: 5.998241\nAction: analogize\nReward: -0.010588\nNext Confusion: 6.310215"} +{"text": "### State\nConfusion: 3.777984\nAction: analogize\nReward: 0.490388\nNext Confusion: 3.513688"} +{"text": "### State\nConfusion: 7.019506\nAction: analogize\nReward: -0.255347\nNext Confusion: 8.119068"} +{"text": "### State\nConfusion: 2.699495\nAction: analogize\nReward: -1.213656\nNext Confusion: 3.973977"} +{"text": "### State\nConfusion: 4.276931\nAction: worked_example\nReward: 2.073769\nNext Confusion: 3.504191"} +{"text": "### State\nConfusion: 7.270848\nAction: question\nReward: 0.602047\nNext Confusion: 6.808021"} +{"text": "### State\nConfusion: 6.406384\nAction: explain\nReward: 0.478975\nNext Confusion: 6.852021"} +{"text": "### State\nConfusion: 6.955819\nAction: explain\nReward: -1.291024\nNext Confusion: 6.745135"} +{"text": "### State\nConfusion: 5.362579\nAction: question\nReward: 0.291444\nNext Confusion: 5.313715"} +{"text": "### State\nConfusion: 7.497689\nAction: analogize\nReward: 0.087838\nNext Confusion: 7.549163"} +{"text": "### State\nConfusion: 3.399238\nAction: explain\nReward: 1.114128\nNext Confusion: 2.626598"} +{"text": "### State\nConfusion: 5.609217\nAction: analogize\nReward: -0.140091\nNext Confusion: 5.979127"} +{"text": "### State\nConfusion: 3.267017\nAction: question\nReward: -0.669656\nNext Confusion: 3.420053"} +{"text": "### State\nConfusion: 4.157508\nAction: correct_fact\nReward: -0.824236\nNext Confusion: 4.648945"} +{"text": "### State\nConfusion: 3.989922\nAction: correct_fact\nReward: -0.101446\nNext Confusion: 4.216505"} +{"text": "### State\nConfusion: 2.22757\nAction: analogize\nReward: -1.115784\nNext Confusion: 3.369227"} +{"text": "### State\nConfusion: 8.759944\nAction: worked_example\nReward: 1.584504\nNext Confusion: 7.617804"} +{"text": "### State\nConfusion: 9.389155\nAction: analogize\nReward: 0.213027\nNext Confusion: 9.195565"} +{"text": "### State\nConfusion: 8.246535\nAction: analogize\nReward: 0.202118\nNext Confusion: 8.268204"} +{"text": "### State\nConfusion: 5.907152\nAction: analogize\nReward: 0.268129\nNext Confusion: 6.087424"} +{"text": "### State\nConfusion: 6.813118\nAction: worked_example\nReward: 0.737036\nNext Confusion: 6.27529"} +{"text": "### State\nConfusion: 6.398956\nAction: explain\nReward: 0.268035\nNext Confusion: 5.872039"} +{"text": "### State\nConfusion: 3.590961\nAction: question\nReward: 1.10202\nNext Confusion: 2.945081"} +{"text": "### State\nConfusion: 6.938659\nAction: worked_example\nReward: 2.218891\nNext Confusion: 5.106211"} +{"text": "### State\nConfusion: 3.289629\nAction: analogize\nReward: -0.180204\nNext Confusion: 3.782614"} +{"text": "### State\nConfusion: 7.283386\nAction: analogize\nReward: 1.03868\nNext Confusion: 7.272589"} +{"text": "### State\nConfusion: 2.666499\nAction: worked_example\nReward: 1.600319\nNext Confusion: 1.500345"} +{"text": "### State\nConfusion: 5.878577\nAction: analogize\nReward: 0.211902\nNext Confusion: 5.616477"} +{"text": "### State\nConfusion: 2.740933\nAction: analogize\nReward: -1.123915\nNext Confusion: 3.60959"} +{"text": "### State\nConfusion: 4.148976\nAction: analogize\nReward: 0.320144\nNext Confusion: 4.609655"} +{"text": "### State\nConfusion: 4.86979\nAction: analogize\nReward: -1.519899\nNext Confusion: 5.834114"} +{"text": "### State\nConfusion: 3.21267\nAction: analogize\nReward: 0.160049\nNext Confusion: 3.229467"} +{"text": "### State\nConfusion: 7.290231\nAction: analogize\nReward: 0.607579\nNext Confusion: 7.191082"} +{"text": "### State\nConfusion: 3.509348\nAction: worked_example\nReward: 2.846924\nNext Confusion: 1.174477"} +{"text": "### State\nConfusion: 3.844179\nAction: question\nReward: 0.169155\nNext Confusion: 3.657271"} +{"text": "### State\nConfusion: 3.89389\nAction: analogize\nReward: 0.439295\nNext Confusion: 4.078422"} +{"text": "### State\nConfusion: 4.463733\nAction: analogize\nReward: -0.699769\nNext Confusion: 5.113838"} +{"text": "### State\nConfusion: 3.780769\nAction: explain\nReward: 0.816708\nNext Confusion: 3.649158"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198227\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 7.127927\nAction: worked_example\nReward: 0.287837\nNext Confusion: 6.968576"} +{"text": "### State\nConfusion: 3.089982\nAction: analogize\nReward: -1.625967\nNext Confusion: 4.475578"} +{"text": "### State\nConfusion: 5.169077\nAction: explain\nReward: 0.535541\nNext Confusion: 5.222714"} +{"text": "### State\nConfusion: 4.358125\nAction: explain\nReward: 0.380645\nNext Confusion: 3.897497"} +{"text": "### State\nConfusion: 3.767699\nAction: analogize\nReward: -1.200743\nNext Confusion: 4.472256"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.535228\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 8.875999\nAction: worked_example\nReward: 0.76591\nNext Confusion: 7.645343"} +{"text": "### State\nConfusion: 3.52916\nAction: analogize\nReward: 0.344736\nNext Confusion: 2.717469"} +{"text": "### State\nConfusion: 2.586894\nAction: explain\nReward: -0.517557\nNext Confusion: 3.244847"} +{"text": "### State\nConfusion: 1.909156\nAction: analogize\nReward: -1.324682\nNext Confusion: 2.857654"} +{"text": "### State\nConfusion: 4.005799\nAction: analogize\nReward: -0.305104\nNext Confusion: 3.669192"} +{"text": "### State\nConfusion: 4.355139\nAction: analogize\nReward: 0.109617\nNext Confusion: 4.115402"} +{"text": "### State\nConfusion: 6.679668\nAction: analogize\nReward: 0.091741\nNext Confusion: 6.599718"} +{"text": "### State\nConfusion: 4.419393\nAction: question\nReward: 1.469747\nNext Confusion: 3.633803"} +{"text": "### State\nConfusion: 6.384556\nAction: worked_example\nReward: 1.95714\nNext Confusion: 5.177717"} +{"text": "### State\nConfusion: 4.349121\nAction: worked_example\nReward: 1.24314\nNext Confusion: 3.26899"} +{"text": "### State\nConfusion: 3.324754\nAction: analogize\nReward: -1.137162\nNext Confusion: 3.613057"} +{"text": "### State\nConfusion: 4.626129\nAction: question\nReward: -0.010993\nNext Confusion: 4.778299"} +{"text": "### State\nConfusion: 5.514156\nAction: question\nReward: 0.89604\nNext Confusion: 4.711784"} +{"text": "### State\nConfusion: 4.125212\nAction: analogize\nReward: -0.73635\nNext Confusion: 4.89324"} +{"text": "### State\nConfusion: 3.397703\nAction: analogize\nReward: -1.003393\nNext Confusion: 4.007573"} +{"text": "### State\nConfusion: 5.827044\nAction: analogize\nReward: 0.119715\nNext Confusion: 5.987204"} +{"text": "### State\nConfusion: 3.580581\nAction: analogize\nReward: -0.213034\nNext Confusion: 4.094987"} +{"text": "### State\nConfusion: 4.512679\nAction: explain\nReward: 1.103546\nNext Confusion: 3.983216"} +{"text": "### State\nConfusion: 3.377749\nAction: explain\nReward: 0.041333\nNext Confusion: 2.830174"} +{"text": "### State\nConfusion: 7.021051\nAction: explain\nReward: -0.138561\nNext Confusion: 6.639877"} +{"text": "### State\nConfusion: 3.677575\nAction: analogize\nReward: -0.28998\nNext Confusion: 4.96881"} +{"text": "### State\nConfusion: 4.841562\nAction: correct_fact\nReward: -1.341953\nNext Confusion: 5.510637"} +{"text": "### State\nConfusion: 5.860694\nAction: question\nReward: 1.317609\nNext Confusion: 4.7185"} +{"text": "### State\nConfusion: 5.116674\nAction: worked_example\nReward: 0.900413\nNext Confusion: 3.554832"} +{"text": "### State\nConfusion: 4.904849\nAction: explain\nReward: 0.922088\nNext Confusion: 4.84363"} +{"text": "### State\nConfusion: 4.016333\nAction: explain\nReward: 0.102206\nNext Confusion: 3.400156"} +{"text": "### State\nConfusion: 3.335467\nAction: analogize\nReward: -0.579045\nNext Confusion: 4.053781"} +{"text": "### State\nConfusion: 3.975311\nAction: explain\nReward: 0.27217\nNext Confusion: 3.46649"} +{"text": "### State\nConfusion: 4.029582\nAction: correct_fact\nReward: -0.498656\nNext Confusion: 4.146517"} +{"text": "### State\nConfusion: 5.653948\nAction: analogize\nReward: 0.806549\nNext Confusion: 5.142787"} +{"text": "### State\nConfusion: 8.509027\nAction: correct_fact\nReward: -0.040474\nNext Confusion: 8.127693"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.018308\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.399515\nAction: analogize\nReward: 0.051091\nNext Confusion: 3.382378"} +{"text": "### State\nConfusion: 4.03549\nAction: analogize\nReward: -0.74119\nNext Confusion: 4.477384"} +{"text": "### State\nConfusion: 3.929249\nAction: question\nReward: 0.308091\nNext Confusion: 3.854811"} +{"text": "### State\nConfusion: 3.167563\nAction: analogize\nReward: -0.026105\nNext Confusion: 3.739793"} +{"text": "### State\nConfusion: 5.641661\nAction: question\nReward: 0.902291\nNext Confusion: 5.095738"} +{"text": "### State\nConfusion: 5.811388\nAction: analogize\nReward: -0.956837\nNext Confusion: 6.566842"} +{"text": "### State\nConfusion: 4.939756\nAction: analogize\nReward: 1.215886\nNext Confusion: 4.202513"} +{"text": "### State\nConfusion: 5.161591\nAction: question\nReward: 0.582941\nNext Confusion: 4.985189"} +{"text": "### State\nConfusion: 8.777076\nAction: correct_fact\nReward: 0.470891\nNext Confusion: 8.279322"} +{"text": "### State\nConfusion: 4.090277\nAction: worked_example\nReward: 1.559957\nNext Confusion: 2.979815"} +{"text": "### State\nConfusion: 3.339561\nAction: worked_example\nReward: 1.660879\nNext Confusion: 2.123369"} +{"text": "### State\nConfusion: 4.107422\nAction: analogize\nReward: -0.121353\nNext Confusion: 4.283752"} +{"text": "### State\nConfusion: 6.483735\nAction: explain\nReward: -1.197914\nNext Confusion: 7.035842"} +{"text": "### State\nConfusion: 5.573504\nAction: explain\nReward: 0.782266\nNext Confusion: 5.434095"} +{"text": "### State\nConfusion: 3.374913\nAction: analogize\nReward: -0.884737\nNext Confusion: 3.438567"} +{"text": "### State\nConfusion: 3.380767\nAction: explain\nReward: 1.294365\nNext Confusion: 2.789468"} +{"text": "### State\nConfusion: 5.549697\nAction: question\nReward: 1.678421\nNext Confusion: 4.247832"} +{"text": "### State\nConfusion: 3.579567\nAction: correct_fact\nReward: 0.672959\nNext Confusion: 3.633714"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.020142\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.408587\nAction: correct_fact\nReward: 0.253955\nNext Confusion: 3.646089"} +{"text": "### State\nConfusion: 2.241163\nAction: question\nReward: 0.458367\nNext Confusion: 1.820895"} +{"text": "### State\nConfusion: 7.083729\nAction: analogize\nReward: -0.546349\nNext Confusion: 7.203736"} +{"text": "### State\nConfusion: 3.053561\nAction: analogize\nReward: 0.344281\nNext Confusion: 2.925175"} +{"text": "### State\nConfusion: 4.379266\nAction: question\nReward: 0.309251\nNext Confusion: 3.804448"} +{"text": "### State\nConfusion: 3.469964\nAction: worked_example\nReward: -1.176672\nNext Confusion: 4.13511"} +{"text": "### State\nConfusion: 3.022251\nAction: analogize\nReward: -0.90958\nNext Confusion: 3.919632"} +{"text": "### State\nConfusion: 5.641866\nAction: analogize\nReward: -0.135453\nNext Confusion: 5.936108"} +{"text": "### State\nConfusion: 4.732938\nAction: explain\nReward: 0.067745\nNext Confusion: 4.250308"} +{"text": "### State\nConfusion: 4.456509\nAction: correct_fact\nReward: -0.713073\nNext Confusion: 4.695286"} +{"text": "### State\nConfusion: 3.703105\nAction: analogize\nReward: 0.354142\nNext Confusion: 3.48034"} +{"text": "### State\nConfusion: 3.761903\nAction: analogize\nReward: -0.013819\nNext Confusion: 3.421196"} +{"text": "### State\nConfusion: 4.269754\nAction: analogize\nReward: 0.027496\nNext Confusion: 4.287079"} +{"text": "### State\nConfusion: 5.663837\nAction: explain\nReward: -0.360503\nNext Confusion: 6.315889"} +{"text": "### State\nConfusion: 5.372044\nAction: question\nReward: 1.080587\nNext Confusion: 4.743021"} +{"text": "### State\nConfusion: 2.501908\nAction: analogize\nReward: -0.534156\nNext Confusion: 3.061587"} +{"text": "### State\nConfusion: 6.000041\nAction: explain\nReward: 0.183818\nNext Confusion: 5.951022"} +{"text": "### State\nConfusion: 4.515942\nAction: question\nReward: 0.491305\nNext Confusion: 4.483781"} +{"text": "### State\nConfusion: 3.967531\nAction: question\nReward: 0.292389\nNext Confusion: 3.614035"} +{"text": "### State\nConfusion: 6.566327\nAction: question\nReward: 0.468035\nNext Confusion: 6.588855"} +{"text": "### State\nConfusion: 5.682308\nAction: worked_example\nReward: 2.358126\nNext Confusion: 3.606454"} +{"text": "### State\nConfusion: 3.510834\nAction: analogize\nReward: -0.694941\nNext Confusion: 3.978493"} +{"text": "### State\nConfusion: 4.583116\nAction: question\nReward: -0.700366\nNext Confusion: 5.10747"} +{"text": "### State\nConfusion: 7.197812\nAction: analogize\nReward: -0.017298\nNext Confusion: 7.292065"} +{"text": "### State\nConfusion: 6.225952\nAction: analogize\nReward: 0.520952\nNext Confusion: 5.862716"} +{"text": "### State\nConfusion: 4.177542\nAction: explain\nReward: 0.807856\nNext Confusion: 3.491008"} +{"text": "### State\nConfusion: 3.897059\nAction: analogize\nReward: 0.338542\nNext Confusion: 4.205773"} +{"text": "### State\nConfusion: 5.013536\nAction: analogize\nReward: -0.733278\nNext Confusion: 4.34735"} +{"text": "### State\nConfusion: 5.245119\nAction: question\nReward: 1.180365\nNext Confusion: 3.996855"} +{"text": "### State\nConfusion: 3.496643\nAction: analogize\nReward: -0.370052\nNext Confusion: 3.667639"} +{"text": "### State\nConfusion: 3.817\nAction: analogize\nReward: 0.105576\nNext Confusion: 3.961266"} +{"text": "### State\nConfusion: 3.864491\nAction: analogize\nReward: 0.184483\nNext Confusion: 4.035629"} +{"text": "### State\nConfusion: 4.685336\nAction: analogize\nReward: -0.242223\nNext Confusion: 4.233638"} +{"text": "### State\nConfusion: 3.923928\nAction: analogize\nReward: -0.662827\nNext Confusion: 3.973851"} +{"text": "### State\nConfusion: 2.349183\nAction: analogize\nReward: -0.344813\nNext Confusion: 2.246699"} +{"text": "### State\nConfusion: 4.223802\nAction: analogize\nReward: 0.470074\nNext Confusion: 4.347506"} +{"text": "### State\nConfusion: 5.313014\nAction: explain\nReward: -1.549022\nNext Confusion: 5.749806"} +{"text": "### State\nConfusion: 4.683001\nAction: analogize\nReward: -0.010471\nNext Confusion: 4.584553"} +{"text": "### State\nConfusion: 4.45898\nAction: analogize\nReward: 0.368356\nNext Confusion: 4.006063"} +{"text": "### State\nConfusion: 5.791666\nAction: analogize\nReward: -0.187986\nNext Confusion: 6.063031"} +{"text": "### State\nConfusion: 3.376903\nAction: analogize\nReward: -1.143536\nNext Confusion: 4.587175"} +{"text": "### State\nConfusion: 9.335158\nAction: worked_example\nReward: 2.152653\nNext Confusion: 8.317876"} +{"text": "### State\nConfusion: 3.621349\nAction: analogize\nReward: 0.582054\nNext Confusion: 3.243578"} +{"text": "### State\nConfusion: 4.74215\nAction: question\nReward: 1.291047\nNext Confusion: 3.348962"} +{"text": "### State\nConfusion: 5.683502\nAction: worked_example\nReward: 1.535629\nNext Confusion: 4.375916"} +{"text": "### State\nConfusion: 4.821148\nAction: worked_example\nReward: 1.047725\nNext Confusion: 3.557803"} +{"text": "### State\nConfusion: 3.149289\nAction: analogize\nReward: 0.743005\nNext Confusion: 2.77953"} +{"text": "### State\nConfusion: 5.93888\nAction: analogize\nReward: 0.198077\nNext Confusion: 5.99675"} +{"text": "### State\nConfusion: 3.840465\nAction: analogize\nReward: -1.367385\nNext Confusion: 4.494304"} +{"text": "### State\nConfusion: 5.273225\nAction: worked_example\nReward: 2.120614\nNext Confusion: 3.968337"} +{"text": "### State\nConfusion: 3.967179\nAction: analogize\nReward: 0.973108\nNext Confusion: 3.397983"} +{"text": "### State\nConfusion: 4.808587\nAction: correct_fact\nReward: -0.525625\nNext Confusion: 5.590657"} +{"text": "### State\nConfusion: 4.419318\nAction: analogize\nReward: -0.39736\nNext Confusion: 5.171327"} +{"text": "### State\nConfusion: 3.713422\nAction: analogize\nReward: -0.434615\nNext Confusion: 4.516202"} +{"text": "### State\nConfusion: 4.057846\nAction: analogize\nReward: -0.236593\nNext Confusion: 4.158133"} +{"text": "### State\nConfusion: 5.812652\nAction: analogize\nReward: 0.194617\nNext Confusion: 6.174366"} +{"text": "### State\nConfusion: 3.962215\nAction: correct_fact\nReward: -0.493041\nNext Confusion: 4.044711"} +{"text": "### State\nConfusion: 3.95616\nAction: correct_fact\nReward: -0.764486\nNext Confusion: 4.490559"} +{"text": "### State\nConfusion: 4.067618\nAction: explain\nReward: 0.753649\nNext Confusion: 3.377144"} +{"text": "### State\nConfusion: 3.963561\nAction: correct_fact\nReward: -0.03958\nNext Confusion: 4.470316"} +{"text": "### State\nConfusion: 4.72749\nAction: correct_fact\nReward: 0.187432\nNext Confusion: 4.308219"} +{"text": "### State\nConfusion: 4.982907\nAction: analogize\nReward: -0.282559\nNext Confusion: 5.749441"} +{"text": "### State\nConfusion: 4.76151\nAction: correct_fact\nReward: -1.59992\nNext Confusion: 5.914158"} +{"text": "### State\nConfusion: 6.825388\nAction: analogize\nReward: -0.197075\nNext Confusion: 7.36307"} +{"text": "### State\nConfusion: 6.623585\nAction: analogize\nReward: -1.496314\nNext Confusion: 7.01574"} +{"text": "### State\nConfusion: 5.225947\nAction: explain\nReward: 0.299021\nNext Confusion: 5.249215"} +{"text": "### State\nConfusion: 4.701275\nAction: question\nReward: 0.376803\nNext Confusion: 4.108999"} +{"text": "### State\nConfusion: 6.925973\nAction: analogize\nReward: -0.90206\nNext Confusion: 7.930647"} +{"text": "### State\nConfusion: 5.79284\nAction: question\nReward: 1.013098\nNext Confusion: 5.272104"} +{"text": "### State\nConfusion: 6.839365\nAction: question\nReward: 0.121905\nNext Confusion: 7.054125"} +{"text": "### State\nConfusion: 3.869141\nAction: analogize\nReward: -0.052189\nNext Confusion: 4.202905"} +{"text": "### State\nConfusion: 3.541264\nAction: analogize\nReward: -0.124331\nNext Confusion: 3.888026"} +{"text": "### State\nConfusion: 8.227653\nAction: analogize\nReward: 0.535528\nNext Confusion: 8.023672"} +{"text": "### State\nConfusion: 3.389575\nAction: analogize\nReward: 0.083942\nNext Confusion: 4.252772"} +{"text": "### State\nConfusion: 6.12198\nAction: analogize\nReward: -0.017456\nNext Confusion: 6.250541"} +{"text": "### State\nConfusion: 3.908567\nAction: analogize\nReward: 0.852892\nNext Confusion: 3.425391"} +{"text": "### State\nConfusion: 5.787926\nAction: analogize\nReward: -0.61093\nNext Confusion: 6.209976"} +{"text": "### State\nConfusion: 3.852343\nAction: correct_fact\nReward: 0.01279\nNext Confusion: 3.730582"} +{"text": "### State\nConfusion: 3.345473\nAction: analogize\nReward: -0.182761\nNext Confusion: 4.095672"} +{"text": "### State\nConfusion: 6.41593\nAction: correct_fact\nReward: -1.011081\nNext Confusion: 6.550212"} +{"text": "### State\nConfusion: 5.542386\nAction: analogize\nReward: 0.529705\nNext Confusion: 5.213415"} +{"text": "### State\nConfusion: 3.542093\nAction: analogize\nReward: -0.150191\nNext Confusion: 3.909852"} +{"text": "### State\nConfusion: 3.518513\nAction: correct_fact\nReward: -0.261489\nNext Confusion: 3.642212"} +{"text": "### State\nConfusion: 8.754671\nAction: analogize\nReward: -0.524548\nNext Confusion: 9.529492"} +{"text": "### State\nConfusion: 3.811084\nAction: analogize\nReward: -0.528901\nNext Confusion: 4.075631"} +{"text": "### State\nConfusion: 2.964789\nAction: worked_example\nReward: 2.127217\nNext Confusion: 1.143061"} +{"text": "### State\nConfusion: 5.281669\nAction: correct_fact\nReward: -0.382373\nNext Confusion: 5.910728"} +{"text": "### State\nConfusion: 3.745115\nAction: analogize\nReward: -0.282543\nNext Confusion: 4.276707"} +{"text": "### State\nConfusion: 9.596537\nAction: worked_example\nReward: 0.622909\nNext Confusion: 8.939276"} +{"text": "### State\nConfusion: 4.492318\nAction: correct_fact\nReward: -0.717\nNext Confusion: 5.227583"} +{"text": "### State\nConfusion: 3.361198\nAction: analogize\nReward: -0.411737\nNext Confusion: 3.623981"} +{"text": "### State\nConfusion: 8.01341\nAction: explain\nReward: 0.07588\nNext Confusion: 7.766606"} +{"text": "### State\nConfusion: 5.438063\nAction: explain\nReward: 2.170198\nNext Confusion: 3.728784"} +{"text": "### State\nConfusion: 4.408485\nAction: analogize\nReward: -1.024377\nNext Confusion: 5.03083"} +{"text": "### State\nConfusion: 3.786148\nAction: analogize\nReward: -1.506429\nNext Confusion: 4.877437"} +{"text": "### State\nConfusion: 3.779745\nAction: worked_example\nReward: 1.154955\nNext Confusion: 3.201455"} +{"text": "### State\nConfusion: 7.605229\nAction: analogize\nReward: -0.006583\nNext Confusion: 7.633517"} +{"text": "### State\nConfusion: 4.4073\nAction: worked_example\nReward: 0.928354\nNext Confusion: 3.085973"} +{"text": "### State\nConfusion: 3.486942\nAction: question\nReward: -0.344526\nNext Confusion: 3.059474"} +{"text": "### State\nConfusion: 8.268665\nAction: analogize\nReward: -0.228095\nNext Confusion: 8.628764"} +{"text": "### State\nConfusion: 4.041603\nAction: worked_example\nReward: 2.745715\nNext Confusion: 2.3753"} +{"text": "### State\nConfusion: 1.978829\nAction: explain\nReward: 0.773021\nNext Confusion: 1.639348"} +{"text": "### State\nConfusion: 5.565595\nAction: analogize\nReward: -0.93579\nNext Confusion: 6.736066"} +{"text": "### State\nConfusion: 3.291308\nAction: explain\nReward: 0.089945\nNext Confusion: 3.398372"} +{"text": "### State\nConfusion: 5.692618\nAction: question\nReward: 0.291638\nNext Confusion: 5.638468"} +{"text": "### State\nConfusion: 6.097005\nAction: worked_example\nReward: 1.610145\nNext Confusion: 4.944908"} +{"text": "### State\nConfusion: 1.66989\nAction: question\nReward: 1.231649\nNext Confusion: 0.598919"} +{"text": "### State\nConfusion: 3.015389\nAction: question\nReward: -0.368871\nNext Confusion: 3.171424"} +{"text": "### State\nConfusion: 1.648728\nAction: worked_example\nReward: 2.086258\nNext Confusion: 0.0"} +{"text": "### State\nConfusion: 6.370827\nAction: correct_fact\nReward: -0.578435\nNext Confusion: 6.86617"} +{"text": "### State\nConfusion: 3.956089\nAction: analogize\nReward: -0.351387\nNext Confusion: 4.47287"} +{"text": "### State\nConfusion: 3.763646\nAction: analogize\nReward: -0.429977\nNext Confusion: 4.198875"} +{"text": "### State\nConfusion: 3.052423\nAction: question\nReward: 1.034561\nNext Confusion: 1.931299"} +{"text": "### State\nConfusion: 7.488086\nAction: analogize\nReward: -0.887759\nNext Confusion: 7.420434"} +{"text": "### State\nConfusion: 3.754042\nAction: question\nReward: 0.725976\nNext Confusion: 2.907831"} +{"text": "### State\nConfusion: 6.115601\nAction: analogize\nReward: -0.929617\nNext Confusion: 7.082544"} +{"text": "### State\nConfusion: 3.945445\nAction: analogize\nReward: -0.264867\nNext Confusion: 4.259188"} +{"text": "### State\nConfusion: 5.845489\nAction: analogize\nReward: -0.983428\nNext Confusion: 6.855581"} +{"text": "### State\nConfusion: 3.867403\nAction: analogize\nReward: 0.147113\nNext Confusion: 4.162812"} +{"text": "### State\nConfusion: 3.782122\nAction: analogize\nReward: -0.020962\nNext Confusion: 3.530049"} +{"text": "### State\nConfusion: 8.655141\nAction: worked_example\nReward: 1.226595\nNext Confusion: 6.920914"} +{"text": "### State\nConfusion: 2.859175\nAction: analogize\nReward: -0.561934\nNext Confusion: 3.274979"} +{"text": "### State\nConfusion: 3.47642\nAction: analogize\nReward: -0.883036\nNext Confusion: 4.15313"} +{"text": "### State\nConfusion: 7.00052\nAction: worked_example\nReward: 0.545365\nNext Confusion: 7.42833"} +{"text": "### State\nConfusion: 3.649172\nAction: analogize\nReward: -0.320634\nNext Confusion: 3.90222"} +{"text": "### State\nConfusion: 2.461097\nAction: question\nReward: 1.631821\nNext Confusion: 2.2893"} +{"text": "### State\nConfusion: 6.028571\nAction: explain\nReward: 0.061264\nNext Confusion: 5.679068"} +{"text": "### State\nConfusion: 3.460609\nAction: analogize\nReward: 0.141836\nNext Confusion: 3.841259"} +{"text": "### State\nConfusion: 6.036307\nAction: analogize\nReward: -0.882709\nNext Confusion: 7.126093"} +{"text": "### State\nConfusion: 2.88672\nAction: analogize\nReward: -0.273486\nNext Confusion: 3.479902"} +{"text": "### State\nConfusion: 2.820439\nAction: analogize\nReward: -0.92288\nNext Confusion: 3.16166"} +{"text": "### State\nConfusion: 4.613919\nAction: explain\nReward: -0.088816\nNext Confusion: 4.993456"} +{"text": "### State\nConfusion: 3.547463\nAction: explain\nReward: 0.985073\nNext Confusion: 2.734695"} +{"text": "### State\nConfusion: 4.721343\nAction: analogize\nReward: 0.242835\nNext Confusion: 5.540349"} +{"text": "### State\nConfusion: 4.678206\nAction: analogize\nReward: 0.387364\nNext Confusion: 4.468099"} +{"text": "### State\nConfusion: 4.515748\nAction: worked_example\nReward: 1.756244\nNext Confusion: 3.44937"} +{"text": "### State\nConfusion: 6.943506\nAction: question\nReward: 1.439846\nNext Confusion: 5.421192"} +{"text": "### State\nConfusion: 4.314752\nAction: correct_fact\nReward: 0.779412\nNext Confusion: 3.266661"} +{"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: 0.437882\nNext Confusion: 4.87753"} +{"text": "### State\nConfusion: 9.092433\nAction: analogize\nReward: -0.455093\nNext Confusion: 9.519798"} +{"text": "### State\nConfusion: 5.318067\nAction: correct_fact\nReward: 0.820235\nNext Confusion: 5.713635"} +{"text": "### State\nConfusion: 2.411903\nAction: analogize\nReward: -1.105515\nNext Confusion: 3.279925"} +{"text": "### State\nConfusion: 3.247107\nAction: explain\nReward: 0.429104\nNext Confusion: 2.74135"} +{"text": "### State\nConfusion: 6.919804\nAction: question\nReward: 0.885016\nNext Confusion: 6.092538"} +{"text": "### State\nConfusion: 4.203641\nAction: question\nReward: -0.063637\nNext Confusion: 3.856217"} +{"text": "### State\nConfusion: 3.565731\nAction: correct_fact\nReward: 0.730722\nNext Confusion: 2.730823"} +{"text": "### State\nConfusion: 3.939648\nAction: analogize\nReward: -0.425959\nNext Confusion: 3.892347"} +{"text": "### State\nConfusion: 4.518892\nAction: question\nReward: -1.051303\nNext Confusion: 4.840122"} +{"text": "### State\nConfusion: 3.858027\nAction: explain\nReward: 0.299108\nNext Confusion: 3.981068"} +{"text": "### State\nConfusion: 2.704036\nAction: explain\nReward: -0.342533\nNext Confusion: 3.274198"} +{"text": "### State\nConfusion: 3.906797\nAction: explain\nReward: 0.579545\nNext Confusion: 3.39401"} +{"text": "### State\nConfusion: 4.712357\nAction: analogize\nReward: 0.441648\nNext Confusion: 4.503565"} +{"text": "### State\nConfusion: 3.351565\nAction: question\nReward: 0.226935\nNext Confusion: 3.119711"} +{"text": "### State\nConfusion: 3.833758\nAction: explain\nReward: 0.490659\nNext Confusion: 3.624048"} +{"text": "### State\nConfusion: 3.916031\nAction: analogize\nReward: -0.824355\nNext Confusion: 5.0153"} +{"text": "### State\nConfusion: 4.437003\nAction: explain\nReward: 0.433959\nNext Confusion: 4.287864"} +{"text": "### State\nConfusion: 5.354143\nAction: analogize\nReward: -0.384773\nNext Confusion: 6.078944"} +{"text": "### State\nConfusion: 8.400786\nAction: worked_example\nReward: -0.597962\nNext Confusion: 8.667874"} +{"text": "### State\nConfusion: 7.149515\nAction: correct_fact\nReward: 1.278329\nNext Confusion: 6.462344"} +{"text": "### State\nConfusion: 5.102567\nAction: question\nReward: 0.804076\nNext Confusion: 5.377817"} +{"text": "### State\nConfusion: 3.092537\nAction: analogize\nReward: -0.466619\nNext Confusion: 3.818028"} +{"text": "### State\nConfusion: 5.887365\nAction: analogize\nReward: -1.052783\nNext Confusion: 7.368321"} +{"text": "### State\nConfusion: 4.274332\nAction: analogize\nReward: 1.093872\nNext Confusion: 4.345952"} +{"text": "### State\nConfusion: 9.127607\nAction: analogize\nReward: -0.598524\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 4.085041\nAction: analogize\nReward: -1.321249\nNext Confusion: 5.029472"} +{"text": "### State\nConfusion: 6.516422\nAction: worked_example\nReward: 0.462508\nNext Confusion: 5.468136"} +{"text": "### State\nConfusion: 7.023285\nAction: question\nReward: 1.22947\nNext Confusion: 5.970022"} +{"text": "### State\nConfusion: 3.508864\nAction: analogize\nReward: -0.749677\nNext Confusion: 4.38532"} +{"text": "### State\nConfusion: 4.205511\nAction: analogize\nReward: 0.205528\nNext Confusion: 3.6198"} +{"text": "### State\nConfusion: 5.185164\nAction: analogize\nReward: -1.097998\nNext Confusion: 6.383344"} +{"text": "### State\nConfusion: 6.544308\nAction: explain\nReward: 0.402704\nNext Confusion: 6.435021"} +{"text": "### State\nConfusion: 5.587864\nAction: analogize\nReward: 0.020874\nNext Confusion: 5.902252"} +{"text": "### State\nConfusion: 4.968722\nAction: analogize\nReward: -0.261211\nNext Confusion: 5.042747"} +{"text": "### State\nConfusion: 3.320405\nAction: worked_example\nReward: 1.129541\nNext Confusion: 2.346201"} +{"text": "### State\nConfusion: 4.647589\nAction: explain\nReward: 1.044288\nNext Confusion: 4.038287"} +{"text": "### State\nConfusion: 6.729565\nAction: question\nReward: 0.806191\nNext Confusion: 6.08601"} +{"text": "### State\nConfusion: 5.97209\nAction: analogize\nReward: -0.404765\nNext Confusion: 6.406681"} +{"text": "### State\nConfusion: 3.632277\nAction: analogize\nReward: -0.610554\nNext Confusion: 3.787645"} +{"text": "### State\nConfusion: 5.321507\nAction: analogize\nReward: 0.456858\nNext Confusion: 5.306321"} +{"text": "### State\nConfusion: 3.609982\nAction: explain\nReward: 0.527262\nNext Confusion: 3.772228"} +{"text": "### State\nConfusion: 7.68092\nAction: analogize\nReward: -0.650275\nNext Confusion: 8.177908"} +{"text": "### State\nConfusion: 5.233692\nAction: worked_example\nReward: 1.109516\nNext Confusion: 4.001993"} +{"text": "### State\nConfusion: 5.192301\nAction: analogize\nReward: 0.364094\nNext Confusion: 5.636886"} +{"text": "### State\nConfusion: 5.681577\nAction: explain\nReward: 1.501193\nNext Confusion: 5.096723"} +{"text": "### State\nConfusion: 3.494826\nAction: worked_example\nReward: 0.476455\nNext Confusion: 3.671003"} +{"text": "### State\nConfusion: 4.405832\nAction: analogize\nReward: 0.198525\nNext Confusion: 4.154148"} +{"text": "### State\nConfusion: 1.466139\nAction: worked_example\nReward: 1.668556\nNext Confusion: 0.0"} +{"text": "### State\nConfusion: 4.717231\nAction: analogize\nReward: -0.452539\nNext Confusion: 5.626471"} +{"text": "### State\nConfusion: 3.871939\nAction: analogize\nReward: -0.476882\nNext Confusion: 4.771052"} +{"text": "### State\nConfusion: 3.444428\nAction: explain\nReward: 1.106501\nNext Confusion: 3.333343"} +{"text": "### State\nConfusion: 4.196409\nAction: analogize\nReward: 0.381661\nNext Confusion: 4.17294"} +{"text": "### State\nConfusion: 6.663786\nAction: question\nReward: -0.873553\nNext Confusion: 6.942137"} +{"text": "### State\nConfusion: 5.515987\nAction: analogize\nReward: -1.334938\nNext Confusion: 6.764931"} +{"text": "### State\nConfusion: 5.240704\nAction: analogize\nReward: 0.294774\nNext Confusion: 4.956213"} +{"text": "### State\nConfusion: 3.792751\nAction: question\nReward: 0.624638\nNext Confusion: 3.817527"} +{"text": "### State\nConfusion: 4.063591\nAction: explain\nReward: 0.083103\nNext Confusion: 4.243097"} +{"text": "### State\nConfusion: 4.814852\nAction: analogize\nReward: 0.506158\nNext Confusion: 5.234246"} +{"text": "### State\nConfusion: 3.722708\nAction: question\nReward: 0.660337\nNext Confusion: 3.508"} +{"text": "### State\nConfusion: 5.437449\nAction: explain\nReward: 0.486583\nNext Confusion: 5.524333"} +{"text": "### State\nConfusion: 5.493462\nAction: analogize\nReward: -0.408789\nNext Confusion: 6.130176"} +{"text": "### State\nConfusion: 2.043504\nAction: question\nReward: -0.300682\nNext Confusion: 2.3756"} +{"text": "### State\nConfusion: 2.417647\nAction: analogize\nReward: -0.002908\nNext Confusion: 3.053841"} +{"text": "### State\nConfusion: 4.872236\nAction: analogize\nReward: -0.958127\nNext Confusion: 5.264666"} +{"text": "### State\nConfusion: 4.20766\nAction: worked_example\nReward: 0.891922\nNext Confusion: 3.989266"} +{"text": "### State\nConfusion: 5.122287\nAction: worked_example\nReward: 0.598247\nNext Confusion: 4.653445"} +{"text": "### State\nConfusion: 7.352225\nAction: worked_example\nReward: -1.130684\nNext Confusion: 8.396069"} +{"text": "### State\nConfusion: 3.337869\nAction: question\nReward: 1.275082\nNext Confusion: 2.30635"} +{"text": "### State\nConfusion: 5.843043\nAction: analogize\nReward: -0.530916\nNext Confusion: 6.435978"} +{"text": "### State\nConfusion: 3.31257\nAction: question\nReward: 0.431417\nNext Confusion: 2.247037"} +{"text": "### State\nConfusion: 2.412863\nAction: worked_example\nReward: 2.749443\nNext Confusion: 0.069799"} +{"text": "### State\nConfusion: 8.244789\nAction: analogize\nReward: -0.621143\nNext Confusion: 8.557005"} +{"text": "### State\nConfusion: 2.765021\nAction: explain\nReward: 0.933131\nNext Confusion: 1.681604"} +{"text": "### State\nConfusion: 5.545647\nAction: analogize\nReward: 0.764545\nNext Confusion: 4.779349"} +{"text": "### State\nConfusion: 2.562586\nAction: correct_fact\nReward: 0.358028\nNext Confusion: 1.512542"} +{"text": "### State\nConfusion: 3.947003\nAction: analogize\nReward: 0.585956\nNext Confusion: 3.144356"} +{"text": "### State\nConfusion: 7.056403\nAction: question\nReward: -0.392791\nNext Confusion: 7.174994"} +{"text": "### State\nConfusion: 4.349577\nAction: worked_example\nReward: 2.003419\nNext Confusion: 2.622539"} +{"text": "### State\nConfusion: 4.15257\nAction: question\nReward: 1.022439\nNext Confusion: 3.610753"} +{"text": "### State\nConfusion: 3.867925\nAction: analogize\nReward: -0.732856\nNext Confusion: 4.480718"} +{"text": "### State\nConfusion: 6.072416\nAction: analogize\nReward: -0.400762\nNext Confusion: 7.712231"} +{"text": "### State\nConfusion: 5.692263\nAction: analogize\nReward: -0.409731\nNext Confusion: 6.464062"} +{"text": "### State\nConfusion: 1.97927\nAction: correct_fact\nReward: 0.316701\nNext Confusion: 2.032135"} +{"text": "### State\nConfusion: 4.016954\nAction: analogize\nReward: 0.118688\nNext Confusion: 3.82214"} +{"text": "### State\nConfusion: 3.23223\nAction: explain\nReward: 0.74181\nNext Confusion: 2.84285"} +{"text": "### State\nConfusion: 3.751665\nAction: analogize\nReward: 1.273059\nNext Confusion: 2.951627"} +{"text": "### State\nConfusion: 6.280319\nAction: worked_example\nReward: 1.779139\nNext Confusion: 4.778588"} +{"text": "### State\nConfusion: 9.398644\nAction: analogize\nReward: -0.367722\nNext Confusion: 9.828088"} +{"text": "### State\nConfusion: 4.856673\nAction: analogize\nReward: 0.697777\nNext Confusion: 4.792671"} +{"text": "### State\nConfusion: 7.677342\nAction: analogize\nReward: -1.276954\nNext Confusion: 9.007716"} +{"text": "### State\nConfusion: 5.617439\nAction: analogize\nReward: -0.855311\nNext Confusion: 6.225406"} +{"text": "### State\nConfusion: 6.137228\nAction: analogize\nReward: -0.461357\nNext Confusion: 6.903793"} +{"text": "### State\nConfusion: 2.707235\nAction: analogize\nReward: -1.614262\nNext Confusion: 3.073972"} +{"text": "### State\nConfusion: 3.939227\nAction: analogize\nReward: -0.449349\nNext Confusion: 4.274253"} +{"text": "### State\nConfusion: 5.494228\nAction: explain\nReward: 0.047741\nNext Confusion: 6.083399"} +{"text": "### State\nConfusion: 2.533629\nAction: analogize\nReward: -0.39871\nNext Confusion: 2.856948"} +{"text": "### State\nConfusion: 6.453248\nAction: explain\nReward: 1.17852\nNext Confusion: 5.8642"} +{"text": "### State\nConfusion: 9.124298\nAction: correct_fact\nReward: -0.178922\nNext Confusion: 9.275643"} +{"text": "### State\nConfusion: 4.845806\nAction: explain\nReward: -0.259024\nNext Confusion: 5.530763"} +{"text": "### State\nConfusion: 7.197324\nAction: analogize\nReward: -0.734195\nNext Confusion: 7.665397"} +{"text": "### State\nConfusion: 4.266757\nAction: analogize\nReward: -0.860657\nNext Confusion: 4.993144"} +{"text": "### State\nConfusion: 3.85403\nAction: analogize\nReward: -0.180861\nNext Confusion: 3.252461"} +{"text": "### State\nConfusion: 3.62735\nAction: analogize\nReward: 0.028956\nNext Confusion: 3.763162"} +{"text": "### State\nConfusion: 8.335056\nAction: worked_example\nReward: 1.617562\nNext Confusion: 6.925548"} +{"text": "### State\nConfusion: 3.790295\nAction: analogize\nReward: 0.182729\nNext Confusion: 3.939273"} +{"text": "### State\nConfusion: 3.766018\nAction: analogize\nReward: 0.001838\nNext Confusion: 3.849688"} +{"text": "### State\nConfusion: 4.107547\nAction: analogize\nReward: -0.113302\nNext Confusion: 4.152052"} +{"text": "### State\nConfusion: 7.174244\nAction: analogize\nReward: -1.656013\nNext Confusion: 7.383377"} +{"text": "### State\nConfusion: 3.229383\nAction: analogize\nReward: 0.619507\nNext Confusion: 3.097441"} +{"text": "### State\nConfusion: 4.611253\nAction: analogize\nReward: -0.313888\nNext Confusion: 4.727696"} +{"text": "### State\nConfusion: 8.702278\nAction: worked_example\nReward: 1.775019\nNext Confusion: 7.147692"} +{"text": "### State\nConfusion: 4.831955\nAction: worked_example\nReward: 2.335352\nNext Confusion: 3.308958"} +{"text": "### State\nConfusion: 3.681416\nAction: correct_fact\nReward: 0.502263\nNext Confusion: 3.247155"} +{"text": "### State\nConfusion: 6.803329\nAction: analogize\nReward: -1.358429\nNext Confusion: 7.614142"} +{"text": "### State\nConfusion: 6.14641\nAction: question\nReward: 1.105078\nNext Confusion: 5.013781"} +{"text": "### State\nConfusion: 5.067341\nAction: analogize\nReward: -0.67485\nNext Confusion: 5.623613"} +{"text": "### State\nConfusion: 5.740351\nAction: analogize\nReward: 0.43157\nNext Confusion: 5.821089"} +{"text": "### State\nConfusion: 6.492049\nAction: analogize\nReward: -0.261783\nNext Confusion: 6.899039"} +{"text": "### State\nConfusion: 3.546743\nAction: correct_fact\nReward: -0.021746\nNext Confusion: 4.128534"} +{"text": "### State\nConfusion: 3.470161\nAction: analogize\nReward: 1.411296\nNext Confusion: 2.963803"} +{"text": "### State\nConfusion: 7.305857\nAction: explain\nReward: -0.260928\nNext Confusion: 7.525834"} +{"text": "### State\nConfusion: 4.079695\nAction: analogize\nReward: 0.425925\nNext Confusion: 4.2028"} +{"text": "### State\nConfusion: 3.504302\nAction: explain\nReward: -0.24801\nNext Confusion: 3.646383"} +{"text": "### State\nConfusion: 6.140616\nAction: analogize\nReward: -0.194836\nNext Confusion: 6.193009"} +{"text": "### State\nConfusion: 3.614631\nAction: worked_example\nReward: 2.808057\nNext Confusion: 1.710415"} +{"text": "### State\nConfusion: 4.385109\nAction: explain\nReward: 0.867373\nNext Confusion: 4.039882"} +{"text": "### State\nConfusion: 5.97857\nAction: worked_example\nReward: 0.678469\nNext Confusion: 4.711379"} +{"text": "### State\nConfusion: 5.842787\nAction: explain\nReward: 0.530482\nNext Confusion: 5.485232"} +{"text": "### State\nConfusion: 3.791125\nAction: explain\nReward: -0.120156\nNext Confusion: 3.974842"} +{"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: -0.177215\nNext Confusion: 5.508161"} +{"text": "### State\nConfusion: 2.616865\nAction: explain\nReward: -0.990273\nNext Confusion: 2.763718"} +{"text": "### State\nConfusion: 5.412301\nAction: analogize\nReward: 0.419836\nNext Confusion: 5.34807"} +{"text": "### State\nConfusion: 6.595836\nAction: worked_example\nReward: 0.79788\nNext Confusion: 5.217594"} +{"text": "### State\nConfusion: 6.025871\nAction: analogize\nReward: 0.350992\nNext Confusion: 6.043433"} +{"text": "### State\nConfusion: 8.022219\nAction: analogize\nReward: -0.212897\nNext Confusion: 7.875199"} +{"text": "### State\nConfusion: 7.210607\nAction: correct_fact\nReward: 0.058529\nNext Confusion: 7.347044"} +{"text": "### State\nConfusion: 3.933006\nAction: analogize\nReward: -0.406958\nNext Confusion: 4.179138"} +{"text": "### State\nConfusion: 2.965985\nAction: analogize\nReward: -0.300177\nNext Confusion: 3.029191"} +{"text": "### State\nConfusion: 6.168064\nAction: analogize\nReward: -0.610067\nNext Confusion: 7.171532"} +{"text": "### State\nConfusion: 3.584667\nAction: analogize\nReward: -0.183735\nNext Confusion: 4.292329"} +{"text": "### State\nConfusion: 3.67187\nAction: analogize\nReward: -1.468257\nNext Confusion: 4.664136"} +{"text": "### State\nConfusion: 9.246445\nAction: analogize\nReward: -0.771256\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 4.244543\nAction: analogize\nReward: -0.932989\nNext Confusion: 5.020196"} +{"text": "### State\nConfusion: 3.598906\nAction: worked_example\nReward: 0.802242\nNext Confusion: 2.522359"} +{"text": "### State\nConfusion: 4.469094\nAction: question\nReward: 0.390748\nNext Confusion: 4.036065"} +{"text": "### State\nConfusion: 2.997087\nAction: analogize\nReward: 0.495354\nNext Confusion: 2.832977"} +{"text": "### State\nConfusion: 3.71337\nAction: analogize\nReward: -1.484861\nNext Confusion: 5.056246"} +{"text": "### State\nConfusion: 3.870778\nAction: explain\nReward: 0.392984\nNext Confusion: 3.912732"} +{"text": "### State\nConfusion: 7.175167\nAction: correct_fact\nReward: -0.247504\nNext Confusion: 7.709682"} +{"text": "### State\nConfusion: 3.68497\nAction: worked_example\nReward: 0.488498\nNext Confusion: 3.046532"} +{"text": "### State\nConfusion: 5.920541\nAction: explain\nReward: 1.570687\nNext Confusion: 4.980819"} +{"text": "### State\nConfusion: 4.128818\nAction: analogize\nReward: -0.559041\nNext Confusion: 4.367249"} +{"text": "### State\nConfusion: 4.818598\nAction: analogize\nReward: 0.198094\nNext Confusion: 5.307376"} +{"text": "### State\nConfusion: 4.356233\nAction: analogize\nReward: 0.28791\nNext Confusion: 4.128079"} +{"text": "### State\nConfusion: 5.0037\nAction: analogize\nReward: -1.000578\nNext Confusion: 5.632165"} +{"text": "### State\nConfusion: 3.368967\nAction: analogize\nReward: 1.246838\nNext Confusion: 2.920736"} +{"text": "### State\nConfusion: 3.198133\nAction: worked_example\nReward: 1.680851\nNext Confusion: 1.77803"} +{"text": "### State\nConfusion: 2.746825\nAction: explain\nReward: 0.707715\nNext Confusion: 2.306501"} +{"text": "### State\nConfusion: 3.790369\nAction: analogize\nReward: -0.041662\nNext Confusion: 3.86117"} +{"text": "### State\nConfusion: 2.891872\nAction: correct_fact\nReward: -1.316577\nNext Confusion: 3.124006"} +{"text": "### State\nConfusion: 4.48038\nAction: analogize\nReward: -0.129744\nNext Confusion: 4.754625"} +{"text": "### State\nConfusion: 3.77296\nAction: correct_fact\nReward: 0.427451\nNext Confusion: 3.604445"} +{"text": "### State\nConfusion: 3.225091\nAction: analogize\nReward: -0.507043\nNext Confusion: 3.829337"} +{"text": "### State\nConfusion: 4.265069\nAction: analogize\nReward: -0.038454\nNext Confusion: 4.813678"} +{"text": "### State\nConfusion: 7.177357\nAction: analogize\nReward: -0.298202\nNext Confusion: 8.014968"} +{"text": "### State\nConfusion: 3.607763\nAction: correct_fact\nReward: -0.138047\nNext Confusion: 3.655758"} +{"text": "### State\nConfusion: 8.976868\nAction: worked_example\nReward: 0.023975\nNext Confusion: 8.534332"} +{"text": "### State\nConfusion: 5.039401\nAction: analogize\nReward: -0.601833\nNext Confusion: 5.407775"} +{"text": "### State\nConfusion: 3.497416\nAction: question\nReward: 0.398961\nNext Confusion: 3.075376"} +{"text": "### State\nConfusion: 3.587401\nAction: analogize\nReward: 0.036509\nNext Confusion: 3.688627"} +{"text": "### State\nConfusion: 4.673002\nAction: analogize\nReward: 0.131562\nNext Confusion: 4.674554"} +{"text": "### State\nConfusion: 3.592728\nAction: analogize\nReward: 0.418171\nNext Confusion: 3.849377"} +{"text": "### State\nConfusion: 7.642635\nAction: analogize\nReward: -0.424847\nNext Confusion: 7.853295"} +{"text": "### State\nConfusion: 8.928662\nAction: analogize\nReward: 0.9627\nNext Confusion: 8.62451"} +{"text": "### State\nConfusion: 4.960207\nAction: analogize\nReward: -5.042938\nNext Confusion: 6.014362"} +{"text": "### State\nConfusion: 2.171726\nAction: analogize\nReward: -0.181946\nNext Confusion: 2.327439"} +{"text": "### State\nConfusion: 6.586604\nAction: worked_example\nReward: 0.59854\nNext Confusion: 5.187266"} +{"text": "### State\nConfusion: 3.143118\nAction: analogize\nReward: 0.17305\nNext Confusion: 2.9831"} +{"text": "### State\nConfusion: 3.265929\nAction: analogize\nReward: 0.667022\nNext Confusion: 2.944679"} +{"text": "### State\nConfusion: 7.445652\nAction: question\nReward: 0.353372\nNext Confusion: 6.919861"} +{"text": "### State\nConfusion: 3.062145\nAction: correct_fact\nReward: -1.195106\nNext Confusion: 3.566605"} +{"text": "### State\nConfusion: 4.143488\nAction: question\nReward: -0.733389\nNext Confusion: 4.834106"} +{"text": "### State\nConfusion: 3.424406\nAction: explain\nReward: -0.602404\nNext Confusion: 3.36164"} +{"text": "### State\nConfusion: 3.387601\nAction: explain\nReward: 0.157426\nNext Confusion: 3.688417"} +{"text": "### State\nConfusion: 7.273157\nAction: explain\nReward: 1.178917\nNext Confusion: 6.322024"} +{"text": "### State\nConfusion: 3.979093\nAction: analogize\nReward: -0.631023\nNext Confusion: 4.222602"} +{"text": "### State\nConfusion: 3.696545\nAction: analogize\nReward: -0.230754\nNext Confusion: 4.253306"} +{"text": "### State\nConfusion: 2.966753\nAction: analogize\nReward: -0.52981\nNext Confusion: 3.338369"} +{"text": "### State\nConfusion: 5.275979\nAction: explain\nReward: 0.51978\nNext Confusion: 4.586684"} +{"text": "### State\nConfusion: 4.143547\nAction: analogize\nReward: -0.556281\nNext Confusion: 4.511189"} +{"text": "### State\nConfusion: 6.479927\nAction: explain\nReward: 0.668874\nNext Confusion: 6.162917"} +{"text": "### State\nConfusion: 3.578943\nAction: question\nReward: 1.043996\nNext Confusion: 2.879315"} +{"text": "### State\nConfusion: 4.859414\nAction: worked_example\nReward: 2.318581\nNext Confusion: 2.760855"} +{"text": "### State\nConfusion: 3.379685\nAction: correct_fact\nReward: -0.36739\nNext Confusion: 3.626151"} +{"text": "### State\nConfusion: 4.285751\nAction: explain\nReward: 0.490226\nNext Confusion: 4.115866"} +{"text": "### State\nConfusion: 6.605411\nAction: analogize\nReward: 0.102846\nNext Confusion: 6.825231"} +{"text": "### State\nConfusion: 3.968445\nAction: analogize\nReward: -1.432025\nNext Confusion: 4.917612"} +{"text": "### State\nConfusion: 7.966238\nAction: correct_fact\nReward: 0.406304\nNext Confusion: 8.120245"} +{"text": "### State\nConfusion: 6.721416\nAction: analogize\nReward: -1.520515\nNext Confusion: 7.218895"} +{"text": "### State\nConfusion: 2.738938\nAction: correct_fact\nReward: 0.085715\nNext Confusion: 2.513217"} +{"text": "### State\nConfusion: 5.238395\nAction: analogize\nReward: 0.63322\nNext Confusion: 5.405862"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.927505\nNext Confusion: 9.517879"} +{"text": "### State\nConfusion: 5.823978\nAction: worked_example\nReward: 1.860738\nNext Confusion: 4.157206"} +{"text": "### State\nConfusion: 5.481368\nAction: analogize\nReward: 0.071183\nNext Confusion: 5.627844"} +{"text": "### State\nConfusion: 8.289637\nAction: analogize\nReward: 0.024586\nNext Confusion: 8.378593"} +{"text": "### State\nConfusion: 5.744939\nAction: analogize\nReward: -1.389301\nNext Confusion: 6.877677"} +{"text": "### State\nConfusion: 6.628249\nAction: analogize\nReward: 0.479303\nNext Confusion: 5.94657"} +{"text": "### State\nConfusion: 6.70891\nAction: explain\nReward: 0.714393\nNext Confusion: 6.231401"} +{"text": "### State\nConfusion: 2.966038\nAction: analogize\nReward: -0.399175\nNext Confusion: 2.929795"} +{"text": "### State\nConfusion: 9.408701\nAction: analogize\nReward: -0.493768\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 9.415164\nAction: worked_example\nReward: 1.486642\nNext Confusion: 8.030042"} +{"text": "### State\nConfusion: 4.254503\nAction: question\nReward: 0.917854\nNext Confusion: 3.972505"} +{"text": "### State\nConfusion: 4.4989\nAction: explain\nReward: -0.321324\nNext Confusion: 4.517928"} +{"text": "### State\nConfusion: 3.953625\nAction: explain\nReward: -0.23264\nNext Confusion: 4.198959"} +{"text": "### State\nConfusion: 5.389431\nAction: analogize\nReward: -0.474144\nNext Confusion: 5.669853"} +{"text": "### State\nConfusion: 4.18871\nAction: question\nReward: -0.195394\nNext Confusion: 4.506213"} +{"text": "### State\nConfusion: 3.939333\nAction: worked_example\nReward: 1.129873\nNext Confusion: 2.415744"} +{"text": "### State\nConfusion: 5.82522\nAction: analogize\nReward: -0.874563\nNext Confusion: 6.778791"} +{"text": "### State\nConfusion: 3.446916\nAction: analogize\nReward: 0.005333\nNext Confusion: 3.194818"} +{"text": "### State\nConfusion: 6.168521\nAction: analogize\nReward: 0.004277\nNext Confusion: 6.380842"} +{"text": "### State\nConfusion: 4.693802\nAction: analogize\nReward: -0.562175\nNext Confusion: 4.965051"} +{"text": "### State\nConfusion: 5.756139\nAction: correct_fact\nReward: 0.139779\nNext Confusion: 6.064686"} +{"text": "### State\nConfusion: 3.910989\nAction: analogize\nReward: -2.469072\nNext Confusion: 5.263532"} +{"text": "### State\nConfusion: 5.079583\nAction: question\nReward: 0.720805\nNext Confusion: 4.772066"} +{"text": "### State\nConfusion: 8.160942\nAction: explain\nReward: -0.003734\nNext Confusion: 8.076292"} +{"text": "### State\nConfusion: 4.063059\nAction: analogize\nReward: -1.248169\nNext Confusion: 5.102006"} +{"text": "### State\nConfusion: 3.923972\nAction: correct_fact\nReward: -0.436689\nNext Confusion: 3.59706"} +{"text": "### State\nConfusion: 4.257745\nAction: analogize\nReward: 0.975398\nNext Confusion: 4.396928"} +{"text": "### State\nConfusion: 4.448721\nAction: correct_fact\nReward: -0.095114\nNext Confusion: 4.54971"} +{"text": "### State\nConfusion: 8.187359\nAction: correct_fact\nReward: -0.402091\nNext Confusion: 8.562424"} +{"text": "### State\nConfusion: 6.941269\nAction: analogize\nReward: -0.082177\nNext Confusion: 6.415106"} +{"text": "### State\nConfusion: 3.733281\nAction: analogize\nReward: -0.557303\nNext Confusion: 4.495642"} +{"text": "### State\nConfusion: 5.054769\nAction: question\nReward: 0.385418\nNext Confusion: 4.25285"} +{"text": "### State\nConfusion: 6.988386\nAction: analogize\nReward: -0.997999\nNext Confusion: 7.872257"} +{"text": "### State\nConfusion: 3.323612\nAction: analogize\nReward: -0.417391\nNext Confusion: 3.446508"} +{"text": "### State\nConfusion: 7.580118\nAction: analogize\nReward: -0.193736\nNext Confusion: 8.119838"} +{"text": "### State\nConfusion: 6.262873\nAction: analogize\nReward: 0.25262\nNext Confusion: 6.57022"} +{"text": "### State\nConfusion: 3.24616\nAction: analogize\nReward: 0.176095\nNext Confusion: 3.061988"} +{"text": "### State\nConfusion: 3.785727\nAction: analogize\nReward: -0.123162\nNext Confusion: 4.260601"} +{"text": "### State\nConfusion: 4.510389\nAction: analogize\nReward: -0.087179\nNext Confusion: 4.489273"} +{"text": "### State\nConfusion: 3.244055\nAction: analogize\nReward: -1.109454\nNext Confusion: 3.376668"} +{"text": "### State\nConfusion: 3.47781\nAction: analogize\nReward: 1.156379\nNext Confusion: 3.43952"} +{"text": "### State\nConfusion: 3.995885\nAction: analogize\nReward: -0.481654\nNext Confusion: 4.774538"} +{"text": "### State\nConfusion: 8.914354\nAction: analogize\nReward: 0.049812\nNext Confusion: 8.771034"} +{"text": "### State\nConfusion: 1.772292\nAction: explain\nReward: 0.297904\nNext Confusion: 1.719932"} +{"text": "### State\nConfusion: 4.163656\nAction: worked_example\nReward: 1.919813\nNext Confusion: 3.011497"} +{"text": "### State\nConfusion: 3.538392\nAction: analogize\nReward: -0.22508\nNext Confusion: 4.106795"} +{"text": "### State\nConfusion: 4.116727\nAction: worked_example\nReward: 2.444091\nNext Confusion: 2.687809"} +{"text": "### State\nConfusion: 6.723317\nAction: explain\nReward: 0.072298\nNext Confusion: 6.793771"} +{"text": "### State\nConfusion: 3.330479\nAction: analogize\nReward: -1.032799\nNext Confusion: 4.53574"} +{"text": "### State\nConfusion: 3.567197\nAction: analogize\nReward: -0.658102\nNext Confusion: 4.478553"} +{"text": "### State\nConfusion: 4.345864\nAction: analogize\nReward: -0.269636\nNext Confusion: 4.495819"} +{"text": "### State\nConfusion: 5.757518\nAction: analogize\nReward: -0.708589\nNext Confusion: 5.357489"} +{"text": "### State\nConfusion: 8.696834\nAction: explain\nReward: 0.505359\nNext Confusion: 8.75677"} +{"text": "### State\nConfusion: 3.847008\nAction: question\nReward: -1.407674\nNext Confusion: 4.419054"} +{"text": "### State\nConfusion: 5.584003\nAction: analogize\nReward: -0.606178\nNext Confusion: 6.05324"} +{"text": "### State\nConfusion: 4.09134\nAction: analogize\nReward: 0.24887\nNext Confusion: 3.440027"} +{"text": "### State\nConfusion: 8.342102\nAction: question\nReward: 0.388513\nNext Confusion: 8.468631"} +{"text": "### State\nConfusion: 4.246235\nAction: analogize\nReward: -0.367984\nNext Confusion: 4.55566"} +{"text": "### State\nConfusion: 5.041572\nAction: question\nReward: -0.049042\nNext Confusion: 4.999399"} +{"text": "### State\nConfusion: 3.843461\nAction: question\nReward: 0.018297\nNext Confusion: 3.998616"} +{"text": "### State\nConfusion: 3.468152\nAction: analogize\nReward: -0.219468\nNext Confusion: 3.57777"} +{"text": "### State\nConfusion: 4.381477\nAction: analogize\nReward: 0.03071\nNext Confusion: 4.940474"} +{"text": "### State\nConfusion: 5.756888\nAction: analogize\nReward: 0.89899\nNext Confusion: 5.094104"} +{"text": "### State\nConfusion: 4.333212\nAction: analogize\nReward: -0.6245\nNext Confusion: 5.334216"} +{"text": "### State\nConfusion: 5.47342\nAction: question\nReward: -0.154752\nNext Confusion: 5.659844"} +{"text": "### State\nConfusion: 8.083508\nAction: analogize\nReward: -0.103334\nNext Confusion: 8.280694"} +{"text": "### State\nConfusion: 4.620574\nAction: explain\nReward: -0.077588\nNext Confusion: 4.464561"} +{"text": "### State\nConfusion: 3.974661\nAction: worked_example\nReward: 1.125675\nNext Confusion: 3.31946"} +{"text": "### State\nConfusion: 4.42193\nAction: analogize\nReward: -0.47474\nNext Confusion: 4.429034"} +{"text": "### State\nConfusion: 5.231293\nAction: analogize\nReward: -0.845958\nNext Confusion: 6.019314"} +{"text": "### State\nConfusion: 3.228954\nAction: analogize\nReward: -0.857821\nNext Confusion: 4.013046"} +{"text": "### State\nConfusion: 3.676788\nAction: analogize\nReward: 1.48361\nNext Confusion: 2.914774"} +{"text": "### State\nConfusion: 6.744595\nAction: analogize\nReward: -3.523492\nNext Confusion: 7.119243"} +{"text": "### State\nConfusion: 6.573047\nAction: analogize\nReward: 0.065792\nNext Confusion: 6.158436"} +{"text": "### State\nConfusion: 4.348007\nAction: analogize\nReward: -1.154604\nNext Confusion: 5.238071"} +{"text": "### State\nConfusion: 5.936844\nAction: analogize\nReward: -1.194396\nNext Confusion: 7.531626"} +{"text": "### State\nConfusion: 5.42589\nAction: analogize\nReward: -0.43157\nNext Confusion: 5.821207"} +{"text": "### State\nConfusion: 6.476246\nAction: explain\nReward: -0.381727\nNext Confusion: 6.038479"} +{"text": "### State\nConfusion: 3.408969\nAction: correct_fact\nReward: -0.29008\nNext Confusion: 3.610456"} +{"text": "### State\nConfusion: 3.137696\nAction: analogize\nReward: -1.614321\nNext Confusion: 4.338411"} +{"text": "### State\nConfusion: 9.981143\nAction: question\nReward: 0.336403\nNext Confusion: 9.127187"} +{"text": "### State\nConfusion: 5.250209\nAction: worked_example\nReward: 1.215982\nNext Confusion: 4.074452"} +{"text": "### State\nConfusion: 3.848411\nAction: correct_fact\nReward: -0.315407\nNext Confusion: 3.798653"} +{"text": "### State\nConfusion: 5.364659\nAction: analogize\nReward: -0.832938\nNext Confusion: 6.078216"} +{"text": "### State\nConfusion: 5.980426\nAction: analogize\nReward: -0.953331\nNext Confusion: 6.469077"} +{"text": "### State\nConfusion: 5.083078\nAction: analogize\nReward: -0.609319\nNext Confusion: 5.623208"} +{"text": "### State\nConfusion: 7.247568\nAction: analogize\nReward: -0.292227\nNext Confusion: 7.618113"} +{"text": "### State\nConfusion: 3.334119\nAction: analogize\nReward: -1.237248\nNext Confusion: 4.051495"} +{"text": "### State\nConfusion: 3.771148\nAction: analogize\nReward: -0.28148\nNext Confusion: 4.278025"} +{"text": "### State\nConfusion: 7.054849\nAction: analogize\nReward: 0.276644\nNext Confusion: 7.069348"} +{"text": "### State\nConfusion: 4.223377\nAction: question\nReward: 0.878895\nNext Confusion: 3.37235"} +{"text": "### State\nConfusion: 6.538994\nAction: analogize\nReward: -0.657308\nNext Confusion: 7.263082"} +{"text": "### State\nConfusion: 4.482307\nAction: analogize\nReward: -0.734999\nNext Confusion: 5.658994"} +{"text": "### State\nConfusion: 7.982437\nAction: worked_example\nReward: 2.063794\nNext Confusion: 5.985748"} +{"text": "### State\nConfusion: 6.038796\nAction: analogize\nReward: -0.428013\nNext Confusion: 6.309379"} +{"text": "### State\nConfusion: 3.535395\nAction: explain\nReward: -0.369274\nNext Confusion: 3.703277"} +{"text": "### State\nConfusion: 2.860002\nAction: explain\nReward: 0.130337\nNext Confusion: 2.802935"} +{"text": "### State\nConfusion: 3.763097\nAction: correct_fact\nReward: -0.823417\nNext Confusion: 4.379292"} +{"text": "### State\nConfusion: 4.180916\nAction: question\nReward: 0.741489\nNext Confusion: 3.423447"} +{"text": "### State\nConfusion: 5.614097\nAction: question\nReward: 0.531301\nNext Confusion: 5.212755"} +{"text": "### State\nConfusion: 8.834068\nAction: analogize\nReward: -0.189005\nNext Confusion: 9.355236"} +{"text": "### State\nConfusion: 6.024891\nAction: correct_fact\nReward: 0.064373\nNext Confusion: 6.602543"} +{"text": "### State\nConfusion: 5.876128\nAction: explain\nReward: 0.267207\nNext Confusion: 5.626973"} +{"text": "### State\nConfusion: 6.593964\nAction: analogize\nReward: 0.138768\nNext Confusion: 6.006979"} +{"text": "### State\nConfusion: 3.408307\nAction: analogize\nReward: -0.436008\nNext Confusion: 3.196113"} +{"text": "### State\nConfusion: 7.686703\nAction: question\nReward: -0.568404\nNext Confusion: 7.67233"} +{"text": "### State\nConfusion: 5.321778\nAction: question\nReward: 1.497982\nNext Confusion: 4.268611"} +{"text": "### State\nConfusion: 4.75159\nAction: analogize\nReward: 1.433057\nNext Confusion: 4.059246"} +{"text": "### State\nConfusion: 3.645793\nAction: question\nReward: 1.585023\nNext Confusion: 1.974554"} +{"text": "### State\nConfusion: 6.01909\nAction: analogize\nReward: 0.32362\nNext Confusion: 6.217828"} +{"text": "### State\nConfusion: 6.016679\nAction: analogize\nReward: 0.081677\nNext Confusion: 6.395025"} +{"text": "### State\nConfusion: 6.743756\nAction: analogize\nReward: -1.098936\nNext Confusion: 7.562526"} +{"text": "### State\nConfusion: 7.090468\nAction: analogize\nReward: 0.011776\nNext Confusion: 7.157492"} +{"text": "### State\nConfusion: 4.080909\nAction: question\nReward: 0.274189\nNext Confusion: 3.741003"} +{"text": "### State\nConfusion: 6.508274\nAction: explain\nReward: -0.447604\nNext Confusion: 6.71945"} +{"text": "### State\nConfusion: 6.877663\nAction: analogize\nReward: -0.5954\nNext Confusion: 7.796532"} +{"text": "### State\nConfusion: 5.203895\nAction: analogize\nReward: -0.700037\nNext Confusion: 5.556193"} +{"text": "### State\nConfusion: 3.591128\nAction: correct_fact\nReward: -0.794202\nNext Confusion: 3.814581"} +{"text": "### State\nConfusion: 4.232285\nAction: explain\nReward: 0.143006\nNext Confusion: 3.782716"} +{"text": "### State\nConfusion: 4.805493\nAction: correct_fact\nReward: -0.240368\nNext Confusion: 5.179041"} +{"text": "### State\nConfusion: 3.191029\nAction: correct_fact\nReward: -4.082677\nNext Confusion: 3.805619"} +{"text": "### State\nConfusion: 3.123191\nAction: analogize\nReward: 0.157324\nNext Confusion: 2.772966"} +{"text": "### State\nConfusion: 5.475059\nAction: analogize\nReward: -0.537958\nNext Confusion: 5.817322"} +{"text": "### State\nConfusion: 8.821889\nAction: analogize\nReward: 0.389415\nNext Confusion: 8.971167"} +{"text": "### State\nConfusion: 5.51871\nAction: question\nReward: -0.490781\nNext Confusion: 5.568502"} +{"text": "### State\nConfusion: 4.363986\nAction: worked_example\nReward: 2.794462\nNext Confusion: 2.385778"} +{"text": "### State\nConfusion: 3.660182\nAction: analogize\nReward: 0.197274\nNext Confusion: 4.073714"} +{"text": "### State\nConfusion: 4.188292\nAction: question\nReward: 2.06589\nNext Confusion: 3.152765"} +{"text": "### State\nConfusion: 4.624219\nAction: explain\nReward: 0.03086\nNext Confusion: 4.495928"} +{"text": "### State\nConfusion: 3.352286\nAction: analogize\nReward: 0.358749\nNext Confusion: 3.544025"} +{"text": "### State\nConfusion: 3.218407\nAction: analogize\nReward: -0.809351\nNext Confusion: 3.86212"} +{"text": "### State\nConfusion: 4.368701\nAction: analogize\nReward: 0.097597\nNext Confusion: 4.579212"} +{"text": "### State\nConfusion: 7.926002\nAction: question\nReward: 1.346095\nNext Confusion: 7.494742"} +{"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 2.632339\nNext Confusion: 8.704373"} +{"text": "### State\nConfusion: 6.556739\nAction: worked_example\nReward: 0.730109\nNext Confusion: 5.347394"} +{"text": "### State\nConfusion: 6.622032\nAction: worked_example\nReward: 1.884973\nNext Confusion: 3.975837"} +{"text": "### State\nConfusion: 6.837491\nAction: explain\nReward: -0.347375\nNext Confusion: 7.207565"} +{"text": "### State\nConfusion: 4.140696\nAction: analogize\nReward: -0.565153\nNext Confusion: 4.848446"} +{"text": "### State\nConfusion: 6.545013\nAction: analogize\nReward: 0.076453\nNext Confusion: 6.966989"} +{"text": "### State\nConfusion: 4.751852\nAction: analogize\nReward: -0.798605\nNext Confusion: 5.513865"} +{"text": "### State\nConfusion: 5.967764\nAction: analogize\nReward: 0.171464\nNext Confusion: 6.42246"} +{"text": "### State\nConfusion: 7.188017\nAction: analogize\nReward: -0.994482\nNext Confusion: 8.0922"} +{"text": "### State\nConfusion: 4.973029\nAction: question\nReward: 0.046659\nNext Confusion: 4.721402"} +{"text": "### State\nConfusion: 6.592906\nAction: correct_fact\nReward: 1.085134\nNext Confusion: 5.660255"} +{"text": "### State\nConfusion: 3.720168\nAction: explain\nReward: 1.271093\nNext Confusion: 3.076503"} +{"text": "### State\nConfusion: 5.986892\nAction: explain\nReward: 0.533297\nNext Confusion: 5.874181"} +{"text": "### State\nConfusion: 3.338895\nAction: analogize\nReward: 0.165743\nNext Confusion: 3.483409"} +{"text": "### State\nConfusion: 6.299437\nAction: question\nReward: 0.090953\nNext Confusion: 5.496086"} +{"text": "### State\nConfusion: 3.233407\nAction: analogize\nReward: 0.193732\nNext Confusion: 3.252628"} +{"text": "### State\nConfusion: 3.767879\nAction: analogize\nReward: -0.519034\nNext Confusion: 4.018177"} +{"text": "### State\nConfusion: 3.284015\nAction: explain\nReward: 0.343419\nNext Confusion: 3.226797"} +{"text": "### State\nConfusion: 4.966618\nAction: analogize\nReward: -1.346464\nNext Confusion: 5.568508"} +{"text": "### State\nConfusion: 7.611811\nAction: analogize\nReward: -1.118271\nNext Confusion: 8.414276"} +{"text": "### State\nConfusion: 3.678779\nAction: analogize\nReward: -1.83396\nNext Confusion: 4.816208"} +{"text": "### State\nConfusion: 9.551186\nAction: worked_example\nReward: 1.757724\nNext Confusion: 7.951963"} +{"text": "### State\nConfusion: 6.259004\nAction: analogize\nReward: 0.637289\nNext Confusion: 5.072351"} +{"text": "### State\nConfusion: 4.111844\nAction: analogize\nReward: 0.086688\nNext Confusion: 4.294176"} +{"text": "### State\nConfusion: 5.955097\nAction: analogize\nReward: -1.022934\nNext Confusion: 5.988747"} +{"text": "### State\nConfusion: 3.42395\nAction: analogize\nReward: -0.086424\nNext Confusion: 3.586034"} +{"text": "### State\nConfusion: 6.418479\nAction: worked_example\nReward: 3.015016\nNext Confusion: 4.592889"} +{"text": "### State\nConfusion: 3.27804\nAction: correct_fact\nReward: 1.052366\nNext Confusion: 2.910743"} +{"text": "### State\nConfusion: 6.8594\nAction: question\nReward: 0.394227\nNext Confusion: 6.030883"} +{"text": "### State\nConfusion: 3.918734\nAction: analogize\nReward: 0.392353\nNext Confusion: 3.580582"} +{"text": "### State\nConfusion: 4.108029\nAction: explain\nReward: 0.186519\nNext Confusion: 4.014627"} +{"text": "### State\nConfusion: 6.534582\nAction: analogize\nReward: -1.341765\nNext Confusion: 7.152132"} +{"text": "### State\nConfusion: 4.526416\nAction: analogize\nReward: 0.041798\nNext Confusion: 4.610605"} +{"text": "### State\nConfusion: 3.207484\nAction: analogize\nReward: -0.274951\nNext Confusion: 3.235347"} +{"text": "### State\nConfusion: 7.274545\nAction: correct_fact\nReward: 0.060256\nNext Confusion: 7.385714"} +{"text": "### State\nConfusion: 6.405168\nAction: analogize\nReward: 1.062377\nNext Confusion: 6.203435"} +{"text": "### State\nConfusion: 6.145315\nAction: analogize\nReward: 0.297888\nNext Confusion: 5.948101"} +{"text": "### State\nConfusion: 4.362407\nAction: question\nReward: -0.376688\nNext Confusion: 4.091491"} +{"text": "### State\nConfusion: 8.439035\nAction: analogize\nReward: -0.300894\nNext Confusion: 8.39113"} +{"text": "### State\nConfusion: 3.67231\nAction: analogize\nReward: -1.550801\nNext Confusion: 4.500672"} +{"text": "### State\nConfusion: 3.833536\nAction: correct_fact\nReward: 0.260055\nNext Confusion: 4.460357"} +{"text": "### State\nConfusion: 4.286399\nAction: analogize\nReward: -0.380031\nNext Confusion: 4.813565"} +{"text": "### State\nConfusion: 3.622745\nAction: analogize\nReward: 1.280211\nNext Confusion: 3.017132"} +{"text": "### State\nConfusion: 3.164635\nAction: analogize\nReward: -1.033433\nNext Confusion: 4.108108"} +{"text": "### State\nConfusion: 4.392075\nAction: correct_fact\nReward: -0.532647\nNext Confusion: 4.885617"} +{"text": "### State\nConfusion: 3.536113\nAction: explain\nReward: 1.498938\nNext Confusion: 2.96965"} +{"text": "### State\nConfusion: 8.397891\nAction: correct_fact\nReward: 1.004038\nNext Confusion: 7.644212"} +{"text": "### State\nConfusion: 2.638306\nAction: analogize\nReward: -0.30316\nNext Confusion: 3.268882"} +{"text": "### State\nConfusion: 2.290366\nAction: worked_example\nReward: 1.073395\nNext Confusion: 1.899378"} +{"text": "### State\nConfusion: 4.104017\nAction: correct_fact\nReward: -0.132089\nNext Confusion: 4.67455"} +{"text": "### State\nConfusion: 4.281032\nAction: explain\nReward: 0.437552\nNext Confusion: 3.920859"} +{"text": "### State\nConfusion: 9.430725\nAction: analogize\nReward: -0.210339\nNext Confusion: 9.636165"} +{"text": "### State\nConfusion: 3.160262\nAction: analogize\nReward: 0.90191\nNext Confusion: 2.309478"} +{"text": "### State\nConfusion: 3.093624\nAction: analogize\nReward: -0.678115\nNext Confusion: 4.155883"} +{"text": "### State\nConfusion: 5.114895\nAction: explain\nReward: 0.544845\nNext Confusion: 5.086789"} +{"text": "### State\nConfusion: 7.723752\nAction: analogize\nReward: -0.464143\nNext Confusion: 8.243782"} +{"text": "### State\nConfusion: 4.042224\nAction: correct_fact\nReward: -0.309778\nNext Confusion: 4.224926"} +{"text": "### State\nConfusion: 1.298197\nAction: analogize\nReward: -0.120681\nNext Confusion: 2.041737"} +{"text": "### State\nConfusion: 3.211188\nAction: question\nReward: 0.94096\nNext Confusion: 2.304281"} +{"text": "### State\nConfusion: 2.177077\nAction: question\nReward: 0.754392\nNext Confusion: 1.588065"} +{"text": "### State\nConfusion: 5.599477\nAction: explain\nReward: 0.427335\nNext Confusion: 5.764095"} +{"text": "### State\nConfusion: 5.271157\nAction: explain\nReward: 1.201553\nNext Confusion: 4.389104"} +{"text": "### State\nConfusion: 5.172612\nAction: explain\nReward: -0.578675\nNext Confusion: 5.641872"} +{"text": "### State\nConfusion: 4.746197\nAction: explain\nReward: -0.415519\nNext Confusion: 4.961803"} +{"text": "### State\nConfusion: 5.20597\nAction: analogize\nReward: -0.188651\nNext Confusion: 4.729918"} +{"text": "### State\nConfusion: 7.899179\nAction: explain\nReward: 0.9233\nNext Confusion: 7.617782"} +{"text": "### State\nConfusion: 4.606825\nAction: question\nReward: 1.332907\nNext Confusion: 3.817736"} +{"text": "### State\nConfusion: 3.545838\nAction: question\nReward: 0.584057\nNext Confusion: 3.080739"} +{"text": "### State\nConfusion: 5.416692\nAction: analogize\nReward: -1.181844\nNext Confusion: 6.291075"} +{"text": "### State\nConfusion: 8.278512\nAction: explain\nReward: 0.65172\nNext Confusion: 8.075535"} +{"text": "### State\nConfusion: 4.03923\nAction: worked_example\nReward: 1.425754\nNext Confusion: 2.703941"} +{"text": "### State\nConfusion: 7.588636\nAction: explain\nReward: 0.627005\nNext Confusion: 6.970175"} +{"text": "### State\nConfusion: 6.207129\nAction: analogize\nReward: -0.164946\nNext Confusion: 6.207433"} +{"text": "### State\nConfusion: 4.607996\nAction: analogize\nReward: -0.501607\nNext Confusion: 4.840988"} +{"text": "### State\nConfusion: 4.281465\nAction: question\nReward: 1.016601\nNext Confusion: 3.84681"} +{"text": "### State\nConfusion: 4.746888\nAction: explain\nReward: 1.408749\nNext Confusion: 3.521709"} +{"text": "### State\nConfusion: 2.90317\nAction: analogize\nReward: -0.593073\nNext Confusion: 3.09674"} +{"text": "### State\nConfusion: 5.362331\nAction: analogize\nReward: -0.147509\nNext Confusion: 5.639575"} +{"text": "### State\nConfusion: 5.548513\nAction: analogize\nReward: -0.657696\nNext Confusion: 6.434372"} +{"text": "### State\nConfusion: 4.179947\nAction: explain\nReward: 1.051147\nNext Confusion: 3.829047"} +{"text": "### State\nConfusion: 6.712979\nAction: question\nReward: 0.125504\nNext Confusion: 6.897026"} +{"text": "### State\nConfusion: 5.161298\nAction: explain\nReward: -0.894121\nNext Confusion: 5.72294"} +{"text": "### State\nConfusion: 4.199243\nAction: analogize\nReward: 1.274532\nNext Confusion: 3.317777"} +{"text": "### State\nConfusion: 2.205262\nAction: worked_example\nReward: 0.85029\nNext Confusion: 1.233461"} +{"text": "### State\nConfusion: 4.533222\nAction: question\nReward: -0.208696\nNext Confusion: 4.21478"} +{"text": "### State\nConfusion: 4.401053\nAction: analogize\nReward: 0.256719\nNext Confusion: 4.846919"} +{"text": "### State\nConfusion: 3.643785\nAction: analogize\nReward: -0.314908\nNext Confusion: 3.947566"} +{"text": "### State\nConfusion: 3.676824\nAction: analogize\nReward: -0.267246\nNext Confusion: 3.87749"} +{"text": "### State\nConfusion: 4.581174\nAction: analogize\nReward: -0.067764\nNext Confusion: 4.506774"} +{"text": "### State\nConfusion: 5.592551\nAction: question\nReward: 0.994469\nNext Confusion: 5.1464"} +{"text": "### State\nConfusion: 4.900575\nAction: analogize\nReward: -1.442941\nNext Confusion: 5.849224"} +{"text": "### State\nConfusion: 4.030148\nAction: explain\nReward: -0.223471\nNext Confusion: 4.296577"} +{"text": "### State\nConfusion: 5.54907\nAction: correct_fact\nReward: 1.283735\nNext Confusion: 4.96947"} +{"text": "### State\nConfusion: 6.399455\nAction: explain\nReward: 0.074439\nNext Confusion: 6.507652"} +{"text": "### State\nConfusion: 4.697805\nAction: analogize\nReward: -1.345821\nNext Confusion: 5.294332"} +{"text": "### State\nConfusion: 7.291886\nAction: analogize\nReward: -0.49404\nNext Confusion: 7.596599"} +{"text": "### State\nConfusion: 8.478653\nAction: worked_example\nReward: 1.67051\nNext Confusion: 7.127231"} +{"text": "### State\nConfusion: 4.146376\nAction: question\nReward: -0.30378\nNext Confusion: 4.132405"} +{"text": "### State\nConfusion: 9.333189\nAction: analogize\nReward: -1.131478\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 5.150481\nAction: explain\nReward: -0.021077\nNext Confusion: 5.147341"} +{"text": "### State\nConfusion: 5.005999\nAction: analogize\nReward: 0.261416\nNext Confusion: 5.043668"} +{"text": "### State\nConfusion: 5.417343\nAction: analogize\nReward: 0.55921\nNext Confusion: 5.474882"} +{"text": "### State\nConfusion: 5.937985\nAction: explain\nReward: 0.105923\nNext Confusion: 5.750947"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.186882\nNext Confusion: 9.814193"} +{"text": "### State\nConfusion: 2.234744\nAction: analogize\nReward: 0.880647\nNext Confusion: 1.544325"} +{"text": "### State\nConfusion: 1.236512\nAction: question\nReward: 1.106881\nNext Confusion: 0.7996"} +{"text": "### State\nConfusion: 4.044295\nAction: analogize\nReward: -1.293793\nNext Confusion: 4.683942"} +{"text": "### State\nConfusion: 4.212736\nAction: explain\nReward: 0.423388\nNext Confusion: 3.542225"} +{"text": "### State\nConfusion: 3.587829\nAction: analogize\nReward: -1.093237\nNext Confusion: 4.790663"} +{"text": "### State\nConfusion: 5.96449\nAction: question\nReward: -0.061454\nNext Confusion: 5.93381"} +{"text": "### State\nConfusion: 3.885393\nAction: analogize\nReward: -0.367448\nNext Confusion: 3.773087"} +{"text": "### State\nConfusion: 6.738736\nAction: explain\nReward: 0.954335\nNext Confusion: 6.322676"} +{"text": "### State\nConfusion: 3.025954\nAction: analogize\nReward: -0.77141\nNext Confusion: 3.877892"} +{"text": "### State\nConfusion: 4.1147\nAction: analogize\nReward: 0.392259\nNext Confusion: 3.516709"} +{"text": "### State\nConfusion: 6.666338\nAction: analogize\nReward: 0.324842\nNext Confusion: 6.57961"} +{"text": "### State\nConfusion: 5.232429\nAction: analogize\nReward: 0.382367\nNext Confusion: 4.214555"} +{"text": "### State\nConfusion: 7.714635\nAction: analogize\nReward: 0.360041\nNext Confusion: 8.002491"} +{"text": "### State\nConfusion: 5.043363\nAction: analogize\nReward: -0.185386\nNext Confusion: 5.162632"} +{"text": "### State\nConfusion: 8.755225\nAction: analogize\nReward: -0.111426\nNext Confusion: 9.15125"} +{"text": "### State\nConfusion: 7.030897\nAction: analogize\nReward: -1.325821\nNext Confusion: 7.800182"} +{"text": "### State\nConfusion: 7.853434\nAction: analogize\nReward: 0.080185\nNext Confusion: 8.146503"} +{"text": "### State\nConfusion: 3.47865\nAction: analogize\nReward: -0.742602\nNext Confusion: 3.932566"} +{"text": "### State\nConfusion: 3.318392\nAction: worked_example\nReward: 1.172948\nNext Confusion: 2.795324"} +{"text": "### State\nConfusion: 3.788924\nAction: question\nReward: 0.853461\nNext Confusion: 3.229144"} +{"text": "### State\nConfusion: 3.487349\nAction: question\nReward: 1.199487\nNext Confusion: 2.967386"} +{"text": "### State\nConfusion: 3.806279\nAction: question\nReward: 2.302437\nNext Confusion: 2.469832"} +{"text": "### State\nConfusion: 3.311562\nAction: analogize\nReward: -0.445051\nNext Confusion: 3.310418"} +{"text": "### State\nConfusion: 5.622833\nAction: analogize\nReward: -1.147916\nNext Confusion: 7.239026"} +{"text": "### State\nConfusion: 4.229888\nAction: analogize\nReward: -0.575123\nNext Confusion: 5.167108"} +{"text": "### State\nConfusion: 4.295042\nAction: analogize\nReward: -0.594404\nNext Confusion: 4.929984"} +{"text": "### State\nConfusion: 3.346937\nAction: analogize\nReward: -1.200558\nNext Confusion: 4.280535"} +{"text": "### State\nConfusion: 6.188107\nAction: analogize\nReward: 0.015743\nNext Confusion: 6.055892"} +{"text": "### State\nConfusion: 4.454732\nAction: question\nReward: 1.193721\nNext Confusion: 3.239344"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.732955\nNext Confusion: 9.617961"} +{"text": "### State\nConfusion: 5.953883\nAction: analogize\nReward: 0.366403\nNext Confusion: 6.130521"} +{"text": "### State\nConfusion: 5.771333\nAction: explain\nReward: -1.45427\nNext Confusion: 6.330766"} +{"text": "### State\nConfusion: 4.92416\nAction: explain\nReward: -0.239389\nNext Confusion: 5.232991"} +{"text": "### State\nConfusion: 2.797066\nAction: analogize\nReward: -1.545631\nNext Confusion: 4.092842"} +{"text": "### State\nConfusion: 3.410682\nAction: explain\nReward: 1.680386\nNext Confusion: 2.625942"} +{"text": "### State\nConfusion: 4.196591\nAction: analogize\nReward: 0.143843\nNext Confusion: 4.218358"} +{"text": "### State\nConfusion: 4.527193\nAction: explain\nReward: 1.279641\nNext Confusion: 3.833264"} +{"text": "### State\nConfusion: 7.109824\nAction: analogize\nReward: -1.056292\nNext Confusion: 7.897732"} +{"text": "### State\nConfusion: 3.4512\nAction: question\nReward: 0.101518\nNext Confusion: 2.863511"} +{"text": "### State\nConfusion: 9.769079\nAction: worked_example\nReward: -0.05826\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 7.047394\nAction: correct_fact\nReward: -0.477517\nNext Confusion: 6.644616"} +{"text": "### State\nConfusion: 5.039032\nAction: analogize\nReward: 0.542779\nNext Confusion: 4.704964"} +{"text": "### State\nConfusion: 2.455086\nAction: analogize\nReward: -0.42512\nNext Confusion: 2.582648"} +{"text": "### State\nConfusion: 8.79387\nAction: explain\nReward: 1.493231\nNext Confusion: 8.595444"} +{"text": "### State\nConfusion: 6.5493\nAction: question\nReward: 1.596032\nNext Confusion: 5.77124"} +{"text": "### State\nConfusion: 3.847614\nAction: analogize\nReward: -0.041072\nNext Confusion: 3.546278"} +{"text": "### State\nConfusion: 3.479255\nAction: worked_example\nReward: 0.772089\nNext Confusion: 2.406743"} +{"text": "### State\nConfusion: 3.405935\nAction: worked_example\nReward: 0.631187\nNext Confusion: 2.943548"} +{"text": "### State\nConfusion: 7.263623\nAction: analogize\nReward: -0.889767\nNext Confusion: 7.725207"} +{"text": "### State\nConfusion: 3.591611\nAction: worked_example\nReward: 1.93213\nNext Confusion: 2.058504"} +{"text": "### State\nConfusion: 5.776166\nAction: explain\nReward: 0.764305\nNext Confusion: 5.656176"} +{"text": "### State\nConfusion: 3.816689\nAction: worked_example\nReward: 2.065074\nNext Confusion: 2.308856"} +{"text": "### State\nConfusion: 6.615742\nAction: analogize\nReward: -1.522533\nNext Confusion: 7.848629"} +{"text": "### State\nConfusion: 3.603578\nAction: analogize\nReward: 0.183606\nNext Confusion: 3.590095"} +{"text": "### State\nConfusion: 6.595288\nAction: analogize\nReward: -0.773955\nNext Confusion: 7.387215"} +{"text": "### State\nConfusion: 3.243061\nAction: analogize\nReward: -0.86511\nNext Confusion: 4.083089"} +{"text": "### State\nConfusion: 6.382723\nAction: correct_fact\nReward: 0.514668\nNext Confusion: 6.59727"} +{"text": "### State\nConfusion: 3.61621\nAction: correct_fact\nReward: 0.218221\nNext Confusion: 3.916531"} +{"text": "### State\nConfusion: 4.585879\nAction: explain\nReward: 0.441228\nNext Confusion: 4.214521"} +{"text": "### State\nConfusion: 5.550642\nAction: analogize\nReward: -0.23505\nNext Confusion: 5.782431"} +{"text": "### State\nConfusion: 3.977782\nAction: analogize\nReward: 0.619823\nNext Confusion: 3.979027"} +{"text": "### State\nConfusion: 3.403716\nAction: analogize\nReward: -0.803805\nNext Confusion: 3.704608"} +{"text": "### State\nConfusion: 3.522781\nAction: analogize\nReward: -0.594497\nNext Confusion: 3.853947"} +{"text": "### State\nConfusion: 3.428078\nAction: worked_example\nReward: 2.908679\nNext Confusion: 0.783914"} +{"text": "### State\nConfusion: 2.765526\nAction: worked_example\nReward: 0.146944\nNext Confusion: 2.475324"} +{"text": "### State\nConfusion: 5.662729\nAction: analogize\nReward: -0.053028\nNext Confusion: 6.267484"} +{"text": "### State\nConfusion: 2.97392\nAction: correct_fact\nReward: 0.207476\nNext Confusion: 2.529974"} +{"text": "### State\nConfusion: 4.33869\nAction: analogize\nReward: -0.048698\nNext Confusion: 4.062849"} +{"text": "### State\nConfusion: 4.013338\nAction: explain\nReward: 0.400833\nNext Confusion: 3.595311"} +{"text": "### State\nConfusion: 4.072468\nAction: analogize\nReward: -1.069305\nNext Confusion: 4.625163"} +{"text": "### State\nConfusion: 3.534719\nAction: analogize\nReward: -0.496206\nNext Confusion: 3.767223"} +{"text": "### State\nConfusion: 3.301023\nAction: analogize\nReward: -0.343358\nNext Confusion: 3.224041"} +{"text": "### State\nConfusion: 5.737082\nAction: question\nReward: 0.787803\nNext Confusion: 5.253803"} +{"text": "### State\nConfusion: 5.034364\nAction: correct_fact\nReward: -0.452177\nNext Confusion: 5.496845"} +{"text": "### State\nConfusion: 8.053705\nAction: analogize\nReward: 0.306109\nNext Confusion: 8.010042"} +{"text": "### State\nConfusion: 4.250006\nAction: analogize\nReward: -1.557089\nNext Confusion: 5.197942"} +{"text": "### State\nConfusion: 2.612885\nAction: analogize\nReward: 0.520245\nNext Confusion: 2.377651"} +{"text": "### State\nConfusion: 4.320713\nAction: question\nReward: 0.08004\nNext Confusion: 4.111726"} +{"text": "### State\nConfusion: 6.200604\nAction: analogize\nReward: 0.148336\nNext Confusion: 6.110532"} +{"text": "### State\nConfusion: 2.905517\nAction: explain\nReward: 0.205134\nNext Confusion: 2.580832"} +{"text": "### State\nConfusion: 7.576805\nAction: analogize\nReward: -1.736052\nNext Confusion: 7.978537"} +{"text": "### State\nConfusion: 4.337534\nAction: analogize\nReward: -0.830804\nNext Confusion: 5.049174"} +{"text": "### State\nConfusion: 4.344432\nAction: analogize\nReward: 0.553108\nNext Confusion: 4.202552"} +{"text": "### State\nConfusion: 4.759101\nAction: analogize\nReward: 1.546088\nNext Confusion: 3.756994"} +{"text": "### State\nConfusion: 5.246162\nAction: question\nReward: 0.419569\nNext Confusion: 5.021464"} +{"text": "### State\nConfusion: 4.600087\nAction: analogize\nReward: -0.103434\nNext Confusion: 4.98533"} +{"text": "### State\nConfusion: 5.103688\nAction: explain\nReward: -1.820209\nNext Confusion: 5.43225"} +{"text": "### State\nConfusion: 4.324837\nAction: analogize\nReward: 0.048282\nNext Confusion: 4.139078"} +{"text": "### State\nConfusion: 2.427948\nAction: analogize\nReward: -0.687189\nNext Confusion: 2.827288"} +{"text": "### State\nConfusion: 5.790867\nAction: explain\nReward: 0.480449\nNext Confusion: 5.232456"} +{"text": "### State\nConfusion: 2.040263\nAction: analogize\nReward: -0.197799\nNext Confusion: 2.24734"} +{"text": "### State\nConfusion: 2.582153\nAction: worked_example\nReward: 0.554199\nNext Confusion: 1.467742"} +{"text": "### State\nConfusion: 4.024901\nAction: explain\nReward: 0.246961\nNext Confusion: 3.849331"} +{"text": "### State\nConfusion: 3.155271\nAction: explain\nReward: 0.93429\nNext Confusion: 2.779514"} +{"text": "### State\nConfusion: 4.333934\nAction: analogize\nReward: -1.382026\nNext Confusion: 5.333732"} +{"text": "### State\nConfusion: 3.711759\nAction: analogize\nReward: -1.054925\nNext Confusion: 4.427508"} +{"text": "### State\nConfusion: 4.197458\nAction: analogize\nReward: -0.672473\nNext Confusion: 4.707225"} +{"text": "### State\nConfusion: 3.595974\nAction: analogize\nReward: -0.30356\nNext Confusion: 3.692486"} +{"text": "### State\nConfusion: 9.424139\nAction: question\nReward: -0.051805\nNext Confusion: 9.827569"} +{"text": "### State\nConfusion: 3.847582\nAction: explain\nReward: -0.545496\nNext Confusion: 4.474236"} +{"text": "### State\nConfusion: 4.257097\nAction: correct_fact\nReward: -1.007093\nNext Confusion: 5.195939"} +{"text": "### State\nConfusion: 3.375196\nAction: analogize\nReward: -0.613092\nNext Confusion: 3.231864"} +{"text": "### State\nConfusion: 4.372277\nAction: analogize\nReward: -0.062679\nNext Confusion: 4.743267"} +{"text": "### State\nConfusion: 5.252807\nAction: analogize\nReward: -1.110442\nNext Confusion: 6.075631"} +{"text": "### State\nConfusion: 4.687475\nAction: analogize\nReward: 0.085918\nNext Confusion: 4.711984"} +{"text": "### State\nConfusion: 4.754649\nAction: analogize\nReward: -0.419951\nNext Confusion: 5.299336"} +{"text": "### State\nConfusion: 6.040562\nAction: analogize\nReward: -0.296113\nNext Confusion: 6.770728"} +{"text": "### State\nConfusion: 5.219658\nAction: explain\nReward: -0.377415\nNext Confusion: 5.787689"} +{"text": "### State\nConfusion: 4.082221\nAction: explain\nReward: 1.391503\nNext Confusion: 3.019216"} +{"text": "### State\nConfusion: 5.875564\nAction: explain\nReward: 0.35298\nNext Confusion: 5.948457"} +{"text": "### State\nConfusion: 3.879623\nAction: analogize\nReward: 0.372709\nNext Confusion: 3.920777"} +{"text": "### State\nConfusion: 4.371979\nAction: analogize\nReward: 0.109884\nNext Confusion: 4.681374"} +{"text": "### State\nConfusion: 4.799329\nAction: analogize\nReward: 0.134149\nNext Confusion: 4.814604"} +{"text": "### State\nConfusion: 2.772295\nAction: analogize\nReward: -1.053458\nNext Confusion: 3.162443"} +{"text": "### State\nConfusion: 4.656308\nAction: analogize\nReward: -0.716218\nNext Confusion: 5.735703"} +{"text": "### State\nConfusion: 3.399582\nAction: analogize\nReward: 0.010502\nNext Confusion: 3.909108"} +{"text": "### State\nConfusion: 4.250917\nAction: analogize\nReward: 0.365275\nNext Confusion: 4.656218"} +{"text": "### State\nConfusion: 3.886501\nAction: analogize\nReward: 0.02291\nNext Confusion: 4.124634"} +{"text": "### State\nConfusion: 7.677275\nAction: analogize\nReward: 0.266499\nNext Confusion: 7.756824"} +{"text": "### State\nConfusion: 4.214218\nAction: analogize\nReward: 0.21845\nNext Confusion: 4.028525"} +{"text": "### State\nConfusion: 3.2064\nAction: correct_fact\nReward: 0.527978\nNext Confusion: 2.366183"} +{"text": "### State\nConfusion: 6.950843\nAction: analogize\nReward: 0.101171\nNext Confusion: 7.08694"} +{"text": "### State\nConfusion: 3.835536\nAction: analogize\nReward: -0.621992\nNext Confusion: 4.231325"} +{"text": "### State\nConfusion: 6.523788\nAction: analogize\nReward: -0.239707\nNext Confusion: 6.37458"} +{"text": "### State\nConfusion: 3.839309\nAction: analogize\nReward: -0.047621\nNext Confusion: 4.649327"} +{"text": "### State\nConfusion: 8.457514\nAction: analogize\nReward: -0.49514\nNext Confusion: 8.68329"} +{"text": "### State\nConfusion: 6.079686\nAction: analogize\nReward: -0.505621\nNext Confusion: 6.997733"} +{"text": "### State\nConfusion: 3.052662\nAction: worked_example\nReward: 2.304519\nNext Confusion: 1.063142"} +{"text": "### State\nConfusion: 4.0362\nAction: worked_example\nReward: 1.032511\nNext Confusion: 2.903929"} +{"text": "### State\nConfusion: 5.259984\nAction: question\nReward: 0.140425\nNext Confusion: 5.599321"} +{"text": "### State\nConfusion: 5.692397\nAction: analogize\nReward: 0.152449\nNext Confusion: 5.766351"} +{"text": "### State\nConfusion: 3.010824\nAction: analogize\nReward: -0.822476\nNext Confusion: 4.061491"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.54809\nNext Confusion: 9.630048"} +{"text": "### State\nConfusion: 6.365635\nAction: correct_fact\nReward: 0.303342\nNext Confusion: 6.359275"} +{"text": "### State\nConfusion: 4.506102\nAction: worked_example\nReward: 3.257079\nNext Confusion: 2.463102"} +{"text": "### State\nConfusion: 6.650249\nAction: question\nReward: 0.689143\nNext Confusion: 5.702714"} +{"text": "### State\nConfusion: 3.872385\nAction: analogize\nReward: -1.4376\nNext Confusion: 5.064346"} +{"text": "### State\nConfusion: 5.604603\nAction: question\nReward: 0.936614\nNext Confusion: 4.981571"} +{"text": "### State\nConfusion: 6.364416\nAction: question\nReward: -0.058641\nNext Confusion: 5.740748"} +{"text": "### State\nConfusion: 5.162976\nAction: analogize\nReward: -0.52851\nNext Confusion: 5.366485"} +{"text": "### State\nConfusion: 4.989271\nAction: question\nReward: 0.420257\nNext Confusion: 3.911954"} +{"text": "### State\nConfusion: 4.054232\nAction: analogize\nReward: -0.821515\nNext Confusion: 4.815986"} +{"text": "### State\nConfusion: 3.795604\nAction: correct_fact\nReward: -0.037172\nNext Confusion: 4.267697"} +{"text": "### State\nConfusion: 7.883514\nAction: analogize\nReward: -0.555406\nNext Confusion: 8.099345"} +{"text": "### State\nConfusion: 4.195034\nAction: analogize\nReward: -0.593037\nNext Confusion: 4.585204"} +{"text": "### State\nConfusion: 3.851079\nAction: question\nReward: -0.757824\nNext Confusion: 3.551079"} +{"text": "### State\nConfusion: 2.634171\nAction: analogize\nReward: -1.035621\nNext Confusion: 3.713475"} +{"text": "### State\nConfusion: 8.174025\nAction: analogize\nReward: -0.735074\nNext Confusion: 8.931459"} +{"text": "### State\nConfusion: 5.127021\nAction: analogize\nReward: -0.197244\nNext Confusion: 5.958104"} +{"text": "### State\nConfusion: 5.421781\nAction: question\nReward: 0.750681\nNext Confusion: 4.586025"} +{"text": "### State\nConfusion: 5.553261\nAction: question\nReward: 0.301218\nNext Confusion: 4.546919"} +{"text": "### State\nConfusion: 7.039034\nAction: analogize\nReward: 0.021269\nNext Confusion: 7.317868"} +{"text": "### State\nConfusion: 4.994831\nAction: analogize\nReward: -0.938323\nNext Confusion: 5.784461"} +{"text": "### State\nConfusion: 3.516691\nAction: worked_example\nReward: 1.905112\nNext Confusion: 2.429983"} +{"text": "### State\nConfusion: 9.025807\nAction: analogize\nReward: -0.680691\nNext Confusion: 9.413644"} +{"text": "### State\nConfusion: 5.016836\nAction: question\nReward: 1.476656\nNext Confusion: 4.174343"} +{"text": "### State\nConfusion: 3.783532\nAction: explain\nReward: 0.61822\nNext Confusion: 3.470704"} +{"text": "### State\nConfusion: 7.004241\nAction: analogize\nReward: 0.271521\nNext Confusion: 6.929998"} +{"text": "### State\nConfusion: 3.788744\nAction: correct_fact\nReward: 0.356577\nNext Confusion: 3.955624"} +{"text": "### State\nConfusion: 3.585034\nAction: analogize\nReward: 0.180996\nNext Confusion: 3.571377"} +{"text": "### State\nConfusion: 5.884622\nAction: analogize\nReward: 0.365228\nNext Confusion: 5.668638"} +{"text": "### State\nConfusion: 3.248542\nAction: analogize\nReward: 0.325974\nNext Confusion: 3.396679"} +{"text": "### State\nConfusion: 4.457415\nAction: question\nReward: -0.504474\nNext Confusion: 5.242377"} +{"text": "### State\nConfusion: 2.183769\nAction: analogize\nReward: -0.43012\nNext Confusion: 2.796507"} +{"text": "### State\nConfusion: 3.337488\nAction: question\nReward: 0.915789\nNext Confusion: 3.028513"} +{"text": "### State\nConfusion: 3.885993\nAction: analogize\nReward: 0.845579\nNext Confusion: 4.075472"} +{"text": "### State\nConfusion: 5.473674\nAction: explain\nReward: 0.411274\nNext Confusion: 5.449886"} +{"text": "### State\nConfusion: 6.636641\nAction: analogize\nReward: 0.105483\nNext Confusion: 6.738043"} +{"text": "### State\nConfusion: 3.585574\nAction: question\nReward: 0.51839\nNext Confusion: 3.376465"} +{"text": "### State\nConfusion: 4.07502\nAction: question\nReward: 1.004779\nNext Confusion: 3.735128"} +{"text": "### State\nConfusion: 2.115222\nAction: analogize\nReward: -1.265686\nNext Confusion: 2.850984"} +{"text": "### State\nConfusion: 3.176735\nAction: explain\nReward: 0.091548\nNext Confusion: 3.295829"} +{"text": "### State\nConfusion: 5.73152\nAction: worked_example\nReward: 0.402069\nNext Confusion: 5.152857"} +{"text": "### State\nConfusion: 2.005058\nAction: analogize\nReward: -1.365192\nNext Confusion: 2.684636"} +{"text": "### State\nConfusion: 3.535708\nAction: explain\nReward: 0.106466\nNext Confusion: 3.094336"} +{"text": "### State\nConfusion: 7.704071\nAction: explain\nReward: 0.721175\nNext Confusion: 6.908477"} +{"text": "### State\nConfusion: 5.243719\nAction: explain\nReward: 1.340814\nNext Confusion: 4.052736"} +{"text": "### State\nConfusion: 7.535902\nAction: analogize\nReward: 0.726927\nNext Confusion: 7.673487"} +{"text": "### State\nConfusion: 4.64706\nAction: analogize\nReward: -0.708878\nNext Confusion: 5.376009"} +{"text": "### State\nConfusion: 7.268917\nAction: correct_fact\nReward: -1.127313\nNext Confusion: 7.566716"} +{"text": "### State\nConfusion: 5.639025\nAction: analogize\nReward: -0.472023\nNext Confusion: 6.138049"} +{"text": "### State\nConfusion: 6.242031\nAction: analogize\nReward: 0.58307\nNext Confusion: 5.923123"} +{"text": "### State\nConfusion: 3.121297\nAction: analogize\nReward: -0.48891\nNext Confusion: 3.906541"} +{"text": "### State\nConfusion: 5.029388\nAction: correct_fact\nReward: -1.002845\nNext Confusion: 6.173575"} +{"text": "### State\nConfusion: 5.053395\nAction: analogize\nReward: -0.313301\nNext Confusion: 5.932772"} +{"text": "### State\nConfusion: 3.194567\nAction: analogize\nReward: 0.039439\nNext Confusion: 3.161913"} +{"text": "### State\nConfusion: 4.130982\nAction: explain\nReward: 0.624081\nNext Confusion: 3.669775"} +{"text": "### State\nConfusion: 7.574032\nAction: analogize\nReward: -0.309397\nNext Confusion: 8.215817"} +{"text": "### State\nConfusion: 6.767454\nAction: analogize\nReward: -0.829196\nNext Confusion: 7.24061"} +{"text": "### State\nConfusion: 2.361092\nAction: analogize\nReward: 0.40583\nNext Confusion: 2.588848"} +{"text": "### State\nConfusion: 3.66922\nAction: analogize\nReward: -0.820735\nNext Confusion: 4.432349"} +{"text": "### State\nConfusion: 6.70331\nAction: correct_fact\nReward: 0.493159\nNext Confusion: 6.04956"} +{"text": "### State\nConfusion: 4.071138\nAction: explain\nReward: -0.054266\nNext Confusion: 3.786052"} +{"text": "### State\nConfusion: 7.498981\nAction: worked_example\nReward: 0.760847\nNext Confusion: 7.090668"} +{"text": "### State\nConfusion: 4.860355\nAction: analogize\nReward: 0.671602\nNext Confusion: 5.004103"} +{"text": "### State\nConfusion: 5.043371\nAction: analogize\nReward: -1.172208\nNext Confusion: 6.048717"} +{"text": "### State\nConfusion: 7.594638\nAction: question\nReward: 0.169686\nNext Confusion: 7.070739"} +{"text": "### State\nConfusion: 4.307424\nAction: question\nReward: 0.69544\nNext Confusion: 3.544158"} +{"text": "### State\nConfusion: 3.624445\nAction: analogize\nReward: -1.039277\nNext Confusion: 4.741978"} +{"text": "### State\nConfusion: 5.519311\nAction: analogize\nReward: -0.172748\nNext Confusion: 6.113349"} +{"text": "### State\nConfusion: 7.701839\nAction: worked_example\nReward: 0.471455\nNext Confusion: 7.059561"} +{"text": "### State\nConfusion: 3.533838\nAction: analogize\nReward: -1.50476\nNext Confusion: 4.605111"} +{"text": "### State\nConfusion: 4.857623\nAction: analogize\nReward: 0.089029\nNext Confusion: 5.381652"} +{"text": "### State\nConfusion: 5.524953\nAction: explain\nReward: 1.281716\nNext Confusion: 4.764929"} +{"text": "### State\nConfusion: 4.382893\nAction: analogize\nReward: 0.915903\nNext Confusion: 4.357092"} +{"text": "### State\nConfusion: 3.510692\nAction: worked_example\nReward: 2.042163\nNext Confusion: 1.725706"} +{"text": "### State\nConfusion: 3.533618\nAction: analogize\nReward: -0.318574\nNext Confusion: 3.606989"} +{"text": "### State\nConfusion: 4.581437\nAction: analogize\nReward: -0.908441\nNext Confusion: 5.250843"} +{"text": "### State\nConfusion: 3.491754\nAction: explain\nReward: 0.09608\nNext Confusion: 3.502002"} +{"text": "### State\nConfusion: 2.910742\nAction: analogize\nReward: -0.185903\nNext Confusion: 2.924014"} +{"text": "### State\nConfusion: 4.220527\nAction: analogize\nReward: 0.157805\nNext Confusion: 4.241376"} +{"text": "### State\nConfusion: 4.236191\nAction: worked_example\nReward: 1.708519\nNext Confusion: 2.755738"} +{"text": "### State\nConfusion: 6.356474\nAction: worked_example\nReward: 1.986318\nNext Confusion: 4.847041"} +{"text": "### State\nConfusion: 6.596906\nAction: explain\nReward: 0.945082\nNext Confusion: 5.777353"} +{"text": "### State\nConfusion: 9.065663\nAction: worked_example\nReward: 2.490255\nNext Confusion: 7.027289"} +{"text": "### State\nConfusion: 5.798057\nAction: question\nReward: 1.632281\nNext Confusion: 4.945551"} +{"text": "### State\nConfusion: 7.547707\nAction: analogize\nReward: -0.470323\nNext Confusion: 8.281344"} +{"text": "### State\nConfusion: 6.886375\nAction: correct_fact\nReward: 0.841331\nNext Confusion: 6.200259"} +{"text": "### State\nConfusion: 2.545521\nAction: analogize\nReward: -0.931296\nNext Confusion: 2.997872"} +{"text": "### State\nConfusion: 3.436865\nAction: question\nReward: 0.605545\nNext Confusion: 3.034606"} +{"text": "### State\nConfusion: 5.48512\nAction: question\nReward: 0.552247\nNext Confusion: 4.395566"} +{"text": "### State\nConfusion: 1.99361\nAction: correct_fact\nReward: 0.398921\nNext Confusion: 2.023035"} +{"text": "### State\nConfusion: 5.620854\nAction: analogize\nReward: -0.537399\nNext Confusion: 6.000232"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.398559\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 4.712558\nAction: explain\nReward: 0.35094\nNext Confusion: 4.100639"} +{"text": "### State\nConfusion: 6.00095\nAction: question\nReward: -0.739487\nNext Confusion: 6.590231"} +{"text": "### State\nConfusion: 3.43446\nAction: analogize\nReward: -0.346556\nNext Confusion: 3.86933"} +{"text": "### State\nConfusion: 4.312879\nAction: correct_fact\nReward: -0.838297\nNext Confusion: 4.408473"} +{"text": "### State\nConfusion: 5.588582\nAction: analogize\nReward: -0.240599\nNext Confusion: 5.828334"} +{"text": "### State\nConfusion: 4.105963\nAction: analogize\nReward: 0.322535\nNext Confusion: 4.015285"} +{"text": "### State\nConfusion: 3.421049\nAction: analogize\nReward: 0.553435\nNext Confusion: 3.008563"} +{"text": "### State\nConfusion: 7.728908\nAction: question\nReward: 1.823694\nNext Confusion: 6.789086"} +{"text": "### State\nConfusion: 3.844443\nAction: worked_example\nReward: 0.401461\nNext Confusion: 2.993685"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: -0.052131\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 4.919034\nAction: analogize\nReward: -1.120059\nNext Confusion: 5.821943"} +{"text": "### State\nConfusion: 6.176862\nAction: analogize\nReward: 0.605978\nNext Confusion: 6.245404"} +{"text": "### State\nConfusion: 3.05765\nAction: analogize\nReward: 0.151237\nNext Confusion: 3.941056"} +{"text": "### State\nConfusion: 4.497888\nAction: analogize\nReward: 0.20143\nNext Confusion: 4.707649"} +{"text": "### State\nConfusion: 2.331526\nAction: explain\nReward: -0.390644\nNext Confusion: 1.887389"} +{"text": "### State\nConfusion: 5.476439\nAction: correct_fact\nReward: -0.625659\nNext Confusion: 5.484992"} +{"text": "### State\nConfusion: 4.749814\nAction: correct_fact\nReward: 0.255413\nNext Confusion: 4.287553"} +{"text": "### State\nConfusion: 4.367284\nAction: correct_fact\nReward: 0.754914\nNext Confusion: 4.831099"} +{"text": "### State\nConfusion: 3.990207\nAction: analogize\nReward: 0.489978\nNext Confusion: 3.777346"} +{"text": "### State\nConfusion: 2.36078\nAction: correct_fact\nReward: 0.131446\nNext Confusion: 2.419431"} +{"text": "### State\nConfusion: 5.181594\nAction: analogize\nReward: -1.023529\nNext Confusion: 6.165031"} +{"text": "### State\nConfusion: 6.155968\nAction: worked_example\nReward: 1.867317\nNext Confusion: 4.540814"} +{"text": "### State\nConfusion: 3.921279\nAction: worked_example\nReward: 1.38055\nNext Confusion: 3.467207"} +{"text": "### State\nConfusion: 4.874241\nAction: analogize\nReward: 0.049841\nNext Confusion: 4.888297"} +{"text": "### State\nConfusion: 3.457221\nAction: analogize\nReward: -0.881108\nNext Confusion: 3.81485"} +{"text": "### State\nConfusion: 3.625238\nAction: analogize\nReward: -1.632032\nNext Confusion: 4.600665"} +{"text": "### State\nConfusion: 3.610027\nAction: correct_fact\nReward: 0.990948\nNext Confusion: 3.138358"} +{"text": "### State\nConfusion: 4.592354\nAction: explain\nReward: -1.103914\nNext Confusion: 4.699113"} +{"text": "### State\nConfusion: 3.965907\nAction: analogize\nReward: 0.427269\nNext Confusion: 4.632581"} +{"text": "### State\nConfusion: 6.224049\nAction: analogize\nReward: 0.403771\nNext Confusion: 6.236999"} +{"text": "### State\nConfusion: 3.513805\nAction: explain\nReward: 0.413231\nNext Confusion: 3.090316"} +{"text": "### State\nConfusion: 3.581797\nAction: analogize\nReward: -1.081045\nNext Confusion: 4.690057"} +{"text": "### State\nConfusion: 4.502285\nAction: analogize\nReward: -0.115318\nNext Confusion: 4.403182"} +{"text": "### State\nConfusion: 1.682893\nAction: analogize\nReward: 0.704574\nNext Confusion: 1.458029"} +{"text": "### State\nConfusion: 3.767197\nAction: explain\nReward: -0.390816\nNext Confusion: 3.399625"} +{"text": "### State\nConfusion: 9.782035\nAction: explain\nReward: 1.040522\nNext Confusion: 9.875731"} +{"text": "### State\nConfusion: 3.654296\nAction: analogize\nReward: 0.143296\nNext Confusion: 3.830031"} +{"text": "### State\nConfusion: 5.027151\nAction: analogize\nReward: -0.585108\nNext Confusion: 5.767681"} +{"text": "### State\nConfusion: 6.183936\nAction: question\nReward: 1.22717\nNext Confusion: 5.29231"} +{"text": "### State\nConfusion: 3.36798\nAction: analogize\nReward: -0.535297\nNext Confusion: 3.329399"} +{"text": "### State\nConfusion: 2.521666\nAction: correct_fact\nReward: -0.191486\nNext Confusion: 2.38397"} +{"text": "### State\nConfusion: 3.486265\nAction: explain\nReward: -1.035973\nNext Confusion: 3.673044"} +{"text": "### State\nConfusion: 2.735054\nAction: analogize\nReward: -0.808363\nNext Confusion: 3.324509"} +{"text": "### State\nConfusion: 3.34873\nAction: explain\nReward: -0.335458\nNext Confusion: 3.526425"} +{"text": "### State\nConfusion: 5.857225\nAction: analogize\nReward: -0.201824\nNext Confusion: 6.406119"} +{"text": "### State\nConfusion: 5.450879\nAction: worked_example\nReward: 0.994392\nNext Confusion: 4.314799"} +{"text": "### State\nConfusion: 6.129226\nAction: explain\nReward: -0.347463\nNext Confusion: 6.183104"} +{"text": "### State\nConfusion: 5.796948\nAction: worked_example\nReward: 1.181532\nNext Confusion: 4.332111"} +{"text": "### State\nConfusion: 4.019942\nAction: analogize\nReward: -1.097066\nNext Confusion: 4.38926"} +{"text": "### State\nConfusion: 6.346842\nAction: analogize\nReward: -0.05843\nNext Confusion: 6.536785"} +{"text": "### State\nConfusion: 4.351801\nAction: analogize\nReward: 1.803047\nNext Confusion: 3.652701"} +{"text": "### State\nConfusion: 3.17073\nAction: question\nReward: 0.985057\nNext Confusion: 2.444106"} +{"text": "### State\nConfusion: 3.334849\nAction: analogize\nReward: -0.158321\nNext Confusion: 3.459907"} +{"text": "### State\nConfusion: 2.858408\nAction: analogize\nReward: -1.283425\nNext Confusion: 4.023058"} +{"text": "### State\nConfusion: 3.845057\nAction: worked_example\nReward: 2.053946\nNext Confusion: 2.178561"} +{"text": "### State\nConfusion: 3.872064\nAction: analogize\nReward: -0.908379\nNext Confusion: 4.351231"} +{"text": "### State\nConfusion: 6.930616\nAction: analogize\nReward: -0.606489\nNext Confusion: 7.437333"} +{"text": "### State\nConfusion: 4.201954\nAction: analogize\nReward: -0.143287\nNext Confusion: 4.545249"} +{"text": "### State\nConfusion: 3.460118\nAction: analogize\nReward: -0.210642\nNext Confusion: 3.958432"} +{"text": "### State\nConfusion: 5.932957\nAction: analogize\nReward: -0.423899\nNext Confusion: 6.711923"} +{"text": "### State\nConfusion: 6.451794\nAction: analogize\nReward: 0.163913\nNext Confusion: 6.371612"} +{"text": "### State\nConfusion: 6.143884\nAction: analogize\nReward: -1.50412\nNext Confusion: 7.402514"} +{"text": "### State\nConfusion: 3.787366\nAction: analogize\nReward: 0.144803\nNext Confusion: 3.96288"} +{"text": "### State\nConfusion: 3.46052\nAction: analogize\nReward: -0.435846\nNext Confusion: 4.18104"} +{"text": "### State\nConfusion: 5.670783\nAction: analogize\nReward: -1.127217\nNext Confusion: 6.834265"} +{"text": "### State\nConfusion: 6.505923\nAction: analogize\nReward: 0.189847\nNext Confusion: 6.595182"} +{"text": "### State\nConfusion: 3.762953\nAction: worked_example\nReward: -0.449213\nNext Confusion: 4.189637"} +{"text": "### State\nConfusion: 7.300265\nAction: analogize\nReward: -0.242874\nNext Confusion: 7.255626"} +{"text": "### State\nConfusion: 3.446495\nAction: explain\nReward: 0.351842\nNext Confusion: 3.319413"} +{"text": "### State\nConfusion: 6.007587\nAction: analogize\nReward: -0.389668\nNext Confusion: 6.238716"} +{"text": "### State\nConfusion: 4.569729\nAction: worked_example\nReward: 1.310543\nNext Confusion: 3.685556"} +{"text": "### State\nConfusion: 4.289463\nAction: question\nReward: -3.159156\nNext Confusion: 3.588308"} +{"text": "### State\nConfusion: 3.805428\nAction: analogize\nReward: -0.137826\nNext Confusion: 4.271468"} +{"text": "### State\nConfusion: 5.339787\nAction: analogize\nReward: -0.725403\nNext Confusion: 5.959084"} +{"text": "### State\nConfusion: 6.367439\nAction: worked_example\nReward: -0.410931\nNext Confusion: 5.970048"} +{"text": "### State\nConfusion: 4.185916\nAction: analogize\nReward: -1.11682\nNext Confusion: 4.895042"} +{"text": "### State\nConfusion: 4.987749\nAction: explain\nReward: -0.259711\nNext Confusion: 5.626599"} +{"text": "### State\nConfusion: 4.17443\nAction: explain\nReward: -0.007389\nNext Confusion: 3.915439"} +{"text": "### State\nConfusion: 7.400229\nAction: analogize\nReward: -0.440491\nNext Confusion: 7.743072"} +{"text": "### State\nConfusion: 5.192929\nAction: correct_fact\nReward: 0.679553\nNext Confusion: 5.149954"} +{"text": "### State\nConfusion: 4.627472\nAction: analogize\nReward: -0.525458\nNext Confusion: 5.036076"} +{"text": "### State\nConfusion: 3.448749\nAction: correct_fact\nReward: -0.686939\nNext Confusion: 4.127151"} +{"text": "### State\nConfusion: 4.950369\nAction: analogize\nReward: -0.497006\nNext Confusion: 5.154066"} +{"text": "### State\nConfusion: 3.633871\nAction: analogize\nReward: -0.05189\nNext Confusion: 4.004499"} +{"text": "### State\nConfusion: 7.003464\nAction: worked_example\nReward: 2.687881\nNext Confusion: 4.899817"} +{"text": "### State\nConfusion: 3.792127\nAction: analogize\nReward: -0.308802\nNext Confusion: 4.434352"} +{"text": "### State\nConfusion: 3.283564\nAction: analogize\nReward: 1.190446\nNext Confusion: 3.436299"} +{"text": "### State\nConfusion: 7.512014\nAction: analogize\nReward: -0.078398\nNext Confusion: 8.393634"} +{"text": "### State\nConfusion: 3.925258\nAction: correct_fact\nReward: 0.514447\nNext Confusion: 3.727096"} +{"text": "### State\nConfusion: 6.571545\nAction: worked_example\nReward: 1.64758\nNext Confusion: 5.171705"} +{"text": "### State\nConfusion: 3.203366\nAction: analogize\nReward: -0.582273\nNext Confusion: 3.776733"} +{"text": "### State\nConfusion: 8.83236\nAction: question\nReward: -1.0229\nNext Confusion: 9.455678"} +{"text": "### State\nConfusion: 6.196302\nAction: analogize\nReward: -0.494157\nNext Confusion: 6.269166"} +{"text": "### State\nConfusion: 7.119833\nAction: analogize\nReward: -0.632348\nNext Confusion: 7.671856"} +{"text": "### State\nConfusion: 3.446799\nAction: analogize\nReward: -0.235273\nNext Confusion: 3.195469"} +{"text": "### State\nConfusion: 7.038426\nAction: worked_example\nReward: 1.144741\nNext Confusion: 5.865378"} +{"text": "### State\nConfusion: 4.291528\nAction: explain\nReward: -1.507047\nNext Confusion: 5.162663"} +{"text": "### State\nConfusion: 6.868341\nAction: question\nReward: 1.142\nNext Confusion: 6.448683"} +{"text": "### State\nConfusion: 3.429575\nAction: question\nReward: 0.260133\nNext Confusion: 3.166194"} +{"text": "### State\nConfusion: 4.569086\nAction: explain\nReward: 1.034115\nNext Confusion: 3.786755"} +{"text": "### State\nConfusion: 4.636881\nAction: analogize\nReward: 1.196279\nNext Confusion: 4.518783"} +{"text": "### State\nConfusion: 4.169616\nAction: explain\nReward: 0.036306\nNext Confusion: 3.886209"} +{"text": "### State\nConfusion: 5.246602\nAction: analogize\nReward: -0.15856\nNext Confusion: 5.307223"} +{"text": "### State\nConfusion: 3.581232\nAction: analogize\nReward: 0.436844\nNext Confusion: 3.550339"} +{"text": "### State\nConfusion: 5.709446\nAction: analogize\nReward: -0.037479\nNext Confusion: 6.23242"} +{"text": "### State\nConfusion: 6.971954\nAction: analogize\nReward: -0.883138\nNext Confusion: 7.784509"} +{"text": "### State\nConfusion: 4.800322\nAction: explain\nReward: -0.156482\nNext Confusion: 5.04152"} +{"text": "### State\nConfusion: 5.968896\nAction: question\nReward: 0.735787\nNext Confusion: 5.41699"} +{"text": "### State\nConfusion: 7.609529\nAction: analogize\nReward: -0.907761\nNext Confusion: 8.138722"} +{"text": "### State\nConfusion: 3.348556\nAction: analogize\nReward: -0.522565\nNext Confusion: 3.955441"} +{"text": "### State\nConfusion: 6.54799\nAction: question\nReward: 0.839793\nNext Confusion: 5.740779"} +{"text": "### State\nConfusion: 5.126441\nAction: explain\nReward: 2.21922\nNext Confusion: 3.600956"} +{"text": "### State\nConfusion: 4.444181\nAction: analogize\nReward: -0.272342\nNext Confusion: 4.869495"} +{"text": "### State\nConfusion: 4.507113\nAction: question\nReward: 0.040065\nNext Confusion: 4.782022"} +{"text": "### State\nConfusion: 4.47993\nAction: question\nReward: 1.40697\nNext Confusion: 3.401652"} +{"text": "### State\nConfusion: 6.068524\nAction: analogize\nReward: -0.164\nNext Confusion: 6.512142"} +{"text": "### State\nConfusion: 8.649179\nAction: analogize\nReward: -0.362601\nNext Confusion: 8.878508"} +{"text": "### State\nConfusion: 2.618763\nAction: analogize\nReward: -0.2353\nNext Confusion: 3.374444"} +{"text": "### State\nConfusion: 3.655497\nAction: analogize\nReward: 0.47788\nNext Confusion: 3.574526"} +{"text": "### State\nConfusion: 6.825738\nAction: question\nReward: 1.524791\nNext Confusion: 5.907894"} +{"text": "### State\nConfusion: 3.849934\nAction: analogize\nReward: -0.454719\nNext Confusion: 4.542527"} +{"text": "### State\nConfusion: 3.52636\nAction: analogize\nReward: -1.335167\nNext Confusion: 4.591458"} +{"text": "### State\nConfusion: 5.653258\nAction: analogize\nReward: 0.425317\nNext Confusion: 6.027638"} +{"text": "### State\nConfusion: 6.79264\nAction: question\nReward: -0.456067\nNext Confusion: 6.680654"} +{"text": "### State\nConfusion: 5.825899\nAction: explain\nReward: 1.137091\nNext Confusion: 4.860346"} +{"text": "### State\nConfusion: 4.024473\nAction: analogize\nReward: -1.130899\nNext Confusion: 5.080507"} +{"text": "### State\nConfusion: 5.027373\nAction: question\nReward: -0.279873\nNext Confusion: 5.103498"} +{"text": "### State\nConfusion: 4.107711\nAction: analogize\nReward: 0.062419\nNext Confusion: 3.716159"} +{"text": "### State\nConfusion: 6.085206\nAction: analogize\nReward: -0.034186\nNext Confusion: 6.579785"} +{"text": "### State\nConfusion: 9.329837\nAction: correct_fact\nReward: -0.34011\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 7.806332\nAction: analogize\nReward: -1.354402\nNext Confusion: 8.877954"} +{"text": "### State\nConfusion: 9.883806\nAction: analogize\nReward: -0.513869\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 6.910408\nAction: question\nReward: -0.016033\nNext Confusion: 6.908116"} +{"text": "### State\nConfusion: 4.543718\nAction: analogize\nReward: 0.746005\nNext Confusion: 4.320139"} +{"text": "### State\nConfusion: 3.82457\nAction: worked_example\nReward: 0.980693\nNext Confusion: 2.952715"} +{"text": "### State\nConfusion: 4.841876\nAction: analogize\nReward: 0.688047\nNext Confusion: 4.906706"} +{"text": "### State\nConfusion: 4.36744\nAction: explain\nReward: 0.149362\nNext Confusion: 4.382951"} +{"text": "### State\nConfusion: 3.4519\nAction: analogize\nReward: -0.289845\nNext Confusion: 3.852726"} +{"text": "### State\nConfusion: 4.559366\nAction: analogize\nReward: 0.248624\nNext Confusion: 4.250278"} +{"text": "### State\nConfusion: 4.871343\nAction: worked_example\nReward: 2.006758\nNext Confusion: 3.021118"} +{"text": "### State\nConfusion: 2.90186\nAction: worked_example\nReward: 0.47045\nNext Confusion: 1.383204"} +{"text": "### State\nConfusion: 3.085492\nAction: explain\nReward: 0.036318\nNext Confusion: 3.096477"} +{"text": "### State\nConfusion: 7.723372\nAction: analogize\nReward: -0.830827\nNext Confusion: 8.442617"} +{"text": "### State\nConfusion: 4.313051\nAction: analogize\nReward: 0.710948\nNext Confusion: 4.525727"} +{"text": "### State\nConfusion: 3.148665\nAction: correct_fact\nReward: 0.685984\nNext Confusion: 2.654194"} +{"text": "### State\nConfusion: 2.622224\nAction: explain\nReward: -0.410488\nNext Confusion: 2.958908"} +{"text": "### State\nConfusion: 4.929009\nAction: explain\nReward: 0.961794\nNext Confusion: 4.239855"} +{"text": "### State\nConfusion: 3.513432\nAction: question\nReward: 1.100215\nNext Confusion: 2.54015"} +{"text": "### State\nConfusion: 6.457459\nAction: analogize\nReward: -0.458103\nNext Confusion: 6.367057"} +{"text": "### State\nConfusion: 6.632654\nAction: analogize\nReward: 0.177102\nNext Confusion: 6.513361"} +{"text": "### State\nConfusion: 6.090064\nAction: question\nReward: 0.381686\nNext Confusion: 5.960601"} +{"text": "### State\nConfusion: 2.783939\nAction: analogize\nReward: 0.773949\nNext Confusion: 2.812135"} +{"text": "### State\nConfusion: 3.035819\nAction: analogize\nReward: -0.798472\nNext Confusion: 4.235182"} +{"text": "### State\nConfusion: 2.786038\nAction: worked_example\nReward: 0.139796\nNext Confusion: 2.822438"} +{"text": "### State\nConfusion: 3.744302\nAction: correct_fact\nReward: 0.623173\nNext Confusion: 3.392101"} +{"text": "### State\nConfusion: 3.43836\nAction: explain\nReward: 0.338272\nNext Confusion: 3.515679"} +{"text": "### State\nConfusion: 4.052148\nAction: analogize\nReward: -0.345972\nNext Confusion: 4.051585"} +{"text": "### State\nConfusion: 5.878376\nAction: analogize\nReward: 0.581745\nNext Confusion: 5.632409"} +{"text": "### State\nConfusion: 7.096414\nAction: worked_example\nReward: 0.754376\nNext Confusion: 6.939009"} +{"text": "### State\nConfusion: 3.708639\nAction: correct_fact\nReward: 0.057415\nNext Confusion: 4.125999"} +{"text": "### State\nConfusion: 7.392665\nAction: worked_example\nReward: 2.514391\nNext Confusion: 5.432892"} +{"text": "### State\nConfusion: 6.486508\nAction: analogize\nReward: -1.212701\nNext Confusion: 7.813913"} +{"text": "### State\nConfusion: 6.889399\nAction: analogize\nReward: -1.428461\nNext Confusion: 8.115419"} +{"text": "### State\nConfusion: 2.396941\nAction: analogize\nReward: 1.012256\nNext Confusion: 2.412092"} +{"text": "### State\nConfusion: 4.669146\nAction: worked_example\nReward: 0.570246\nNext Confusion: 4.402059"} +{"text": "### State\nConfusion: 4.528219\nAction: analogize\nReward: 0.682524\nNext Confusion: 4.172763"} +{"text": "### State\nConfusion: 7.065587\nAction: worked_example\nReward: 0.844311\nNext Confusion: 5.942281"} +{"text": "### State\nConfusion: 8.21628\nAction: explain\nReward: 1.303762\nNext Confusion: 7.426588"} +{"text": "### State\nConfusion: 4.642588\nAction: correct_fact\nReward: 0.030873\nNext Confusion: 4.616276"} +{"text": "### State\nConfusion: 6.436354\nAction: worked_example\nReward: 1.043848\nNext Confusion: 5.170756"} +{"text": "### State\nConfusion: 3.18685\nAction: question\nReward: 0.569739\nNext Confusion: 2.5131"} +{"text": "### State\nConfusion: 4.502162\nAction: analogize\nReward: 0.123031\nNext Confusion: 4.256639"} +{"text": "### State\nConfusion: 3.236621\nAction: analogize\nReward: -1.226024\nNext Confusion: 4.357628"} +{"text": "### State\nConfusion: 2.31076\nAction: analogize\nReward: 0.351427\nNext Confusion: 2.393765"} +{"text": "### State\nConfusion: 5.464483\nAction: analogize\nReward: -0.107958\nNext Confusion: 5.520666"} +{"text": "### State\nConfusion: 3.708102\nAction: explain\nReward: 0.049102\nNext Confusion: 2.566842"} +{"text": "### State\nConfusion: 4.404992\nAction: analogize\nReward: -0.510053\nNext Confusion: 4.818144"} +{"text": "### State\nConfusion: 3.450883\nAction: analogize\nReward: -1.250229\nNext Confusion: 4.223539"} +{"text": "### State\nConfusion: 5.159201\nAction: question\nReward: -0.898699\nNext Confusion: 5.392422"} +{"text": "### State\nConfusion: 5.612621\nAction: analogize\nReward: -1.899629\nNext Confusion: 6.393727"} +{"text": "### State\nConfusion: 2.496055\nAction: analogize\nReward: 0.083258\nNext Confusion: 2.69881"} +{"text": "### State\nConfusion: 4.094158\nAction: analogize\nReward: -0.281068\nNext Confusion: 4.18822"} +{"text": "### State\nConfusion: 6.628239\nAction: question\nReward: 0.187419\nNext Confusion: 6.615895"} +{"text": "### State\nConfusion: 5.738016\nAction: analogize\nReward: -1.199077\nNext Confusion: 6.711353"} +{"text": "### State\nConfusion: 6.312516\nAction: analogize\nReward: 0.42972\nNext Confusion: 6.950694"} +{"text": "### State\nConfusion: 3.805835\nAction: analogize\nReward: -0.01728\nNext Confusion: 3.534842"} +{"text": "### State\nConfusion: 7.522794\nAction: explain\nReward: 0.367578\nNext Confusion: 7.324257"} +{"text": "### State\nConfusion: 8.889452\nAction: analogize\nReward: -0.572201\nNext Confusion: 8.881808"} +{"text": "### State\nConfusion: 4.964946\nAction: analogize\nReward: -0.624785\nNext Confusion: 5.750177"} +{"text": "### State\nConfusion: 4.519431\nAction: analogize\nReward: -0.219609\nNext Confusion: 5.098336"} +{"text": "### State\nConfusion: 6.143557\nAction: question\nReward: 0.970562\nNext Confusion: 5.958963"} +{"text": "### State\nConfusion: 7.568764\nAction: analogize\nReward: -1.154265\nNext Confusion: 8.696467"} +{"text": "### State\nConfusion: 4.139323\nAction: explain\nReward: -0.497746\nNext Confusion: 4.463565"} +{"text": "### State\nConfusion: 8.346292\nAction: explain\nReward: 0.947031\nNext Confusion: 7.685566"} +{"text": "### State\nConfusion: 2.578793\nAction: worked_example\nReward: 0.8092\nNext Confusion: 1.972243"} +{"text": "### State\nConfusion: 3.574151\nAction: question\nReward: -0.001893\nNext Confusion: 2.94033"} +{"text": "### State\nConfusion: 3.391303\nAction: analogize\nReward: 0.159351\nNext Confusion: 2.838915"} +{"text": "### State\nConfusion: 4.434367\nAction: correct_fact\nReward: 0.725639\nNext Confusion: 3.582488"} +{"text": "### State\nConfusion: 4.485438\nAction: analogize\nReward: -1.644401\nNext Confusion: 5.267482"} +{"text": "### State\nConfusion: 3.795971\nAction: worked_example\nReward: 2.16479\nNext Confusion: 1.835349"} +{"text": "### State\nConfusion: 4.48812\nAction: analogize\nReward: -0.252056\nNext Confusion: 4.740558"} +{"text": "### State\nConfusion: 6.62453\nAction: analogize\nReward: -0.599549\nNext Confusion: 7.097754"} +{"text": "### State\nConfusion: 3.436073\nAction: analogize\nReward: -0.128818\nNext Confusion: 3.550781"} +{"text": "### State\nConfusion: 6.385056\nAction: explain\nReward: -0.614619\nNext Confusion: 6.825878"} +{"text": "### State\nConfusion: 4.543844\nAction: analogize\nReward: -0.357182\nNext Confusion: 4.814017"} +{"text": "### State\nConfusion: 5.016735\nAction: correct_fact\nReward: -1.125121\nNext Confusion: 5.913329"} +{"text": "### State\nConfusion: 3.947467\nAction: analogize\nReward: -1.027909\nNext Confusion: 5.059095"} +{"text": "### State\nConfusion: 5.171448\nAction: analogize\nReward: 0.577806\nNext Confusion: 5.198316"} +{"text": "### State\nConfusion: 8.871197\nAction: question\nReward: -0.803998\nNext Confusion: 9.891808"} +{"text": "### State\nConfusion: 4.985002\nAction: correct_fact\nReward: 0.845959\nNext Confusion: 4.625676"} +{"text": "### State\nConfusion: 5.58028\nAction: analogize\nReward: 0.36508\nNext Confusion: 5.53048"} +{"text": "### State\nConfusion: 3.853096\nAction: question\nReward: 1.561354\nNext Confusion: 3.282271"} +{"text": "### State\nConfusion: 5.917102\nAction: analogize\nReward: 0.05721\nNext Confusion: 5.640356"} +{"text": "### State\nConfusion: 3.994782\nAction: analogize\nReward: -0.043592\nNext Confusion: 4.123508"} +{"text": "### State\nConfusion: 4.253486\nAction: analogize\nReward: -1.054318\nNext Confusion: 5.083886"} +{"text": "### State\nConfusion: 3.477302\nAction: correct_fact\nReward: 0.558578\nNext Confusion: 3.341924"} +{"text": "### State\nConfusion: 6.411841\nAction: question\nReward: 0.474881\nNext Confusion: 5.896121"} +{"text": "### State\nConfusion: 5.725423\nAction: explain\nReward: -0.616484\nNext Confusion: 6.39949"} +{"text": "### State\nConfusion: 6.111221\nAction: analogize\nReward: 0.543065\nNext Confusion: 5.609612"} +{"text": "### State\nConfusion: 4.020193\nAction: analogize\nReward: -1.372713\nNext Confusion: 4.147141"} +{"text": "### State\nConfusion: 3.316797\nAction: explain\nReward: 1.090464\nNext Confusion: 2.999543"} +{"text": "### State\nConfusion: 2.233988\nAction: question\nReward: 0.011496\nNext Confusion: 2.114241"} +{"text": "### State\nConfusion: 7.038006\nAction: question\nReward: -0.747112\nNext Confusion: 6.650014"} +{"text": "### State\nConfusion: 5.194492\nAction: question\nReward: 0.315717\nNext Confusion: 4.832783"} +{"text": "### State\nConfusion: 5.63014\nAction: analogize\nReward: 0.03927\nNext Confusion: 5.690156"} +{"text": "### State\nConfusion: 3.946496\nAction: analogize\nReward: 0.064232\nNext Confusion: 5.154892"} +{"text": "### State\nConfusion: 4.438356\nAction: analogize\nReward: 0.268679\nNext Confusion: 3.728327"} +{"text": "### State\nConfusion: 5.453129\nAction: worked_example\nReward: 1.927628\nNext Confusion: 3.642222"} +{"text": "### State\nConfusion: 9.513212\nAction: question\nReward: 0.707923\nNext Confusion: 8.978129"} +{"text": "### State\nConfusion: 4.612859\nAction: question\nReward: 0.298371\nNext Confusion: 3.895462"} +{"text": "### State\nConfusion: 4.710078\nAction: explain\nReward: -0.092907\nNext Confusion: 5.249335"} +{"text": "### State\nConfusion: 5.980734\nAction: analogize\nReward: -0.194105\nNext Confusion: 6.089187"} +{"text": "### State\nConfusion: 2.867381\nAction: analogize\nReward: -0.180572\nNext Confusion: 3.381789"} +{"text": "### State\nConfusion: 3.278762\nAction: analogize\nReward: -0.748468\nNext Confusion: 4.113945"} +{"text": "### State\nConfusion: 5.969358\nAction: correct_fact\nReward: 1.535565\nNext Confusion: 4.871317"} +{"text": "### State\nConfusion: 7.275512\nAction: question\nReward: 0.812077\nNext Confusion: 6.31911"} +{"text": "### State\nConfusion: 2.584528\nAction: analogize\nReward: 1.050292\nNext Confusion: 2.113127"} +{"text": "### State\nConfusion: 3.503871\nAction: explain\nReward: 0.04849\nNext Confusion: 3.540186"} +{"text": "### State\nConfusion: 4.614442\nAction: analogize\nReward: 0.522376\nNext Confusion: 5.167736"} +{"text": "### State\nConfusion: 7.105223\nAction: explain\nReward: 0.918648\nNext Confusion: 6.37787"} +{"text": "### State\nConfusion: 3.646493\nAction: question\nReward: 1.323374\nNext Confusion: 2.361944"} +{"text": "### State\nConfusion: 4.541859\nAction: analogize\nReward: -0.008162\nNext Confusion: 4.688088"} +{"text": "### State\nConfusion: 3.714432\nAction: correct_fact\nReward: -1.29251\nNext Confusion: 5.221759"} +{"text": "### State\nConfusion: 4.098722\nAction: worked_example\nReward: 1.309035\nNext Confusion: 1.963477"} +{"text": "### State\nConfusion: 2.639115\nAction: analogize\nReward: 1.241288\nNext Confusion: 1.959279"} +{"text": "### State\nConfusion: 4.209787\nAction: explain\nReward: -0.674517\nNext Confusion: 4.646072"} +{"text": "### State\nConfusion: 4.237337\nAction: analogize\nReward: 0.299678\nNext Confusion: 3.768544"} +{"text": "### State\nConfusion: 4.216485\nAction: analogize\nReward: -0.08135\nNext Confusion: 4.33587"} +{"text": "### State\nConfusion: 4.031052\nAction: explain\nReward: 0.003192\nNext Confusion: 3.64745"} +{"text": "### State\nConfusion: 4.382634\nAction: worked_example\nReward: 1.73771\nNext Confusion: 2.649923"} +{"text": "### State\nConfusion: 5.132607\nAction: explain\nReward: 0.382928\nNext Confusion: 4.589305"} +{"text": "### State\nConfusion: 3.012049\nAction: correct_fact\nReward: 0.826988\nNext Confusion: 2.747293"} +{"text": "### State\nConfusion: 3.640825\nAction: question\nReward: 0.142369\nNext Confusion: 3.34056"} +{"text": "### State\nConfusion: 7.855445\nAction: analogize\nReward: -0.694125\nNext Confusion: 7.793014"} +{"text": "### State\nConfusion: 3.575469\nAction: analogize\nReward: 0.731786\nNext Confusion: 2.930617"} +{"text": "### State\nConfusion: 5.864926\nAction: correct_fact\nReward: -0.739393\nNext Confusion: 6.226672"} +{"text": "### State\nConfusion: 4.742312\nAction: worked_example\nReward: 2.256657\nNext Confusion: 3.236478"} +{"text": "### State\nConfusion: 3.681698\nAction: worked_example\nReward: 2.565199\nNext Confusion: 1.781532"} +{"text": "### State\nConfusion: 5.45446\nAction: worked_example\nReward: 1.317665\nNext Confusion: 4.860599"} +{"text": "### State\nConfusion: 3.974952\nAction: explain\nReward: 0.611576\nNext Confusion: 3.247132"} +{"text": "### State\nConfusion: 4.964568\nAction: analogize\nReward: -1.248745\nNext Confusion: 5.991206"} +{"text": "### State\nConfusion: 6.005152\nAction: question\nReward: -0.355016\nNext Confusion: 6.667488"} +{"text": "### State\nConfusion: 3.601624\nAction: explain\nReward: 1.362752\nNext Confusion: 2.763752"} +{"text": "### State\nConfusion: 3.35013\nAction: explain\nReward: 1.195844\nNext Confusion: 3.74787"} +{"text": "### State\nConfusion: 2.871308\nAction: question\nReward: 1.241142\nNext Confusion: 1.719885"} +{"text": "### State\nConfusion: 3.469416\nAction: question\nReward: 0.918632\nNext Confusion: 3.167326"} +{"text": "### State\nConfusion: 3.741222\nAction: explain\nReward: 0.985201\nNext Confusion: 3.32284"} +{"text": "### State\nConfusion: 6.040923\nAction: analogize\nReward: -0.280699\nNext Confusion: 6.575557"} +{"text": "### State\nConfusion: 7.684596\nAction: explain\nReward: 0.497976\nNext Confusion: 7.635492"} +{"text": "### State\nConfusion: 3.280561\nAction: question\nReward: 1.638365\nNext Confusion: 1.803264"} +{"text": "### State\nConfusion: 7.663548\nAction: analogize\nReward: -0.226292\nNext Confusion: 7.610589"} +{"text": "### State\nConfusion: 3.984347\nAction: explain\nReward: 0.399343\nNext Confusion: 3.598188"} +{"text": "### State\nConfusion: 4.128422\nAction: analogize\nReward: -0.020421\nNext Confusion: 4.24922"} +{"text": "### State\nConfusion: 4.86738\nAction: explain\nReward: 0.412249\nNext Confusion: 4.323055"} +{"text": "### State\nConfusion: 3.86008\nAction: analogize\nReward: -0.201859\nNext Confusion: 4.178868"} +{"text": "### State\nConfusion: 2.770174\nAction: analogize\nReward: -0.44956\nNext Confusion: 2.613835"} +{"text": "### State\nConfusion: 4.151423\nAction: worked_example\nReward: 1.723241\nNext Confusion: 2.956266"} +{"text": "### State\nConfusion: 6.310332\nAction: analogize\nReward: 0.928881\nNext Confusion: 5.853417"} +{"text": "### State\nConfusion: 3.257129\nAction: analogize\nReward: -0.580045\nNext Confusion: 3.712077"} +{"text": "### State\nConfusion: 3.381224\nAction: analogize\nReward: 0.35832\nNext Confusion: 2.915107"} +{"text": "### State\nConfusion: 5.293595\nAction: analogize\nReward: 0.024167\nNext Confusion: 5.410278"} +{"text": "### State\nConfusion: 3.664918\nAction: analogize\nReward: 0.629177\nNext Confusion: 3.607038"} +{"text": "### State\nConfusion: 3.814348\nAction: analogize\nReward: -0.991218\nNext Confusion: 4.655007"} +{"text": "### State\nConfusion: 5.846762\nAction: worked_example\nReward: 1.942244\nNext Confusion: 4.556337"} +{"text": "### State\nConfusion: 5.780618\nAction: worked_example\nReward: 0.564407\nNext Confusion: 5.13104"} +{"text": "### State\nConfusion: 3.274057\nAction: correct_fact\nReward: -0.765777\nNext Confusion: 3.590828"} +{"text": "### State\nConfusion: 3.975992\nAction: analogize\nReward: -1.591306\nNext Confusion: 4.650961"} +{"text": "### State\nConfusion: 2.629499\nAction: analogize\nReward: 0.559346\nNext Confusion: 2.924919"} +{"text": "### State\nConfusion: 4.576322\nAction: question\nReward: 1.115169\nNext Confusion: 3.710506"} +{"text": "### State\nConfusion: 4.406433\nAction: question\nReward: 0.384513\nNext Confusion: 4.542675"} +{"text": "### State\nConfusion: 3.695796\nAction: explain\nReward: -0.09499\nNext Confusion: 3.903161"} +{"text": "### State\nConfusion: 3.598451\nAction: analogize\nReward: -0.256206\nNext Confusion: 4.140747"} +{"text": "### State\nConfusion: 3.289128\nAction: analogize\nReward: -0.30123\nNext Confusion: 3.63686"} +{"text": "### State\nConfusion: 6.212872\nAction: analogize\nReward: 1.21673\nNext Confusion: 5.301171"} +{"text": "### State\nConfusion: 2.665315\nAction: analogize\nReward: 0.487963\nNext Confusion: 2.517148"} +{"text": "### State\nConfusion: 7.634206\nAction: analogize\nReward: 0.364259\nNext Confusion: 7.525106"} +{"text": "### State\nConfusion: 4.694621\nAction: analogize\nReward: -0.110508\nNext Confusion: 4.591361"} +{"text": "### State\nConfusion: 3.257582\nAction: analogize\nReward: 0.048877\nNext Confusion: 3.63751"} +{"text": "### State\nConfusion: 7.117423\nAction: analogize\nReward: 0.360643\nNext Confusion: 6.834822"} +{"text": "### State\nConfusion: 2.396412\nAction: analogize\nReward: -0.941327\nNext Confusion: 3.360732"} +{"text": "### State\nConfusion: 3.254998\nAction: explain\nReward: 0.913498\nNext Confusion: 2.997523"} +{"text": "### State\nConfusion: 4.529406\nAction: question\nReward: 0.384746\nNext Confusion: 4.120297"} +{"text": "### State\nConfusion: 3.558558\nAction: analogize\nReward: -0.363472\nNext Confusion: 3.638471"} +{"text": "### State\nConfusion: 3.043111\nAction: question\nReward: 0.349095\nNext Confusion: 2.999848"} +{"text": "### State\nConfusion: 7.750188\nAction: analogize\nReward: -0.581808\nNext Confusion: 8.439862"} +{"text": "### State\nConfusion: 3.544147\nAction: analogize\nReward: -0.685245\nNext Confusion: 4.018972"} +{"text": "### State\nConfusion: 3.828862\nAction: question\nReward: 1.050056\nNext Confusion: 3.542843"} +{"text": "### State\nConfusion: 3.389573\nAction: explain\nReward: 0.664424\nNext Confusion: 3.723847"} +{"text": "### State\nConfusion: 4.171442\nAction: analogize\nReward: -1.336033\nNext Confusion: 5.128333"} +{"text": "### State\nConfusion: 3.576818\nAction: worked_example\nReward: 0.172307\nNext Confusion: 3.437141"} +{"text": "### State\nConfusion: 4.544904\nAction: correct_fact\nReward: -0.733044\nNext Confusion: 4.568539"} +{"text": "### State\nConfusion: 7.43306\nAction: worked_example\nReward: 2.463857\nNext Confusion: 5.143341"} +{"text": "### State\nConfusion: 3.439448\nAction: analogize\nReward: -0.571421\nNext Confusion: 4.087216"} +{"text": "### State\nConfusion: 3.704676\nAction: analogize\nReward: -1.422864\nNext Confusion: 4.662764"} +{"text": "### State\nConfusion: 3.942663\nAction: correct_fact\nReward: 0.486764\nNext Confusion: 3.753754"} +{"text": "### State\nConfusion: 6.66982\nAction: worked_example\nReward: 1.728911\nNext Confusion: 4.802022"} +{"text": "### State\nConfusion: 5.246306\nAction: worked_example\nReward: -0.294401\nNext Confusion: 5.037916"} +{"text": "### State\nConfusion: 9.410426\nAction: analogize\nReward: -1.320736\nNext Confusion: 9.686011"} +{"text": "### State\nConfusion: 6.195813\nAction: analogize\nReward: 0.606987\nNext Confusion: 6.49038"} +{"text": "### State\nConfusion: 5.922952\nAction: explain\nReward: 0.232012\nNext Confusion: 5.318613"} +{"text": "### State\nConfusion: 5.650737\nAction: question\nReward: 0.866942\nNext Confusion: 4.455626"} +{"text": "### State\nConfusion: 3.919665\nAction: analogize\nReward: -0.997712\nNext Confusion: 4.632429"} +{"text": "### State\nConfusion: 3.941246\nAction: question\nReward: 0.649643\nNext Confusion: 3.760049"} +{"text": "### State\nConfusion: 6.998311\nAction: analogize\nReward: -0.427791\nNext Confusion: 7.825326"} +{"text": "### State\nConfusion: 3.439216\nAction: analogize\nReward: -0.835926\nNext Confusion: 4.451193"} +{"text": "### State\nConfusion: 6.045847\nAction: analogize\nReward: 0.214939\nNext Confusion: 6.031241"} +{"text": "### State\nConfusion: 6.210586\nAction: question\nReward: -3.694161\nNext Confusion: 6.180187"} +{"text": "### State\nConfusion: 4.026471\nAction: analogize\nReward: -0.220299\nNext Confusion: 4.869419"} +{"text": "### State\nConfusion: 7.577714\nAction: analogize\nReward: -1.174037\nNext Confusion: 8.802045"} +{"text": "### State\nConfusion: 3.543173\nAction: analogize\nReward: -0.365294\nNext Confusion: 3.866641"} +{"text": "### State\nConfusion: 5.09092\nAction: analogize\nReward: 1.13697\nNext Confusion: 4.349956"} +{"text": "### State\nConfusion: 8.079839\nAction: analogize\nReward: 0.226414\nNext Confusion: 8.204414"} +{"text": "### State\nConfusion: 7.353573\nAction: analogize\nReward: -1.642375\nNext Confusion: 8.902788"} +{"text": "### State\nConfusion: 5.057345\nAction: correct_fact\nReward: 0.380797\nNext Confusion: 4.734569"} +{"text": "### State\nConfusion: 9.910346\nAction: analogize\nReward: -1.254739\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.257287\nAction: explain\nReward: 1.209915\nNext Confusion: 2.730555"} +{"text": "### State\nConfusion: 2.891581\nAction: correct_fact\nReward: 1.087497\nNext Confusion: 2.386734"} +{"text": "### State\nConfusion: 3.396667\nAction: analogize\nReward: -0.498005\nNext Confusion: 3.937934"} +{"text": "### State\nConfusion: 6.116508\nAction: analogize\nReward: 0.1768\nNext Confusion: 6.346785"} +{"text": "### State\nConfusion: 9.778449\nAction: analogize\nReward: -0.537666\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.730837\nAction: analogize\nReward: 0.814024\nNext Confusion: 3.501215"} +{"text": "### State\nConfusion: 6.362095\nAction: question\nReward: -0.753607\nNext Confusion: 6.451304"} +{"text": "### State\nConfusion: 3.924527\nAction: question\nReward: 1.359091\nNext Confusion: 2.809764"} +{"text": "### State\nConfusion: 4.227033\nAction: analogize\nReward: -0.552635\nNext Confusion: 4.370997"} +{"text": "### State\nConfusion: 4.017463\nAction: explain\nReward: 0.588962\nNext Confusion: 3.727648"} +{"text": "### State\nConfusion: 3.403827\nAction: analogize\nReward: -0.26206\nNext Confusion: 4.009012"} +{"text": "### State\nConfusion: 5.242961\nAction: analogize\nReward: 0.309312\nNext Confusion: 5.459402"} +{"text": "### State\nConfusion: 5.55746\nAction: analogize\nReward: -0.133931\nNext Confusion: 5.654018"} +{"text": "### State\nConfusion: 7.313769\nAction: analogize\nReward: 1.068778\nNext Confusion: 7.331645"} +{"text": "### State\nConfusion: 4.527921\nAction: analogize\nReward: 0.174773\nNext Confusion: 4.651058"} +{"text": "### State\nConfusion: 4.213427\nAction: explain\nReward: 0.440255\nNext Confusion: 3.94091"} +{"text": "### State\nConfusion: 5.50433\nAction: analogize\nReward: -0.638466\nNext Confusion: 6.355938"} +{"text": "### State\nConfusion: 4.101085\nAction: analogize\nReward: -1.054182\nNext Confusion: 5.146448"} +{"text": "### State\nConfusion: 4.349844\nAction: analogize\nReward: -0.170631\nNext Confusion: 4.653336"} +{"text": "### State\nConfusion: 4.015178\nAction: correct_fact\nReward: 0.047997\nNext Confusion: 4.260065"} +{"text": "### State\nConfusion: 2.358372\nAction: analogize\nReward: -0.598425\nNext Confusion: 2.979395"} +{"text": "### State\nConfusion: 4.52034\nAction: explain\nReward: 0.450091\nNext Confusion: 4.371052"} +{"text": "### State\nConfusion: 6.761803\nAction: analogize\nReward: 1.344641\nNext Confusion: 5.632773"} +{"text": "### State\nConfusion: 5.87241\nAction: question\nReward: 0.377697\nNext Confusion: 5.843579"} +{"text": "### State\nConfusion: 3.923287\nAction: analogize\nReward: -0.657101\nNext Confusion: 4.637989"} +{"text": "### State\nConfusion: 9.313203\nAction: question\nReward: 0.856034\nNext Confusion: 8.638969"} +{"text": "### State\nConfusion: 3.786844\nAction: analogize\nReward: 0.06864\nNext Confusion: 4.0975"} +{"text": "### State\nConfusion: 5.629326\nAction: explain\nReward: 0.078317\nNext Confusion: 6.028976"} +{"text": "### State\nConfusion: 10.0\nAction: question\nReward: -0.123322\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 5.23106\nAction: analogize\nReward: -0.280209\nNext Confusion: 5.898155"} +{"text": "### State\nConfusion: 4.382555\nAction: analogize\nReward: -1.659106\nNext Confusion: 5.728748"} +{"text": "### State\nConfusion: 6.25581\nAction: question\nReward: 0.21789\nNext Confusion: 5.880257"} +{"text": "### State\nConfusion: 2.868613\nAction: analogize\nReward: 0.909742\nNext Confusion: 2.372117"} +{"text": "### State\nConfusion: 3.684495\nAction: correct_fact\nReward: -0.745467\nNext Confusion: 4.462787"} +{"text": "### State\nConfusion: 3.525275\nAction: analogize\nReward: -0.111069\nNext Confusion: 4.110827"} +{"text": "### State\nConfusion: 3.225854\nAction: analogize\nReward: -0.629036\nNext Confusion: 3.792701"} +{"text": "### State\nConfusion: 4.270598\nAction: analogize\nReward: 0.480308\nNext Confusion: 3.756942"} +{"text": "### State\nConfusion: 4.836082\nAction: analogize\nReward: -0.761899\nNext Confusion: 5.27111"} +{"text": "### State\nConfusion: 8.947462\nAction: analogize\nReward: -1.034251\nNext Confusion: 8.811069"} +{"text": "### State\nConfusion: 6.235946\nAction: explain\nReward: 0.433649\nNext Confusion: 5.740307"} +{"text": "### State\nConfusion: 7.087507\nAction: question\nReward: 1.290575\nNext Confusion: 5.952477"} +{"text": "### State\nConfusion: 3.511142\nAction: correct_fact\nReward: 0.069971\nNext Confusion: 3.650214"} +{"text": "### State\nConfusion: 3.852046\nAction: question\nReward: 0.918159\nNext Confusion: 3.057334"} +{"text": "### State\nConfusion: 6.66341\nAction: analogize\nReward: 0.553079\nNext Confusion: 6.19827"} +{"text": "### State\nConfusion: 4.063512\nAction: analogize\nReward: -0.20142\nNext Confusion: 4.057294"} +{"text": "### State\nConfusion: 9.915004\nAction: analogize\nReward: 0.468495\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 4.206673\nAction: question\nReward: 1.256381\nNext Confusion: 3.352159"} +{"text": "### State\nConfusion: 8.36885\nAction: analogize\nReward: -1.800544\nNext Confusion: 9.007835"} +{"text": "### State\nConfusion: 5.879703\nAction: explain\nReward: 0.015996\nNext Confusion: 5.779414"} +{"text": "### State\nConfusion: 3.387135\nAction: worked_example\nReward: -0.285351\nNext Confusion: 3.143748"} +{"text": "### State\nConfusion: 3.42407\nAction: explain\nReward: -1.038417\nNext Confusion: 4.218859"} +{"text": "### State\nConfusion: 5.131143\nAction: explain\nReward: 0.547275\nNext Confusion: 4.681826"} +{"text": "### State\nConfusion: 6.287015\nAction: question\nReward: 2.149246\nNext Confusion: 5.357315"} +{"text": "### State\nConfusion: 3.635542\nAction: correct_fact\nReward: 0.099958\nNext Confusion: 3.996667"} +{"text": "### State\nConfusion: 4.814803\nAction: analogize\nReward: -0.395849\nNext Confusion: 4.792355"} +{"text": "### State\nConfusion: 8.044463\nAction: explain\nReward: -0.297554\nNext Confusion: 7.542534"} +{"text": "### State\nConfusion: 8.401709\nAction: analogize\nReward: -0.967848\nNext Confusion: 9.894008"} +{"text": "### State\nConfusion: 6.648936\nAction: explain\nReward: 1.280506\nNext Confusion: 5.837753"} +{"text": "### State\nConfusion: 2.353691\nAction: analogize\nReward: 1.116544\nNext Confusion: 1.840919"} +{"text": "### State\nConfusion: 2.624949\nAction: analogize\nReward: 0.572625\nNext Confusion: 2.075723"} +{"text": "### State\nConfusion: 2.680676\nAction: worked_example\nReward: 0.813016\nNext Confusion: 1.673576"} +{"text": "### State\nConfusion: 6.013613\nAction: explain\nReward: 0.225777\nNext Confusion: 5.772486"} +{"text": "### State\nConfusion: 6.153138\nAction: analogize\nReward: 0.394142\nNext Confusion: 6.72213"} +{"text": "### State\nConfusion: 3.991692\nAction: analogize\nReward: -0.533825\nNext Confusion: 3.808707"} +{"text": "### State\nConfusion: 4.422607\nAction: analogize\nReward: 0.446483\nNext Confusion: 4.097887"} +{"text": "### State\nConfusion: 3.941558\nAction: analogize\nReward: -0.217683\nNext Confusion: 4.405933"} +{"text": "### State\nConfusion: 4.22295\nAction: analogize\nReward: 0.082211\nNext Confusion: 4.529451"} +{"text": "### State\nConfusion: 5.897951\nAction: analogize\nReward: -0.363261\nNext Confusion: 6.494216"} +{"text": "### State\nConfusion: 3.695955\nAction: analogize\nReward: 0.097225\nNext Confusion: 3.346347"} +{"text": "### State\nConfusion: 3.633662\nAction: worked_example\nReward: 0.364471\nNext Confusion: 3.045426"} +{"text": "### State\nConfusion: 8.276167\nAction: analogize\nReward: -0.287066\nNext Confusion: 8.230134"} +{"text": "### State\nConfusion: 7.056122\nAction: analogize\nReward: 0.076491\nNext Confusion: 7.140624"} +{"text": "### State\nConfusion: 6.252013\nAction: analogize\nReward: -0.740944\nNext Confusion: 7.121952"} +{"text": "### State\nConfusion: 9.184165\nAction: analogize\nReward: -0.583071\nNext Confusion: 9.831767"} +{"text": "### State\nConfusion: 7.184398\nAction: explain\nReward: -0.123245\nNext Confusion: 7.505643"} +{"text": "### State\nConfusion: 3.933074\nAction: analogize\nReward: -0.656493\nNext Confusion: 4.151541"} +{"text": "### State\nConfusion: 4.39857\nAction: correct_fact\nReward: 0.819409\nNext Confusion: 3.612621"} +{"text": "### State\nConfusion: 7.368159\nAction: question\nReward: -0.305729\nNext Confusion: 6.645311"} +{"text": "### State\nConfusion: 6.044369\nAction: explain\nReward: -1.484875\nNext Confusion: 6.056402"} +{"text": "### State\nConfusion: 3.630255\nAction: explain\nReward: 0.14536\nNext Confusion: 3.800318"} +{"text": "### State\nConfusion: 3.970609\nAction: explain\nReward: 0.544449\nNext Confusion: 3.713812"} +{"text": "### State\nConfusion: 6.995214\nAction: analogize\nReward: -0.404806\nNext Confusion: 7.366908"} +{"text": "### State\nConfusion: 2.539684\nAction: question\nReward: 0.684036\nNext Confusion: 1.758763"} +{"text": "### State\nConfusion: 4.291467\nAction: analogize\nReward: 0.211242\nNext Confusion: 4.725763"} +{"text": "### State\nConfusion: 3.88243\nAction: analogize\nReward: 1.228902\nNext Confusion: 3.252029"} +{"text": "### State\nConfusion: 3.367319\nAction: question\nReward: 0.896723\nNext Confusion: 2.189595"} +{"text": "### State\nConfusion: 4.465155\nAction: explain\nReward: 0.145001\nNext Confusion: 4.276695"} +{"text": "### State\nConfusion: 5.148044\nAction: analogize\nReward: -0.85651\nNext Confusion: 5.814228"} +{"text": "### State\nConfusion: 6.414485\nAction: analogize\nReward: -0.167799\nNext Confusion: 6.776225"} +{"text": "### State\nConfusion: 5.660143\nAction: explain\nReward: 0.666448\nNext Confusion: 5.634968"} +{"text": "### State\nConfusion: 3.073078\nAction: worked_example\nReward: 1.191585\nNext Confusion: 1.84006"} +{"text": "### State\nConfusion: 4.392269\nAction: explain\nReward: -0.13347\nNext Confusion: 4.073843"} +{"text": "### State\nConfusion: 5.864765\nAction: analogize\nReward: -1.350922\nNext Confusion: 6.857552"} +{"text": "### State\nConfusion: 4.392626\nAction: analogize\nReward: -1.048435\nNext Confusion: 4.934402"} +{"text": "### State\nConfusion: 7.997655\nAction: correct_fact\nReward: 0.441853\nNext Confusion: 7.844183"} +{"text": "### State\nConfusion: 4.253817\nAction: analogize\nReward: -0.180777\nNext Confusion: 4.751152"} +{"text": "### State\nConfusion: 4.194475\nAction: analogize\nReward: -0.13063\nNext Confusion: 4.203718"} +{"text": "### State\nConfusion: 3.775372\nAction: analogize\nReward: -0.102879\nNext Confusion: 4.218565"} +{"text": "### State\nConfusion: 3.37428\nAction: explain\nReward: -0.147437\nNext Confusion: 3.824593"} +{"text": "### State\nConfusion: 5.427704\nAction: explain\nReward: 0.223987\nNext Confusion: 4.968712"} +{"text": "### State\nConfusion: 3.636294\nAction: analogize\nReward: -1.018032\nNext Confusion: 4.057275"} +{"text": "### State\nConfusion: 6.214283\nAction: worked_example\nReward: 2.571471\nNext Confusion: 5.29688"} +{"text": "### State\nConfusion: 3.93336\nAction: analogize\nReward: -0.744836\nNext Confusion: 4.682724"} +{"text": "### State\nConfusion: 4.802226\nAction: analogize\nReward: -0.819463\nNext Confusion: 6.311615"} +{"text": "### State\nConfusion: 3.885915\nAction: analogize\nReward: 0.055154\nNext Confusion: 3.762716"} +{"text": "### State\nConfusion: 4.20142\nAction: analogize\nReward: -0.829035\nNext Confusion: 4.799394"} +{"text": "### State\nConfusion: 3.003221\nAction: analogize\nReward: -0.314853\nNext Confusion: 3.496412"} +{"text": "### State\nConfusion: 4.048622\nAction: analogize\nReward: -1.332944\nNext Confusion: 5.046173"} +{"text": "### State\nConfusion: 5.844535\nAction: analogize\nReward: 0.587102\nNext Confusion: 5.329001"} +{"text": "### State\nConfusion: 2.72501\nAction: analogize\nReward: -0.226736\nNext Confusion: 2.524352"} +{"text": "### State\nConfusion: 7.002024\nAction: correct_fact\nReward: 0.509639\nNext Confusion: 6.743065"} +{"text": "### State\nConfusion: 3.325475\nAction: question\nReward: 0.06805\nNext Confusion: 3.499943"} +{"text": "### State\nConfusion: 3.802268\nAction: analogize\nReward: -0.0121\nNext Confusion: 4.107089"} +{"text": "### State\nConfusion: 2.142409\nAction: correct_fact\nReward: 0.629906\nNext Confusion: 1.889285"} +{"text": "### State\nConfusion: 4.549087\nAction: analogize\nReward: -0.264564\nNext Confusion: 5.161147"} +{"text": "### State\nConfusion: 3.565095\nAction: question\nReward: -0.083092\nNext Confusion: 3.521617"} +{"text": "### State\nConfusion: 6.562612\nAction: analogize\nReward: -0.482755\nNext Confusion: 7.10455"} +{"text": "### State\nConfusion: 4.241969\nAction: explain\nReward: -0.754929\nNext Confusion: 5.114543"} +{"text": "### State\nConfusion: 2.703632\nAction: analogize\nReward: -0.503238\nNext Confusion: 3.434828"} +{"text": "### State\nConfusion: 4.048455\nAction: worked_example\nReward: 1.670669\nNext Confusion: 3.173782"} +{"text": "### State\nConfusion: 3.324484\nAction: correct_fact\nReward: -0.601735\nNext Confusion: 4.07383"} +{"text": "### State\nConfusion: 5.943546\nAction: analogize\nReward: -0.626702\nNext Confusion: 6.768705"} +{"text": "### State\nConfusion: 7.54099\nAction: worked_example\nReward: -0.038655\nNext Confusion: 7.901897"} +{"text": "### State\nConfusion: 5.921177\nAction: analogize\nReward: -0.518356\nNext Confusion: 6.339199"} +{"text": "### State\nConfusion: 3.627352\nAction: question\nReward: 1.265732\nNext Confusion: 2.68943"} +{"text": "### State\nConfusion: 4.268603\nAction: analogize\nReward: 0.224228\nNext Confusion: 4.325311"} +{"text": "### State\nConfusion: 7.438092\nAction: analogize\nReward: 0.769506\nNext Confusion: 6.760568"} +{"text": "### State\nConfusion: 5.561396\nAction: analogize\nReward: 0.192368\nNext Confusion: 6.595321"} +{"text": "### State\nConfusion: 3.728523\nAction: analogize\nReward: 0.288576\nNext Confusion: 4.402052"} +{"text": "### State\nConfusion: 2.824914\nAction: explain\nReward: -0.298342\nNext Confusion: 3.015885"} +{"text": "### State\nConfusion: 3.5681\nAction: analogize\nReward: -1.062345\nNext Confusion: 3.871775"} +{"text": "### State\nConfusion: 5.129294\nAction: correct_fact\nReward: 0.217271\nNext Confusion: 4.869125"} +{"text": "### State\nConfusion: 3.513123\nAction: analogize\nReward: 0.348811\nNext Confusion: 3.595584"} +{"text": "### State\nConfusion: 3.052817\nAction: worked_example\nReward: 2.832272\nNext Confusion: 0.91998"} +{"text": "### State\nConfusion: 7.955312\nAction: analogize\nReward: -0.117205\nNext Confusion: 8.157463"} +{"text": "### State\nConfusion: 3.807566\nAction: explain\nReward: 0.103445\nNext Confusion: 3.867911"} +{"text": "### State\nConfusion: 3.244115\nAction: worked_example\nReward: 1.05523\nNext Confusion: 1.530957"} +{"text": "### State\nConfusion: 2.521808\nAction: analogize\nReward: -0.867435\nNext Confusion: 2.434005"} +{"text": "### State\nConfusion: 9.124791\nAction: analogize\nReward: -0.841239\nNext Confusion: 9.679058"} +{"text": "### State\nConfusion: 5.325014\nAction: analogize\nReward: -1.782482\nNext Confusion: 6.970171"} +{"text": "### State\nConfusion: 4.339715\nAction: explain\nReward: 0.008463\nNext Confusion: 3.846507"} +{"text": "### State\nConfusion: 6.615342\nAction: question\nReward: 1.733143\nNext Confusion: 5.407709"} +{"text": "### State\nConfusion: 4.24576\nAction: explain\nReward: -0.29446\nNext Confusion: 4.733876"} +{"text": "### State\nConfusion: 4.335828\nAction: analogize\nReward: -0.146346\nNext Confusion: 4.271027"} +{"text": "### State\nConfusion: 4.58664\nAction: analogize\nReward: -0.103805\nNext Confusion: 4.874441"} +{"text": "### State\nConfusion: 4.006128\nAction: analogize\nReward: -0.725112\nNext Confusion: 4.798853"} +{"text": "### State\nConfusion: 5.253432\nAction: worked_example\nReward: 2.352804\nNext Confusion: 3.767527"} +{"text": "### State\nConfusion: 3.005701\nAction: explain\nReward: 0.139484\nNext Confusion: 2.093722"} +{"text": "### State\nConfusion: 3.874975\nAction: explain\nReward: -0.787808\nNext Confusion: 3.802166"} +{"text": "### State\nConfusion: 3.41289\nAction: explain\nReward: 1.219302\nNext Confusion: 2.298687"} +{"text": "### State\nConfusion: 3.578395\nAction: question\nReward: 0.811461\nNext Confusion: 2.339393"} +{"text": "### State\nConfusion: 3.207485\nAction: analogize\nReward: 0.315275\nNext Confusion: 2.610947"} +{"text": "### State\nConfusion: 2.575161\nAction: worked_example\nReward: 2.987322\nNext Confusion: 0.201491"} +{"text": "### State\nConfusion: 4.277697\nAction: analogize\nReward: 0.129558\nNext Confusion: 4.751849"} +{"text": "### State\nConfusion: 8.911267\nAction: correct_fact\nReward: -0.124284\nNext Confusion: 8.876928"} +{"text": "### State\nConfusion: 3.571608\nAction: explain\nReward: -0.07214\nNext Confusion: 3.462044"} +{"text": "### State\nConfusion: 6.763837\nAction: analogize\nReward: -0.857645\nNext Confusion: 7.317293"} +{"text": "### State\nConfusion: 9.618838\nAction: analogize\nReward: -0.535427\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.973634\nAction: explain\nReward: -0.52296\nNext Confusion: 4.014759"} +{"text": "### State\nConfusion: 4.366366\nAction: analogize\nReward: 0.616717\nNext Confusion: 3.759719"} +{"text": "### State\nConfusion: 3.682107\nAction: correct_fact\nReward: -0.336892\nNext Confusion: 3.918612"} +{"text": "### State\nConfusion: 5.293279\nAction: worked_example\nReward: 0.835641\nNext Confusion: 4.078385"} +{"text": "### State\nConfusion: 4.117586\nAction: analogize\nReward: -1.002237\nNext Confusion: 4.322363"} +{"text": "### State\nConfusion: 4.685913\nAction: question\nReward: 0.069347\nNext Confusion: 4.376846"} +{"text": "### State\nConfusion: 7.71473\nAction: worked_example\nReward: 2.620265\nNext Confusion: 6.301953"} +{"text": "### State\nConfusion: 4.964281\nAction: correct_fact\nReward: 0.475344\nNext Confusion: 5.341691"} +{"text": "### State\nConfusion: 3.69864\nAction: explain\nReward: 0.306914\nNext Confusion: 3.666771"} +{"text": "### State\nConfusion: 4.640393\nAction: correct_fact\nReward: 0.83143\nNext Confusion: 4.648771"} +{"text": "### State\nConfusion: 3.663903\nAction: explain\nReward: 0.146509\nNext Confusion: 3.056985"} +{"text": "### State\nConfusion: 4.372581\nAction: analogize\nReward: 1.235951\nNext Confusion: 3.717001"} +{"text": "### State\nConfusion: 4.812475\nAction: question\nReward: 0.287407\nNext Confusion: 4.611772"} +{"text": "### State\nConfusion: 4.098743\nAction: correct_fact\nReward: 1.399562\nNext Confusion: 3.505099"} +{"text": "### State\nConfusion: 3.51753\nAction: correct_fact\nReward: 0.518148\nNext Confusion: 3.218788"} +{"text": "### State\nConfusion: 7.673138\nAction: analogize\nReward: -0.506786\nNext Confusion: 8.241877"} +{"text": "### State\nConfusion: 5.163452\nAction: correct_fact\nReward: 0.662886\nNext Confusion: 4.834873"} +{"text": "### State\nConfusion: 3.634682\nAction: explain\nReward: 1.605538\nNext Confusion: 2.658165"} +{"text": "### State\nConfusion: 4.804548\nAction: analogize\nReward: -0.526519\nNext Confusion: 5.3719"} +{"text": "### State\nConfusion: 5.730246\nAction: analogize\nReward: -0.369718\nNext Confusion: 6.35551"} +{"text": "### State\nConfusion: 3.63205\nAction: analogize\nReward: -0.231718\nNext Confusion: 4.320949"} +{"text": "### State\nConfusion: 6.188833\nAction: explain\nReward: -0.086604\nNext Confusion: 6.12198"} +{"text": "### State\nConfusion: 3.921316\nAction: question\nReward: 0.490084\nNext Confusion: 3.494523"} +{"text": "### State\nConfusion: 3.867314\nAction: question\nReward: 0.795442\nNext Confusion: 2.899061"} +{"text": "### State\nConfusion: 6.886967\nAction: explain\nReward: -0.57278\nNext Confusion: 6.923401"} +{"text": "### State\nConfusion: 3.680679\nAction: analogize\nReward: -0.210942\nNext Confusion: 3.925346"} +{"text": "### State\nConfusion: 8.736197\nAction: explain\nReward: -0.914355\nNext Confusion: 9.615757"} +{"text": "### State\nConfusion: 3.546871\nAction: question\nReward: 1.117404\nNext Confusion: 2.736447"} +{"text": "### State\nConfusion: 6.864706\nAction: analogize\nReward: 0.214425\nNext Confusion: 6.507564"} +{"text": "### State\nConfusion: 5.739324\nAction: explain\nReward: 0.335455\nNext Confusion: 5.200863"} +{"text": "### State\nConfusion: 6.305572\nAction: analogize\nReward: 0.075181\nNext Confusion: 6.520935"} +{"text": "### State\nConfusion: 4.454816\nAction: analogize\nReward: -0.289589\nNext Confusion: 4.773785"} +{"text": "### State\nConfusion: 3.731757\nAction: explain\nReward: 0.873925\nNext Confusion: 2.932281"} +{"text": "### State\nConfusion: 5.496557\nAction: correct_fact\nReward: 0.868323\nNext Confusion: 5.627773"} +{"text": "### State\nConfusion: 4.492536\nAction: analogize\nReward: -0.193267\nNext Confusion: 4.587128"} +{"text": "### State\nConfusion: 3.143882\nAction: explain\nReward: 0.736479\nNext Confusion: 2.814084"} +{"text": "### State\nConfusion: 6.209274\nAction: question\nReward: -0.687091\nNext Confusion: 6.21104"} +{"text": "### State\nConfusion: 4.661629\nAction: explain\nReward: 0.805475\nNext Confusion: 4.795467"} +{"text": "### State\nConfusion: 3.818923\nAction: analogize\nReward: -0.621434\nNext Confusion: 4.386176"} +{"text": "### State\nConfusion: 3.080188\nAction: correct_fact\nReward: 0.555658\nNext Confusion: 2.880363"} +{"text": "### State\nConfusion: 3.506171\nAction: analogize\nReward: 0.546359\nNext Confusion: 3.254782"} +{"text": "### State\nConfusion: 6.788947\nAction: question\nReward: 1.500851\nNext Confusion: 5.293974"} +{"text": "### State\nConfusion: 4.457938\nAction: analogize\nReward: -0.031805\nNext Confusion: 4.962243"} +{"text": "### State\nConfusion: 6.438289\nAction: explain\nReward: 0.217682\nNext Confusion: 6.198248"} +{"text": "### State\nConfusion: 4.113327\nAction: worked_example\nReward: 2.453192\nNext Confusion: 2.296527"} +{"text": "### State\nConfusion: 4.691368\nAction: analogize\nReward: -0.428181\nNext Confusion: 5.063083"} +{"text": "### State\nConfusion: 6.270288\nAction: analogize\nReward: 0.043957\nNext Confusion: 7.006448"} +{"text": "### State\nConfusion: 3.941789\nAction: analogize\nReward: -1.714923\nNext Confusion: 3.669449"} +{"text": "### State\nConfusion: 6.964881\nAction: analogize\nReward: -0.724558\nNext Confusion: 7.170291"} +{"text": "### State\nConfusion: 6.03055\nAction: explain\nReward: -0.30925\nNext Confusion: 5.772726"} +{"text": "### State\nConfusion: 4.264237\nAction: question\nReward: 1.085219\nNext Confusion: 3.315322"} +{"text": "### State\nConfusion: 3.291669\nAction: analogize\nReward: -0.695105\nNext Confusion: 3.578062"} +{"text": "### State\nConfusion: 2.852946\nAction: explain\nReward: 0.896694\nNext Confusion: 2.766197"} +{"text": "### State\nConfusion: 7.655255\nAction: analogize\nReward: -0.602577\nNext Confusion: 8.688943"} +{"text": "### State\nConfusion: 7.109231\nAction: analogize\nReward: -1.82364\nNext Confusion: 7.303028"} +{"text": "### State\nConfusion: 6.423125\nAction: analogize\nReward: -0.296018\nNext Confusion: 7.129556"} +{"text": "### State\nConfusion: 3.713895\nAction: correct_fact\nReward: 1.061697\nNext Confusion: 3.08923"} +{"text": "### State\nConfusion: 5.848164\nAction: explain\nReward: 0.196832\nNext Confusion: 5.547879"} +{"text": "### State\nConfusion: 3.044751\nAction: analogize\nReward: 0.050225\nNext Confusion: 2.399746"} +{"text": "### State\nConfusion: 3.267077\nAction: correct_fact\nReward: 0.065062\nNext Confusion: 3.377568"} +{"text": "### State\nConfusion: 3.677239\nAction: analogize\nReward: 0.623853\nNext Confusion: 3.404431"} +{"text": "### State\nConfusion: 3.430977\nAction: analogize\nReward: -0.637506\nNext Confusion: 4.219885"} +{"text": "### State\nConfusion: 3.54131\nAction: analogize\nReward: -0.296704\nNext Confusion: 3.700912"} +{"text": "### State\nConfusion: 3.03738\nAction: analogize\nReward: -0.444641\nNext Confusion: 3.498177"} +{"text": "### State\nConfusion: 4.582879\nAction: analogize\nReward: -0.954809\nNext Confusion: 5.478829"} +{"text": "### State\nConfusion: 5.151244\nAction: worked_example\nReward: 2.380252\nNext Confusion: 3.726861"} +{"text": "### State\nConfusion: 5.093281\nAction: correct_fact\nReward: 0.95357\nNext Confusion: 4.533796"} +{"text": "### State\nConfusion: 7.686016\nAction: analogize\nReward: -0.938085\nNext Confusion: 8.366612"} +{"text": "### State\nConfusion: 4.178343\nAction: worked_example\nReward: 0.374517\nNext Confusion: 3.552812"} +{"text": "### State\nConfusion: 6.83574\nAction: question\nReward: 0.747584\nNext Confusion: 6.205879"} +{"text": "### State\nConfusion: 4.541863\nAction: analogize\nReward: -0.557367\nNext Confusion: 4.824588"} +{"text": "### State\nConfusion: 3.081633\nAction: explain\nReward: 0.136932\nNext Confusion: 2.914456"} +{"text": "### State\nConfusion: 2.926489\nAction: worked_example\nReward: 1.157325\nNext Confusion: 1.859197"} +{"text": "### State\nConfusion: 4.609409\nAction: analogize\nReward: 0.394905\nNext Confusion: 4.347848"} +{"text": "### State\nConfusion: 5.320403\nAction: correct_fact\nReward: 0.135497\nNext Confusion: 5.200538"} +{"text": "### State\nConfusion: 4.829214\nAction: explain\nReward: -0.83525\nNext Confusion: 5.605744"} +{"text": "### State\nConfusion: 6.698942\nAction: analogize\nReward: 0.293412\nNext Confusion: 6.224935"} +{"text": "### State\nConfusion: 3.408383\nAction: analogize\nReward: -1.13421\nNext Confusion: 4.121609"} +{"text": "### State\nConfusion: 2.550534\nAction: explain\nReward: 1.595885\nNext Confusion: 2.337572"} +{"text": "### State\nConfusion: 4.26689\nAction: analogize\nReward: -0.923485\nNext Confusion: 4.911927"} +{"text": "### State\nConfusion: 4.54758\nAction: analogize\nReward: 0.028784\nNext Confusion: 4.688157"} +{"text": "### State\nConfusion: 3.963094\nAction: analogize\nReward: 0.601362\nNext Confusion: 3.715459"} +{"text": "### State\nConfusion: 4.264097\nAction: analogize\nReward: 0.165267\nNext Confusion: 4.050135"} +{"text": "### State\nConfusion: 4.775359\nAction: analogize\nReward: 0.805797\nNext Confusion: 4.071631"} +{"text": "### State\nConfusion: 4.309736\nAction: analogize\nReward: -1.096488\nNext Confusion: 4.92112"} +{"text": "### State\nConfusion: 3.836866\nAction: analogize\nReward: 0.243327\nNext Confusion: 3.894223"} +{"text": "### State\nConfusion: 3.551428\nAction: analogize\nReward: -0.240741\nNext Confusion: 3.635041"} +{"text": "### State\nConfusion: 6.219377\nAction: analogize\nReward: -0.440877\nNext Confusion: 6.443679"} +{"text": "### State\nConfusion: 8.134678\nAction: analogize\nReward: -0.952597\nNext Confusion: 9.351403"} +{"text": "### State\nConfusion: 7.455897\nAction: worked_example\nReward: 3.000091\nNext Confusion: 5.743115"} +{"text": "### State\nConfusion: 3.591007\nAction: analogize\nReward: 0.415582\nNext Confusion: 3.694337"} +{"text": "### State\nConfusion: 5.798405\nAction: correct_fact\nReward: 0.444779\nNext Confusion: 5.077715"} +{"text": "### State\nConfusion: 3.156499\nAction: correct_fact\nReward: 0.71058\nNext Confusion: 3.226074"} +{"text": "### State\nConfusion: 2.320361\nAction: analogize\nReward: -0.406777\nNext Confusion: 2.661581"} +{"text": "### State\nConfusion: 3.254967\nAction: explain\nReward: 0.564712\nNext Confusion: 2.827411"} +{"text": "### State\nConfusion: 3.106249\nAction: analogize\nReward: -0.246119\nNext Confusion: 3.193833"} +{"text": "### State\nConfusion: 6.7713\nAction: analogize\nReward: -0.996812\nNext Confusion: 7.599147"} +{"text": "### State\nConfusion: 3.82842\nAction: analogize\nReward: -0.442356\nNext Confusion: 3.9427"} +{"text": "### State\nConfusion: 3.163116\nAction: analogize\nReward: -0.57468\nNext Confusion: 3.300192"} +{"text": "### State\nConfusion: 3.769666\nAction: analogize\nReward: -1.033696\nNext Confusion: 3.965607"} +{"text": "### State\nConfusion: 4.149986\nAction: explain\nReward: 2.104363\nNext Confusion: 3.151176"} +{"text": "### State\nConfusion: 3.644197\nAction: correct_fact\nReward: 0.883312\nNext Confusion: 2.699045"} +{"text": "### State\nConfusion: 3.353661\nAction: worked_example\nReward: -1.517132\nNext Confusion: 3.120078"} +{"text": "### State\nConfusion: 3.097823\nAction: worked_example\nReward: 0.987777\nNext Confusion: 2.541394"} +{"text": "### State\nConfusion: 1.986863\nAction: analogize\nReward: -0.166395\nNext Confusion: 2.263733"} +{"text": "### State\nConfusion: 3.803623\nAction: explain\nReward: -0.599474\nNext Confusion: 4.320712"} +{"text": "### State\nConfusion: 2.989635\nAction: analogize\nReward: -0.391854\nNext Confusion: 3.153213"} +{"text": "### State\nConfusion: 3.825881\nAction: analogize\nReward: -0.304546\nNext Confusion: 4.124488"} +{"text": "### State\nConfusion: 3.925646\nAction: analogize\nReward: 1.280908\nNext Confusion: 3.250851"} +{"text": "### State\nConfusion: 3.520161\nAction: correct_fact\nReward: 1.316759\nNext Confusion: 3.189941"} +{"text": "### State\nConfusion: 5.792756\nAction: worked_example\nReward: 1.211052\nNext Confusion: 4.469619"} +{"text": "### State\nConfusion: 5.700114\nAction: worked_example\nReward: 1.746722\nNext Confusion: 4.20747"} +{"text": "### State\nConfusion: 3.441217\nAction: analogize\nReward: -1.146556\nNext Confusion: 4.742431"} +{"text": "### State\nConfusion: 6.980942\nAction: worked_example\nReward: 2.354988\nNext Confusion: 5.094528"} +{"text": "### State\nConfusion: 4.636788\nAction: worked_example\nReward: 1.395544\nNext Confusion: 2.763631"} +{"text": "### State\nConfusion: 4.57248\nAction: worked_example\nReward: -0.893071\nNext Confusion: 5.374165"} +{"text": "### State\nConfusion: 4.616354\nAction: correct_fact\nReward: -0.458687\nNext Confusion: 4.978016"} +{"text": "### State\nConfusion: 9.871258\nAction: worked_example\nReward: 0.951345\nNext Confusion: 8.626527"} +{"text": "### State\nConfusion: 5.400334\nAction: analogize\nReward: -3.107368\nNext Confusion: 4.699464"} +{"text": "### State\nConfusion: 3.301483\nAction: analogize\nReward: -0.056496\nNext Confusion: 3.928677"} +{"text": "### State\nConfusion: 6.662239\nAction: explain\nReward: -0.308954\nNext Confusion: 7.263927"} +{"text": "### State\nConfusion: 4.49765\nAction: analogize\nReward: -0.627163\nNext Confusion: 4.901896"} +{"text": "### State\nConfusion: 7.890362\nAction: correct_fact\nReward: -1.032236\nNext Confusion: 8.792729"} +{"text": "### State\nConfusion: 3.356429\nAction: analogize\nReward: 0.527811\nNext Confusion: 3.568363"} +{"text": "### State\nConfusion: 3.184431\nAction: analogize\nReward: 0.474491\nNext Confusion: 2.551207"} +{"text": "### State\nConfusion: 4.585125\nAction: analogize\nReward: 0.313417\nNext Confusion: 4.652011"} +{"text": "### State\nConfusion: 7.036767\nAction: analogize\nReward: -0.249813\nNext Confusion: 8.359595"} +{"text": "### State\nConfusion: 4.977924\nAction: analogize\nReward: -0.39375\nNext Confusion: 5.75664"} +{"text": "### State\nConfusion: 4.899474\nAction: analogize\nReward: 0.324894\nNext Confusion: 4.782554"} +{"text": "### State\nConfusion: 5.46263\nAction: analogize\nReward: 0.041644\nNext Confusion: 5.704385"} +{"text": "### State\nConfusion: 3.728646\nAction: explain\nReward: 0.553889\nNext Confusion: 3.630325"} +{"text": "### State\nConfusion: 3.874559\nAction: analogize\nReward: 0.23354\nNext Confusion: 3.770426"} +{"text": "### State\nConfusion: 4.150585\nAction: analogize\nReward: -0.386776\nNext Confusion: 4.294056"} +{"text": "### State\nConfusion: 3.750043\nAction: analogize\nReward: -0.098659\nNext Confusion: 3.729299"} +{"text": "### State\nConfusion: 4.046793\nAction: analogize\nReward: 1.120879\nNext Confusion: 3.69392"} +{"text": "### State\nConfusion: 4.282802\nAction: analogize\nReward: 0.229984\nNext Confusion: 4.534772"} +{"text": "### State\nConfusion: 9.815567\nAction: explain\nReward: 0.113322\nNext Confusion: 9.897961"} +{"text": "### State\nConfusion: 8.447412\nAction: question\nReward: 0.905137\nNext Confusion: 7.587751"} +{"text": "### State\nConfusion: 8.702408\nAction: correct_fact\nReward: 0.327523\nNext Confusion: 8.957442"} +{"text": "### State\nConfusion: 4.547598\nAction: question\nReward: 0.19703\nNext Confusion: 4.136933"} +{"text": "### State\nConfusion: 3.093116\nAction: analogize\nReward: -0.690177\nNext Confusion: 3.494307"} +{"text": "### State\nConfusion: 6.10634\nAction: analogize\nReward: -0.06686\nNext Confusion: 6.080076"} +{"text": "### State\nConfusion: 5.061502\nAction: explain\nReward: -0.08977\nNext Confusion: 5.423969"} +{"text": "### State\nConfusion: 6.275469\nAction: analogize\nReward: -0.406191\nNext Confusion: 6.53923"} +{"text": "### State\nConfusion: 2.794288\nAction: explain\nReward: -0.477082\nNext Confusion: 3.057377"} +{"text": "### State\nConfusion: 7.193281\nAction: analogize\nReward: -0.987684\nNext Confusion: 8.28434"} +{"text": "### State\nConfusion: 4.47008\nAction: analogize\nReward: 2.03497\nNext Confusion: 4.062569"} +{"text": "### State\nConfusion: 7.197754\nAction: explain\nReward: 0.563753\nNext Confusion: 6.897256"} +{"text": "### State\nConfusion: 6.664058\nAction: analogize\nReward: -1.519211\nNext Confusion: 7.664808"} +{"text": "### State\nConfusion: 9.278358\nAction: analogize\nReward: -0.130705\nNext Confusion: 9.787616"} +{"text": "### State\nConfusion: 3.831932\nAction: analogize\nReward: 0.197891\nNext Confusion: 4.277423"} +{"text": "### State\nConfusion: 5.387299\nAction: correct_fact\nReward: 0.229777\nNext Confusion: 5.204511"} +{"text": "### State\nConfusion: 3.000513\nAction: question\nReward: 0.870176\nNext Confusion: 2.886877"} +{"text": "### State\nConfusion: 4.005711\nAction: explain\nReward: 0.577621\nNext Confusion: 3.404496"} +{"text": "### State\nConfusion: 3.297642\nAction: correct_fact\nReward: 0.209132\nNext Confusion: 3.396185"} +{"text": "### State\nConfusion: 3.705981\nAction: question\nReward: -0.267627\nNext Confusion: 3.913401"} +{"text": "### State\nConfusion: 2.617965\nAction: analogize\nReward: 0.747505\nNext Confusion: 1.880171"} +{"text": "### State\nConfusion: 7.499238\nAction: analogize\nReward: 0.395212\nNext Confusion: 7.566947"} +{"text": "### State\nConfusion: 7.075433\nAction: explain\nReward: 0.394626\nNext Confusion: 6.730183"} +{"text": "### State\nConfusion: 5.561482\nAction: explain\nReward: 0.444965\nNext Confusion: 5.071474"} +{"text": "### State\nConfusion: 4.43107\nAction: question\nReward: 1.475692\nNext Confusion: 3.331222"} +{"text": "### State\nConfusion: 4.206248\nAction: worked_example\nReward: 1.131951\nNext Confusion: 2.576638"} +{"text": "### State\nConfusion: 4.046033\nAction: analogize\nReward: 0.078906\nNext Confusion: 3.6898"} +{"text": "### State\nConfusion: 3.234105\nAction: explain\nReward: 1.041857\nNext Confusion: 2.462756"} +{"text": "### State\nConfusion: 3.184991\nAction: analogize\nReward: -0.740992\nNext Confusion: 3.408039"} +{"text": "### State\nConfusion: 7.069261\nAction: worked_example\nReward: 1.829486\nNext Confusion: 5.310039"} +{"text": "### State\nConfusion: 4.233379\nAction: correct_fact\nReward: -0.757781\nNext Confusion: 4.137143"} +{"text": "### State\nConfusion: 5.110789\nAction: analogize\nReward: 0.68815\nNext Confusion: 4.324455"} +{"text": "### State\nConfusion: 6.596481\nAction: explain\nReward: -0.336485\nNext Confusion: 6.980236"} +{"text": "### State\nConfusion: 4.242512\nAction: analogize\nReward: -0.919348\nNext Confusion: 4.882627"} +{"text": "### State\nConfusion: 4.002249\nAction: analogize\nReward: -0.165824\nNext Confusion: 4.386279"} +{"text": "### State\nConfusion: 3.579485\nAction: worked_example\nReward: 1.023444\nNext Confusion: 2.80122"} +{"text": "### State\nConfusion: 3.234276\nAction: analogize\nReward: -0.679574\nNext Confusion: 4.246226"} +{"text": "### State\nConfusion: 4.081505\nAction: analogize\nReward: -0.179928\nNext Confusion: 4.543588"} +{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.626965\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.283777\nAction: worked_example\nReward: 1.920165\nNext Confusion: 1.740775"} +{"text": "### State\nConfusion: 4.382779\nAction: analogize\nReward: 0.254878\nNext Confusion: 4.928416"} +{"text": "### State\nConfusion: 6.469818\nAction: analogize\nReward: 0.10987\nNext Confusion: 6.650122"} +{"text": "### State\nConfusion: 3.401842\nAction: analogize\nReward: -0.199118\nNext Confusion: 3.205163"} +{"text": "### State\nConfusion: 4.53089\nAction: analogize\nReward: -1.084081\nNext Confusion: 4.936621"} +{"text": "### State\nConfusion: 7.181551\nAction: explain\nReward: 0.857128\nNext Confusion: 7.342774"} +{"text": "### State\nConfusion: 4.468257\nAction: analogize\nReward: -0.448214\nNext Confusion: 5.072941"} +{"text": "### State\nConfusion: 3.547867\nAction: analogize\nReward: -0.621887\nNext Confusion: 4.220783"} +{"text": "### State\nConfusion: 8.407737\nAction: explain\nReward: 0.888179\nNext Confusion: 7.801633"} +{"text": "### State\nConfusion: 3.330818\nAction: analogize\nReward: 0.427229\nNext Confusion: 3.057645"} +{"text": "### State\nConfusion: 9.220737\nAction: question\nReward: 1.039978\nNext Confusion: 8.286354"} +{"text": "### State\nConfusion: 6.95472\nAction: correct_fact\nReward: 0.03733\nNext Confusion: 7.105115"} +{"text": "### State\nConfusion: 3.038147\nAction: explain\nReward: -0.052942\nNext Confusion: 3.534167"} +{"text": "### State\nConfusion: 4.203768\nAction: analogize\nReward: 0.44414\nNext Confusion: 3.748747"} +{"text": "### State\nConfusion: 4.394054\nAction: analogize\nReward: 0.863723\nNext Confusion: 4.08186"} +{"text": "### State\nConfusion: 7.25743\nAction: correct_fact\nReward: 0.615971\nNext Confusion: 6.862161"} +{"text": "### State\nConfusion: 3.332897\nAction: worked_example\nReward: 1.561058\nNext Confusion: 2.125503"} +{"text": "### State\nConfusion: 2.435858\nAction: analogize\nReward: 0.474768\nNext Confusion: 2.184254"} +{"text": "### State\nConfusion: 3.347793\nAction: question\nReward: 0.604531\nNext Confusion: 3.077612"} +{"text": "### State\nConfusion: 4.475967\nAction: analogize\nReward: -0.318223\nNext Confusion: 4.587817"} +{"text": "### State\nConfusion: 2.567078\nAction: analogize\nReward: 0.606041\nNext Confusion: 2.992178"} +{"text": "### State\nConfusion: 3.475035\nAction: correct_fact\nReward: 0.14881\nNext Confusion: 3.128379"} +{"text": "### State\nConfusion: 7.965955\nAction: analogize\nReward: -0.464932\nNext Confusion: 8.695142"} +{"text": "### State\nConfusion: 6.437036\nAction: worked_example\nReward: 1.469581\nNext Confusion: 5.73296"} +{"text": "### State\nConfusion: 5.183217\nAction: question\nReward: 0.797294\nNext Confusion: 4.91528"} +{"text": "### State\nConfusion: 4.545303\nAction: analogize\nReward: -0.524589\nNext Confusion: 4.333676"} +{"text": "### State\nConfusion: 3.575009\nAction: analogize\nReward: -0.159195\nNext Confusion: 3.466167"} +{"text": "### State\nConfusion: 3.788009\nAction: correct_fact\nReward: 0.762772\nNext Confusion: 3.053306"} +{"text": "### State\nConfusion: 9.45097\nAction: analogize\nReward: 0.388164\nNext Confusion: 9.557405"} +{"text": "### State\nConfusion: 2.62543\nAction: analogize\nReward: 0.251797\nNext Confusion: 2.618268"} +{"text": "### State\nConfusion: 2.320139\nAction: analogize\nReward: 0.979596\nNext Confusion: 1.804903"} +{"text": "### State\nConfusion: 6.174851\nAction: analogize\nReward: -0.47453\nNext Confusion: 6.922316"} +{"text": "### State\nConfusion: 4.333913\nAction: analogize\nReward: -0.327869\nNext Confusion: 4.996931"} +{"text": "### State\nConfusion: 5.262707\nAction: correct_fact\nReward: -0.606454\nNext Confusion: 5.759839"} +{"text": "### State\nConfusion: 5.174058\nAction: correct_fact\nReward: -0.344999\nNext Confusion: 5.809796"} +{"text": "### State\nConfusion: 4.217094\nAction: correct_fact\nReward: -1.139956\nNext Confusion: 4.768854"} +{"text": "### State\nConfusion: 2.933767\nAction: explain\nReward: 0.647692\nNext Confusion: 2.456029"} +{"text": "### State\nConfusion: 3.497738\nAction: correct_fact\nReward: 0.961823\nNext Confusion: 2.937699"} +{"text": "### State\nConfusion: 4.59177\nAction: analogize\nReward: 0.722301\nNext Confusion: 4.028663"} +{"text": "### State\nConfusion: 4.772787\nAction: explain\nReward: -0.079586\nNext Confusion: 5.282044"} +{"text": "### State\nConfusion: 2.663238\nAction: correct_fact\nReward: 0.744137\nNext Confusion: 2.511148"} +{"text": "### State\nConfusion: 3.442076\nAction: worked_example\nReward: 1.852468\nNext Confusion: 1.87048"} +{"text": "### State\nConfusion: 3.418484\nAction: correct_fact\nReward: -1.434631\nNext Confusion: 4.678504"} +{"text": "### State\nConfusion: 7.50074\nAction: analogize\nReward: -1.03624\nNext Confusion: 7.782828"} +{"text": "### State\nConfusion: 3.662425\nAction: analogize\nReward: 0.288273\nNext Confusion: 3.881542"} +{"text": "### State\nConfusion: 5.825604\nAction: explain\nReward: 0.167269\nNext Confusion: 5.945172"} +{"text": "### State\nConfusion: 8.298186\nAction: explain\nReward: -0.642418\nNext Confusion: 9.145937"} +{"text": "### State\nConfusion: 3.544472\nAction: analogize\nReward: 1.496793\nNext Confusion: 3.059354"} +{"text": "### State\nConfusion: 5.426896\nAction: worked_example\nReward: 2.035843\nNext Confusion: 3.913632"} +{"text": "### State\nConfusion: 7.057615\nAction: worked_example\nReward: 1.300917\nNext Confusion: 6.035897"} +{"text": "### State\nConfusion: 5.308103\nAction: worked_example\nReward: 0.597929\nNext Confusion: 4.281115"} +{"text": "### State\nConfusion: 4.660218\nAction: correct_fact\nReward: -1.198085\nNext Confusion: 5.178064"} +{"text": "### State\nConfusion: 4.182568\nAction: analogize\nReward: -0.669219\nNext Confusion: 4.998801"} +{"text": "### State\nConfusion: 3.374448\nAction: question\nReward: 0.148296\nNext Confusion: 3.367422"} +{"text": "### State\nConfusion: 2.244774\nAction: analogize\nReward: 0.139914\nNext Confusion: 2.010998"} +{"text": "### State\nConfusion: 7.57388\nAction: worked_example\nReward: 2.927079\nNext Confusion: 5.321944"} +{"text": "### State\nConfusion: 5.053628\nAction: analogize\nReward: -0.848654\nNext Confusion: 5.590656"} +{"text": "### State\nConfusion: 4.58681\nAction: question\nReward: 0.398616\nNext Confusion: 3.826893"} +{"text": "### State\nConfusion: 4.385522\nAction: analogize\nReward: 0.196747\nNext Confusion: 4.364781"} +{"text": "### State\nConfusion: 5.258719\nAction: analogize\nReward: -0.478197\nNext Confusion: 5.745747"} +{"text": "### State\nConfusion: 4.160741\nAction: analogize\nReward: -0.373814\nNext Confusion: 4.572683"} +{"text": "### State\nConfusion: 4.594178\nAction: explain\nReward: -0.379261\nNext Confusion: 4.967944"} +{"text": "### State\nConfusion: 3.734883\nAction: explain\nReward: -0.135488\nNext Confusion: 3.877711"} +{"text": "### State\nConfusion: 8.422437\nAction: correct_fact\nReward: 1.419393\nNext Confusion: 8.090842"} +{"text": "### State\nConfusion: 4.626043\nAction: question\nReward: 0.781404\nNext Confusion: 4.271732"} +{"text": "### State\nConfusion: 4.303492\nAction: analogize\nReward: -0.486873\nNext Confusion: 5.130393"} +{"text": "### State\nConfusion: 6.131956\nAction: analogize\nReward: -0.443139\nNext Confusion: 5.680384"} +{"text": "### State\nConfusion: 3.401462\nAction: analogize\nReward: -0.628829\nNext Confusion: 3.549128"} +{"text": "### State\nConfusion: 4.063388\nAction: question\nReward: 0.669799\nNext Confusion: 3.684196"} +{"text": "### State\nConfusion: 4.185434\nAction: analogize\nReward: 0.082712\nNext Confusion: 4.268066"} +{"text": "### State\nConfusion: 5.392101\nAction: analogize\nReward: -0.201228\nNext Confusion: 5.383713"} +{"text": "### State\nConfusion: 3.389313\nAction: analogize\nReward: -1.13185\nNext Confusion: 4.473208"} +{"text": "### State\nConfusion: 3.382116\nAction: explain\nReward: -0.083349\nNext Confusion: 3.077605"} +{"text": "### State\nConfusion: 6.708632\nAction: analogize\nReward: 0.235717\nNext Confusion: 6.575146"} +{"text": "### State\nConfusion: 4.120906\nAction: question\nReward: -0.201961\nNext Confusion: 4.228239"} +{"text": "### State\nConfusion: 5.375295\nAction: analogize\nReward: -0.566432\nNext Confusion: 6.098354"} +{"text": "### State\nConfusion: 5.3075\nAction: explain\nReward: 0.840567\nNext Confusion: 4.593798"} +{"text": "### State\nConfusion: 3.732203\nAction: analogize\nReward: 0.019524\nNext Confusion: 3.975943"} +{"text": "### State\nConfusion: 6.234458\nAction: question\nReward: -0.545665\nNext Confusion: 5.8335"} +{"text": "### State\nConfusion: 2.99104\nAction: analogize\nReward: -0.644436\nNext Confusion: 3.476136"} +{"text": "### State\nConfusion: 2.404935\nAction: explain\nReward: 0.535707\nNext Confusion: 2.225598"} +{"text": "### State\nConfusion: 3.773187\nAction: analogize\nReward: -0.455111\nNext Confusion: 4.499895"} +{"text": "### State\nConfusion: 3.483044\nAction: explain\nReward: 0.347968\nNext Confusion: 2.844657"} +{"text": "### State\nConfusion: 4.503647\nAction: analogize\nReward: -0.185608\nNext Confusion: 4.902484"} +{"text": "### State\nConfusion: 5.3724\nAction: explain\nReward: 0.451463\nNext Confusion: 5.077805"} +{"text": "### State\nConfusion: 3.309671\nAction: analogize\nReward: -0.143453\nNext Confusion: 3.113236"} +{"text": "### State\nConfusion: 4.513547\nAction: question\nReward: 0.606718\nNext Confusion: 4.085341"} +{"text": "### State\nConfusion: 3.603875\nAction: analogize\nReward: -1.52553\nNext Confusion: 4.786602"} +{"text": "### State\nConfusion: 7.368618\nAction: analogize\nReward: -0.765855\nNext Confusion: 7.418029"} +{"text": "### State\nConfusion: 7.257043\nAction: worked_example\nReward: 0.905552\nNext Confusion: 6.61073"} +{"text": "### State\nConfusion: 5.561694\nAction: analogize\nReward: -0.508069\nNext Confusion: 6.053774"} +{"text": "### State\nConfusion: 2.828592\nAction: analogize\nReward: -0.277313\nNext Confusion: 2.854013"} +{"text": "### State\nConfusion: 3.715696\nAction: analogize\nReward: 0.745882\nNext Confusion: 4.146977"} +{"text": "### State\nConfusion: 3.437749\nAction: analogize\nReward: 0.142635\nNext Confusion: 3.554578"} +{"text": "### State\nConfusion: 2.441164\nAction: analogize\nReward: 0.021128\nNext Confusion: 2.135772"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.631064\nNext Confusion: 9.471717"} +{"text": "### State\nConfusion: 3.799863\nAction: analogize\nReward: -0.517685\nNext Confusion: 4.380528"} +{"text": "### State\nConfusion: 4.649355\nAction: analogize\nReward: -0.19733\nNext Confusion: 5.630637"} +{"text": "### State\nConfusion: 4.754737\nAction: analogize\nReward: 0.957142\nNext Confusion: 4.203445"} +{"text": "### State\nConfusion: 3.006349\nAction: analogize\nReward: 0.189458\nNext Confusion: 3.092292"} +{"text": "### State\nConfusion: 3.176708\nAction: analogize\nReward: 0.504138\nNext Confusion: 3.474577"} +{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.068862\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.380745\nAction: analogize\nReward: 0.278275\nNext Confusion: 3.226712"} +{"text": "### State\nConfusion: 4.818123\nAction: correct_fact\nReward: -0.642482\nNext Confusion: 5.129877"} +{"text": "### State\nConfusion: 1.902845\nAction: question\nReward: -0.081912\nNext Confusion: 2.182037"} +{"text": "### State\nConfusion: 6.238435\nAction: question\nReward: -0.37372\nNext Confusion: 5.979242"} +{"text": "### State\nConfusion: 3.918744\nAction: explain\nReward: -0.178161\nNext Confusion: 3.932915"} +{"text": "### State\nConfusion: 6.612762\nAction: analogize\nReward: 0.452281\nNext Confusion: 6.863951"} +{"text": "### State\nConfusion: 3.425594\nAction: analogize\nReward: -0.412629\nNext Confusion: 3.878789"} +{"text": "### State\nConfusion: 4.775546\nAction: analogize\nReward: -0.009128\nNext Confusion: 4.696767"} +{"text": "### State\nConfusion: 7.393962\nAction: analogize\nReward: -0.722784\nNext Confusion: 7.283319"} +{"text": "### State\nConfusion: 5.283406\nAction: analogize\nReward: 0.286061\nNext Confusion: 5.141407"} +{"text": "### State\nConfusion: 6.754779\nAction: worked_example\nReward: 2.055535\nNext Confusion: 5.141479"} +{"text": "### State\nConfusion: 4.003936\nAction: question\nReward: -0.513979\nNext Confusion: 4.440584"} +{"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 0.83318\nNext Confusion: 9.88262"} +{"text": "### State\nConfusion: 6.166142\nAction: worked_example\nReward: 0.421776\nNext Confusion: 5.762331"} +{"text": "### State\nConfusion: 3.861957\nAction: question\nReward: 0.724535\nNext Confusion: 3.452488"} +{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 1.315561\nNext Confusion: 7.715245"} +{"text": "### State\nConfusion: 8.259786\nAction: explain\nReward: 0.558777\nNext Confusion: 7.904824"} +{"text": "### State\nConfusion: 3.152868\nAction: correct_fact\nReward: 0.234806\nNext Confusion: 3.087118"} +{"text": "### State\nConfusion: 6.291952\nAction: analogize\nReward: -1.134346\nNext Confusion: 6.971364"} +{"text": "### State\nConfusion: 3.391283\nAction: explain\nReward: -0.301607\nNext Confusion: 3.773489"} +{"text": "### State\nConfusion: 7.058365\nAction: question\nReward: 0.096737\nNext Confusion: 6.488666"} +{"text": "### State\nConfusion: 5.330192\nAction: question\nReward: 0.434085\nNext Confusion: 4.503624"} +{"text": "### State\nConfusion: 7.3815\nAction: explain\nReward: -0.624081\nNext Confusion: 7.829628"} +{"text": "### State\nConfusion: 3.936956\nAction: explain\nReward: 0.156011\nNext Confusion: 4.77193"} +{"text": "### State\nConfusion: 8.507776\nAction: analogize\nReward: -0.267206\nNext Confusion: 8.839627"} +{"text": "### State\nConfusion: 3.966226\nAction: analogize\nReward: 0.07544\nNext Confusion: 3.734597"} +{"text": "### State\nConfusion: 3.389673\nAction: analogize\nReward: -0.385492\nNext Confusion: 3.757603"} +{"text": "### State\nConfusion: 4.152238\nAction: explain\nReward: 0.194201\nNext Confusion: 3.124795"} +{"text": "### State\nConfusion: 3.671381\nAction: explain\nReward: -0.851671\nNext Confusion: 4.85722"} +{"text": "### State\nConfusion: 3.87501\nAction: analogize\nReward: 0.963411\nNext Confusion: 4.011062"} +{"text": "### State\nConfusion: 3.591396\nAction: correct_fact\nReward: -0.060506\nNext Confusion: 3.749811"} +{"text": "### State\nConfusion: 5.843234\nAction: explain\nReward: 1.538094\nNext Confusion: 4.893917"} +{"text": "### State\nConfusion: 6.427896\nAction: worked_example\nReward: 0.989949\nNext Confusion: 4.926106"} +{"text": "### State\nConfusion: 6.490404\nAction: analogize\nReward: -0.035181\nNext Confusion: 6.649741"} +{"text": "### State\nConfusion: 4.298608\nAction: analogize\nReward: 0.51613\nNext Confusion: 4.339033"} +{"text": "### State\nConfusion: 2.794963\nAction: explain\nReward: -0.328953\nNext Confusion: 3.284198"} +{"text": "### State\nConfusion: 0.692194\nAction: worked_example\nReward: 0.527875\nNext Confusion: 0.0"} +{"text": "### State\nConfusion: 3.597074\nAction: explain\nReward: -0.452491\nNext Confusion: 4.077954"} +{"text": "### State\nConfusion: 3.767463\nAction: analogize\nReward: -1.452971\nNext Confusion: 4.617874"} +{"text": "### State\nConfusion: 4.362273\nAction: worked_example\nReward: 0.933517\nNext Confusion: 3.596099"} +{"text": "### State\nConfusion: 4.670532\nAction: analogize\nReward: -1.184355\nNext Confusion: 4.818303"} +{"text": "### State\nConfusion: 3.000032\nAction: analogize\nReward: -0.435179\nNext Confusion: 4.011406"} +{"text": "### State\nConfusion: 3.608845\nAction: explain\nReward: 0.150313\nNext Confusion: 3.592252"} +{"text": "### State\nConfusion: 3.324292\nAction: question\nReward: -0.160978\nNext Confusion: 3.667143"} +{"text": "### State\nConfusion: 3.206405\nAction: analogize\nReward: 0.718679\nNext Confusion: 2.95461"} +{"text": "### State\nConfusion: 6.566045\nAction: analogize\nReward: -0.764221\nNext Confusion: 7.17747"} +{"text": "### State\nConfusion: 4.402418\nAction: analogize\nReward: -1.400358\nNext Confusion: 5.781072"} +{"text": "### State\nConfusion: 3.839939\nAction: analogize\nReward: 1.133303\nNext Confusion: 3.523669"} +{"text": "### State\nConfusion: 4.051802\nAction: analogize\nReward: 0.732211\nNext Confusion: 3.626338"} +{"text": "### State\nConfusion: 3.779393\nAction: correct_fact\nReward: -0.882573\nNext Confusion: 4.401586"} +{"text": "### State\nConfusion: 4.234075\nAction: analogize\nReward: 0.006339\nNext Confusion: 4.259672"} +{"text": "### State\nConfusion: 5.705486\nAction: analogize\nReward: -0.177564\nNext Confusion: 5.695643"} +{"text": "### State\nConfusion: 3.404266\nAction: question\nReward: 0.306554\nNext Confusion: 3.231444"} +{"text": "### State\nConfusion: 3.485604\nAction: analogize\nReward: 0.322089\nNext Confusion: 3.532859"} +{"text": "### State\nConfusion: 5.956239\nAction: analogize\nReward: 0.175887\nNext Confusion: 5.894433"} +{"text": "### State\nConfusion: 3.64884\nAction: correct_fact\nReward: 0.186027\nNext Confusion: 4.024583"} +{"text": "### State\nConfusion: 4.357332\nAction: analogize\nReward: 0.094647\nNext Confusion: 4.234993"} +{"text": "### State\nConfusion: 3.956645\nAction: analogize\nReward: 0.143816\nNext Confusion: 2.74001"} +{"text": "### State\nConfusion: 4.275213\nAction: analogize\nReward: -0.112997\nNext Confusion: 4.803157"} +{"text": "### State\nConfusion: 7.308207\nAction: question\nReward: -0.002123\nNext Confusion: 6.719409"} +{"text": "### State\nConfusion: 6.641357\nAction: analogize\nReward: -0.387628\nNext Confusion: 7.074882"} +{"text": "### State\nConfusion: 7.486015\nAction: analogize\nReward: -0.275908\nNext Confusion: 7.823219"} +{"text": "### State\nConfusion: 7.737795\nAction: worked_example\nReward: 0.754587\nNext Confusion: 7.086144"} +{"text": "### State\nConfusion: 6.91396\nAction: worked_example\nReward: -0.301934\nNext Confusion: 7.024792"} +{"text": "### State\nConfusion: 3.362437\nAction: analogize\nReward: -0.823035\nNext Confusion: 3.973793"} +{"text": "### State\nConfusion: 4.437325\nAction: analogize\nReward: -0.477802\nNext Confusion: 5.210613"} +{"text": "### State\nConfusion: 3.576501\nAction: analogize\nReward: -0.928897\nNext Confusion: 3.691247"} +{"text": "### State\nConfusion: 3.308704\nAction: analogize\nReward: 0.099583\nNext Confusion: 3.11851"} +{"text": "### State\nConfusion: 4.28096\nAction: analogize\nReward: -0.935757\nNext Confusion: 4.505311"} +{"text": "### State\nConfusion: 7.126233\nAction: analogize\nReward: -2.046618\nNext Confusion: 8.965673"} +{"text": "### State\nConfusion: 5.618792\nAction: explain\nReward: -0.070551\nNext Confusion: 6.015597"} +{"text": "### State\nConfusion: 7.323637\nAction: analogize\nReward: 0.798525\nNext Confusion: 7.287678"} +{"text": "### State\nConfusion: 3.949625\nAction: analogize\nReward: -0.422096\nNext Confusion: 3.592318"} +{"text": "### State\nConfusion: 5.734334\nAction: analogize\nReward: -0.061426\nNext Confusion: 5.932269"} +{"text": "### State\nConfusion: 4.095278\nAction: explain\nReward: 0.150416\nNext Confusion: 3.920164"} +{"text": "### State\nConfusion: 4.267157\nAction: correct_fact\nReward: -0.55467\nNext Confusion: 4.589067"} +{"text": "### State\nConfusion: 3.476372\nAction: analogize\nReward: 0.434812\nNext Confusion: 3.368467"} +{"text": "### State\nConfusion: 3.845721\nAction: analogize\nReward: -0.44389\nNext Confusion: 3.958671"} +{"text": "### State\nConfusion: 5.89789\nAction: explain\nReward: 0.289961\nNext Confusion: 5.80951"} +{"text": "### State\nConfusion: 3.30418\nAction: question\nReward: 0.561687\nNext Confusion: 2.938793"} +{"text": "### State\nConfusion: 5.601084\nAction: analogize\nReward: 0.090898\nNext Confusion: 6.04158"} +{"text": "### State\nConfusion: 3.804507\nAction: explain\nReward: -0.214608\nNext Confusion: 4.102091"} +{"text": "### State\nConfusion: 5.699248\nAction: analogize\nReward: -0.86557\nNext Confusion: 7.082009"} +{"text": "### State\nConfusion: 3.841494\nAction: question\nReward: 1.661304\nNext Confusion: 2.886172"} +{"text": "### State\nConfusion: 6.02223\nAction: analogize\nReward: -0.655605\nNext Confusion: 6.230086"} +{"text": "### State\nConfusion: 5.195364\nAction: explain\nReward: -0.001099\nNext Confusion: 4.825065"} +{"text": "### State\nConfusion: 4.841122\nAction: question\nReward: 1.924098\nNext Confusion: 4.41556"} +{"text": "### State\nConfusion: 3.552158\nAction: explain\nReward: 1.212741\nNext Confusion: 1.933405"} +{"text": "### State\nConfusion: 7.492652\nAction: analogize\nReward: -0.788627\nNext Confusion: 8.387055"} +{"text": "### State\nConfusion: 2.427369\nAction: analogize\nReward: -0.966672\nNext Confusion: 2.26519"} +{"text": "### State\nConfusion: 5.075143\nAction: analogize\nReward: -0.249792\nNext Confusion: 5.690755"} +{"text": "### State\nConfusion: 6.168964\nAction: analogize\nReward: -0.402024\nNext Confusion: 6.470056"} +{"text": "### State\nConfusion: 9.379551\nAction: correct_fact\nReward: 0.264547\nNext Confusion: 9.748802"} +{"text": "### State\nConfusion: 5.41286\nAction: analogize\nReward: 0.3176\nNext Confusion: 5.959353"} +{"text": "### State\nConfusion: 3.271821\nAction: analogize\nReward: -0.490908\nNext Confusion: 2.909299"} +{"text": "### State\nConfusion: 3.993392\nAction: explain\nReward: 0.02944\nNext Confusion: 4.182259"} +{"text": "### State\nConfusion: 3.636055\nAction: analogize\nReward: 0.567075\nNext Confusion: 3.196599"} +{"text": "### State\nConfusion: 4.865194\nAction: question\nReward: 0.345734\nNext Confusion: 4.333092"} +{"text": "### State\nConfusion: 3.210343\nAction: worked_example\nReward: 1.467505\nNext Confusion: 1.968105"} +{"text": "### State\nConfusion: 3.497403\nAction: analogize\nReward: -0.218089\nNext Confusion: 3.823413"} +{"text": "### State\nConfusion: 7.678603\nAction: question\nReward: 1.366405\nNext Confusion: 6.973949"} +{"text": "### State\nConfusion: 5.769523\nAction: question\nReward: 1.414712\nNext Confusion: 4.295852"} +{"text": "### State\nConfusion: 6.377737\nAction: analogize\nReward: 1.021054\nNext Confusion: 5.484272"} +{"text": "### State\nConfusion: 4.100323\nAction: analogize\nReward: 0.385391\nNext Confusion: 3.907769"} +{"text": "### State\nConfusion: 3.378142\nAction: analogize\nReward: -0.65456\nNext Confusion: 3.887522"} +{"text": "### State\nConfusion: 2.248417\nAction: analogize\nReward: 1.451595\nNext Confusion: 1.826454"} +{"text": "### State\nConfusion: 5.806881\nAction: analogize\nReward: 0.423723\nNext Confusion: 5.425294"} +{"text": "### State\nConfusion: 9.48094\nAction: question\nReward: -0.237491\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 7.971427\nAction: analogize\nReward: -0.560404\nNext Confusion: 8.549126"} +{"text": "### State\nConfusion: 4.571994\nAction: analogize\nReward: -0.816992\nNext Confusion: 4.862229"} +{"text": "### State\nConfusion: 7.443913\nAction: analogize\nReward: -0.611569\nNext Confusion: 7.605993"} +{"text": "### State\nConfusion: 7.047747\nAction: worked_example\nReward: 2.416605\nNext Confusion: 4.614026"} +{"text": "### State\nConfusion: 4.453329\nAction: analogize\nReward: -0.823522\nNext Confusion: 4.277873"} +{"text": "### State\nConfusion: 4.519389\nAction: analogize\nReward: 0.845583\nNext Confusion: 3.553505"} +{"text": "### State\nConfusion: 3.012619\nAction: worked_example\nReward: 1.78637\nNext Confusion: 1.214761"} +{"text": "### State\nConfusion: 6.717383\nAction: analogize\nReward: 0.672834\nNext Confusion: 6.077421"} +{"text": "### State\nConfusion: 8.516796\nAction: analogize\nReward: 0.896699\nNext Confusion: 8.070201"} +{"text": "### State\nConfusion: 6.385722\nAction: worked_example\nReward: 1.639703\nNext Confusion: 5.6651"} +{"text": "### State\nConfusion: 3.420088\nAction: worked_example\nReward: 1.214258\nNext Confusion: 1.911085"} +{"text": "### State\nConfusion: 3.304491\nAction: correct_fact\nReward: -0.659982\nNext Confusion: 3.89256"} +{"text": "### State\nConfusion: 4.34604\nAction: analogize\nReward: 0.367453\nNext Confusion: 4.545939"} +{"text": "### State\nConfusion: 5.70637\nAction: explain\nReward: 0.986834\nNext Confusion: 5.318554"} +{"text": "### State\nConfusion: 4.569731\nAction: explain\nReward: -0.251817\nNext Confusion: 5.206977"} +{"text": "### State\nConfusion: 6.623116\nAction: question\nReward: 0.813307\nNext Confusion: 6.747557"} +{"text": "### State\nConfusion: 2.478569\nAction: question\nReward: 0.624874\nNext Confusion: 2.662309"} +{"text": "### State\nConfusion: 2.731783\nAction: explain\nReward: -0.700712\nNext Confusion: 3.661917"} +{"text": "### State\nConfusion: 2.529542\nAction: analogize\nReward: -0.754609\nNext Confusion: 3.141305"} +{"text": "### State\nConfusion: 4.801964\nAction: explain\nReward: 0.7866\nNext Confusion: 3.92003"} +{"text": "### State\nConfusion: 5.041254\nAction: worked_example\nReward: 2.127081\nNext Confusion: 3.543361"} +{"text": "### State\nConfusion: 2.69548\nAction: analogize\nReward: -1.212333\nNext Confusion: 3.334755"} +{"text": "### State\nConfusion: 6.504969\nAction: correct_fact\nReward: -0.521073\nNext Confusion: 6.356454"} +{"text": "### State\nConfusion: 6.60377\nAction: question\nReward: 1.630758\nNext Confusion: 5.570652"} +{"text": "### State\nConfusion: 6.362903\nAction: question\nReward: 0.464984\nNext Confusion: 6.482695"} +{"text": "### State\nConfusion: 3.440521\nAction: analogize\nReward: 0.046752\nNext Confusion: 3.519933"} +{"text": "### State\nConfusion: 5.921055\nAction: correct_fact\nReward: -0.238961\nNext Confusion: 6.003142"} +{"text": "### State\nConfusion: 3.715529\nAction: analogize\nReward: 0.378563\nNext Confusion: 3.28992"} +{"text": "### State\nConfusion: 5.372323\nAction: analogize\nReward: -0.712887\nNext Confusion: 5.683921"} +{"text": "### State\nConfusion: 5.731329\nAction: analogize\nReward: -0.035359\nNext Confusion: 6.064223"} +{"text": "### State\nConfusion: 6.944568\nAction: analogize\nReward: 0.637097\nNext Confusion: 6.785502"} +{"text": "### State\nConfusion: 5.343425\nAction: question\nReward: 1.413053\nNext Confusion: 4.877586"} +{"text": "### State\nConfusion: 4.044001\nAction: correct_fact\nReward: 0.791123\nNext Confusion: 4.111125"} +{"text": "### State\nConfusion: 2.885599\nAction: question\nReward: 0.741619\nNext Confusion: 2.525219"} +{"text": "### State\nConfusion: 4.033328\nAction: analogize\nReward: -0.324879\nNext Confusion: 3.825331"} +{"text": "### State\nConfusion: 3.583706\nAction: analogize\nReward: 0.5554\nNext Confusion: 2.749167"} +{"text": "### State\nConfusion: 6.577643\nAction: analogize\nReward: 0.07324\nNext Confusion: 6.620709"} +{"text": "### State\nConfusion: 3.076465\nAction: explain\nReward: 0.322549\nNext Confusion: 2.944134"} +{"text": "### State\nConfusion: 4.1453\nAction: analogize\nReward: -0.292769\nNext Confusion: 4.125049"} +{"text": "### State\nConfusion: 3.86788\nAction: worked_example\nReward: 2.352502\nNext Confusion: 2.928135"} +{"text": "### State\nConfusion: 3.783789\nAction: analogize\nReward: 0.076052\nNext Confusion: 4.21214"} +{"text": "### State\nConfusion: 2.774392\nAction: analogize\nReward: 0.770279\nNext Confusion: 2.368991"} +{"text": "### State\nConfusion: 5.348946\nAction: explain\nReward: 0.926692\nNext Confusion: 4.922467"} +{"text": "### State\nConfusion: 6.317099\nAction: worked_example\nReward: 1.679236\nNext Confusion: 5.322797"} +{"text": "### State\nConfusion: 3.215445\nAction: worked_example\nReward: 1.053004\nNext Confusion: 1.814326"} +{"text": "### State\nConfusion: 3.391156\nAction: analogize\nReward: -0.276477\nNext Confusion: 3.740653"} +{"text": "### State\nConfusion: 2.778777\nAction: analogize\nReward: 0.684112\nNext Confusion: 2.150054"} +{"text": "### State\nConfusion: 5.19486\nAction: analogize\nReward: 1.399383\nNext Confusion: 4.791388"} +{"text": "### State\nConfusion: 6.92237\nAction: worked_example\nReward: 2.276795\nNext Confusion: 5.880061"} +{"text": "### State\nConfusion: 2.999296\nAction: correct_fact\nReward: -0.39091\nNext Confusion: 3.133839"} +{"text": "### State\nConfusion: 3.52445\nAction: worked_example\nReward: 1.497804\nNext Confusion: 2.473041"} +{"text": "### State\nConfusion: 3.491305\nAction: analogize\nReward: -1.428821\nNext Confusion: 4.530365"} +{"text": "### State\nConfusion: 7.938798\nAction: question\nReward: 1.634904\nNext Confusion: 7.270522"} +{"text": "### State\nConfusion: 3.63357\nAction: explain\nReward: -1.215149\nNext Confusion: 4.452176"} +{"text": "### State\nConfusion: 7.658671\nAction: correct_fact\nReward: -0.057792\nNext Confusion: 7.543473"} +{"text": "### State\nConfusion: 4.069772\nAction: analogize\nReward: 0.751406\nNext Confusion: 3.895314"} +{"text": "### State\nConfusion: 5.882581\nAction: correct_fact\nReward: -0.371607\nNext Confusion: 5.911959"} +{"text": "### State\nConfusion: 4.669287\nAction: explain\nReward: -0.120479\nNext Confusion: 4.880317"} +{"text": "### State\nConfusion: 4.291442\nAction: correct_fact\nReward: -0.798368\nNext Confusion: 4.442684"} +{"text": "### State\nConfusion: 5.854513\nAction: analogize\nReward: -0.261615\nNext Confusion: 6.171615"} +{"text": "### State\nConfusion: 6.836235\nAction: explain\nReward: -0.649459\nNext Confusion: 6.768502"} +{"text": "### State\nConfusion: 3.439055\nAction: analogize\nReward: -0.147351\nNext Confusion: 3.926904"} +{"text": "### State\nConfusion: 3.48054\nAction: correct_fact\nReward: 0.060255\nNext Confusion: 3.50379"} +{"text": "### State\nConfusion: 4.569834\nAction: analogize\nReward: -1.409163\nNext Confusion: 5.57295"} +{"text": "### State\nConfusion: 6.003526\nAction: question\nReward: 0.689142\nNext Confusion: 5.342512"} +{"text": "### State\nConfusion: 4.746644\nAction: analogize\nReward: -1.093781\nNext Confusion: 5.532921"} +{"text": "### State\nConfusion: 7.746864\nAction: analogize\nReward: -0.978909\nNext Confusion: 8.778952"} +{"text": "### State\nConfusion: 8.820141\nAction: explain\nReward: 0.943031\nNext Confusion: 7.812218"} +{"text": "### State\nConfusion: 8.521159\nAction: analogize\nReward: 0.563675\nNext Confusion: 9.113123"} +{"text": "### State\nConfusion: 2.340023\nAction: explain\nReward: 0.847821\nNext Confusion: 1.814186"} +{"text": "### State\nConfusion: 4.345832\nAction: question\nReward: 1.241657\nNext Confusion: 3.922095"} +{"text": "### State\nConfusion: 4.367841\nAction: analogize\nReward: -3.81745\nNext Confusion: 4.413113"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.006\nNext Confusion: 10.0"} +{"text": "### State\nConfusion: 3.855926\nAction: worked_example\nReward: 1.359279\nNext Confusion: 2.140503"} +{"text": "### State\nConfusion: 5.947092\nAction: analogize\nReward: -1.062392\nNext Confusion: 6.499333"} +{"text": "### State\nConfusion: 4.267658\nAction: explain\nReward: -0.076467\nNext Confusion: 4.075721"} +{"text": "### State\nConfusion: 5.994388\nAction: worked_example\nReward: 1.125528\nNext Confusion: 4.426499"} +{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.999445\nNext Confusion: 8.992741"} +{"text": "### State\nConfusion: 5.015469\nAction: question\nReward: -0.550988\nNext Confusion: 5.527272"} +{"text": "### State\nConfusion: 4.494957\nAction: analogize\nReward: -1.693285\nNext Confusion: 5.166589"} +{"text": "### State\nConfusion: 3.628634\nAction: analogize\nReward: -1.178641\nNext Confusion: 3.971706"} +{"text": "### State\nConfusion: 3.521578\nAction: explain\nReward: 0.53535\nNext Confusion: 4.013176"} +{"text": "### State\nConfusion: 3.901601\nAction: analogize\nReward: -0.547104\nNext Confusion: 4.631837"} +{"text": "### State\nConfusion: 4.647506\nAction: explain\nReward: 0.631395\nNext Confusion: 4.224959"} +{"text": "### State\nConfusion: 5.711406\nAction: explain\nReward: 0.49222\nNext Confusion: 6.024854"} +{"text": "### State\nConfusion: 4.07159\nAction: analogize\nReward: -0.424504\nNext Confusion: 4.798265"} +{"text": "### State\nConfusion: 4.140967\nAction: correct_fact\nReward: -1.231822\nNext Confusion: 5.130908"} +{"text": "### State\nConfusion: 6.352411\nAction: analogize\nReward: -1.18697\nNext Confusion: 7.181125"} +{"text": "### State\nConfusion: 7.185508\nAction: worked_example\nReward: 3.034571\nNext Confusion: 4.742269"} +{"text": "### State\nConfusion: 6.044193\nAction: correct_fact\nReward: -0.131452\nNext Confusion: 6.259499"} +{"text": "### State\nConfusion: 3.850863\nAction: correct_fact\nReward: -1.165361\nNext Confusion: 4.437674"} +{"text": "### State\nConfusion: 5.709232\nAction: analogize\nReward: -0.335393\nNext Confusion: 6.03693"} +{"text": "### State\nConfusion: 2.790189\nAction: question\nReward: -0.015608\nNext Confusion: 2.420717"} +{"text": "### State\nConfusion: 2.813883\nAction: analogize\nReward: -0.325674\nNext Confusion: 3.097643"} +{"text": "### State\nConfusion: 4.600974\nAction: analogize\nReward: -0.689222\nNext Confusion: 5.273267"} +{"text": "### State\nConfusion: 6.823961\nAction: question\nReward: 0.231637\nNext Confusion: 6.530535"} +{"text": "### State\nConfusion: 4.187769\nAction: analogize\nReward: 0.41784\nNext Confusion: 4.147315"} +{"text": "### State\nConfusion: 3.270071\nAction: analogize\nReward: -0.701233\nNext Confusion: 3.924204"} +{"text": "### State\nConfusion: 3.892113\nAction: worked_example\nReward: 1.771034\nNext Confusion: 3.077213"} +{"text": "### State\nConfusion: 3.897737\nAction: analogize\nReward: -0.880082\nNext Confusion: 4.28867"} +{"text": "### State\nConfusion: 4.182186\nAction: question\nReward: 1.025072\nNext Confusion: 3.581476"} +{"text": "### State\nConfusion: 3.280212\nAction: analogize\nReward: -0.230556\nNext Confusion: 3.718891"} +{"text": "### State\nConfusion: 5.115473\nAction: analogize\nReward: -0.052009\nNext Confusion: 5.389236"} +{"text": "### State\nConfusion: 3.25951\nAction: explain\nReward: 0.456638\nNext Confusion: 2.630789"} +{"text": "### State\nConfusion: 4.461349\nAction: explain\nReward: -0.238552\nNext Confusion: 4.220826"} +{"text": "### State\nConfusion: 3.37934\nAction: explain\nReward: 1.300042\nNext Confusion: 2.233323"} +{"text": "### State\nConfusion: 7.708539\nAction: explain\nReward: 0.779534\nNext Confusion: 7.262962"} +{"text": "### State\nConfusion: 4.487832\nAction: correct_fact\nReward: -0.640726\nNext Confusion: 4.850804"} +{"text": "### State\nConfusion: 7.527032\nAction: explain\nReward: 0.188903\nNext Confusion: 7.260336"} +{"text": "### State\nConfusion: 5.608341\nAction: correct_fact\nReward: 0.029112\nNext Confusion: 5.848946"} +{"text": "### State\nConfusion: 2.400969\nAction: analogize\nReward: -1.50911\nNext Confusion: 4.221312"} +{"text": "### State\nConfusion: 4.617443\nAction: explain\nReward: 0.140889\nNext Confusion: 4.101977"} +{"text": "### State\nConfusion: 1.713455\nAction: analogize\nReward: -1.625323\nNext Confusion: 3.040552"} +{"text": "### State\nConfusion: 3.207202\nAction: worked_example\nReward: 0.662639\nNext Confusion: 2.216147"} +{"text": "### State\nConfusion: 6.116821\nAction: explain\nReward: 0.739173\nNext Confusion: 5.414842"} +{"text": "### State\nConfusion: 7.716421\nAction: question\nReward: -0.244427\nNext Confusion: 7.80996"} +{"text": "### State\nConfusion: 4.312419\nAction: analogize\nReward: 0.045445\nNext Confusion: 4.849009"} +{"text": "### State\nConfusion: 2.497267\nAction: analogize\nReward: -0.53257\nNext Confusion: 3.211382"} +{"text": "### State\nConfusion: 4.186003\nAction: explain\nReward: -0.100915\nNext Confusion: 4.1359"} +{"text": "### State\nConfusion: 6.124384\nAction: correct_fact\nReward: -0.222745\nNext Confusion: 6.004959"} +{"text": "### State\nConfusion: 4.838198\nAction: analogize\nReward: -0.537104\nNext Confusion: 5.21931"} +{"text": "### State\nConfusion: 4.336282\nAction: analogize\nReward: 0.362864\nNext Confusion: 4.121377"} +{"text": "### State\nConfusion: 6.199643\nAction: analogize\nReward: 0.089828\nNext Confusion: 6.90752"} +{"text": "### State\nConfusion: 6.246179\nAction: worked_example\nReward: 0.016199\nNext Confusion: 6.216495"} +{"text": "### State\nConfusion: 3.753611\nAction: worked_example\nReward: 2.03355\nNext Confusion: 1.908327"} +{"text": "### State\nConfusion: 3.90575\nAction: analogize\nReward: -0.864023\nNext Confusion: 4.875975"} +{"text": "### State\nConfusion: 3.125639\nAction: analogize\nReward: -0.59186\nNext Confusion: 3.940023"} +{"text": "### State\nConfusion: 4.659585\nAction: analogize\nReward: -0.154829\nNext Confusion: 4.735131"} +{"text": "### State\nConfusion: 2.798429\nAction: analogize\nReward: -0.435412\nNext Confusion: 3.529701"} +{"text": "### State\nConfusion: 3.181256\nAction: explain\nReward: 0.548676\nNext Confusion: 3.000934"} +{"text": "### State\nConfusion: 4.824714\nAction: worked_example\nReward: 0.915687\nNext Confusion: 3.833705"} +{"text": "### State\nConfusion: 3.840567\nAction: explain\nReward: 0.957436\nNext Confusion: 3.357632"} +{"text": "### State\nConfusion: 7.498173\nAction: analogize\nReward: 0.597861\nNext Confusion: 7.415368"} +{"text": "### State\nConfusion: 4.988446\nAction: explain\nReward: 0.252937\nNext Confusion: 4.888795"} +{"text": "### State\nConfusion: 4.628919\nAction: analogize\nReward: -0.793281\nNext Confusion: 5.547164"} +{"text": "### State\nConfusion: 2.532769\nAction: analogize\nReward: 0.608471\nNext Confusion: 2.095517"} +{"text": "### State\nConfusion: 6.552197\nAction: analogize\nReward: -0.143695\nNext Confusion: 6.75866"} +{"text": "### State\nConfusion: 6.521643\nAction: analogize\nReward: 0.625005\nNext Confusion: 6.203167"} +{"text": "### State\nConfusion: 4.492259\nAction: worked_example\nReward: 2.173731\nNext Confusion: 2.631922"} +{"text": "### State\nConfusion: 7.241621\nAction: question\nReward: 1.406091\nNext Confusion: 5.86407"} +{"text": "### State\nConfusion: 3.693046\nAction: explain\nReward: 1.068534\nNext Confusion: 2.440671"} +{"text": "### State\nConfusion: 3.962626\nAction: explain\nReward: -0.730675\nNext Confusion: 4.393443"} +{"text": "### State\nConfusion: 6.47488\nAction: analogize\nReward: -0.811074\nNext Confusion: 7.624598"} +{"text": "### State\nConfusion: 7.081945\nAction: analogize\nReward: 0.137783\nNext Confusion: 6.88515"} +{"text": "### State\nConfusion: 6.003791\nAction: analogize\nReward: 0.25566\nNext Confusion: 6.172901"} +{"text": "### State\nConfusion: 4.536524\nAction: analogize\nReward: -0.882808\nNext Confusion: 5.352852"} +{"text": "### State\nConfusion: 7.325707\nAction: analogize\nReward: 0.390309\nNext Confusion: 6.489121"} +{"text": "### State\nConfusion: 3.344648\nAction: analogize\nReward: -3.003033\nNext Confusion: 2.995784"} +{"text": "### State\nConfusion: 4.936317\nAction: analogize\nReward: 0.065177\nNext Confusion: 4.409051"} +{"text": "### State\nConfusion: 5.412323\nAction: worked_example\nReward: 0.69626\nNext Confusion: 4.851012"} +{"text": "### State\nConfusion: 3.733434\nAction: correct_fact\nReward: 1.215873\nNext Confusion: 3.299668"} +{"text": "### State\nConfusion: 3.594582\nAction: analogize\nReward: -0.245522\nNext Confusion: 4.261171"} +{"text": "### State\nConfusion: 5.702906\nAction: analogize\nReward: -1.517803\nNext Confusion: 7.966025"} +{"text": "### State\nConfusion: 4.458236\nAction: analogize\nReward: -0.448019\nNext Confusion: 4.331777"} +{"text": "### State\nConfusion: 3.877192\nAction: correct_fact\nReward: -0.606948\nNext Confusion: 4.709797"} +{"text": "### State\nConfusion: 4.004076\nAction: explain\nReward: 1.357555\nNext Confusion: 3.258263"} +{"text": "### State\nConfusion: 7.576537\nAction: analogize\nReward: -1.53424\nNext Confusion: 8.860916"} +{"text": "### State\nConfusion: 3.554901\nAction: question\nReward: 0.474365\nNext Confusion: 3.006228"} +{"text": "### State\nConfusion: 3.251683\nAction: correct_fact\nReward: 0.518483\nNext Confusion: 2.64814"} +{"text": "### State\nConfusion: 3.569207\nAction: analogize\nReward: -0.410212\nNext Confusion: 3.989583"} +{"text": "### State\nConfusion: 3.670016\nAction: explain\nReward: 1.762957\nNext Confusion: 2.516897"} +{"text": "### State\nConfusion: 2.353776\nAction: worked_example\nReward: 2.261622\nNext Confusion: 0.0"} +{"text": "### State\nConfusion: 5.180178\nAction: worked_example\nReward: 1.90911\nNext Confusion: 3.493268"} +{"text": "### State\nConfusion: 4.770649\nAction: analogize\nReward: -0.145658\nNext Confusion: 4.804286"} +{"text": "### State\nConfusion: 3.80372\nAction: analogize\nReward: -1.000576\nNext Confusion: 4.46522"} +{"text": "### State\nConfusion: 6.532478\nAction: analogize\nReward: -0.946917\nNext Confusion: 7.290173"} +{"text": "### State\nConfusion: 6.0253\nAction: analogize\nReward: -0.456855\nNext Confusion: 6.661849"} +{"text": "### State\nConfusion: 5.025003\nAction: correct_fact\nReward: 0.758891\nNext Confusion: 4.576565"} +{"text": "### State\nConfusion: 2.845722\nAction: analogize\nReward: 1.140977\nNext Confusion: 2.995232"} +{"text": "### State\nConfusion: 7.297413\nAction: analogize\nReward: 1.366689\nNext Confusion: 7.151979"} +{"text": "### State\nConfusion: 9.557812\nAction: analogize\nReward: -0.288579\nNext Confusion: 9.169003"} +{"text": "### State\nConfusion: 6.37981\nAction: explain\nReward: 1.050409\nNext Confusion: 6.148158"} +{"text": "### State\nConfusion: 3.714115\nAction: worked_example\nReward: 2.349328\nNext Confusion: 1.744698"} +{"text": "### State\nConfusion: 3.174654\nAction: analogize\nReward: 0.685784\nNext Confusion: 3.14699"} +{"text": "### State\nConfusion: 6.137669\nAction: question\nReward: 0.671082\nNext Confusion: 5.555594"} +{"text": "### State\nConfusion: 3.0194\nAction: question\nReward: 0.66125\nNext Confusion: 2.640104"} +{"text": "### State\nConfusion: 5.61132\nAction: worked_example\nReward: 1.569835\nNext Confusion: 3.932574"} +{"text": "### State\nConfusion: 5.073577\nAction: analogize\nReward: -0.484999\nNext Confusion: 5.577819"} +{"text": "### State\nConfusion: 6.558275\nAction: analogize\nReward: 0.953879\nNext Confusion: 6.599469"} +{"text": "### State\nConfusion: 3.460505\nAction: analogize\nReward: -1.369673\nNext Confusion: 4.018704"} +{"text": "### State\nConfusion: 4.546068\nAction: analogize\nReward: 0.296617\nNext Confusion: 3.999932"} +{"text": "### State\nConfusion: 2.789076\nAction: correct_fact\nReward: -0.139055\nNext Confusion: 3.162709"} +{"text": "### State\nConfusion: 4.722394\nAction: analogize\nReward: -0.272211\nNext Confusion: 5.144951"} +{"text": "### State\nConfusion: 6.894866\nAction: analogize\nReward: 0.365742\nNext Confusion: 6.84301"} +{"text": "### State\nConfusion: 6.848865\nAction: analogize\nReward: -0.199233\nNext Confusion: 6.911935"} +{"text": "### State\nConfusion: 7.232148\nAction: analogize\nReward: 0.043035\nNext Confusion: 7.288547"} +{"text": "### State\nConfusion: 3.149628\nAction: explain\nReward: -0.427429\nNext Confusion: 3.316867"} +{"text": "### State\nConfusion: 3.926137\nAction: question\nReward: 1.42821\nNext Confusion: 2.881466"} +{"text": "### State\nConfusion: 3.196238\nAction: analogize\nReward: 1.17027\nNext Confusion: 2.384046"} +{"text": "### State\nConfusion: 5.307725\nAction: correct_fact\nReward: -1.086515\nNext Confusion: 5.717581"} +{"text": "### State\nConfusion: 5.070612\nAction: question\nReward: -0.603608\nNext Confusion: 5.819668"} +{"text": "### State\nConfusion: 7.103436\nAction: analogize\nReward: -1.143868\nNext Confusion: 8.541348"} +{"text": "### State\nConfusion: 6.538433\nAction: analogize\nReward: 0.299716\nNext Confusion: 7.164129"} +{"text": "### State\nConfusion: 5.592933\nAction: analogize\nReward: -0.571434\nNext Confusion: 6.062175"} +{"text": "### State\nConfusion: 3.944029\nAction: analogize\nReward: -0.0834\nNext Confusion: 4.574897"} +{"text": "### State\nConfusion: 2.918218\nAction: analogize\nReward: 0.128027\nNext Confusion: 3.573923"} +{"text": "### State\nConfusion: 4.998973\nAction: analogize\nReward: 0.099739\nNext Confusion: 4.395178"} +{"text": "### State\nConfusion: 4.694908\nAction: analogize\nReward: 0.148056\nNext Confusion: 5.204955"} +{"text": "### State\nConfusion: 3.781684\nAction: analogize\nReward: -0.263645\nNext Confusion: 4.12463"} +{"text": "### State\nConfusion: 6.591872\nAction: analogize\nReward: -1.118598\nNext Confusion: 7.24768"} +{"text": "### State\nConfusion: 3.995603\nAction: explain\nReward: 1.786482\nNext Confusion: 2.911538"} +{"text": "### State\nConfusion: 5.361206\nAction: question\nReward: 0.458134\nNext Confusion: 5.455734"} +{"text": "### State\nConfusion: 3.610285\nAction: analogize\nReward: 0.819867\nNext Confusion: 2.220746"} +{"text": "### State\nConfusion: 6.07102\nAction: explain\nReward: 0.175273\nNext Confusion: 6.668109"} +{"text": "### State\nConfusion: 3.806757\nAction: question\nReward: 0.064293\nNext Confusion: 3.739635"} +{"text": "### State\nConfusion: 4.38536\nAction: analogize\nReward: -1.787108\nNext Confusion: 5.131299"} +{"text": "### State\nConfusion: 7.341646\nAction: correct_fact\nReward: -0.362596\nNext Confusion: 8.125962"} +{"text": "### State\nConfusion: 4.368033\nAction: question\nReward: -0.037405\nNext Confusion: 4.460615"} +{"text": "### State\nConfusion: 4.966889\nAction: analogize\nReward: 0.38992\nNext Confusion: 4.741419"} +{"text": "### State\nConfusion: 1.737497\nAction: explain\nReward: -0.588786\nNext Confusion: 1.845481"} +{"text": "### State\nConfusion: 4.904597\nAction: analogize\nReward: -0.504161\nNext Confusion: 5.030465"} +{"text": "### State\nConfusion: 3.854805\nAction: correct_fact\nReward: 0.143636\nNext Confusion: 4.084965"} +{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.161128\nNext Confusion: 9.897921"} +{"text": "### State\nConfusion: 6.948946\nAction: explain\nReward: 0.019254\nNext Confusion: 7.208404"} +{"text": "### State\nConfusion: 5.288423\nAction: worked_example\nReward: 0.99203\nNext Confusion: 3.951557"} +{"text": "### State\nConfusion: 6.829811\nAction: analogize\nReward: -0.455124\nNext Confusion: 7.361673"} +{"text": "### State\nConfusion: 3.869974\nAction: analogize\nReward: 0.530551\nNext Confusion: 3.98222"} +{"text": "### State\nConfusion: 7.334596\nAction: analogize\nReward: -0.876755\nNext Confusion: 8.262695"} +{"text": "### State\nConfusion: 7.459252\nAction: analogize\nReward: -0.095127\nNext Confusion: 7.275376"} +{"text": "### State\nConfusion: 2.977803\nAction: correct_fact\nReward: -0.305983\nNext Confusion: 3.23653"} +{"text": "### State\nConfusion: 4.767777\nAction: analogize\nReward: -0.346948\nNext Confusion: 4.939303"} +{"text": "### State\nConfusion: 9.385743\nAction: correct_fact\nReward: -1.353628\nNext Confusion: 9.932337"} +{"text": "### State\nConfusion: 4.616424\nAction: worked_example\nReward: 1.173654\nNext Confusion: 2.993332"} +{"text": "### State\nConfusion: 3.982618\nAction: question\nReward: 1.465169\nNext Confusion: 3.196667"} +{"text": "### State\nConfusion: 3.731763\nAction: worked_example\nReward: 0.96159\nNext Confusion: 2.827726"} +{"text": "### State\nConfusion: 3.987744\nAction: analogize\nReward: -0.768711\nNext Confusion: 4.888868"} +{"text": "### State\nConfusion: 4.67894\nAction: question\nReward: 0.242026\nNext Confusion: 3.964318"} +{"text": "### State\nConfusion: 2.752789\nAction: analogize\nReward: -1.218773\nNext Confusion: 2.988962"} +{"text": "### State\nConfusion: 6.775572\nAction: analogize\nReward: 0.72213\nNext Confusion: 6.405007"} +{"text": "### State\nConfusion: 2.37896\nAction: worked_example\nReward: 1.470493\nNext Confusion: 1.213226"} +{"text": "### State\nConfusion: 4.411928\nAction: analogize\nReward: 2.35412\nNext Confusion: 3.227744"} +{"text": "### State\nConfusion: 4.708626\nAction: analogize\nReward: -0.456251\nNext Confusion: 4.466039"}