EduForge-Tutor / src /environment /training_samples.jsonl
hari15prasad
Data Prep: Successfully converted training data to JSONL format for AutoTrain
eec0e7c
{"text": "### State\nConfusion: 3.250681\nAction: explain\nReward: 0.277968\nNext Confusion: 2.895787"}
{"text": "### State\nConfusion: 6.946829\nAction: correct_fact\nReward: 1.136782\nNext Confusion: 6.797103"}
{"text": "### State\nConfusion: 3.01263\nAction: explain\nReward: 0.161669\nNext Confusion: 2.434628"}
{"text": "### State\nConfusion: 4.200218\nAction: analogize\nReward: -0.536253\nNext Confusion: 4.50549"}
{"text": "### State\nConfusion: 4.204886\nAction: correct_fact\nReward: 0.001798\nNext Confusion: 4.348224"}
{"text": "### State\nConfusion: 4.431564\nAction: analogize\nReward: -0.00913\nNext Confusion: 5.02598"}
{"text": "### State\nConfusion: 7.112898\nAction: analogize\nReward: -0.260503\nNext Confusion: 7.776484"}
{"text": "### State\nConfusion: 6.707709\nAction: worked_example\nReward: 0.623157\nNext Confusion: 5.590979"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.200613\nNext Confusion: 9.88075"}
{"text": "### State\nConfusion: 6.441003\nAction: analogize\nReward: 0.059471\nNext Confusion: 6.277812"}
{"text": "### State\nConfusion: 4.039312\nAction: analogize\nReward: 0.588281\nNext Confusion: 3.436871"}
{"text": "### State\nConfusion: 4.283644\nAction: analogize\nReward: 0.377662\nNext Confusion: 4.225992"}
{"text": "### State\nConfusion: 5.741777\nAction: correct_fact\nReward: -0.086082\nNext Confusion: 6.010756"}
{"text": "### State\nConfusion: 2.020052\nAction: correct_fact\nReward: 0.065653\nNext Confusion: 1.847908"}
{"text": "### State\nConfusion: 4.519175\nAction: explain\nReward: -0.532623\nNext Confusion: 4.565185"}
{"text": "### State\nConfusion: 8.321839\nAction: correct_fact\nReward: -0.03258\nNext Confusion: 8.116296"}
{"text": "### State\nConfusion: 8.126682\nAction: worked_example\nReward: 1.752661\nNext Confusion: 6.691122"}
{"text": "### State\nConfusion: 5.13175\nAction: worked_example\nReward: 1.046388\nNext Confusion: 3.915293"}
{"text": "### State\nConfusion: 2.675358\nAction: analogize\nReward: -0.527201\nNext Confusion: 3.269576"}
{"text": "### State\nConfusion: 4.452432\nAction: question\nReward: -0.12029\nNext Confusion: 4.301777"}
{"text": "### State\nConfusion: 5.261161\nAction: explain\nReward: -0.348596\nNext Confusion: 5.673129"}
{"text": "### State\nConfusion: 4.41783\nAction: correct_fact\nReward: 0.606589\nNext Confusion: 3.844203"}
{"text": "### State\nConfusion: 7.167364\nAction: analogize\nReward: -0.036751\nNext Confusion: 7.91662"}
{"text": "### State\nConfusion: 4.443446\nAction: analogize\nReward: -0.405494\nNext Confusion: 4.455853"}
{"text": "### State\nConfusion: 2.394889\nAction: worked_example\nReward: 1.569233\nNext Confusion: 0.521398"}
{"text": "### State\nConfusion: 5.666886\nAction: analogize\nReward: 0.132937\nNext Confusion: 5.507486"}
{"text": "### State\nConfusion: 8.903051\nAction: worked_example\nReward: 1.168286\nNext Confusion: 8.015495"}
{"text": "### State\nConfusion: 7.419142\nAction: analogize\nReward: 0.370696\nNext Confusion: 6.90989"}
{"text": "### State\nConfusion: 4.930095\nAction: analogize\nReward: -0.354928\nNext Confusion: 5.273698"}
{"text": "### State\nConfusion: 3.654876\nAction: correct_fact\nReward: 0.075058\nNext Confusion: 4.012523"}
{"text": "### State\nConfusion: 3.298562\nAction: correct_fact\nReward: 0.274487\nNext Confusion: 3.207063"}
{"text": "### State\nConfusion: 5.36716\nAction: worked_example\nReward: 1.439936\nNext Confusion: 3.664801"}
{"text": "### State\nConfusion: 3.746032\nAction: question\nReward: 0.769005\nNext Confusion: 3.475201"}
{"text": "### State\nConfusion: 5.479237\nAction: correct_fact\nReward: 0.557245\nNext Confusion: 5.685524"}
{"text": "### State\nConfusion: 5.16923\nAction: analogize\nReward: 0.659608\nNext Confusion: 4.931562"}
{"text": "### State\nConfusion: 5.465634\nAction: analogize\nReward: -0.455456\nNext Confusion: 6.204408"}
{"text": "### State\nConfusion: 4.611916\nAction: analogize\nReward: 0.505761\nNext Confusion: 4.227184"}
{"text": "### State\nConfusion: 6.265313\nAction: analogize\nReward: -0.2694\nNext Confusion: 6.18744"}
{"text": "### State\nConfusion: 4.897626\nAction: analogize\nReward: 0.702536\nNext Confusion: 4.261006"}
{"text": "### State\nConfusion: 4.620578\nAction: analogize\nReward: 0.574101\nNext Confusion: 4.518202"}
{"text": "### State\nConfusion: 4.456707\nAction: question\nReward: -0.11188\nNext Confusion: 4.38086"}
{"text": "### State\nConfusion: 3.882776\nAction: correct_fact\nReward: 0.216437\nNext Confusion: 3.257319"}
{"text": "### State\nConfusion: 7.64693\nAction: question\nReward: 0.516669\nNext Confusion: 6.653407"}
{"text": "### State\nConfusion: 3.631051\nAction: analogize\nReward: -0.060158\nNext Confusion: 3.668511"}
{"text": "### State\nConfusion: 3.242117\nAction: analogize\nReward: -0.632007\nNext Confusion: 4.07684"}
{"text": "### State\nConfusion: 5.477382\nAction: worked_example\nReward: 0.014376\nNext Confusion: 5.024957"}
{"text": "### State\nConfusion: 4.500757\nAction: question\nReward: 0.791141\nNext Confusion: 4.249756"}
{"text": "### State\nConfusion: 4.149765\nAction: analogize\nReward: -0.737377\nNext Confusion: 5.078204"}
{"text": "### State\nConfusion: 4.0037\nAction: analogize\nReward: 0.026296\nNext Confusion: 4.308817"}
{"text": "### State\nConfusion: 3.601284\nAction: correct_fact\nReward: 0.736689\nNext Confusion: 2.987959"}
{"text": "### State\nConfusion: 4.442764\nAction: analogize\nReward: -1.300369\nNext Confusion: 5.400936"}
{"text": "### State\nConfusion: 3.490773\nAction: explain\nReward: -0.677986\nNext Confusion: 3.864996"}
{"text": "### State\nConfusion: 4.677259\nAction: question\nReward: 0.948914\nNext Confusion: 4.377257"}
{"text": "### State\nConfusion: 5.060442\nAction: correct_fact\nReward: 0.766447\nNext Confusion: 4.600817"}
{"text": "### State\nConfusion: 4.615941\nAction: analogize\nReward: 1.128073\nNext Confusion: 4.953195"}
{"text": "### State\nConfusion: 2.83426\nAction: analogize\nReward: 0.942352\nNext Confusion: 2.650195"}
{"text": "### State\nConfusion: 3.270736\nAction: analogize\nReward: 0.441857\nNext Confusion: 2.980848"}
{"text": "### State\nConfusion: 5.609833\nAction: analogize\nReward: -0.276144\nNext Confusion: 6.021879"}
{"text": "### State\nConfusion: 3.269245\nAction: analogize\nReward: -0.558671\nNext Confusion: 3.781189"}
{"text": "### State\nConfusion: 5.657327\nAction: correct_fact\nReward: -0.370105\nNext Confusion: 6.336563"}
{"text": "### State\nConfusion: 4.241743\nAction: question\nReward: 0.405085\nNext Confusion: 3.343711"}
{"text": "### State\nConfusion: 4.50831\nAction: worked_example\nReward: 1.323088\nNext Confusion: 3.333388"}
{"text": "### State\nConfusion: 8.612566\nAction: analogize\nReward: -0.590897\nNext Confusion: 9.277476"}
{"text": "### State\nConfusion: 3.341706\nAction: question\nReward: 0.605924\nNext Confusion: 2.983989"}
{"text": "### State\nConfusion: 6.981562\nAction: worked_example\nReward: 1.42079\nNext Confusion: 6.22347"}
{"text": "### State\nConfusion: 6.482682\nAction: explain\nReward: 0.134391\nNext Confusion: 6.252345"}
{"text": "### State\nConfusion: 3.801888\nAction: correct_fact\nReward: -0.720082\nNext Confusion: 3.976293"}
{"text": "### State\nConfusion: 3.833232\nAction: analogize\nReward: -0.353737\nNext Confusion: 4.299214"}
{"text": "### State\nConfusion: 5.998812\nAction: explain\nReward: 0.389272\nNext Confusion: 5.514136"}
{"text": "### State\nConfusion: 4.684952\nAction: question\nReward: -0.067119\nNext Confusion: 4.627373"}
{"text": "### State\nConfusion: 3.739171\nAction: explain\nReward: 0.081376\nNext Confusion: 4.377791"}
{"text": "### State\nConfusion: 6.828212\nAction: explain\nReward: 1.582702\nNext Confusion: 5.688067"}
{"text": "### State\nConfusion: 7.270997\nAction: analogize\nReward: -1.004266\nNext Confusion: 7.936233"}
{"text": "### State\nConfusion: 6.698097\nAction: analogize\nReward: -0.106108\nNext Confusion: 7.112916"}
{"text": "### State\nConfusion: 5.745265\nAction: analogize\nReward: -0.139695\nNext Confusion: 5.720766"}
{"text": "### State\nConfusion: 3.632954\nAction: question\nReward: 1.16064\nNext Confusion: 3.356993"}
{"text": "### State\nConfusion: 6.09853\nAction: analogize\nReward: -1.18527\nNext Confusion: 7.567443"}
{"text": "### State\nConfusion: 3.86836\nAction: worked_example\nReward: 0.259128\nNext Confusion: 3.484797"}
{"text": "### State\nConfusion: 6.078642\nAction: question\nReward: -1.233893\nNext Confusion: 7.035697"}
{"text": "### State\nConfusion: 2.798933\nAction: explain\nReward: 0.579925\nNext Confusion: 2.698838"}
{"text": "### State\nConfusion: 5.148643\nAction: analogize\nReward: 0.709493\nNext Confusion: 4.744413"}
{"text": "### State\nConfusion: 7.496325\nAction: explain\nReward: 1.159911\nNext Confusion: 7.308018"}
{"text": "### State\nConfusion: 3.956711\nAction: analogize\nReward: -0.442151\nNext Confusion: 4.599521"}
{"text": "### State\nConfusion: 5.598244\nAction: analogize\nReward: -1.461143\nNext Confusion: 6.285129"}
{"text": "### State\nConfusion: 2.592946\nAction: analogize\nReward: -0.963043\nNext Confusion: 3.637503"}
{"text": "### State\nConfusion: 5.577053\nAction: question\nReward: -1.324903\nNext Confusion: 6.152676"}
{"text": "### State\nConfusion: 5.230882\nAction: analogize\nReward: -0.518079\nNext Confusion: 5.693272"}
{"text": "### State\nConfusion: 4.305274\nAction: question\nReward: 0.976141\nNext Confusion: 4.003798"}
{"text": "### State\nConfusion: 8.230191\nAction: analogize\nReward: -0.636257\nNext Confusion: 9.311369"}
{"text": "### State\nConfusion: 3.902071\nAction: analogize\nReward: 0.500202\nNext Confusion: 3.75269"}
{"text": "### State\nConfusion: 2.887785\nAction: correct_fact\nReward: 1.377221\nNext Confusion: 2.390823"}
{"text": "### State\nConfusion: 4.391108\nAction: analogize\nReward: -0.342566\nNext Confusion: 4.73378"}
{"text": "### State\nConfusion: 6.183204\nAction: question\nReward: -0.198667\nNext Confusion: 6.669494"}
{"text": "### State\nConfusion: 5.336835\nAction: worked_example\nReward: 0.735701\nNext Confusion: 3.867037"}
{"text": "### State\nConfusion: 3.049285\nAction: analogize\nReward: 1.806344\nNext Confusion: 1.962377"}
{"text": "### State\nConfusion: 5.835483\nAction: analogize\nReward: 0.322794\nNext Confusion: 5.636015"}
{"text": "### State\nConfusion: 6.197648\nAction: analogize\nReward: -0.037625\nNext Confusion: 6.726842"}
{"text": "### State\nConfusion: 3.712592\nAction: analogize\nReward: -1.302483\nNext Confusion: 4.627915"}
{"text": "### State\nConfusion: 2.942607\nAction: analogize\nReward: 0.391221\nNext Confusion: 3.280312"}
{"text": "### State\nConfusion: 7.031706\nAction: analogize\nReward: -0.006694\nNext Confusion: 7.219192"}
{"text": "### State\nConfusion: 6.227323\nAction: analogize\nReward: -1.185675\nNext Confusion: 7.040853"}
{"text": "### State\nConfusion: 4.443704\nAction: explain\nReward: -0.222932\nNext Confusion: 4.861446"}
{"text": "### State\nConfusion: 5.72539\nAction: analogize\nReward: -0.945966\nNext Confusion: 6.40068"}
{"text": "### State\nConfusion: 3.285097\nAction: analogize\nReward: 0.502635\nNext Confusion: 2.970505"}
{"text": "### State\nConfusion: 4.861887\nAction: question\nReward: -0.581137\nNext Confusion: 5.077682"}
{"text": "### State\nConfusion: 3.146511\nAction: worked_example\nReward: -0.022938\nNext Confusion: 2.971046"}
{"text": "### State\nConfusion: 7.118391\nAction: question\nReward: 0.68446\nNext Confusion: 6.163698"}
{"text": "### State\nConfusion: 3.458978\nAction: analogize\nReward: -0.578027\nNext Confusion: 3.857936"}
{"text": "### State\nConfusion: 3.074595\nAction: analogize\nReward: -0.545842\nNext Confusion: 3.548289"}
{"text": "### State\nConfusion: 3.44214\nAction: analogize\nReward: 0.225382\nNext Confusion: 3.721544"}
{"text": "### State\nConfusion: 8.819264\nAction: analogize\nReward: 0.151339\nNext Confusion: 8.657555"}
{"text": "### State\nConfusion: 4.521422\nAction: worked_example\nReward: -0.10238\nNext Confusion: 4.557052"}
{"text": "### State\nConfusion: 6.23277\nAction: explain\nReward: 0.457413\nNext Confusion: 6.462687"}
{"text": "### State\nConfusion: 6.84021\nAction: correct_fact\nReward: -0.057949\nNext Confusion: 6.665882"}
{"text": "### State\nConfusion: 4.478228\nAction: analogize\nReward: -1.01402\nNext Confusion: 4.81966"}
{"text": "### State\nConfusion: 3.90523\nAction: analogize\nReward: -0.397117\nNext Confusion: 4.581836"}
{"text": "### State\nConfusion: 5.505198\nAction: explain\nReward: -0.791799\nNext Confusion: 5.179022"}
{"text": "### State\nConfusion: 6.171377\nAction: analogize\nReward: 2.02061\nNext Confusion: 5.293525"}
{"text": "### State\nConfusion: 7.07752\nAction: analogize\nReward: -0.140945\nNext Confusion: 7.439284"}
{"text": "### State\nConfusion: 4.74815\nAction: question\nReward: -0.921987\nNext Confusion: 4.901676"}
{"text": "### State\nConfusion: 4.509364\nAction: explain\nReward: 1.032465\nNext Confusion: 3.376094"}
{"text": "### State\nConfusion: 2.027247\nAction: worked_example\nReward: 0.728003\nNext Confusion: 2.481546"}
{"text": "### State\nConfusion: 3.262276\nAction: analogize\nReward: -0.181705\nNext Confusion: 4.062567"}
{"text": "### State\nConfusion: 4.404922\nAction: analogize\nReward: 0.137978\nNext Confusion: 4.550036"}
{"text": "### State\nConfusion: 8.323386\nAction: analogize\nReward: -0.82731\nNext Confusion: 8.967352"}
{"text": "### State\nConfusion: 3.065048\nAction: worked_example\nReward: 1.422474\nNext Confusion: 1.396024"}
{"text": "### State\nConfusion: 8.650606\nAction: correct_fact\nReward: 0.013713\nNext Confusion: 8.505588"}
{"text": "### State\nConfusion: 4.651423\nAction: correct_fact\nReward: 0.975844\nNext Confusion: 3.993101"}
{"text": "### State\nConfusion: 5.509835\nAction: analogize\nReward: 1.279046\nNext Confusion: 5.567154"}
{"text": "### State\nConfusion: 6.096856\nAction: analogize\nReward: -0.38767\nNext Confusion: 6.7619"}
{"text": "### State\nConfusion: 4.852611\nAction: analogize\nReward: -0.843568\nNext Confusion: 4.90363"}
{"text": "### State\nConfusion: 3.32426\nAction: correct_fact\nReward: 0.634364\nNext Confusion: 3.443108"}
{"text": "### State\nConfusion: 3.007626\nAction: explain\nReward: -0.714219\nNext Confusion: 3.19942"}
{"text": "### State\nConfusion: 8.091647\nAction: analogize\nReward: 0.138296\nNext Confusion: 8.641704"}
{"text": "### State\nConfusion: 6.707883\nAction: analogize\nReward: -1.551451\nNext Confusion: 7.149846"}
{"text": "### State\nConfusion: 5.131027\nAction: analogize\nReward: -0.898392\nNext Confusion: 5.243616"}
{"text": "### State\nConfusion: 7.653114\nAction: analogize\nReward: 0.004476\nNext Confusion: 7.78867"}
{"text": "### State\nConfusion: 3.999675\nAction: worked_example\nReward: 0.461726\nNext Confusion: 3.61097"}
{"text": "### State\nConfusion: 9.165361\nAction: analogize\nReward: -0.127371\nNext Confusion: 9.594726"}
{"text": "### State\nConfusion: 5.877134\nAction: analogize\nReward: -0.827503\nNext Confusion: 6.792844"}
{"text": "### State\nConfusion: 4.240065\nAction: analogize\nReward: 0.017438\nNext Confusion: 4.091051"}
{"text": "### State\nConfusion: 6.373348\nAction: worked_example\nReward: 2.087573\nNext Confusion: 4.599887"}
{"text": "### State\nConfusion: 3.398287\nAction: analogize\nReward: 0.804075\nNext Confusion: 2.719617"}
{"text": "### State\nConfusion: 5.074518\nAction: analogize\nReward: 0.123028\nNext Confusion: 5.348822"}
{"text": "### State\nConfusion: 2.402497\nAction: analogize\nReward: -0.223833\nNext Confusion: 2.35697"}
{"text": "### State\nConfusion: 4.486272\nAction: question\nReward: 0.398914\nNext Confusion: 4.16057"}
{"text": "### State\nConfusion: 5.279123\nAction: analogize\nReward: -0.645918\nNext Confusion: 5.878665"}
{"text": "### State\nConfusion: 2.826214\nAction: explain\nReward: 0.117254\nNext Confusion: 2.519507"}
{"text": "### State\nConfusion: 7.463021\nAction: correct_fact\nReward: -0.078153\nNext Confusion: 7.250635"}
{"text": "### State\nConfusion: 4.378883\nAction: correct_fact\nReward: 0.349144\nNext Confusion: 4.469529"}
{"text": "### State\nConfusion: 3.601191\nAction: analogize\nReward: -0.4524\nNext Confusion: 3.516358"}
{"text": "### State\nConfusion: 3.17514\nAction: worked_example\nReward: 1.188492\nNext Confusion: 2.420519"}
{"text": "### State\nConfusion: 4.386989\nAction: analogize\nReward: -0.239119\nNext Confusion: 4.577966"}
{"text": "### State\nConfusion: 3.484874\nAction: analogize\nReward: 0.235369\nNext Confusion: 3.315143"}
{"text": "### State\nConfusion: 5.282593\nAction: analogize\nReward: 0.254918\nNext Confusion: 5.733146"}
{"text": "### State\nConfusion: 2.864476\nAction: explain\nReward: 0.342867\nNext Confusion: 3.148373"}
{"text": "### State\nConfusion: 4.97228\nAction: analogize\nReward: -0.818184\nNext Confusion: 4.98446"}
{"text": "### State\nConfusion: 5.917563\nAction: analogize\nReward: 0.363083\nNext Confusion: 6.30535"}
{"text": "### State\nConfusion: 4.550479\nAction: analogize\nReward: 0.016655\nNext Confusion: 5.802547"}
{"text": "### State\nConfusion: 3.973916\nAction: analogize\nReward: -0.288334\nNext Confusion: 4.395061"}
{"text": "### State\nConfusion: 3.831629\nAction: analogize\nReward: 0.150414\nNext Confusion: 4.073593"}
{"text": "### State\nConfusion: 6.013124\nAction: analogize\nReward: -0.745808\nNext Confusion: 6.446821"}
{"text": "### State\nConfusion: 4.514874\nAction: analogize\nReward: 0.229664\nNext Confusion: 4.723423"}
{"text": "### State\nConfusion: 4.115125\nAction: analogize\nReward: -1.187277\nNext Confusion: 4.327879"}
{"text": "### State\nConfusion: 4.383352\nAction: analogize\nReward: -0.218114\nNext Confusion: 4.679171"}
{"text": "### State\nConfusion: 3.9782\nAction: correct_fact\nReward: 0.984978\nNext Confusion: 3.826136"}
{"text": "### State\nConfusion: 4.19293\nAction: correct_fact\nReward: -0.565662\nNext Confusion: 4.888168"}
{"text": "### State\nConfusion: 3.7524\nAction: correct_fact\nReward: 0.912141\nNext Confusion: 2.79205"}
{"text": "### State\nConfusion: 5.939085\nAction: analogize\nReward: 0.14276\nNext Confusion: 5.931013"}
{"text": "### State\nConfusion: 3.940862\nAction: question\nReward: 1.299393\nNext Confusion: 3.066277"}
{"text": "### State\nConfusion: 6.421869\nAction: analogize\nReward: 0.36015\nNext Confusion: 6.279908"}
{"text": "### State\nConfusion: 1.453712\nAction: analogize\nReward: -0.959622\nNext Confusion: 2.468141"}
{"text": "### State\nConfusion: 5.496111\nAction: analogize\nReward: -0.43066\nNext Confusion: 5.796453"}
{"text": "### State\nConfusion: 3.835277\nAction: analogize\nReward: -0.210495\nNext Confusion: 4.315474"}
{"text": "### State\nConfusion: 4.667592\nAction: analogize\nReward: 0.221866\nNext Confusion: 4.160811"}
{"text": "### State\nConfusion: 7.403551\nAction: worked_example\nReward: 0.907719\nNext Confusion: 6.486184"}
{"text": "### State\nConfusion: 4.500176\nAction: analogize\nReward: -0.275044\nNext Confusion: 4.99979"}
{"text": "### State\nConfusion: 3.816011\nAction: analogize\nReward: -0.839258\nNext Confusion: 4.310652"}
{"text": "### State\nConfusion: 8.19787\nAction: question\nReward: 0.946011\nNext Confusion: 7.058693"}
{"text": "### State\nConfusion: 3.899465\nAction: analogize\nReward: 0.877398\nNext Confusion: 2.888416"}
{"text": "### State\nConfusion: 5.417362\nAction: question\nReward: 1.217177\nNext Confusion: 4.386165"}
{"text": "### State\nConfusion: 4.163053\nAction: analogize\nReward: -0.983947\nNext Confusion: 5.03493"}
{"text": "### State\nConfusion: 5.122217\nAction: analogize\nReward: -1.302016\nNext Confusion: 5.724295"}
{"text": "### State\nConfusion: 4.713499\nAction: question\nReward: -1.06088\nNext Confusion: 5.390096"}
{"text": "### State\nConfusion: 5.252266\nAction: correct_fact\nReward: 0.112053\nNext Confusion: 5.166234"}
{"text": "### State\nConfusion: 4.277287\nAction: analogize\nReward: 0.349988\nNext Confusion: 3.858081"}
{"text": "### State\nConfusion: 4.312006\nAction: analogize\nReward: -0.280414\nNext Confusion: 4.743623"}
{"text": "### State\nConfusion: 2.116424\nAction: worked_example\nReward: 2.219539\nNext Confusion: 0.30772"}
{"text": "### State\nConfusion: 4.487134\nAction: analogize\nReward: -0.316827\nNext Confusion: 4.72908"}
{"text": "### State\nConfusion: 3.734942\nAction: explain\nReward: 0.559271\nNext Confusion: 3.387467"}
{"text": "### State\nConfusion: 3.817736\nAction: correct_fact\nReward: -0.371755\nNext Confusion: 4.35576"}
{"text": "### State\nConfusion: 3.534894\nAction: question\nReward: 0.714752\nNext Confusion: 2.6903"}
{"text": "### State\nConfusion: 3.297557\nAction: analogize\nReward: -0.02651\nNext Confusion: 3.73844"}
{"text": "### State\nConfusion: 3.66799\nAction: question\nReward: 1.278086\nNext Confusion: 2.777691"}
{"text": "### State\nConfusion: 8.029835\nAction: explain\nReward: 0.004985\nNext Confusion: 8.023991"}
{"text": "### State\nConfusion: 4.69018\nAction: analogize\nReward: -0.212867\nNext Confusion: 5.239052"}
{"text": "### State\nConfusion: 3.043147\nAction: analogize\nReward: 0.341268\nNext Confusion: 2.599216"}
{"text": "### State\nConfusion: 5.367027\nAction: question\nReward: 0.914907\nNext Confusion: 4.669119"}
{"text": "### State\nConfusion: 2.610881\nAction: worked_example\nReward: 1.154435\nNext Confusion: 0.578632"}
{"text": "### State\nConfusion: 2.696339\nAction: question\nReward: -0.000458\nNext Confusion: 2.809412"}
{"text": "### State\nConfusion: 4.532858\nAction: analogize\nReward: 1.700078\nNext Confusion: 3.571719"}
{"text": "### State\nConfusion: 3.726544\nAction: question\nReward: -0.297797\nNext Confusion: 3.881641"}
{"text": "### State\nConfusion: 5.898968\nAction: analogize\nReward: -0.010863\nNext Confusion: 6.54058"}
{"text": "### State\nConfusion: 4.530686\nAction: correct_fact\nReward: -0.477902\nNext Confusion: 5.114145"}
{"text": "### State\nConfusion: 4.004507\nAction: analogize\nReward: -0.184868\nNext Confusion: 4.069702"}
{"text": "### State\nConfusion: 3.988666\nAction: correct_fact\nReward: -0.211808\nNext Confusion: 4.288907"}
{"text": "### State\nConfusion: 3.219075\nAction: worked_example\nReward: 0.314439\nNext Confusion: 3.734656"}
{"text": "### State\nConfusion: 3.293107\nAction: explain\nReward: -0.32457\nNext Confusion: 2.891417"}
{"text": "### State\nConfusion: 4.433997\nAction: explain\nReward: 1.305128\nNext Confusion: 3.859591"}
{"text": "### State\nConfusion: 3.627563\nAction: worked_example\nReward: 1.166838\nNext Confusion: 3.274162"}
{"text": "### State\nConfusion: 2.615709\nAction: analogize\nReward: 0.055049\nNext Confusion: 2.620762"}
{"text": "### State\nConfusion: 4.573371\nAction: analogize\nReward: 0.162817\nNext Confusion: 4.686983"}
{"text": "### State\nConfusion: 2.914325\nAction: analogize\nReward: 0.573057\nNext Confusion: 2.620443"}
{"text": "### State\nConfusion: 4.103402\nAction: correct_fact\nReward: -0.109456\nNext Confusion: 4.510875"}
{"text": "### State\nConfusion: 4.716958\nAction: analogize\nReward: -1.069869\nNext Confusion: 5.22988"}
{"text": "### State\nConfusion: 1.439015\nAction: worked_example\nReward: 1.653535\nNext Confusion: 0.0"}
{"text": "### State\nConfusion: 4.512936\nAction: explain\nReward: -0.674713\nNext Confusion: 4.853434"}
{"text": "### State\nConfusion: 4.616469\nAction: analogize\nReward: 0.228202\nNext Confusion: 4.998136"}
{"text": "### State\nConfusion: 3.472289\nAction: analogize\nReward: -0.518788\nNext Confusion: 4.016916"}
{"text": "### State\nConfusion: 5.290328\nAction: explain\nReward: 1.622873\nNext Confusion: 4.628211"}
{"text": "### State\nConfusion: 4.509018\nAction: analogize\nReward: -0.246907\nNext Confusion: 4.696386"}
{"text": "### State\nConfusion: 3.649479\nAction: analogize\nReward: -0.646281\nNext Confusion: 3.705608"}
{"text": "### State\nConfusion: 4.436886\nAction: explain\nReward: 0.071447\nNext Confusion: 4.62008"}
{"text": "### State\nConfusion: 6.165114\nAction: explain\nReward: 0.102394\nNext Confusion: 5.989851"}
{"text": "### State\nConfusion: 3.751394\nAction: question\nReward: 0.110986\nNext Confusion: 3.572822"}
{"text": "### State\nConfusion: 4.967533\nAction: worked_example\nReward: 1.888933\nNext Confusion: 3.294329"}
{"text": "### State\nConfusion: 6.802119\nAction: analogize\nReward: -0.318646\nNext Confusion: 6.787519"}
{"text": "### State\nConfusion: 2.757781\nAction: analogize\nReward: -0.041224\nNext Confusion: 2.945709"}
{"text": "### State\nConfusion: 3.441426\nAction: question\nReward: -0.356823\nNext Confusion: 3.619329"}
{"text": "### State\nConfusion: 3.824876\nAction: analogize\nReward: -0.282262\nNext Confusion: 4.405335"}
{"text": "### State\nConfusion: 6.033777\nAction: analogize\nReward: 0.118754\nNext Confusion: 5.973301"}
{"text": "### State\nConfusion: 3.73862\nAction: analogize\nReward: 0.371175\nNext Confusion: 3.56842"}
{"text": "### State\nConfusion: 9.286809\nAction: worked_example\nReward: 2.644093\nNext Confusion: 7.658459"}
{"text": "### State\nConfusion: 3.810243\nAction: worked_example\nReward: 1.870768\nNext Confusion: 1.740508"}
{"text": "### State\nConfusion: 3.956185\nAction: question\nReward: 0.81304\nNext Confusion: 3.16671"}
{"text": "### State\nConfusion: 3.00301\nAction: analogize\nReward: -0.345361\nNext Confusion: 2.953303"}
{"text": "### State\nConfusion: 3.451303\nAction: analogize\nReward: -0.858153\nNext Confusion: 4.097912"}
{"text": "### State\nConfusion: 3.48126\nAction: question\nReward: -0.04059\nNext Confusion: 3.13211"}
{"text": "### State\nConfusion: 5.124149\nAction: question\nReward: 0.945285\nNext Confusion: 3.9529"}
{"text": "### State\nConfusion: 3.216448\nAction: analogize\nReward: 0.281472\nNext Confusion: 3.077387"}
{"text": "### State\nConfusion: 3.474833\nAction: analogize\nReward: -1.38124\nNext Confusion: 4.510687"}
{"text": "### State\nConfusion: 3.63635\nAction: analogize\nReward: 0.229631\nNext Confusion: 4.149948"}
{"text": "### State\nConfusion: 3.650369\nAction: analogize\nReward: -0.573367\nNext Confusion: 4.224239"}
{"text": "### State\nConfusion: 3.732268\nAction: explain\nReward: -0.387552\nNext Confusion: 4.25324"}
{"text": "### State\nConfusion: 3.145265\nAction: analogize\nReward: -0.078527\nNext Confusion: 3.347933"}
{"text": "### State\nConfusion: 6.957926\nAction: analogize\nReward: 1.066442\nNext Confusion: 6.509846"}
{"text": "### State\nConfusion: 3.81907\nAction: analogize\nReward: -0.537125\nNext Confusion: 3.932789"}
{"text": "### State\nConfusion: 6.875212\nAction: explain\nReward: 0.923676\nNext Confusion: 6.357508"}
{"text": "### State\nConfusion: 6.113523\nAction: analogize\nReward: 0.216784\nNext Confusion: 5.652343"}
{"text": "### State\nConfusion: 4.34719\nAction: analogize\nReward: 0.100165\nNext Confusion: 4.293112"}
{"text": "### State\nConfusion: 3.46102\nAction: analogize\nReward: -1.955337\nNext Confusion: 5.25365"}
{"text": "### State\nConfusion: 2.541971\nAction: analogize\nReward: -0.67109\nNext Confusion: 3.242385"}
{"text": "### State\nConfusion: 3.4017\nAction: analogize\nReward: -1.546172\nNext Confusion: 4.715109"}
{"text": "### State\nConfusion: 3.903403\nAction: explain\nReward: 0.955664\nNext Confusion: 3.233826"}
{"text": "### State\nConfusion: 4.410839\nAction: correct_fact\nReward: -0.778426\nNext Confusion: 4.867506"}
{"text": "### State\nConfusion: 5.352035\nAction: analogize\nReward: 0.083221\nNext Confusion: 5.309198"}
{"text": "### State\nConfusion: 3.845539\nAction: worked_example\nReward: 0.721115\nNext Confusion: 3.197238"}
{"text": "### State\nConfusion: 4.363106\nAction: analogize\nReward: -0.783213\nNext Confusion: 4.303111"}
{"text": "### State\nConfusion: 7.119\nAction: analogize\nReward: -0.595028\nNext Confusion: 8.178642"}
{"text": "### State\nConfusion: 2.084962\nAction: correct_fact\nReward: 0.396716\nNext Confusion: 2.375542"}
{"text": "### State\nConfusion: 8.238723\nAction: analogize\nReward: -0.264989\nNext Confusion: 8.752825"}
{"text": "### State\nConfusion: 5.645957\nAction: analogize\nReward: -0.080122\nNext Confusion: 5.676078"}
{"text": "### State\nConfusion: 6.062615\nAction: analogize\nReward: 1.713626\nNext Confusion: 5.300618"}
{"text": "### State\nConfusion: 8.921528\nAction: worked_example\nReward: 1.502296\nNext Confusion: 7.384206"}
{"text": "### State\nConfusion: 3.535991\nAction: explain\nReward: 0.402923\nNext Confusion: 2.849609"}
{"text": "### State\nConfusion: 4.234336\nAction: analogize\nReward: 0.416406\nNext Confusion: 4.556367"}
{"text": "### State\nConfusion: 2.388265\nAction: analogize\nReward: -0.320608\nNext Confusion: 2.556611"}
{"text": "### State\nConfusion: 5.013006\nAction: analogize\nReward: -0.899276\nNext Confusion: 5.653823"}
{"text": "### State\nConfusion: 3.866427\nAction: analogize\nReward: -0.164342\nNext Confusion: 4.012446"}
{"text": "### State\nConfusion: 3.142777\nAction: correct_fact\nReward: 1.00956\nNext Confusion: 2.708875"}
{"text": "### State\nConfusion: 5.851146\nAction: worked_example\nReward: 1.087909\nNext Confusion: 4.400152"}
{"text": "### State\nConfusion: 9.969285\nAction: correct_fact\nReward: -0.29484\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.26319\nAction: analogize\nReward: -0.381748\nNext Confusion: 3.710803"}
{"text": "### State\nConfusion: 3.560582\nAction: explain\nReward: -1.057823\nNext Confusion: 3.697929"}
{"text": "### State\nConfusion: 5.7797\nAction: worked_example\nReward: 1.58499\nNext Confusion: 4.221551"}
{"text": "### State\nConfusion: 3.365968\nAction: analogize\nReward: -0.12409\nNext Confusion: 3.294733"}
{"text": "### State\nConfusion: 7.357676\nAction: analogize\nReward: -0.930477\nNext Confusion: 7.947465"}
{"text": "### State\nConfusion: 5.213246\nAction: correct_fact\nReward: -0.457683\nNext Confusion: 5.714824"}
{"text": "### State\nConfusion: 8.686376\nAction: worked_example\nReward: 0.914465\nNext Confusion: 7.735434"}
{"text": "### State\nConfusion: 5.906049\nAction: question\nReward: 0.914918\nNext Confusion: 4.746426"}
{"text": "### State\nConfusion: 3.43873\nAction: analogize\nReward: -0.791984\nNext Confusion: 4.093406"}
{"text": "### State\nConfusion: 8.147875\nAction: analogize\nReward: -0.339623\nNext Confusion: 9.074441"}
{"text": "### State\nConfusion: 4.987865\nAction: correct_fact\nReward: -0.135189\nNext Confusion: 5.126443"}
{"text": "### State\nConfusion: 6.827771\nAction: analogize\nReward: -0.182827\nNext Confusion: 7.09621"}
{"text": "### State\nConfusion: 4.841376\nAction: correct_fact\nReward: 0.440885\nNext Confusion: 4.646943"}
{"text": "### State\nConfusion: 6.711827\nAction: explain\nReward: 1.491523\nNext Confusion: 5.848803"}
{"text": "### State\nConfusion: 4.967377\nAction: analogize\nReward: 0.201888\nNext Confusion: 5.331802"}
{"text": "### State\nConfusion: 7.154704\nAction: analogize\nReward: 0.341228\nNext Confusion: 7.220116"}
{"text": "### State\nConfusion: 2.715033\nAction: analogize\nReward: 0.773613\nNext Confusion: 2.429066"}
{"text": "### State\nConfusion: 6.547896\nAction: explain\nReward: 0.563126\nNext Confusion: 6.683662"}
{"text": "### State\nConfusion: 6.269861\nAction: analogize\nReward: -0.157441\nNext Confusion: 6.715462"}
{"text": "### State\nConfusion: 5.309012\nAction: analogize\nReward: -0.39383\nNext Confusion: 6.213959"}
{"text": "### State\nConfusion: 4.32928\nAction: analogize\nReward: -1.387919\nNext Confusion: 5.461956"}
{"text": "### State\nConfusion: 7.075807\nAction: question\nReward: 0.342144\nNext Confusion: 6.488871"}
{"text": "### State\nConfusion: 2.908937\nAction: worked_example\nReward: 1.786775\nNext Confusion: 1.24221"}
{"text": "### State\nConfusion: 3.163769\nAction: analogize\nReward: -0.649834\nNext Confusion: 3.567671"}
{"text": "### State\nConfusion: 6.413857\nAction: analogize\nReward: 0.632799\nNext Confusion: 6.437032"}
{"text": "### State\nConfusion: 4.157505\nAction: correct_fact\nReward: -0.609828\nNext Confusion: 5.336113"}
{"text": "### State\nConfusion: 5.211759\nAction: analogize\nReward: -0.736369\nNext Confusion: 5.220968"}
{"text": "### State\nConfusion: 4.079003\nAction: analogize\nReward: 0.088859\nNext Confusion: 3.57106"}
{"text": "### State\nConfusion: 7.824298\nAction: worked_example\nReward: 1.347861\nNext Confusion: 7.120889"}
{"text": "### State\nConfusion: 3.864172\nAction: correct_fact\nReward: -1.319164\nNext Confusion: 5.237199"}
{"text": "### State\nConfusion: 3.599646\nAction: analogize\nReward: -0.234941\nNext Confusion: 3.997227"}
{"text": "### State\nConfusion: 6.169248\nAction: analogize\nReward: -0.73131\nNext Confusion: 6.311524"}
{"text": "### State\nConfusion: 4.205811\nAction: analogize\nReward: -0.028118\nNext Confusion: 4.480411"}
{"text": "### State\nConfusion: 6.248756\nAction: worked_example\nReward: 2.320086\nNext Confusion: 4.345955"}
{"text": "### State\nConfusion: 5.422293\nAction: correct_fact\nReward: -0.231537\nNext Confusion: 5.617543"}
{"text": "### State\nConfusion: 2.242248\nAction: analogize\nReward: -2.077594\nNext Confusion: 4.016796"}
{"text": "### State\nConfusion: 6.597207\nAction: worked_example\nReward: 0.058722\nNext Confusion: 6.496094"}
{"text": "### State\nConfusion: 6.364598\nAction: analogize\nReward: -0.576298\nNext Confusion: 6.859088"}
{"text": "### State\nConfusion: 4.490757\nAction: analogize\nReward: -0.434451\nNext Confusion: 4.642448"}
{"text": "### State\nConfusion: 3.946703\nAction: analogize\nReward: -0.681441\nNext Confusion: 4.066546"}
{"text": "### State\nConfusion: 4.001388\nAction: analogize\nReward: -0.120379\nNext Confusion: 3.938912"}
{"text": "### State\nConfusion: 9.710864\nAction: analogize\nReward: 0.004072\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.747454\nAction: analogize\nReward: -0.778482\nNext Confusion: 4.130773"}
{"text": "### State\nConfusion: 4.575968\nAction: explain\nReward: -0.196636\nNext Confusion: 4.720822"}
{"text": "### State\nConfusion: 7.338616\nAction: analogize\nReward: 0.358031\nNext Confusion: 7.647601"}
{"text": "### State\nConfusion: 3.574423\nAction: analogize\nReward: 0.154168\nNext Confusion: 3.77347"}
{"text": "### State\nConfusion: 4.63278\nAction: explain\nReward: 0.621704\nNext Confusion: 4.001392"}
{"text": "### State\nConfusion: 6.210172\nAction: analogize\nReward: 1.340827\nNext Confusion: 5.613212"}
{"text": "### State\nConfusion: 4.389936\nAction: correct_fact\nReward: 0.538823\nNext Confusion: 4.880523"}
{"text": "### State\nConfusion: 5.383227\nAction: analogize\nReward: -0.455558\nNext Confusion: 5.864184"}
{"text": "### State\nConfusion: 4.676518\nAction: question\nReward: -0.024095\nNext Confusion: 4.595433"}
{"text": "### State\nConfusion: 4.116737\nAction: analogize\nReward: -0.200846\nNext Confusion: 4.198854"}
{"text": "### State\nConfusion: 3.158481\nAction: analogize\nReward: -0.501419\nNext Confusion: 3.801235"}
{"text": "### State\nConfusion: 5.381923\nAction: explain\nReward: 0.301327\nNext Confusion: 4.506645"}
{"text": "### State\nConfusion: 3.871241\nAction: analogize\nReward: 0.470844\nNext Confusion: 3.760368"}
{"text": "### State\nConfusion: 5.753503\nAction: question\nReward: 0.836846\nNext Confusion: 5.024852"}
{"text": "### State\nConfusion: 6.185399\nAction: correct_fact\nReward: -0.229113\nNext Confusion: 5.964536"}
{"text": "### State\nConfusion: 5.237682\nAction: analogize\nReward: 0.639695\nNext Confusion: 5.570729"}
{"text": "### State\nConfusion: 3.803306\nAction: analogize\nReward: -0.653361\nNext Confusion: 4.502231"}
{"text": "### State\nConfusion: 4.290909\nAction: analogize\nReward: 0.050406\nNext Confusion: 4.59684"}
{"text": "### State\nConfusion: 3.545454\nAction: correct_fact\nReward: -1.307137\nNext Confusion: 4.240777"}
{"text": "### State\nConfusion: 3.892848\nAction: analogize\nReward: 0.277591\nNext Confusion: 4.556432"}
{"text": "### State\nConfusion: 2.286423\nAction: analogize\nReward: -0.020258\nNext Confusion: 2.711716"}
{"text": "### State\nConfusion: 6.568009\nAction: analogize\nReward: 0.309756\nNext Confusion: 6.701265"}
{"text": "### State\nConfusion: 7.574443\nAction: question\nReward: 0.488642\nNext Confusion: 7.358108"}
{"text": "### State\nConfusion: 2.923719\nAction: question\nReward: 0.680867\nNext Confusion: 1.858226"}
{"text": "### State\nConfusion: 6.309944\nAction: explain\nReward: -0.374884\nNext Confusion: 6.108041"}
{"text": "### State\nConfusion: 3.950947\nAction: explain\nReward: -0.246238\nNext Confusion: 3.967673"}
{"text": "### State\nConfusion: 3.558406\nAction: explain\nReward: -0.86277\nNext Confusion: 4.408267"}
{"text": "### State\nConfusion: 4.903235\nAction: analogize\nReward: -0.33585\nNext Confusion: 5.614946"}
{"text": "### State\nConfusion: 3.336845\nAction: analogize\nReward: -0.129177\nNext Confusion: 3.785467"}
{"text": "### State\nConfusion: 3.309768\nAction: analogize\nReward: 0.973455\nNext Confusion: 2.441803"}
{"text": "### State\nConfusion: 5.035866\nAction: analogize\nReward: 0.479477\nNext Confusion: 5.282769"}
{"text": "### State\nConfusion: 3.717341\nAction: analogize\nReward: -0.445694\nNext Confusion: 3.543303"}
{"text": "### State\nConfusion: 4.434391\nAction: analogize\nReward: -0.524554\nNext Confusion: 4.974204"}
{"text": "### State\nConfusion: 3.056816\nAction: analogize\nReward: -0.758043\nNext Confusion: 4.123322"}
{"text": "### State\nConfusion: 4.187655\nAction: analogize\nReward: -0.539545\nNext Confusion: 4.730906"}
{"text": "### State\nConfusion: 4.638495\nAction: correct_fact\nReward: -0.155797\nNext Confusion: 4.900523"}
{"text": "### State\nConfusion: 6.000943\nAction: analogize\nReward: -1.059716\nNext Confusion: 7.495129"}
{"text": "### State\nConfusion: 3.527035\nAction: question\nReward: -0.523099\nNext Confusion: 4.023082"}
{"text": "### State\nConfusion: 2.94476\nAction: analogize\nReward: 0.002781\nNext Confusion: 3.050672"}
{"text": "### State\nConfusion: 4.197364\nAction: explain\nReward: -0.036997\nNext Confusion: 3.47503"}
{"text": "### State\nConfusion: 2.724514\nAction: analogize\nReward: -0.0952\nNext Confusion: 2.998136"}
{"text": "### State\nConfusion: 4.317858\nAction: analogize\nReward: -0.248636\nNext Confusion: 4.875382"}
{"text": "### State\nConfusion: 8.902446\nAction: analogize\nReward: 0.339665\nNext Confusion: 8.612985"}
{"text": "### State\nConfusion: 7.162479\nAction: explain\nReward: 0.045718\nNext Confusion: 6.794286"}
{"text": "### State\nConfusion: 3.628578\nAction: analogize\nReward: -0.46392\nNext Confusion: 3.195431"}
{"text": "### State\nConfusion: 8.05332\nAction: analogize\nReward: 0.5926\nNext Confusion: 7.991128"}
{"text": "### State\nConfusion: 3.5457\nAction: explain\nReward: 1.156754\nNext Confusion: 2.851767"}
{"text": "### State\nConfusion: 6.851163\nAction: analogize\nReward: -0.991948\nNext Confusion: 7.793215"}
{"text": "### State\nConfusion: 3.835441\nAction: explain\nReward: 0.089078\nNext Confusion: 3.786062"}
{"text": "### State\nConfusion: 2.596621\nAction: analogize\nReward: 0.806401\nNext Confusion: 2.628039"}
{"text": "### State\nConfusion: 8.799524\nAction: analogize\nReward: 0.116618\nNext Confusion: 9.191632"}
{"text": "### State\nConfusion: 3.935173\nAction: explain\nReward: 0.042982\nNext Confusion: 4.48615"}
{"text": "### State\nConfusion: 4.541541\nAction: analogize\nReward: -0.048526\nNext Confusion: 4.233993"}
{"text": "### State\nConfusion: 3.478506\nAction: explain\nReward: -0.555564\nNext Confusion: 3.648344"}
{"text": "### State\nConfusion: 2.207017\nAction: correct_fact\nReward: -1.103466\nNext Confusion: 2.982554"}
{"text": "### State\nConfusion: 6.885302\nAction: analogize\nReward: -0.296764\nNext Confusion: 7.428911"}
{"text": "### State\nConfusion: 4.36444\nAction: analogize\nReward: 0.561046\nNext Confusion: 3.995603"}
{"text": "### State\nConfusion: 4.83831\nAction: correct_fact\nReward: 0.016987\nNext Confusion: 4.787175"}
{"text": "### State\nConfusion: 3.486222\nAction: analogize\nReward: -0.006196\nNext Confusion: 3.984055"}
{"text": "### State\nConfusion: 4.288857\nAction: analogize\nReward: -0.083703\nNext Confusion: 3.823161"}
{"text": "### State\nConfusion: 3.205301\nAction: correct_fact\nReward: -0.636662\nNext Confusion: 4.112988"}
{"text": "### State\nConfusion: 4.280341\nAction: analogize\nReward: -0.699241\nNext Confusion: 4.976665"}
{"text": "### State\nConfusion: 3.260056\nAction: analogize\nReward: -0.594702\nNext Confusion: 4.102306"}
{"text": "### State\nConfusion: 4.612415\nAction: analogize\nReward: 0.936412\nNext Confusion: 3.610846"}
{"text": "### State\nConfusion: 3.823294\nAction: question\nReward: 0.860296\nNext Confusion: 3.350242"}
{"text": "### State\nConfusion: 4.300542\nAction: analogize\nReward: -0.277862\nNext Confusion: 4.604201"}
{"text": "### State\nConfusion: 3.008559\nAction: correct_fact\nReward: 0.791254\nNext Confusion: 3.051819"}
{"text": "### State\nConfusion: 7.152747\nAction: analogize\nReward: -1.218924\nNext Confusion: 8.283377"}
{"text": "### State\nConfusion: 3.386136\nAction: analogize\nReward: -1.36675\nNext Confusion: 4.221894"}
{"text": "### State\nConfusion: 4.445341\nAction: analogize\nReward: -0.451169\nNext Confusion: 5.100821"}
{"text": "### State\nConfusion: 3.284357\nAction: explain\nReward: 0.506617\nNext Confusion: 3.124258"}
{"text": "### State\nConfusion: 5.902904\nAction: analogize\nReward: 0.375122\nNext Confusion: 5.377923"}
{"text": "### State\nConfusion: 6.133891\nAction: analogize\nReward: -0.515857\nNext Confusion: 6.836678"}
{"text": "### State\nConfusion: 4.543045\nAction: analogize\nReward: 0.09579\nNext Confusion: 4.757417"}
{"text": "### State\nConfusion: 3.405838\nAction: analogize\nReward: -0.677895\nNext Confusion: 3.94108"}
{"text": "### State\nConfusion: 3.854457\nAction: analogize\nReward: -0.714533\nNext Confusion: 4.356869"}
{"text": "### State\nConfusion: 8.185769\nAction: worked_example\nReward: 1.678385\nNext Confusion: 6.671469"}
{"text": "### State\nConfusion: 4.561003\nAction: analogize\nReward: -0.987293\nNext Confusion: 4.868517"}
{"text": "### State\nConfusion: 3.15542\nAction: question\nReward: 1.250758\nNext Confusion: 2.164837"}
{"text": "### State\nConfusion: 3.6711\nAction: analogize\nReward: 0.597279\nNext Confusion: 3.621499"}
{"text": "### State\nConfusion: 4.706904\nAction: analogize\nReward: -0.253879\nNext Confusion: 4.531873"}
{"text": "### State\nConfusion: 4.510057\nAction: analogize\nReward: 0.328044\nNext Confusion: 4.414616"}
{"text": "### State\nConfusion: 4.219359\nAction: analogize\nReward: 0.042542\nNext Confusion: 4.427316"}
{"text": "### State\nConfusion: 3.831463\nAction: explain\nReward: 1.360988\nNext Confusion: 2.83884"}
{"text": "### State\nConfusion: 4.097205\nAction: question\nReward: 0.258788\nNext Confusion: 3.725177"}
{"text": "### State\nConfusion: 9.491195\nAction: analogize\nReward: -0.344806\nNext Confusion: 9.492555"}
{"text": "### State\nConfusion: 2.441149\nAction: analogize\nReward: -0.793021\nNext Confusion: 2.837643"}
{"text": "### State\nConfusion: 3.408415\nAction: analogize\nReward: -0.309769\nNext Confusion: 3.985229"}
{"text": "### State\nConfusion: 5.631257\nAction: analogize\nReward: -0.416589\nNext Confusion: 6.514521"}
{"text": "### State\nConfusion: 7.245012\nAction: worked_example\nReward: 2.992593\nNext Confusion: 4.881175"}
{"text": "### State\nConfusion: 6.536276\nAction: explain\nReward: 1.337626\nNext Confusion: 5.404844"}
{"text": "### State\nConfusion: 3.131055\nAction: analogize\nReward: -0.642311\nNext Confusion: 3.90995"}
{"text": "### State\nConfusion: 3.734571\nAction: analogize\nReward: -1.202896\nNext Confusion: 4.529602"}
{"text": "### State\nConfusion: 3.614458\nAction: analogize\nReward: -0.375706\nNext Confusion: 3.960335"}
{"text": "### State\nConfusion: 3.754846\nAction: analogize\nReward: 1.114828\nNext Confusion: 2.872017"}
{"text": "### State\nConfusion: 4.282373\nAction: analogize\nReward: 0.047074\nNext Confusion: 3.929965"}
{"text": "### State\nConfusion: 6.072084\nAction: worked_example\nReward: 2.003795\nNext Confusion: 5.110387"}
{"text": "### State\nConfusion: 7.607367\nAction: worked_example\nReward: 1.966738\nNext Confusion: 5.615737"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198476\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.649977\nAction: explain\nReward: 0.189024\nNext Confusion: 3.923286"}
{"text": "### State\nConfusion: 4.086252\nAction: analogize\nReward: -1.095851\nNext Confusion: 4.539408"}
{"text": "### State\nConfusion: 3.548788\nAction: analogize\nReward: 0.200725\nNext Confusion: 3.887756"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.763297\nNext Confusion: 9.680236"}
{"text": "### State\nConfusion: 3.073316\nAction: worked_example\nReward: -0.489686\nNext Confusion: 4.424902"}
{"text": "### State\nConfusion: 6.668706\nAction: correct_fact\nReward: 1.328342\nNext Confusion: 5.888302"}
{"text": "### State\nConfusion: 7.088053\nAction: analogize\nReward: 0.347794\nNext Confusion: 6.982926"}
{"text": "### State\nConfusion: 7.178564\nAction: worked_example\nReward: 1.450848\nNext Confusion: 6.387661"}
{"text": "### State\nConfusion: 1.744545\nAction: explain\nReward: 0.740145\nNext Confusion: 0.861106"}
{"text": "### State\nConfusion: 8.584458\nAction: worked_example\nReward: 2.582664\nNext Confusion: 7.174828"}
{"text": "### State\nConfusion: 3.625311\nAction: explain\nReward: 0.355308\nNext Confusion: 3.380443"}
{"text": "### State\nConfusion: 4.523099\nAction: analogize\nReward: -0.790397\nNext Confusion: 5.10107"}
{"text": "### State\nConfusion: 4.948718\nAction: analogize\nReward: -1.799253\nNext Confusion: 5.516174"}
{"text": "### State\nConfusion: 6.113761\nAction: question\nReward: 0.485334\nNext Confusion: 5.434281"}
{"text": "### State\nConfusion: 5.818233\nAction: question\nReward: 1.002597\nNext Confusion: 5.021882"}
{"text": "### State\nConfusion: 1.768893\nAction: analogize\nReward: 0.961322\nNext Confusion: 1.464652"}
{"text": "### State\nConfusion: 2.894877\nAction: question\nReward: 1.014309\nNext Confusion: 2.353802"}
{"text": "### State\nConfusion: 5.089719\nAction: analogize\nReward: 0.77709\nNext Confusion: 4.554134"}
{"text": "### State\nConfusion: 4.265329\nAction: analogize\nReward: 0.503933\nNext Confusion: 4.53531"}
{"text": "### State\nConfusion: 2.975997\nAction: question\nReward: 0.782601\nNext Confusion: 2.096145"}
{"text": "### State\nConfusion: 2.969771\nAction: analogize\nReward: -0.040926\nNext Confusion: 2.708039"}
{"text": "### State\nConfusion: 5.588715\nAction: question\nReward: -0.768939\nNext Confusion: 5.495356"}
{"text": "### State\nConfusion: 7.738388\nAction: correct_fact\nReward: 0.097154\nNext Confusion: 7.895498"}
{"text": "### State\nConfusion: 9.789788\nAction: question\nReward: 0.507937\nNext Confusion: 9.316481"}
{"text": "### State\nConfusion: 2.122493\nAction: analogize\nReward: 0.559706\nNext Confusion: 2.205507"}
{"text": "### State\nConfusion: 5.07307\nAction: explain\nReward: 1.405116\nNext Confusion: 4.673156"}
{"text": "### State\nConfusion: 5.187453\nAction: question\nReward: -0.206875\nNext Confusion: 5.470587"}
{"text": "### State\nConfusion: 7.89118\nAction: worked_example\nReward: 3.005133\nNext Confusion: 5.957319"}
{"text": "### State\nConfusion: 3.463503\nAction: analogize\nReward: 0.233139\nNext Confusion: 3.116866"}
{"text": "### State\nConfusion: 4.490139\nAction: analogize\nReward: -1.247777\nNext Confusion: 4.560209"}
{"text": "### State\nConfusion: 6.336253\nAction: analogize\nReward: 0.192\nNext Confusion: 5.439387"}
{"text": "### State\nConfusion: 3.879744\nAction: explain\nReward: 1.933112\nNext Confusion: 2.894167"}
{"text": "### State\nConfusion: 3.602151\nAction: analogize\nReward: 0.405749\nNext Confusion: 3.220006"}
{"text": "### State\nConfusion: 3.939015\nAction: explain\nReward: -0.410771\nNext Confusion: 4.216709"}
{"text": "### State\nConfusion: 4.013006\nAction: analogize\nReward: -0.508277\nNext Confusion: 4.368215"}
{"text": "### State\nConfusion: 6.478064\nAction: analogize\nReward: -1.128476\nNext Confusion: 7.656253"}
{"text": "### State\nConfusion: 7.29305\nAction: analogize\nReward: 1.065129\nNext Confusion: 6.880034"}
{"text": "### State\nConfusion: 2.54377\nAction: analogize\nReward: -0.940805\nNext Confusion: 2.54594"}
{"text": "### State\nConfusion: 4.214607\nAction: analogize\nReward: -1.635581\nNext Confusion: 5.250125"}
{"text": "### State\nConfusion: 3.376368\nAction: question\nReward: 1.454091\nNext Confusion: 2.159586"}
{"text": "### State\nConfusion: 4.664003\nAction: analogize\nReward: -0.481433\nNext Confusion: 4.975037"}
{"text": "### State\nConfusion: 5.789483\nAction: analogize\nReward: 0.351938\nNext Confusion: 5.524223"}
{"text": "### State\nConfusion: 4.134062\nAction: analogize\nReward: -0.746534\nNext Confusion: 4.309115"}
{"text": "### State\nConfusion: 2.75702\nAction: worked_example\nReward: 1.686613\nNext Confusion: 1.597392"}
{"text": "### State\nConfusion: 5.175445\nAction: worked_example\nReward: 2.26482\nNext Confusion: 3.099191"}
{"text": "### State\nConfusion: 6.065045\nAction: analogize\nReward: -1.475176\nNext Confusion: 7.14422"}
{"text": "### State\nConfusion: 4.352107\nAction: analogize\nReward: -0.176949\nNext Confusion: 4.417541"}
{"text": "### State\nConfusion: 5.816594\nAction: analogize\nReward: -2.146036\nNext Confusion: 7.010947"}
{"text": "### State\nConfusion: 3.675687\nAction: analogize\nReward: -0.013575\nNext Confusion: 3.6197"}
{"text": "### State\nConfusion: 6.159766\nAction: explain\nReward: 0.721524\nNext Confusion: 5.935606"}
{"text": "### State\nConfusion: 3.588376\nAction: question\nReward: 0.441773\nNext Confusion: 3.378304"}
{"text": "### State\nConfusion: 6.344508\nAction: question\nReward: 1.770076\nNext Confusion: 5.09023"}
{"text": "### State\nConfusion: 3.821794\nAction: analogize\nReward: -0.387546\nNext Confusion: 4.346112"}
{"text": "### State\nConfusion: 7.260673\nAction: correct_fact\nReward: 0.008941\nNext Confusion: 7.553966"}
{"text": "### State\nConfusion: 5.776783\nAction: analogize\nReward: -0.838409\nNext Confusion: 6.757849"}
{"text": "### State\nConfusion: 3.874547\nAction: explain\nReward: 0.548746\nNext Confusion: 3.753034"}
{"text": "### State\nConfusion: 7.218959\nAction: worked_example\nReward: 0.624518\nNext Confusion: 6.850143"}
{"text": "### State\nConfusion: 7.365728\nAction: explain\nReward: 1.423284\nNext Confusion: 6.245595"}
{"text": "### State\nConfusion: 4.087149\nAction: correct_fact\nReward: 0.54914\nNext Confusion: 4.116221"}
{"text": "### State\nConfusion: 9.569596\nAction: worked_example\nReward: 1.091264\nNext Confusion: 8.481068"}
{"text": "### State\nConfusion: 7.997488\nAction: question\nReward: 0.609481\nNext Confusion: 6.839767"}
{"text": "### State\nConfusion: 6.471966\nAction: analogize\nReward: 0.734048\nNext Confusion: 6.210516"}
{"text": "### State\nConfusion: 5.247116\nAction: analogize\nReward: 0.085486\nNext Confusion: 5.148559"}
{"text": "### State\nConfusion: 5.63931\nAction: question\nReward: 0.358263\nNext Confusion: 5.389858"}
{"text": "### State\nConfusion: 6.156111\nAction: analogize\nReward: 0.161887\nNext Confusion: 5.62754"}
{"text": "### State\nConfusion: 3.755793\nAction: correct_fact\nReward: -0.840027\nNext Confusion: 4.740012"}
{"text": "### State\nConfusion: 7.445958\nAction: worked_example\nReward: 3.084386\nNext Confusion: 5.851674"}
{"text": "### State\nConfusion: 7.444563\nAction: explain\nReward: 0.010491\nNext Confusion: 6.992737"}
{"text": "### State\nConfusion: 4.028035\nAction: analogize\nReward: -0.705654\nNext Confusion: 4.397549"}
{"text": "### State\nConfusion: 3.586522\nAction: correct_fact\nReward: -0.890749\nNext Confusion: 3.880734"}
{"text": "### State\nConfusion: 3.824157\nAction: analogize\nReward: 0.862425\nNext Confusion: 2.996262"}
{"text": "### State\nConfusion: 3.883596\nAction: worked_example\nReward: 1.802463\nNext Confusion: 2.584208"}
{"text": "### State\nConfusion: 5.404881\nAction: correct_fact\nReward: 0.081433\nNext Confusion: 5.07905"}
{"text": "### State\nConfusion: 2.991435\nAction: analogize\nReward: -0.433469\nNext Confusion: 3.357399"}
{"text": "### State\nConfusion: 3.545515\nAction: analogize\nReward: -0.007121\nNext Confusion: 3.71352"}
{"text": "### State\nConfusion: 3.660067\nAction: analogize\nReward: -0.928261\nNext Confusion: 4.0236"}
{"text": "### State\nConfusion: 7.0235\nAction: question\nReward: 0.20218\nNext Confusion: 6.875191"}
{"text": "### State\nConfusion: 9.746426\nAction: analogize\nReward: 0.725931\nNext Confusion: 9.392016"}
{"text": "### State\nConfusion: 2.125439\nAction: explain\nReward: -1.062487\nNext Confusion: 2.867128"}
{"text": "### State\nConfusion: 3.227817\nAction: analogize\nReward: -0.014357\nNext Confusion: 3.537508"}
{"text": "### State\nConfusion: 3.876828\nAction: analogize\nReward: 0.060063\nNext Confusion: 3.857296"}
{"text": "### State\nConfusion: 7.089156\nAction: analogize\nReward: 0.444024\nNext Confusion: 7.298431"}
{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 2.962947\nNext Confusion: 8.246628"}
{"text": "### State\nConfusion: 5.594191\nAction: explain\nReward: 0.679804\nNext Confusion: 5.233611"}
{"text": "### State\nConfusion: 6.283862\nAction: analogize\nReward: 0.743037\nNext Confusion: 6.186083"}
{"text": "### State\nConfusion: 5.072781\nAction: question\nReward: 1.101868\nNext Confusion: 3.897992"}
{"text": "### State\nConfusion: 4.118371\nAction: correct_fact\nReward: 0.029128\nNext Confusion: 3.826488"}
{"text": "### State\nConfusion: 7.527729\nAction: correct_fact\nReward: 0.845789\nNext Confusion: 7.247021"}
{"text": "### State\nConfusion: 6.289606\nAction: analogize\nReward: 0.422815\nNext Confusion: 5.600898"}
{"text": "### State\nConfusion: 4.523598\nAction: analogize\nReward: -0.188144\nNext Confusion: 4.558487"}
{"text": "### State\nConfusion: 3.763717\nAction: question\nReward: 0.6122\nNext Confusion: 3.588942"}
{"text": "### State\nConfusion: 8.323666\nAction: correct_fact\nReward: 0.409538\nNext Confusion: 8.021692"}
{"text": "### State\nConfusion: 5.981031\nAction: worked_example\nReward: 0.917426\nNext Confusion: 5.423538"}
{"text": "### State\nConfusion: 3.515466\nAction: analogize\nReward: -0.103899\nNext Confusion: 3.695133"}
{"text": "### State\nConfusion: 6.429372\nAction: explain\nReward: -0.029875\nNext Confusion: 6.378915"}
{"text": "### State\nConfusion: 5.750795\nAction: analogize\nReward: -0.089562\nNext Confusion: 5.622753"}
{"text": "### State\nConfusion: 5.025332\nAction: analogize\nReward: -1.312083\nNext Confusion: 6.373254"}
{"text": "### State\nConfusion: 7.359441\nAction: analogize\nReward: -0.720074\nNext Confusion: 7.755872"}
{"text": "### State\nConfusion: 3.776062\nAction: question\nReward: -0.134128\nNext Confusion: 3.433186"}
{"text": "### State\nConfusion: 1.355683\nAction: explain\nReward: -0.00118\nNext Confusion: 1.431554"}
{"text": "### State\nConfusion: 3.912807\nAction: correct_fact\nReward: -1.548555\nNext Confusion: 4.628972"}
{"text": "### State\nConfusion: 3.794357\nAction: correct_fact\nReward: -0.059373\nNext Confusion: 3.670245"}
{"text": "### State\nConfusion: 5.059543\nAction: analogize\nReward: 0.032602\nNext Confusion: 5.353498"}
{"text": "### State\nConfusion: 3.077356\nAction: analogize\nReward: 0.011284\nNext Confusion: 2.947043"}
{"text": "### State\nConfusion: 5.534637\nAction: explain\nReward: 1.721247\nNext Confusion: 4.352992"}
{"text": "### State\nConfusion: 4.366849\nAction: analogize\nReward: -0.917181\nNext Confusion: 5.294018"}
{"text": "### State\nConfusion: 6.835693\nAction: explain\nReward: 0.456146\nNext Confusion: 6.470298"}
{"text": "### State\nConfusion: 9.452318\nAction: analogize\nReward: 0.033633\nNext Confusion: 9.382432"}
{"text": "### State\nConfusion: 3.400432\nAction: analogize\nReward: -0.369133\nNext Confusion: 3.65212"}
{"text": "### State\nConfusion: 5.731019\nAction: analogize\nReward: -1.289566\nNext Confusion: 6.41672"}
{"text": "### State\nConfusion: 3.618958\nAction: explain\nReward: -0.907926\nNext Confusion: 3.69676"}
{"text": "### State\nConfusion: 3.088994\nAction: explain\nReward: 0.2848\nNext Confusion: 2.83016"}
{"text": "### State\nConfusion: 7.169104\nAction: analogize\nReward: 1.486468\nNext Confusion: 6.278144"}
{"text": "### State\nConfusion: 4.010123\nAction: question\nReward: 0.990315\nNext Confusion: 2.898942"}
{"text": "### State\nConfusion: 3.325781\nAction: analogize\nReward: -0.847395\nNext Confusion: 3.638474"}
{"text": "### State\nConfusion: 4.63962\nAction: question\nReward: 1.030137\nNext Confusion: 3.610102"}
{"text": "### State\nConfusion: 3.698579\nAction: analogize\nReward: -0.018877\nNext Confusion: 4.387045"}
{"text": "### State\nConfusion: 1.946232\nAction: analogize\nReward: -1.351303\nNext Confusion: 3.380728"}
{"text": "### State\nConfusion: 4.236727\nAction: analogize\nReward: -0.73162\nNext Confusion: 5.393082"}
{"text": "### State\nConfusion: 8.190957\nAction: analogize\nReward: 0.404816\nNext Confusion: 8.214826"}
{"text": "### State\nConfusion: 6.995987\nAction: analogize\nReward: 0.571194\nNext Confusion: 6.929503"}
{"text": "### State\nConfusion: 2.113818\nAction: explain\nReward: -0.30514\nNext Confusion: 2.129459"}
{"text": "### State\nConfusion: 3.684304\nAction: analogize\nReward: -2.017778\nNext Confusion: 5.452133"}
{"text": "### State\nConfusion: 4.239281\nAction: worked_example\nReward: 1.860685\nNext Confusion: 2.940046"}
{"text": "### State\nConfusion: 3.676164\nAction: analogize\nReward: 0.091998\nNext Confusion: 4.183175"}
{"text": "### State\nConfusion: 4.133906\nAction: analogize\nReward: -0.55272\nNext Confusion: 4.581522"}
{"text": "### State\nConfusion: 4.054602\nAction: correct_fact\nReward: -0.177678\nNext Confusion: 4.056369"}
{"text": "### State\nConfusion: 4.639642\nAction: analogize\nReward: -0.272352\nNext Confusion: 5.553159"}
{"text": "### State\nConfusion: 5.635794\nAction: analogize\nReward: -0.705183\nNext Confusion: 6.451484"}
{"text": "### State\nConfusion: 5.543668\nAction: correct_fact\nReward: 0.885091\nNext Confusion: 4.846454"}
{"text": "### State\nConfusion: 3.267564\nAction: analogize\nReward: -0.098069\nNext Confusion: 3.530591"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.338373\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.627039\nAction: analogize\nReward: -0.495228\nNext Confusion: 3.729506"}
{"text": "### State\nConfusion: 2.909357\nAction: analogize\nReward: 0.712175\nNext Confusion: 2.649315"}
{"text": "### State\nConfusion: 5.226436\nAction: explain\nReward: -0.593975\nNext Confusion: 5.565649"}
{"text": "### State\nConfusion: 3.411177\nAction: analogize\nReward: -0.915413\nNext Confusion: 4.738223"}
{"text": "### State\nConfusion: 2.31554\nAction: correct_fact\nReward: 0.05716\nNext Confusion: 1.97111"}
{"text": "### State\nConfusion: 3.476088\nAction: question\nReward: 2.026609\nNext Confusion: 2.038584"}
{"text": "### State\nConfusion: 5.963129\nAction: explain\nReward: -0.663593\nNext Confusion: 5.924516"}
{"text": "### State\nConfusion: 4.23041\nAction: analogize\nReward: -0.798136\nNext Confusion: 5.034618"}
{"text": "### State\nConfusion: 4.460384\nAction: analogize\nReward: 0.281733\nNext Confusion: 4.369669"}
{"text": "### State\nConfusion: 3.952235\nAction: analogize\nReward: -0.543262\nNext Confusion: 4.522062"}
{"text": "### State\nConfusion: 4.810875\nAction: analogize\nReward: -0.724943\nNext Confusion: 4.581037"}
{"text": "### State\nConfusion: 3.226505\nAction: worked_example\nReward: 0.912334\nNext Confusion: 2.419872"}
{"text": "### State\nConfusion: 9.691536\nAction: worked_example\nReward: 1.77497\nNext Confusion: 8.428343"}
{"text": "### State\nConfusion: 5.033921\nAction: question\nReward: 0.196992\nNext Confusion: 4.573141"}
{"text": "### State\nConfusion: 3.517061\nAction: analogize\nReward: -0.263287\nNext Confusion: 3.85532"}
{"text": "### State\nConfusion: 7.032363\nAction: worked_example\nReward: 1.873676\nNext Confusion: 5.367367"}
{"text": "### State\nConfusion: 4.801627\nAction: analogize\nReward: -0.242848\nNext Confusion: 5.514838"}
{"text": "### State\nConfusion: 6.834419\nAction: analogize\nReward: 0.4653\nNext Confusion: 7.076133"}
{"text": "### State\nConfusion: 6.279381\nAction: question\nReward: 0.830391\nNext Confusion: 5.205801"}
{"text": "### State\nConfusion: 8.198117\nAction: worked_example\nReward: 1.509305\nNext Confusion: 7.05779"}
{"text": "### State\nConfusion: 3.336602\nAction: analogize\nReward: 0.37946\nNext Confusion: 3.545989"}
{"text": "### State\nConfusion: 6.838361\nAction: analogize\nReward: -0.741956\nNext Confusion: 7.300534"}
{"text": "### State\nConfusion: 5.791677\nAction: explain\nReward: 0.437775\nNext Confusion: 5.755174"}
{"text": "### State\nConfusion: 9.722909\nAction: correct_fact\nReward: -1.217724\nNext Confusion: 9.802279"}
{"text": "### State\nConfusion: 7.018476\nAction: question\nReward: 0.563995\nNext Confusion: 6.405234"}
{"text": "### State\nConfusion: 4.592573\nAction: analogize\nReward: -2.146348\nNext Confusion: 5.838669"}
{"text": "### State\nConfusion: 5.724254\nAction: analogize\nReward: 1.270602\nNext Confusion: 5.397553"}
{"text": "### State\nConfusion: 4.446648\nAction: worked_example\nReward: -0.503034\nNext Confusion: 4.548247"}
{"text": "### State\nConfusion: 5.499\nAction: explain\nReward: 0.310256\nNext Confusion: 5.026456"}
{"text": "### State\nConfusion: 3.483297\nAction: question\nReward: 0.985335\nNext Confusion: 2.828101"}
{"text": "### State\nConfusion: 2.728559\nAction: explain\nReward: 0.672884\nNext Confusion: 2.04629"}
{"text": "### State\nConfusion: 6.694008\nAction: question\nReward: -0.553633\nNext Confusion: 7.286873"}
{"text": "### State\nConfusion: 2.769368\nAction: question\nReward: 0.406163\nNext Confusion: 2.904883"}
{"text": "### State\nConfusion: 5.757645\nAction: correct_fact\nReward: 0.664011\nNext Confusion: 5.369445"}
{"text": "### State\nConfusion: 4.197348\nAction: analogize\nReward: 1.133919\nNext Confusion: 3.996284"}
{"text": "### State\nConfusion: 5.534684\nAction: explain\nReward: -0.725343\nNext Confusion: 6.430503"}
{"text": "### State\nConfusion: 2.75554\nAction: worked_example\nReward: 1.502689\nNext Confusion: 1.00548"}
{"text": "### State\nConfusion: 4.337378\nAction: analogize\nReward: 0.228474\nNext Confusion: 4.612438"}
{"text": "### State\nConfusion: 4.446347\nAction: analogize\nReward: -0.524076\nNext Confusion: 5.001291"}
{"text": "### State\nConfusion: 5.104956\nAction: analogize\nReward: -0.163479\nNext Confusion: 5.520312"}
{"text": "### State\nConfusion: 3.629805\nAction: explain\nReward: -0.095137\nNext Confusion: 4.171221"}
{"text": "### State\nConfusion: 3.944147\nAction: analogize\nReward: -0.668701\nNext Confusion: 5.047819"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.550446\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 9.356881\nAction: correct_fact\nReward: -1.110132\nNext Confusion: 9.71459"}
{"text": "### State\nConfusion: 4.205706\nAction: explain\nReward: -0.234715\nNext Confusion: 4.437668"}
{"text": "### State\nConfusion: 5.271949\nAction: analogize\nReward: -0.57591\nNext Confusion: 5.56199"}
{"text": "### State\nConfusion: 3.972523\nAction: analogize\nReward: 0.4965\nNext Confusion: 4.568244"}
{"text": "### State\nConfusion: 6.749858\nAction: analogize\nReward: -0.236824\nNext Confusion: 7.190533"}
{"text": "### State\nConfusion: 8.942906\nAction: correct_fact\nReward: 0.233019\nNext Confusion: 8.72936"}
{"text": "### State\nConfusion: 3.798457\nAction: analogize\nReward: -0.124735\nNext Confusion: 4.220905"}
{"text": "### State\nConfusion: 4.55122\nAction: analogize\nReward: -1.185639\nNext Confusion: 5.466571"}
{"text": "### State\nConfusion: 3.276233\nAction: analogize\nReward: 0.248292\nNext Confusion: 3.85905"}
{"text": "### State\nConfusion: 4.059147\nAction: correct_fact\nReward: 0.407354\nNext Confusion: 3.503689"}
{"text": "### State\nConfusion: 4.406315\nAction: correct_fact\nReward: -0.603789\nNext Confusion: 4.506692"}
{"text": "### State\nConfusion: 2.644092\nAction: analogize\nReward: -0.019094\nNext Confusion: 3.00082"}
{"text": "### State\nConfusion: 5.603155\nAction: worked_example\nReward: 1.284057\nNext Confusion: 4.500092"}
{"text": "### State\nConfusion: 3.218315\nAction: analogize\nReward: -0.348337\nNext Confusion: 3.556726"}
{"text": "### State\nConfusion: 3.800715\nAction: analogize\nReward: -0.820115\nNext Confusion: 4.452045"}
{"text": "### State\nConfusion: 1.973659\nAction: analogize\nReward: -0.355537\nNext Confusion: 2.784944"}
{"text": "### State\nConfusion: 4.688923\nAction: question\nReward: 1.102507\nNext Confusion: 3.759745"}
{"text": "### State\nConfusion: 3.492448\nAction: analogize\nReward: 0.101092\nNext Confusion: 3.807346"}
{"text": "### State\nConfusion: 2.46282\nAction: analogize\nReward: -1.078748\nNext Confusion: 3.07609"}
{"text": "### State\nConfusion: 6.213417\nAction: analogize\nReward: 0.576302\nNext Confusion: 5.873172"}
{"text": "### State\nConfusion: 3.541289\nAction: explain\nReward: 0.015983\nNext Confusion: 3.708063"}
{"text": "### State\nConfusion: 5.982512\nAction: analogize\nReward: -0.190504\nNext Confusion: 6.230028"}
{"text": "### State\nConfusion: 5.589458\nAction: worked_example\nReward: 1.965483\nNext Confusion: 5.141684"}
{"text": "### State\nConfusion: 3.292279\nAction: question\nReward: 0.632446\nNext Confusion: 2.64234"}
{"text": "### State\nConfusion: 1.361117\nAction: analogize\nReward: -0.449704\nNext Confusion: 2.123296"}
{"text": "### State\nConfusion: 5.606995\nAction: worked_example\nReward: 0.968661\nNext Confusion: 5.075988"}
{"text": "### State\nConfusion: 3.824053\nAction: worked_example\nReward: 1.445538\nNext Confusion: 2.7174"}
{"text": "### State\nConfusion: 3.619457\nAction: analogize\nReward: -0.932034\nNext Confusion: 4.530998"}
{"text": "### State\nConfusion: 4.039429\nAction: question\nReward: 0.626477\nNext Confusion: 3.597227"}
{"text": "### State\nConfusion: 5.48916\nAction: question\nReward: 0.787362\nNext Confusion: 5.328984"}
{"text": "### State\nConfusion: 3.539406\nAction: analogize\nReward: -0.441082\nNext Confusion: 4.028983"}
{"text": "### State\nConfusion: 3.917401\nAction: analogize\nReward: -0.314303\nNext Confusion: 4.375538"}
{"text": "### State\nConfusion: 4.657824\nAction: analogize\nReward: 0.393414\nNext Confusion: 4.629042"}
{"text": "### State\nConfusion: 9.817489\nAction: analogize\nReward: -0.384728\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.5677\nAction: explain\nReward: -0.380941\nNext Confusion: 3.946322"}
{"text": "### State\nConfusion: 3.643777\nAction: analogize\nReward: -1.920887\nNext Confusion: 4.448967"}
{"text": "### State\nConfusion: 4.602626\nAction: explain\nReward: 0.775825\nNext Confusion: 3.995621"}
{"text": "### State\nConfusion: 3.493214\nAction: explain\nReward: -0.128712\nNext Confusion: 3.723056"}
{"text": "### State\nConfusion: 4.024774\nAction: analogize\nReward: -2.039025\nNext Confusion: 4.794768"}
{"text": "### State\nConfusion: 5.567354\nAction: question\nReward: 0.953785\nNext Confusion: 5.143233"}
{"text": "### State\nConfusion: 4.857438\nAction: explain\nReward: 0.443781\nNext Confusion: 4.433139"}
{"text": "### State\nConfusion: 6.066289\nAction: explain\nReward: -0.315992\nNext Confusion: 6.312766"}
{"text": "### State\nConfusion: 7.202041\nAction: analogize\nReward: 0.352557\nNext Confusion: 7.09304"}
{"text": "### State\nConfusion: 6.867551\nAction: explain\nReward: -0.373433\nNext Confusion: 7.375757"}
{"text": "### State\nConfusion: 4.086685\nAction: question\nReward: 0.298342\nNext Confusion: 3.762268"}
{"text": "### State\nConfusion: 8.616289\nAction: question\nReward: 1.143804\nNext Confusion: 7.619493"}
{"text": "### State\nConfusion: 4.24828\nAction: analogize\nReward: -0.861769\nNext Confusion: 5.43898"}
{"text": "### State\nConfusion: 2.615268\nAction: correct_fact\nReward: 1.0784\nNext Confusion: 1.460789"}
{"text": "### State\nConfusion: 5.23111\nAction: correct_fact\nReward: -0.202571\nNext Confusion: 5.344155"}
{"text": "### State\nConfusion: 7.022273\nAction: analogize\nReward: -1.226544\nNext Confusion: 7.900564"}
{"text": "### State\nConfusion: 7.907241\nAction: question\nReward: 1.464275\nNext Confusion: 6.918559"}
{"text": "### State\nConfusion: 7.12112\nAction: correct_fact\nReward: 1.403988\nNext Confusion: 6.659081"}
{"text": "### State\nConfusion: 3.515963\nAction: analogize\nReward: -0.759772\nNext Confusion: 4.787727"}
{"text": "### State\nConfusion: 5.604148\nAction: worked_example\nReward: 1.415625\nNext Confusion: 4.33325"}
{"text": "### State\nConfusion: 5.108318\nAction: analogize\nReward: -1.22629\nNext Confusion: 5.783868"}
{"text": "### State\nConfusion: 4.742907\nAction: analogize\nReward: -0.264648\nNext Confusion: 4.893974"}
{"text": "### State\nConfusion: 2.955016\nAction: explain\nReward: 0.640322\nNext Confusion: 2.963681"}
{"text": "### State\nConfusion: 3.553782\nAction: analogize\nReward: 0.463052\nNext Confusion: 3.616991"}
{"text": "### State\nConfusion: 4.315825\nAction: correct_fact\nReward: 0.128917\nNext Confusion: 4.809463"}
{"text": "### State\nConfusion: 6.324152\nAction: worked_example\nReward: 2.24065\nNext Confusion: 4.650812"}
{"text": "### State\nConfusion: 5.497759\nAction: explain\nReward: 1.417489\nNext Confusion: 4.601767"}
{"text": "### State\nConfusion: 3.482203\nAction: analogize\nReward: -0.362165\nNext Confusion: 3.796918"}
{"text": "### State\nConfusion: 4.651682\nAction: explain\nReward: 0.920595\nNext Confusion: 4.110992"}
{"text": "### State\nConfusion: 3.818045\nAction: worked_example\nReward: 0.974113\nNext Confusion: 2.51135"}
{"text": "### State\nConfusion: 4.387833\nAction: question\nReward: 0.807106\nNext Confusion: 3.467108"}
{"text": "### State\nConfusion: 7.444748\nAction: explain\nReward: 0.535044\nNext Confusion: 7.314575"}
{"text": "### State\nConfusion: 8.638442\nAction: analogize\nReward: -0.385962\nNext Confusion: 9.068825"}
{"text": "### State\nConfusion: 3.031337\nAction: analogize\nReward: 1.474946\nNext Confusion: 2.794966"}
{"text": "### State\nConfusion: 7.27936\nAction: analogize\nReward: 0.607482\nNext Confusion: 6.99242"}
{"text": "### State\nConfusion: 3.787557\nAction: analogize\nReward: -0.636365\nNext Confusion: 4.095346"}
{"text": "### State\nConfusion: 7.239841\nAction: analogize\nReward: -0.911259\nNext Confusion: 8.592769"}
{"text": "### State\nConfusion: 7.590043\nAction: worked_example\nReward: 0.271013\nNext Confusion: 7.004351"}
{"text": "### State\nConfusion: 8.092934\nAction: worked_example\nReward: 1.788613\nNext Confusion: 6.377313"}
{"text": "### State\nConfusion: 5.034043\nAction: explain\nReward: 0.301633\nNext Confusion: 5.184921"}
{"text": "### State\nConfusion: 4.350404\nAction: explain\nReward: 0.861178\nNext Confusion: 3.60923"}
{"text": "### State\nConfusion: 2.270528\nAction: explain\nReward: 0.728106\nNext Confusion: 1.638538"}
{"text": "### State\nConfusion: 4.23141\nAction: analogize\nReward: -0.872446\nNext Confusion: 4.772834"}
{"text": "### State\nConfusion: 4.371902\nAction: worked_example\nReward: 0.463624\nNext Confusion: 3.571019"}
{"text": "### State\nConfusion: 6.633394\nAction: question\nReward: -0.168295\nNext Confusion: 6.514897"}
{"text": "### State\nConfusion: 6.584599\nAction: question\nReward: 0.347541\nNext Confusion: 6.205327"}
{"text": "### State\nConfusion: 10.0\nAction: question\nReward: 1.610989\nNext Confusion: 9.107007"}
{"text": "### State\nConfusion: 5.890399\nAction: analogize\nReward: -0.177367\nNext Confusion: 5.894826"}
{"text": "### State\nConfusion: 3.914789\nAction: question\nReward: 0.963476\nNext Confusion: 3.638644"}
{"text": "### State\nConfusion: 4.606835\nAction: analogize\nReward: -1.59837\nNext Confusion: 5.999383"}
{"text": "### State\nConfusion: 5.992104\nAction: analogize\nReward: -0.850966\nNext Confusion: 6.851071"}
{"text": "### State\nConfusion: 4.461924\nAction: correct_fact\nReward: -0.385522\nNext Confusion: 5.865354"}
{"text": "### State\nConfusion: 6.803428\nAction: analogize\nReward: 0.091275\nNext Confusion: 6.193048"}
{"text": "### State\nConfusion: 4.592856\nAction: analogize\nReward: 0.028521\nNext Confusion: 4.672834"}
{"text": "### State\nConfusion: 2.804246\nAction: analogize\nReward: -0.216599\nNext Confusion: 2.494085"}
{"text": "### State\nConfusion: 3.900469\nAction: analogize\nReward: -0.715619\nNext Confusion: 3.978828"}
{"text": "### State\nConfusion: 4.378933\nAction: analogize\nReward: -0.515609\nNext Confusion: 5.002777"}
{"text": "### State\nConfusion: 7.587787\nAction: question\nReward: 1.416706\nNext Confusion: 6.875957"}
{"text": "### State\nConfusion: 3.742195\nAction: analogize\nReward: 0.076616\nNext Confusion: 4.102513"}
{"text": "### State\nConfusion: 3.479584\nAction: explain\nReward: 0.616633\nNext Confusion: 2.790334"}
{"text": "### State\nConfusion: 4.141684\nAction: analogize\nReward: -0.697507\nNext Confusion: 4.529926"}
{"text": "### State\nConfusion: 5.484036\nAction: analogize\nReward: -0.203655\nNext Confusion: 6.119545"}
{"text": "### State\nConfusion: 5.041966\nAction: analogize\nReward: 0.478271\nNext Confusion: 4.934764"}
{"text": "### State\nConfusion: 5.643482\nAction: analogize\nReward: 0.330683\nNext Confusion: 6.250234"}
{"text": "### State\nConfusion: 3.039198\nAction: analogize\nReward: -1.140458\nNext Confusion: 4.141131"}
{"text": "### State\nConfusion: 6.9517\nAction: analogize\nReward: 1.121652\nNext Confusion: 6.827886"}
{"text": "### State\nConfusion: 4.896638\nAction: analogize\nReward: 0.44984\nNext Confusion: 4.884718"}
{"text": "### State\nConfusion: 6.982846\nAction: analogize\nReward: -1.013628\nNext Confusion: 7.971094"}
{"text": "### State\nConfusion: 6.060497\nAction: explain\nReward: 0.290709\nNext Confusion: 5.825891"}
{"text": "### State\nConfusion: 3.599947\nAction: analogize\nReward: 0.693696\nNext Confusion: 3.53343"}
{"text": "### State\nConfusion: 4.170319\nAction: correct_fact\nReward: 0.898607\nNext Confusion: 3.590391"}
{"text": "### State\nConfusion: 7.032939\nAction: analogize\nReward: -0.729945\nNext Confusion: 8.16857"}
{"text": "### State\nConfusion: 4.053638\nAction: worked_example\nReward: 1.543701\nNext Confusion: 2.689349"}
{"text": "### State\nConfusion: 5.90626\nAction: analogize\nReward: -0.607466\nNext Confusion: 6.439805"}
{"text": "### State\nConfusion: 4.583474\nAction: analogize\nReward: -0.97328\nNext Confusion: 5.106393"}
{"text": "### State\nConfusion: 4.896346\nAction: analogize\nReward: -0.610819\nNext Confusion: 5.848339"}
{"text": "### State\nConfusion: 4.550838\nAction: analogize\nReward: -0.475464\nNext Confusion: 4.483279"}
{"text": "### State\nConfusion: 3.38141\nAction: explain\nReward: 0.504023\nNext Confusion: 3.468134"}
{"text": "### State\nConfusion: 4.350189\nAction: correct_fact\nReward: 0.390332\nNext Confusion: 3.921563"}
{"text": "### State\nConfusion: 6.046617\nAction: explain\nReward: 0.985491\nNext Confusion: 5.920085"}
{"text": "### State\nConfusion: 6.886919\nAction: analogize\nReward: 0.792771\nNext Confusion: 6.498229"}
{"text": "### State\nConfusion: 4.564333\nAction: analogize\nReward: 0.421946\nNext Confusion: 4.38198"}
{"text": "### State\nConfusion: 3.378859\nAction: analogize\nReward: -0.767424\nNext Confusion: 3.908994"}
{"text": "### State\nConfusion: 5.998241\nAction: analogize\nReward: -0.010588\nNext Confusion: 6.310215"}
{"text": "### State\nConfusion: 3.777984\nAction: analogize\nReward: 0.490388\nNext Confusion: 3.513688"}
{"text": "### State\nConfusion: 7.019506\nAction: analogize\nReward: -0.255347\nNext Confusion: 8.119068"}
{"text": "### State\nConfusion: 2.699495\nAction: analogize\nReward: -1.213656\nNext Confusion: 3.973977"}
{"text": "### State\nConfusion: 4.276931\nAction: worked_example\nReward: 2.073769\nNext Confusion: 3.504191"}
{"text": "### State\nConfusion: 7.270848\nAction: question\nReward: 0.602047\nNext Confusion: 6.808021"}
{"text": "### State\nConfusion: 6.406384\nAction: explain\nReward: 0.478975\nNext Confusion: 6.852021"}
{"text": "### State\nConfusion: 6.955819\nAction: explain\nReward: -1.291024\nNext Confusion: 6.745135"}
{"text": "### State\nConfusion: 5.362579\nAction: question\nReward: 0.291444\nNext Confusion: 5.313715"}
{"text": "### State\nConfusion: 7.497689\nAction: analogize\nReward: 0.087838\nNext Confusion: 7.549163"}
{"text": "### State\nConfusion: 3.399238\nAction: explain\nReward: 1.114128\nNext Confusion: 2.626598"}
{"text": "### State\nConfusion: 5.609217\nAction: analogize\nReward: -0.140091\nNext Confusion: 5.979127"}
{"text": "### State\nConfusion: 3.267017\nAction: question\nReward: -0.669656\nNext Confusion: 3.420053"}
{"text": "### State\nConfusion: 4.157508\nAction: correct_fact\nReward: -0.824236\nNext Confusion: 4.648945"}
{"text": "### State\nConfusion: 3.989922\nAction: correct_fact\nReward: -0.101446\nNext Confusion: 4.216505"}
{"text": "### State\nConfusion: 2.22757\nAction: analogize\nReward: -1.115784\nNext Confusion: 3.369227"}
{"text": "### State\nConfusion: 8.759944\nAction: worked_example\nReward: 1.584504\nNext Confusion: 7.617804"}
{"text": "### State\nConfusion: 9.389155\nAction: analogize\nReward: 0.213027\nNext Confusion: 9.195565"}
{"text": "### State\nConfusion: 8.246535\nAction: analogize\nReward: 0.202118\nNext Confusion: 8.268204"}
{"text": "### State\nConfusion: 5.907152\nAction: analogize\nReward: 0.268129\nNext Confusion: 6.087424"}
{"text": "### State\nConfusion: 6.813118\nAction: worked_example\nReward: 0.737036\nNext Confusion: 6.27529"}
{"text": "### State\nConfusion: 6.398956\nAction: explain\nReward: 0.268035\nNext Confusion: 5.872039"}
{"text": "### State\nConfusion: 3.590961\nAction: question\nReward: 1.10202\nNext Confusion: 2.945081"}
{"text": "### State\nConfusion: 6.938659\nAction: worked_example\nReward: 2.218891\nNext Confusion: 5.106211"}
{"text": "### State\nConfusion: 3.289629\nAction: analogize\nReward: -0.180204\nNext Confusion: 3.782614"}
{"text": "### State\nConfusion: 7.283386\nAction: analogize\nReward: 1.03868\nNext Confusion: 7.272589"}
{"text": "### State\nConfusion: 2.666499\nAction: worked_example\nReward: 1.600319\nNext Confusion: 1.500345"}
{"text": "### State\nConfusion: 5.878577\nAction: analogize\nReward: 0.211902\nNext Confusion: 5.616477"}
{"text": "### State\nConfusion: 2.740933\nAction: analogize\nReward: -1.123915\nNext Confusion: 3.60959"}
{"text": "### State\nConfusion: 4.148976\nAction: analogize\nReward: 0.320144\nNext Confusion: 4.609655"}
{"text": "### State\nConfusion: 4.86979\nAction: analogize\nReward: -1.519899\nNext Confusion: 5.834114"}
{"text": "### State\nConfusion: 3.21267\nAction: analogize\nReward: 0.160049\nNext Confusion: 3.229467"}
{"text": "### State\nConfusion: 7.290231\nAction: analogize\nReward: 0.607579\nNext Confusion: 7.191082"}
{"text": "### State\nConfusion: 3.509348\nAction: worked_example\nReward: 2.846924\nNext Confusion: 1.174477"}
{"text": "### State\nConfusion: 3.844179\nAction: question\nReward: 0.169155\nNext Confusion: 3.657271"}
{"text": "### State\nConfusion: 3.89389\nAction: analogize\nReward: 0.439295\nNext Confusion: 4.078422"}
{"text": "### State\nConfusion: 4.463733\nAction: analogize\nReward: -0.699769\nNext Confusion: 5.113838"}
{"text": "### State\nConfusion: 3.780769\nAction: explain\nReward: 0.816708\nNext Confusion: 3.649158"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.198227\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 7.127927\nAction: worked_example\nReward: 0.287837\nNext Confusion: 6.968576"}
{"text": "### State\nConfusion: 3.089982\nAction: analogize\nReward: -1.625967\nNext Confusion: 4.475578"}
{"text": "### State\nConfusion: 5.169077\nAction: explain\nReward: 0.535541\nNext Confusion: 5.222714"}
{"text": "### State\nConfusion: 4.358125\nAction: explain\nReward: 0.380645\nNext Confusion: 3.897497"}
{"text": "### State\nConfusion: 3.767699\nAction: analogize\nReward: -1.200743\nNext Confusion: 4.472256"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.535228\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 8.875999\nAction: worked_example\nReward: 0.76591\nNext Confusion: 7.645343"}
{"text": "### State\nConfusion: 3.52916\nAction: analogize\nReward: 0.344736\nNext Confusion: 2.717469"}
{"text": "### State\nConfusion: 2.586894\nAction: explain\nReward: -0.517557\nNext Confusion: 3.244847"}
{"text": "### State\nConfusion: 1.909156\nAction: analogize\nReward: -1.324682\nNext Confusion: 2.857654"}
{"text": "### State\nConfusion: 4.005799\nAction: analogize\nReward: -0.305104\nNext Confusion: 3.669192"}
{"text": "### State\nConfusion: 4.355139\nAction: analogize\nReward: 0.109617\nNext Confusion: 4.115402"}
{"text": "### State\nConfusion: 6.679668\nAction: analogize\nReward: 0.091741\nNext Confusion: 6.599718"}
{"text": "### State\nConfusion: 4.419393\nAction: question\nReward: 1.469747\nNext Confusion: 3.633803"}
{"text": "### State\nConfusion: 6.384556\nAction: worked_example\nReward: 1.95714\nNext Confusion: 5.177717"}
{"text": "### State\nConfusion: 4.349121\nAction: worked_example\nReward: 1.24314\nNext Confusion: 3.26899"}
{"text": "### State\nConfusion: 3.324754\nAction: analogize\nReward: -1.137162\nNext Confusion: 3.613057"}
{"text": "### State\nConfusion: 4.626129\nAction: question\nReward: -0.010993\nNext Confusion: 4.778299"}
{"text": "### State\nConfusion: 5.514156\nAction: question\nReward: 0.89604\nNext Confusion: 4.711784"}
{"text": "### State\nConfusion: 4.125212\nAction: analogize\nReward: -0.73635\nNext Confusion: 4.89324"}
{"text": "### State\nConfusion: 3.397703\nAction: analogize\nReward: -1.003393\nNext Confusion: 4.007573"}
{"text": "### State\nConfusion: 5.827044\nAction: analogize\nReward: 0.119715\nNext Confusion: 5.987204"}
{"text": "### State\nConfusion: 3.580581\nAction: analogize\nReward: -0.213034\nNext Confusion: 4.094987"}
{"text": "### State\nConfusion: 4.512679\nAction: explain\nReward: 1.103546\nNext Confusion: 3.983216"}
{"text": "### State\nConfusion: 3.377749\nAction: explain\nReward: 0.041333\nNext Confusion: 2.830174"}
{"text": "### State\nConfusion: 7.021051\nAction: explain\nReward: -0.138561\nNext Confusion: 6.639877"}
{"text": "### State\nConfusion: 3.677575\nAction: analogize\nReward: -0.28998\nNext Confusion: 4.96881"}
{"text": "### State\nConfusion: 4.841562\nAction: correct_fact\nReward: -1.341953\nNext Confusion: 5.510637"}
{"text": "### State\nConfusion: 5.860694\nAction: question\nReward: 1.317609\nNext Confusion: 4.7185"}
{"text": "### State\nConfusion: 5.116674\nAction: worked_example\nReward: 0.900413\nNext Confusion: 3.554832"}
{"text": "### State\nConfusion: 4.904849\nAction: explain\nReward: 0.922088\nNext Confusion: 4.84363"}
{"text": "### State\nConfusion: 4.016333\nAction: explain\nReward: 0.102206\nNext Confusion: 3.400156"}
{"text": "### State\nConfusion: 3.335467\nAction: analogize\nReward: -0.579045\nNext Confusion: 4.053781"}
{"text": "### State\nConfusion: 3.975311\nAction: explain\nReward: 0.27217\nNext Confusion: 3.46649"}
{"text": "### State\nConfusion: 4.029582\nAction: correct_fact\nReward: -0.498656\nNext Confusion: 4.146517"}
{"text": "### State\nConfusion: 5.653948\nAction: analogize\nReward: 0.806549\nNext Confusion: 5.142787"}
{"text": "### State\nConfusion: 8.509027\nAction: correct_fact\nReward: -0.040474\nNext Confusion: 8.127693"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.018308\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.399515\nAction: analogize\nReward: 0.051091\nNext Confusion: 3.382378"}
{"text": "### State\nConfusion: 4.03549\nAction: analogize\nReward: -0.74119\nNext Confusion: 4.477384"}
{"text": "### State\nConfusion: 3.929249\nAction: question\nReward: 0.308091\nNext Confusion: 3.854811"}
{"text": "### State\nConfusion: 3.167563\nAction: analogize\nReward: -0.026105\nNext Confusion: 3.739793"}
{"text": "### State\nConfusion: 5.641661\nAction: question\nReward: 0.902291\nNext Confusion: 5.095738"}
{"text": "### State\nConfusion: 5.811388\nAction: analogize\nReward: -0.956837\nNext Confusion: 6.566842"}
{"text": "### State\nConfusion: 4.939756\nAction: analogize\nReward: 1.215886\nNext Confusion: 4.202513"}
{"text": "### State\nConfusion: 5.161591\nAction: question\nReward: 0.582941\nNext Confusion: 4.985189"}
{"text": "### State\nConfusion: 8.777076\nAction: correct_fact\nReward: 0.470891\nNext Confusion: 8.279322"}
{"text": "### State\nConfusion: 4.090277\nAction: worked_example\nReward: 1.559957\nNext Confusion: 2.979815"}
{"text": "### State\nConfusion: 3.339561\nAction: worked_example\nReward: 1.660879\nNext Confusion: 2.123369"}
{"text": "### State\nConfusion: 4.107422\nAction: analogize\nReward: -0.121353\nNext Confusion: 4.283752"}
{"text": "### State\nConfusion: 6.483735\nAction: explain\nReward: -1.197914\nNext Confusion: 7.035842"}
{"text": "### State\nConfusion: 5.573504\nAction: explain\nReward: 0.782266\nNext Confusion: 5.434095"}
{"text": "### State\nConfusion: 3.374913\nAction: analogize\nReward: -0.884737\nNext Confusion: 3.438567"}
{"text": "### State\nConfusion: 3.380767\nAction: explain\nReward: 1.294365\nNext Confusion: 2.789468"}
{"text": "### State\nConfusion: 5.549697\nAction: question\nReward: 1.678421\nNext Confusion: 4.247832"}
{"text": "### State\nConfusion: 3.579567\nAction: correct_fact\nReward: 0.672959\nNext Confusion: 3.633714"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.020142\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.408587\nAction: correct_fact\nReward: 0.253955\nNext Confusion: 3.646089"}
{"text": "### State\nConfusion: 2.241163\nAction: question\nReward: 0.458367\nNext Confusion: 1.820895"}
{"text": "### State\nConfusion: 7.083729\nAction: analogize\nReward: -0.546349\nNext Confusion: 7.203736"}
{"text": "### State\nConfusion: 3.053561\nAction: analogize\nReward: 0.344281\nNext Confusion: 2.925175"}
{"text": "### State\nConfusion: 4.379266\nAction: question\nReward: 0.309251\nNext Confusion: 3.804448"}
{"text": "### State\nConfusion: 3.469964\nAction: worked_example\nReward: -1.176672\nNext Confusion: 4.13511"}
{"text": "### State\nConfusion: 3.022251\nAction: analogize\nReward: -0.90958\nNext Confusion: 3.919632"}
{"text": "### State\nConfusion: 5.641866\nAction: analogize\nReward: -0.135453\nNext Confusion: 5.936108"}
{"text": "### State\nConfusion: 4.732938\nAction: explain\nReward: 0.067745\nNext Confusion: 4.250308"}
{"text": "### State\nConfusion: 4.456509\nAction: correct_fact\nReward: -0.713073\nNext Confusion: 4.695286"}
{"text": "### State\nConfusion: 3.703105\nAction: analogize\nReward: 0.354142\nNext Confusion: 3.48034"}
{"text": "### State\nConfusion: 3.761903\nAction: analogize\nReward: -0.013819\nNext Confusion: 3.421196"}
{"text": "### State\nConfusion: 4.269754\nAction: analogize\nReward: 0.027496\nNext Confusion: 4.287079"}
{"text": "### State\nConfusion: 5.663837\nAction: explain\nReward: -0.360503\nNext Confusion: 6.315889"}
{"text": "### State\nConfusion: 5.372044\nAction: question\nReward: 1.080587\nNext Confusion: 4.743021"}
{"text": "### State\nConfusion: 2.501908\nAction: analogize\nReward: -0.534156\nNext Confusion: 3.061587"}
{"text": "### State\nConfusion: 6.000041\nAction: explain\nReward: 0.183818\nNext Confusion: 5.951022"}
{"text": "### State\nConfusion: 4.515942\nAction: question\nReward: 0.491305\nNext Confusion: 4.483781"}
{"text": "### State\nConfusion: 3.967531\nAction: question\nReward: 0.292389\nNext Confusion: 3.614035"}
{"text": "### State\nConfusion: 6.566327\nAction: question\nReward: 0.468035\nNext Confusion: 6.588855"}
{"text": "### State\nConfusion: 5.682308\nAction: worked_example\nReward: 2.358126\nNext Confusion: 3.606454"}
{"text": "### State\nConfusion: 3.510834\nAction: analogize\nReward: -0.694941\nNext Confusion: 3.978493"}
{"text": "### State\nConfusion: 4.583116\nAction: question\nReward: -0.700366\nNext Confusion: 5.10747"}
{"text": "### State\nConfusion: 7.197812\nAction: analogize\nReward: -0.017298\nNext Confusion: 7.292065"}
{"text": "### State\nConfusion: 6.225952\nAction: analogize\nReward: 0.520952\nNext Confusion: 5.862716"}
{"text": "### State\nConfusion: 4.177542\nAction: explain\nReward: 0.807856\nNext Confusion: 3.491008"}
{"text": "### State\nConfusion: 3.897059\nAction: analogize\nReward: 0.338542\nNext Confusion: 4.205773"}
{"text": "### State\nConfusion: 5.013536\nAction: analogize\nReward: -0.733278\nNext Confusion: 4.34735"}
{"text": "### State\nConfusion: 5.245119\nAction: question\nReward: 1.180365\nNext Confusion: 3.996855"}
{"text": "### State\nConfusion: 3.496643\nAction: analogize\nReward: -0.370052\nNext Confusion: 3.667639"}
{"text": "### State\nConfusion: 3.817\nAction: analogize\nReward: 0.105576\nNext Confusion: 3.961266"}
{"text": "### State\nConfusion: 3.864491\nAction: analogize\nReward: 0.184483\nNext Confusion: 4.035629"}
{"text": "### State\nConfusion: 4.685336\nAction: analogize\nReward: -0.242223\nNext Confusion: 4.233638"}
{"text": "### State\nConfusion: 3.923928\nAction: analogize\nReward: -0.662827\nNext Confusion: 3.973851"}
{"text": "### State\nConfusion: 2.349183\nAction: analogize\nReward: -0.344813\nNext Confusion: 2.246699"}
{"text": "### State\nConfusion: 4.223802\nAction: analogize\nReward: 0.470074\nNext Confusion: 4.347506"}
{"text": "### State\nConfusion: 5.313014\nAction: explain\nReward: -1.549022\nNext Confusion: 5.749806"}
{"text": "### State\nConfusion: 4.683001\nAction: analogize\nReward: -0.010471\nNext Confusion: 4.584553"}
{"text": "### State\nConfusion: 4.45898\nAction: analogize\nReward: 0.368356\nNext Confusion: 4.006063"}
{"text": "### State\nConfusion: 5.791666\nAction: analogize\nReward: -0.187986\nNext Confusion: 6.063031"}
{"text": "### State\nConfusion: 3.376903\nAction: analogize\nReward: -1.143536\nNext Confusion: 4.587175"}
{"text": "### State\nConfusion: 9.335158\nAction: worked_example\nReward: 2.152653\nNext Confusion: 8.317876"}
{"text": "### State\nConfusion: 3.621349\nAction: analogize\nReward: 0.582054\nNext Confusion: 3.243578"}
{"text": "### State\nConfusion: 4.74215\nAction: question\nReward: 1.291047\nNext Confusion: 3.348962"}
{"text": "### State\nConfusion: 5.683502\nAction: worked_example\nReward: 1.535629\nNext Confusion: 4.375916"}
{"text": "### State\nConfusion: 4.821148\nAction: worked_example\nReward: 1.047725\nNext Confusion: 3.557803"}
{"text": "### State\nConfusion: 3.149289\nAction: analogize\nReward: 0.743005\nNext Confusion: 2.77953"}
{"text": "### State\nConfusion: 5.93888\nAction: analogize\nReward: 0.198077\nNext Confusion: 5.99675"}
{"text": "### State\nConfusion: 3.840465\nAction: analogize\nReward: -1.367385\nNext Confusion: 4.494304"}
{"text": "### State\nConfusion: 5.273225\nAction: worked_example\nReward: 2.120614\nNext Confusion: 3.968337"}
{"text": "### State\nConfusion: 3.967179\nAction: analogize\nReward: 0.973108\nNext Confusion: 3.397983"}
{"text": "### State\nConfusion: 4.808587\nAction: correct_fact\nReward: -0.525625\nNext Confusion: 5.590657"}
{"text": "### State\nConfusion: 4.419318\nAction: analogize\nReward: -0.39736\nNext Confusion: 5.171327"}
{"text": "### State\nConfusion: 3.713422\nAction: analogize\nReward: -0.434615\nNext Confusion: 4.516202"}
{"text": "### State\nConfusion: 4.057846\nAction: analogize\nReward: -0.236593\nNext Confusion: 4.158133"}
{"text": "### State\nConfusion: 5.812652\nAction: analogize\nReward: 0.194617\nNext Confusion: 6.174366"}
{"text": "### State\nConfusion: 3.962215\nAction: correct_fact\nReward: -0.493041\nNext Confusion: 4.044711"}
{"text": "### State\nConfusion: 3.95616\nAction: correct_fact\nReward: -0.764486\nNext Confusion: 4.490559"}
{"text": "### State\nConfusion: 4.067618\nAction: explain\nReward: 0.753649\nNext Confusion: 3.377144"}
{"text": "### State\nConfusion: 3.963561\nAction: correct_fact\nReward: -0.03958\nNext Confusion: 4.470316"}
{"text": "### State\nConfusion: 4.72749\nAction: correct_fact\nReward: 0.187432\nNext Confusion: 4.308219"}
{"text": "### State\nConfusion: 4.982907\nAction: analogize\nReward: -0.282559\nNext Confusion: 5.749441"}
{"text": "### State\nConfusion: 4.76151\nAction: correct_fact\nReward: -1.59992\nNext Confusion: 5.914158"}
{"text": "### State\nConfusion: 6.825388\nAction: analogize\nReward: -0.197075\nNext Confusion: 7.36307"}
{"text": "### State\nConfusion: 6.623585\nAction: analogize\nReward: -1.496314\nNext Confusion: 7.01574"}
{"text": "### State\nConfusion: 5.225947\nAction: explain\nReward: 0.299021\nNext Confusion: 5.249215"}
{"text": "### State\nConfusion: 4.701275\nAction: question\nReward: 0.376803\nNext Confusion: 4.108999"}
{"text": "### State\nConfusion: 6.925973\nAction: analogize\nReward: -0.90206\nNext Confusion: 7.930647"}
{"text": "### State\nConfusion: 5.79284\nAction: question\nReward: 1.013098\nNext Confusion: 5.272104"}
{"text": "### State\nConfusion: 6.839365\nAction: question\nReward: 0.121905\nNext Confusion: 7.054125"}
{"text": "### State\nConfusion: 3.869141\nAction: analogize\nReward: -0.052189\nNext Confusion: 4.202905"}
{"text": "### State\nConfusion: 3.541264\nAction: analogize\nReward: -0.124331\nNext Confusion: 3.888026"}
{"text": "### State\nConfusion: 8.227653\nAction: analogize\nReward: 0.535528\nNext Confusion: 8.023672"}
{"text": "### State\nConfusion: 3.389575\nAction: analogize\nReward: 0.083942\nNext Confusion: 4.252772"}
{"text": "### State\nConfusion: 6.12198\nAction: analogize\nReward: -0.017456\nNext Confusion: 6.250541"}
{"text": "### State\nConfusion: 3.908567\nAction: analogize\nReward: 0.852892\nNext Confusion: 3.425391"}
{"text": "### State\nConfusion: 5.787926\nAction: analogize\nReward: -0.61093\nNext Confusion: 6.209976"}
{"text": "### State\nConfusion: 3.852343\nAction: correct_fact\nReward: 0.01279\nNext Confusion: 3.730582"}
{"text": "### State\nConfusion: 3.345473\nAction: analogize\nReward: -0.182761\nNext Confusion: 4.095672"}
{"text": "### State\nConfusion: 6.41593\nAction: correct_fact\nReward: -1.011081\nNext Confusion: 6.550212"}
{"text": "### State\nConfusion: 5.542386\nAction: analogize\nReward: 0.529705\nNext Confusion: 5.213415"}
{"text": "### State\nConfusion: 3.542093\nAction: analogize\nReward: -0.150191\nNext Confusion: 3.909852"}
{"text": "### State\nConfusion: 3.518513\nAction: correct_fact\nReward: -0.261489\nNext Confusion: 3.642212"}
{"text": "### State\nConfusion: 8.754671\nAction: analogize\nReward: -0.524548\nNext Confusion: 9.529492"}
{"text": "### State\nConfusion: 3.811084\nAction: analogize\nReward: -0.528901\nNext Confusion: 4.075631"}
{"text": "### State\nConfusion: 2.964789\nAction: worked_example\nReward: 2.127217\nNext Confusion: 1.143061"}
{"text": "### State\nConfusion: 5.281669\nAction: correct_fact\nReward: -0.382373\nNext Confusion: 5.910728"}
{"text": "### State\nConfusion: 3.745115\nAction: analogize\nReward: -0.282543\nNext Confusion: 4.276707"}
{"text": "### State\nConfusion: 9.596537\nAction: worked_example\nReward: 0.622909\nNext Confusion: 8.939276"}
{"text": "### State\nConfusion: 4.492318\nAction: correct_fact\nReward: -0.717\nNext Confusion: 5.227583"}
{"text": "### State\nConfusion: 3.361198\nAction: analogize\nReward: -0.411737\nNext Confusion: 3.623981"}
{"text": "### State\nConfusion: 8.01341\nAction: explain\nReward: 0.07588\nNext Confusion: 7.766606"}
{"text": "### State\nConfusion: 5.438063\nAction: explain\nReward: 2.170198\nNext Confusion: 3.728784"}
{"text": "### State\nConfusion: 4.408485\nAction: analogize\nReward: -1.024377\nNext Confusion: 5.03083"}
{"text": "### State\nConfusion: 3.786148\nAction: analogize\nReward: -1.506429\nNext Confusion: 4.877437"}
{"text": "### State\nConfusion: 3.779745\nAction: worked_example\nReward: 1.154955\nNext Confusion: 3.201455"}
{"text": "### State\nConfusion: 7.605229\nAction: analogize\nReward: -0.006583\nNext Confusion: 7.633517"}
{"text": "### State\nConfusion: 4.4073\nAction: worked_example\nReward: 0.928354\nNext Confusion: 3.085973"}
{"text": "### State\nConfusion: 3.486942\nAction: question\nReward: -0.344526\nNext Confusion: 3.059474"}
{"text": "### State\nConfusion: 8.268665\nAction: analogize\nReward: -0.228095\nNext Confusion: 8.628764"}
{"text": "### State\nConfusion: 4.041603\nAction: worked_example\nReward: 2.745715\nNext Confusion: 2.3753"}
{"text": "### State\nConfusion: 1.978829\nAction: explain\nReward: 0.773021\nNext Confusion: 1.639348"}
{"text": "### State\nConfusion: 5.565595\nAction: analogize\nReward: -0.93579\nNext Confusion: 6.736066"}
{"text": "### State\nConfusion: 3.291308\nAction: explain\nReward: 0.089945\nNext Confusion: 3.398372"}
{"text": "### State\nConfusion: 5.692618\nAction: question\nReward: 0.291638\nNext Confusion: 5.638468"}
{"text": "### State\nConfusion: 6.097005\nAction: worked_example\nReward: 1.610145\nNext Confusion: 4.944908"}
{"text": "### State\nConfusion: 1.66989\nAction: question\nReward: 1.231649\nNext Confusion: 0.598919"}
{"text": "### State\nConfusion: 3.015389\nAction: question\nReward: -0.368871\nNext Confusion: 3.171424"}
{"text": "### State\nConfusion: 1.648728\nAction: worked_example\nReward: 2.086258\nNext Confusion: 0.0"}
{"text": "### State\nConfusion: 6.370827\nAction: correct_fact\nReward: -0.578435\nNext Confusion: 6.86617"}
{"text": "### State\nConfusion: 3.956089\nAction: analogize\nReward: -0.351387\nNext Confusion: 4.47287"}
{"text": "### State\nConfusion: 3.763646\nAction: analogize\nReward: -0.429977\nNext Confusion: 4.198875"}
{"text": "### State\nConfusion: 3.052423\nAction: question\nReward: 1.034561\nNext Confusion: 1.931299"}
{"text": "### State\nConfusion: 7.488086\nAction: analogize\nReward: -0.887759\nNext Confusion: 7.420434"}
{"text": "### State\nConfusion: 3.754042\nAction: question\nReward: 0.725976\nNext Confusion: 2.907831"}
{"text": "### State\nConfusion: 6.115601\nAction: analogize\nReward: -0.929617\nNext Confusion: 7.082544"}
{"text": "### State\nConfusion: 3.945445\nAction: analogize\nReward: -0.264867\nNext Confusion: 4.259188"}
{"text": "### State\nConfusion: 5.845489\nAction: analogize\nReward: -0.983428\nNext Confusion: 6.855581"}
{"text": "### State\nConfusion: 3.867403\nAction: analogize\nReward: 0.147113\nNext Confusion: 4.162812"}
{"text": "### State\nConfusion: 3.782122\nAction: analogize\nReward: -0.020962\nNext Confusion: 3.530049"}
{"text": "### State\nConfusion: 8.655141\nAction: worked_example\nReward: 1.226595\nNext Confusion: 6.920914"}
{"text": "### State\nConfusion: 2.859175\nAction: analogize\nReward: -0.561934\nNext Confusion: 3.274979"}
{"text": "### State\nConfusion: 3.47642\nAction: analogize\nReward: -0.883036\nNext Confusion: 4.15313"}
{"text": "### State\nConfusion: 7.00052\nAction: worked_example\nReward: 0.545365\nNext Confusion: 7.42833"}
{"text": "### State\nConfusion: 3.649172\nAction: analogize\nReward: -0.320634\nNext Confusion: 3.90222"}
{"text": "### State\nConfusion: 2.461097\nAction: question\nReward: 1.631821\nNext Confusion: 2.2893"}
{"text": "### State\nConfusion: 6.028571\nAction: explain\nReward: 0.061264\nNext Confusion: 5.679068"}
{"text": "### State\nConfusion: 3.460609\nAction: analogize\nReward: 0.141836\nNext Confusion: 3.841259"}
{"text": "### State\nConfusion: 6.036307\nAction: analogize\nReward: -0.882709\nNext Confusion: 7.126093"}
{"text": "### State\nConfusion: 2.88672\nAction: analogize\nReward: -0.273486\nNext Confusion: 3.479902"}
{"text": "### State\nConfusion: 2.820439\nAction: analogize\nReward: -0.92288\nNext Confusion: 3.16166"}
{"text": "### State\nConfusion: 4.613919\nAction: explain\nReward: -0.088816\nNext Confusion: 4.993456"}
{"text": "### State\nConfusion: 3.547463\nAction: explain\nReward: 0.985073\nNext Confusion: 2.734695"}
{"text": "### State\nConfusion: 4.721343\nAction: analogize\nReward: 0.242835\nNext Confusion: 5.540349"}
{"text": "### State\nConfusion: 4.678206\nAction: analogize\nReward: 0.387364\nNext Confusion: 4.468099"}
{"text": "### State\nConfusion: 4.515748\nAction: worked_example\nReward: 1.756244\nNext Confusion: 3.44937"}
{"text": "### State\nConfusion: 6.943506\nAction: question\nReward: 1.439846\nNext Confusion: 5.421192"}
{"text": "### State\nConfusion: 4.314752\nAction: correct_fact\nReward: 0.779412\nNext Confusion: 3.266661"}
{"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: 0.437882\nNext Confusion: 4.87753"}
{"text": "### State\nConfusion: 9.092433\nAction: analogize\nReward: -0.455093\nNext Confusion: 9.519798"}
{"text": "### State\nConfusion: 5.318067\nAction: correct_fact\nReward: 0.820235\nNext Confusion: 5.713635"}
{"text": "### State\nConfusion: 2.411903\nAction: analogize\nReward: -1.105515\nNext Confusion: 3.279925"}
{"text": "### State\nConfusion: 3.247107\nAction: explain\nReward: 0.429104\nNext Confusion: 2.74135"}
{"text": "### State\nConfusion: 6.919804\nAction: question\nReward: 0.885016\nNext Confusion: 6.092538"}
{"text": "### State\nConfusion: 4.203641\nAction: question\nReward: -0.063637\nNext Confusion: 3.856217"}
{"text": "### State\nConfusion: 3.565731\nAction: correct_fact\nReward: 0.730722\nNext Confusion: 2.730823"}
{"text": "### State\nConfusion: 3.939648\nAction: analogize\nReward: -0.425959\nNext Confusion: 3.892347"}
{"text": "### State\nConfusion: 4.518892\nAction: question\nReward: -1.051303\nNext Confusion: 4.840122"}
{"text": "### State\nConfusion: 3.858027\nAction: explain\nReward: 0.299108\nNext Confusion: 3.981068"}
{"text": "### State\nConfusion: 2.704036\nAction: explain\nReward: -0.342533\nNext Confusion: 3.274198"}
{"text": "### State\nConfusion: 3.906797\nAction: explain\nReward: 0.579545\nNext Confusion: 3.39401"}
{"text": "### State\nConfusion: 4.712357\nAction: analogize\nReward: 0.441648\nNext Confusion: 4.503565"}
{"text": "### State\nConfusion: 3.351565\nAction: question\nReward: 0.226935\nNext Confusion: 3.119711"}
{"text": "### State\nConfusion: 3.833758\nAction: explain\nReward: 0.490659\nNext Confusion: 3.624048"}
{"text": "### State\nConfusion: 3.916031\nAction: analogize\nReward: -0.824355\nNext Confusion: 5.0153"}
{"text": "### State\nConfusion: 4.437003\nAction: explain\nReward: 0.433959\nNext Confusion: 4.287864"}
{"text": "### State\nConfusion: 5.354143\nAction: analogize\nReward: -0.384773\nNext Confusion: 6.078944"}
{"text": "### State\nConfusion: 8.400786\nAction: worked_example\nReward: -0.597962\nNext Confusion: 8.667874"}
{"text": "### State\nConfusion: 7.149515\nAction: correct_fact\nReward: 1.278329\nNext Confusion: 6.462344"}
{"text": "### State\nConfusion: 5.102567\nAction: question\nReward: 0.804076\nNext Confusion: 5.377817"}
{"text": "### State\nConfusion: 3.092537\nAction: analogize\nReward: -0.466619\nNext Confusion: 3.818028"}
{"text": "### State\nConfusion: 5.887365\nAction: analogize\nReward: -1.052783\nNext Confusion: 7.368321"}
{"text": "### State\nConfusion: 4.274332\nAction: analogize\nReward: 1.093872\nNext Confusion: 4.345952"}
{"text": "### State\nConfusion: 9.127607\nAction: analogize\nReward: -0.598524\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 4.085041\nAction: analogize\nReward: -1.321249\nNext Confusion: 5.029472"}
{"text": "### State\nConfusion: 6.516422\nAction: worked_example\nReward: 0.462508\nNext Confusion: 5.468136"}
{"text": "### State\nConfusion: 7.023285\nAction: question\nReward: 1.22947\nNext Confusion: 5.970022"}
{"text": "### State\nConfusion: 3.508864\nAction: analogize\nReward: -0.749677\nNext Confusion: 4.38532"}
{"text": "### State\nConfusion: 4.205511\nAction: analogize\nReward: 0.205528\nNext Confusion: 3.6198"}
{"text": "### State\nConfusion: 5.185164\nAction: analogize\nReward: -1.097998\nNext Confusion: 6.383344"}
{"text": "### State\nConfusion: 6.544308\nAction: explain\nReward: 0.402704\nNext Confusion: 6.435021"}
{"text": "### State\nConfusion: 5.587864\nAction: analogize\nReward: 0.020874\nNext Confusion: 5.902252"}
{"text": "### State\nConfusion: 4.968722\nAction: analogize\nReward: -0.261211\nNext Confusion: 5.042747"}
{"text": "### State\nConfusion: 3.320405\nAction: worked_example\nReward: 1.129541\nNext Confusion: 2.346201"}
{"text": "### State\nConfusion: 4.647589\nAction: explain\nReward: 1.044288\nNext Confusion: 4.038287"}
{"text": "### State\nConfusion: 6.729565\nAction: question\nReward: 0.806191\nNext Confusion: 6.08601"}
{"text": "### State\nConfusion: 5.97209\nAction: analogize\nReward: -0.404765\nNext Confusion: 6.406681"}
{"text": "### State\nConfusion: 3.632277\nAction: analogize\nReward: -0.610554\nNext Confusion: 3.787645"}
{"text": "### State\nConfusion: 5.321507\nAction: analogize\nReward: 0.456858\nNext Confusion: 5.306321"}
{"text": "### State\nConfusion: 3.609982\nAction: explain\nReward: 0.527262\nNext Confusion: 3.772228"}
{"text": "### State\nConfusion: 7.68092\nAction: analogize\nReward: -0.650275\nNext Confusion: 8.177908"}
{"text": "### State\nConfusion: 5.233692\nAction: worked_example\nReward: 1.109516\nNext Confusion: 4.001993"}
{"text": "### State\nConfusion: 5.192301\nAction: analogize\nReward: 0.364094\nNext Confusion: 5.636886"}
{"text": "### State\nConfusion: 5.681577\nAction: explain\nReward: 1.501193\nNext Confusion: 5.096723"}
{"text": "### State\nConfusion: 3.494826\nAction: worked_example\nReward: 0.476455\nNext Confusion: 3.671003"}
{"text": "### State\nConfusion: 4.405832\nAction: analogize\nReward: 0.198525\nNext Confusion: 4.154148"}
{"text": "### State\nConfusion: 1.466139\nAction: worked_example\nReward: 1.668556\nNext Confusion: 0.0"}
{"text": "### State\nConfusion: 4.717231\nAction: analogize\nReward: -0.452539\nNext Confusion: 5.626471"}
{"text": "### State\nConfusion: 3.871939\nAction: analogize\nReward: -0.476882\nNext Confusion: 4.771052"}
{"text": "### State\nConfusion: 3.444428\nAction: explain\nReward: 1.106501\nNext Confusion: 3.333343"}
{"text": "### State\nConfusion: 4.196409\nAction: analogize\nReward: 0.381661\nNext Confusion: 4.17294"}
{"text": "### State\nConfusion: 6.663786\nAction: question\nReward: -0.873553\nNext Confusion: 6.942137"}
{"text": "### State\nConfusion: 5.515987\nAction: analogize\nReward: -1.334938\nNext Confusion: 6.764931"}
{"text": "### State\nConfusion: 5.240704\nAction: analogize\nReward: 0.294774\nNext Confusion: 4.956213"}
{"text": "### State\nConfusion: 3.792751\nAction: question\nReward: 0.624638\nNext Confusion: 3.817527"}
{"text": "### State\nConfusion: 4.063591\nAction: explain\nReward: 0.083103\nNext Confusion: 4.243097"}
{"text": "### State\nConfusion: 4.814852\nAction: analogize\nReward: 0.506158\nNext Confusion: 5.234246"}
{"text": "### State\nConfusion: 3.722708\nAction: question\nReward: 0.660337\nNext Confusion: 3.508"}
{"text": "### State\nConfusion: 5.437449\nAction: explain\nReward: 0.486583\nNext Confusion: 5.524333"}
{"text": "### State\nConfusion: 5.493462\nAction: analogize\nReward: -0.408789\nNext Confusion: 6.130176"}
{"text": "### State\nConfusion: 2.043504\nAction: question\nReward: -0.300682\nNext Confusion: 2.3756"}
{"text": "### State\nConfusion: 2.417647\nAction: analogize\nReward: -0.002908\nNext Confusion: 3.053841"}
{"text": "### State\nConfusion: 4.872236\nAction: analogize\nReward: -0.958127\nNext Confusion: 5.264666"}
{"text": "### State\nConfusion: 4.20766\nAction: worked_example\nReward: 0.891922\nNext Confusion: 3.989266"}
{"text": "### State\nConfusion: 5.122287\nAction: worked_example\nReward: 0.598247\nNext Confusion: 4.653445"}
{"text": "### State\nConfusion: 7.352225\nAction: worked_example\nReward: -1.130684\nNext Confusion: 8.396069"}
{"text": "### State\nConfusion: 3.337869\nAction: question\nReward: 1.275082\nNext Confusion: 2.30635"}
{"text": "### State\nConfusion: 5.843043\nAction: analogize\nReward: -0.530916\nNext Confusion: 6.435978"}
{"text": "### State\nConfusion: 3.31257\nAction: question\nReward: 0.431417\nNext Confusion: 2.247037"}
{"text": "### State\nConfusion: 2.412863\nAction: worked_example\nReward: 2.749443\nNext Confusion: 0.069799"}
{"text": "### State\nConfusion: 8.244789\nAction: analogize\nReward: -0.621143\nNext Confusion: 8.557005"}
{"text": "### State\nConfusion: 2.765021\nAction: explain\nReward: 0.933131\nNext Confusion: 1.681604"}
{"text": "### State\nConfusion: 5.545647\nAction: analogize\nReward: 0.764545\nNext Confusion: 4.779349"}
{"text": "### State\nConfusion: 2.562586\nAction: correct_fact\nReward: 0.358028\nNext Confusion: 1.512542"}
{"text": "### State\nConfusion: 3.947003\nAction: analogize\nReward: 0.585956\nNext Confusion: 3.144356"}
{"text": "### State\nConfusion: 7.056403\nAction: question\nReward: -0.392791\nNext Confusion: 7.174994"}
{"text": "### State\nConfusion: 4.349577\nAction: worked_example\nReward: 2.003419\nNext Confusion: 2.622539"}
{"text": "### State\nConfusion: 4.15257\nAction: question\nReward: 1.022439\nNext Confusion: 3.610753"}
{"text": "### State\nConfusion: 3.867925\nAction: analogize\nReward: -0.732856\nNext Confusion: 4.480718"}
{"text": "### State\nConfusion: 6.072416\nAction: analogize\nReward: -0.400762\nNext Confusion: 7.712231"}
{"text": "### State\nConfusion: 5.692263\nAction: analogize\nReward: -0.409731\nNext Confusion: 6.464062"}
{"text": "### State\nConfusion: 1.97927\nAction: correct_fact\nReward: 0.316701\nNext Confusion: 2.032135"}
{"text": "### State\nConfusion: 4.016954\nAction: analogize\nReward: 0.118688\nNext Confusion: 3.82214"}
{"text": "### State\nConfusion: 3.23223\nAction: explain\nReward: 0.74181\nNext Confusion: 2.84285"}
{"text": "### State\nConfusion: 3.751665\nAction: analogize\nReward: 1.273059\nNext Confusion: 2.951627"}
{"text": "### State\nConfusion: 6.280319\nAction: worked_example\nReward: 1.779139\nNext Confusion: 4.778588"}
{"text": "### State\nConfusion: 9.398644\nAction: analogize\nReward: -0.367722\nNext Confusion: 9.828088"}
{"text": "### State\nConfusion: 4.856673\nAction: analogize\nReward: 0.697777\nNext Confusion: 4.792671"}
{"text": "### State\nConfusion: 7.677342\nAction: analogize\nReward: -1.276954\nNext Confusion: 9.007716"}
{"text": "### State\nConfusion: 5.617439\nAction: analogize\nReward: -0.855311\nNext Confusion: 6.225406"}
{"text": "### State\nConfusion: 6.137228\nAction: analogize\nReward: -0.461357\nNext Confusion: 6.903793"}
{"text": "### State\nConfusion: 2.707235\nAction: analogize\nReward: -1.614262\nNext Confusion: 3.073972"}
{"text": "### State\nConfusion: 3.939227\nAction: analogize\nReward: -0.449349\nNext Confusion: 4.274253"}
{"text": "### State\nConfusion: 5.494228\nAction: explain\nReward: 0.047741\nNext Confusion: 6.083399"}
{"text": "### State\nConfusion: 2.533629\nAction: analogize\nReward: -0.39871\nNext Confusion: 2.856948"}
{"text": "### State\nConfusion: 6.453248\nAction: explain\nReward: 1.17852\nNext Confusion: 5.8642"}
{"text": "### State\nConfusion: 9.124298\nAction: correct_fact\nReward: -0.178922\nNext Confusion: 9.275643"}
{"text": "### State\nConfusion: 4.845806\nAction: explain\nReward: -0.259024\nNext Confusion: 5.530763"}
{"text": "### State\nConfusion: 7.197324\nAction: analogize\nReward: -0.734195\nNext Confusion: 7.665397"}
{"text": "### State\nConfusion: 4.266757\nAction: analogize\nReward: -0.860657\nNext Confusion: 4.993144"}
{"text": "### State\nConfusion: 3.85403\nAction: analogize\nReward: -0.180861\nNext Confusion: 3.252461"}
{"text": "### State\nConfusion: 3.62735\nAction: analogize\nReward: 0.028956\nNext Confusion: 3.763162"}
{"text": "### State\nConfusion: 8.335056\nAction: worked_example\nReward: 1.617562\nNext Confusion: 6.925548"}
{"text": "### State\nConfusion: 3.790295\nAction: analogize\nReward: 0.182729\nNext Confusion: 3.939273"}
{"text": "### State\nConfusion: 3.766018\nAction: analogize\nReward: 0.001838\nNext Confusion: 3.849688"}
{"text": "### State\nConfusion: 4.107547\nAction: analogize\nReward: -0.113302\nNext Confusion: 4.152052"}
{"text": "### State\nConfusion: 7.174244\nAction: analogize\nReward: -1.656013\nNext Confusion: 7.383377"}
{"text": "### State\nConfusion: 3.229383\nAction: analogize\nReward: 0.619507\nNext Confusion: 3.097441"}
{"text": "### State\nConfusion: 4.611253\nAction: analogize\nReward: -0.313888\nNext Confusion: 4.727696"}
{"text": "### State\nConfusion: 8.702278\nAction: worked_example\nReward: 1.775019\nNext Confusion: 7.147692"}
{"text": "### State\nConfusion: 4.831955\nAction: worked_example\nReward: 2.335352\nNext Confusion: 3.308958"}
{"text": "### State\nConfusion: 3.681416\nAction: correct_fact\nReward: 0.502263\nNext Confusion: 3.247155"}
{"text": "### State\nConfusion: 6.803329\nAction: analogize\nReward: -1.358429\nNext Confusion: 7.614142"}
{"text": "### State\nConfusion: 6.14641\nAction: question\nReward: 1.105078\nNext Confusion: 5.013781"}
{"text": "### State\nConfusion: 5.067341\nAction: analogize\nReward: -0.67485\nNext Confusion: 5.623613"}
{"text": "### State\nConfusion: 5.740351\nAction: analogize\nReward: 0.43157\nNext Confusion: 5.821089"}
{"text": "### State\nConfusion: 6.492049\nAction: analogize\nReward: -0.261783\nNext Confusion: 6.899039"}
{"text": "### State\nConfusion: 3.546743\nAction: correct_fact\nReward: -0.021746\nNext Confusion: 4.128534"}
{"text": "### State\nConfusion: 3.470161\nAction: analogize\nReward: 1.411296\nNext Confusion: 2.963803"}
{"text": "### State\nConfusion: 7.305857\nAction: explain\nReward: -0.260928\nNext Confusion: 7.525834"}
{"text": "### State\nConfusion: 4.079695\nAction: analogize\nReward: 0.425925\nNext Confusion: 4.2028"}
{"text": "### State\nConfusion: 3.504302\nAction: explain\nReward: -0.24801\nNext Confusion: 3.646383"}
{"text": "### State\nConfusion: 6.140616\nAction: analogize\nReward: -0.194836\nNext Confusion: 6.193009"}
{"text": "### State\nConfusion: 3.614631\nAction: worked_example\nReward: 2.808057\nNext Confusion: 1.710415"}
{"text": "### State\nConfusion: 4.385109\nAction: explain\nReward: 0.867373\nNext Confusion: 4.039882"}
{"text": "### State\nConfusion: 5.97857\nAction: worked_example\nReward: 0.678469\nNext Confusion: 4.711379"}
{"text": "### State\nConfusion: 5.842787\nAction: explain\nReward: 0.530482\nNext Confusion: 5.485232"}
{"text": "### State\nConfusion: 3.791125\nAction: explain\nReward: -0.120156\nNext Confusion: 3.974842"}
{"text": "### State\nConfusion: 5.301826\nAction: analogize\nReward: -0.177215\nNext Confusion: 5.508161"}
{"text": "### State\nConfusion: 2.616865\nAction: explain\nReward: -0.990273\nNext Confusion: 2.763718"}
{"text": "### State\nConfusion: 5.412301\nAction: analogize\nReward: 0.419836\nNext Confusion: 5.34807"}
{"text": "### State\nConfusion: 6.595836\nAction: worked_example\nReward: 0.79788\nNext Confusion: 5.217594"}
{"text": "### State\nConfusion: 6.025871\nAction: analogize\nReward: 0.350992\nNext Confusion: 6.043433"}
{"text": "### State\nConfusion: 8.022219\nAction: analogize\nReward: -0.212897\nNext Confusion: 7.875199"}
{"text": "### State\nConfusion: 7.210607\nAction: correct_fact\nReward: 0.058529\nNext Confusion: 7.347044"}
{"text": "### State\nConfusion: 3.933006\nAction: analogize\nReward: -0.406958\nNext Confusion: 4.179138"}
{"text": "### State\nConfusion: 2.965985\nAction: analogize\nReward: -0.300177\nNext Confusion: 3.029191"}
{"text": "### State\nConfusion: 6.168064\nAction: analogize\nReward: -0.610067\nNext Confusion: 7.171532"}
{"text": "### State\nConfusion: 3.584667\nAction: analogize\nReward: -0.183735\nNext Confusion: 4.292329"}
{"text": "### State\nConfusion: 3.67187\nAction: analogize\nReward: -1.468257\nNext Confusion: 4.664136"}
{"text": "### State\nConfusion: 9.246445\nAction: analogize\nReward: -0.771256\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 4.244543\nAction: analogize\nReward: -0.932989\nNext Confusion: 5.020196"}
{"text": "### State\nConfusion: 3.598906\nAction: worked_example\nReward: 0.802242\nNext Confusion: 2.522359"}
{"text": "### State\nConfusion: 4.469094\nAction: question\nReward: 0.390748\nNext Confusion: 4.036065"}
{"text": "### State\nConfusion: 2.997087\nAction: analogize\nReward: 0.495354\nNext Confusion: 2.832977"}
{"text": "### State\nConfusion: 3.71337\nAction: analogize\nReward: -1.484861\nNext Confusion: 5.056246"}
{"text": "### State\nConfusion: 3.870778\nAction: explain\nReward: 0.392984\nNext Confusion: 3.912732"}
{"text": "### State\nConfusion: 7.175167\nAction: correct_fact\nReward: -0.247504\nNext Confusion: 7.709682"}
{"text": "### State\nConfusion: 3.68497\nAction: worked_example\nReward: 0.488498\nNext Confusion: 3.046532"}
{"text": "### State\nConfusion: 5.920541\nAction: explain\nReward: 1.570687\nNext Confusion: 4.980819"}
{"text": "### State\nConfusion: 4.128818\nAction: analogize\nReward: -0.559041\nNext Confusion: 4.367249"}
{"text": "### State\nConfusion: 4.818598\nAction: analogize\nReward: 0.198094\nNext Confusion: 5.307376"}
{"text": "### State\nConfusion: 4.356233\nAction: analogize\nReward: 0.28791\nNext Confusion: 4.128079"}
{"text": "### State\nConfusion: 5.0037\nAction: analogize\nReward: -1.000578\nNext Confusion: 5.632165"}
{"text": "### State\nConfusion: 3.368967\nAction: analogize\nReward: 1.246838\nNext Confusion: 2.920736"}
{"text": "### State\nConfusion: 3.198133\nAction: worked_example\nReward: 1.680851\nNext Confusion: 1.77803"}
{"text": "### State\nConfusion: 2.746825\nAction: explain\nReward: 0.707715\nNext Confusion: 2.306501"}
{"text": "### State\nConfusion: 3.790369\nAction: analogize\nReward: -0.041662\nNext Confusion: 3.86117"}
{"text": "### State\nConfusion: 2.891872\nAction: correct_fact\nReward: -1.316577\nNext Confusion: 3.124006"}
{"text": "### State\nConfusion: 4.48038\nAction: analogize\nReward: -0.129744\nNext Confusion: 4.754625"}
{"text": "### State\nConfusion: 3.77296\nAction: correct_fact\nReward: 0.427451\nNext Confusion: 3.604445"}
{"text": "### State\nConfusion: 3.225091\nAction: analogize\nReward: -0.507043\nNext Confusion: 3.829337"}
{"text": "### State\nConfusion: 4.265069\nAction: analogize\nReward: -0.038454\nNext Confusion: 4.813678"}
{"text": "### State\nConfusion: 7.177357\nAction: analogize\nReward: -0.298202\nNext Confusion: 8.014968"}
{"text": "### State\nConfusion: 3.607763\nAction: correct_fact\nReward: -0.138047\nNext Confusion: 3.655758"}
{"text": "### State\nConfusion: 8.976868\nAction: worked_example\nReward: 0.023975\nNext Confusion: 8.534332"}
{"text": "### State\nConfusion: 5.039401\nAction: analogize\nReward: -0.601833\nNext Confusion: 5.407775"}
{"text": "### State\nConfusion: 3.497416\nAction: question\nReward: 0.398961\nNext Confusion: 3.075376"}
{"text": "### State\nConfusion: 3.587401\nAction: analogize\nReward: 0.036509\nNext Confusion: 3.688627"}
{"text": "### State\nConfusion: 4.673002\nAction: analogize\nReward: 0.131562\nNext Confusion: 4.674554"}
{"text": "### State\nConfusion: 3.592728\nAction: analogize\nReward: 0.418171\nNext Confusion: 3.849377"}
{"text": "### State\nConfusion: 7.642635\nAction: analogize\nReward: -0.424847\nNext Confusion: 7.853295"}
{"text": "### State\nConfusion: 8.928662\nAction: analogize\nReward: 0.9627\nNext Confusion: 8.62451"}
{"text": "### State\nConfusion: 4.960207\nAction: analogize\nReward: -5.042938\nNext Confusion: 6.014362"}
{"text": "### State\nConfusion: 2.171726\nAction: analogize\nReward: -0.181946\nNext Confusion: 2.327439"}
{"text": "### State\nConfusion: 6.586604\nAction: worked_example\nReward: 0.59854\nNext Confusion: 5.187266"}
{"text": "### State\nConfusion: 3.143118\nAction: analogize\nReward: 0.17305\nNext Confusion: 2.9831"}
{"text": "### State\nConfusion: 3.265929\nAction: analogize\nReward: 0.667022\nNext Confusion: 2.944679"}
{"text": "### State\nConfusion: 7.445652\nAction: question\nReward: 0.353372\nNext Confusion: 6.919861"}
{"text": "### State\nConfusion: 3.062145\nAction: correct_fact\nReward: -1.195106\nNext Confusion: 3.566605"}
{"text": "### State\nConfusion: 4.143488\nAction: question\nReward: -0.733389\nNext Confusion: 4.834106"}
{"text": "### State\nConfusion: 3.424406\nAction: explain\nReward: -0.602404\nNext Confusion: 3.36164"}
{"text": "### State\nConfusion: 3.387601\nAction: explain\nReward: 0.157426\nNext Confusion: 3.688417"}
{"text": "### State\nConfusion: 7.273157\nAction: explain\nReward: 1.178917\nNext Confusion: 6.322024"}
{"text": "### State\nConfusion: 3.979093\nAction: analogize\nReward: -0.631023\nNext Confusion: 4.222602"}
{"text": "### State\nConfusion: 3.696545\nAction: analogize\nReward: -0.230754\nNext Confusion: 4.253306"}
{"text": "### State\nConfusion: 2.966753\nAction: analogize\nReward: -0.52981\nNext Confusion: 3.338369"}
{"text": "### State\nConfusion: 5.275979\nAction: explain\nReward: 0.51978\nNext Confusion: 4.586684"}
{"text": "### State\nConfusion: 4.143547\nAction: analogize\nReward: -0.556281\nNext Confusion: 4.511189"}
{"text": "### State\nConfusion: 6.479927\nAction: explain\nReward: 0.668874\nNext Confusion: 6.162917"}
{"text": "### State\nConfusion: 3.578943\nAction: question\nReward: 1.043996\nNext Confusion: 2.879315"}
{"text": "### State\nConfusion: 4.859414\nAction: worked_example\nReward: 2.318581\nNext Confusion: 2.760855"}
{"text": "### State\nConfusion: 3.379685\nAction: correct_fact\nReward: -0.36739\nNext Confusion: 3.626151"}
{"text": "### State\nConfusion: 4.285751\nAction: explain\nReward: 0.490226\nNext Confusion: 4.115866"}
{"text": "### State\nConfusion: 6.605411\nAction: analogize\nReward: 0.102846\nNext Confusion: 6.825231"}
{"text": "### State\nConfusion: 3.968445\nAction: analogize\nReward: -1.432025\nNext Confusion: 4.917612"}
{"text": "### State\nConfusion: 7.966238\nAction: correct_fact\nReward: 0.406304\nNext Confusion: 8.120245"}
{"text": "### State\nConfusion: 6.721416\nAction: analogize\nReward: -1.520515\nNext Confusion: 7.218895"}
{"text": "### State\nConfusion: 2.738938\nAction: correct_fact\nReward: 0.085715\nNext Confusion: 2.513217"}
{"text": "### State\nConfusion: 5.238395\nAction: analogize\nReward: 0.63322\nNext Confusion: 5.405862"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.927505\nNext Confusion: 9.517879"}
{"text": "### State\nConfusion: 5.823978\nAction: worked_example\nReward: 1.860738\nNext Confusion: 4.157206"}
{"text": "### State\nConfusion: 5.481368\nAction: analogize\nReward: 0.071183\nNext Confusion: 5.627844"}
{"text": "### State\nConfusion: 8.289637\nAction: analogize\nReward: 0.024586\nNext Confusion: 8.378593"}
{"text": "### State\nConfusion: 5.744939\nAction: analogize\nReward: -1.389301\nNext Confusion: 6.877677"}
{"text": "### State\nConfusion: 6.628249\nAction: analogize\nReward: 0.479303\nNext Confusion: 5.94657"}
{"text": "### State\nConfusion: 6.70891\nAction: explain\nReward: 0.714393\nNext Confusion: 6.231401"}
{"text": "### State\nConfusion: 2.966038\nAction: analogize\nReward: -0.399175\nNext Confusion: 2.929795"}
{"text": "### State\nConfusion: 9.408701\nAction: analogize\nReward: -0.493768\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 9.415164\nAction: worked_example\nReward: 1.486642\nNext Confusion: 8.030042"}
{"text": "### State\nConfusion: 4.254503\nAction: question\nReward: 0.917854\nNext Confusion: 3.972505"}
{"text": "### State\nConfusion: 4.4989\nAction: explain\nReward: -0.321324\nNext Confusion: 4.517928"}
{"text": "### State\nConfusion: 3.953625\nAction: explain\nReward: -0.23264\nNext Confusion: 4.198959"}
{"text": "### State\nConfusion: 5.389431\nAction: analogize\nReward: -0.474144\nNext Confusion: 5.669853"}
{"text": "### State\nConfusion: 4.18871\nAction: question\nReward: -0.195394\nNext Confusion: 4.506213"}
{"text": "### State\nConfusion: 3.939333\nAction: worked_example\nReward: 1.129873\nNext Confusion: 2.415744"}
{"text": "### State\nConfusion: 5.82522\nAction: analogize\nReward: -0.874563\nNext Confusion: 6.778791"}
{"text": "### State\nConfusion: 3.446916\nAction: analogize\nReward: 0.005333\nNext Confusion: 3.194818"}
{"text": "### State\nConfusion: 6.168521\nAction: analogize\nReward: 0.004277\nNext Confusion: 6.380842"}
{"text": "### State\nConfusion: 4.693802\nAction: analogize\nReward: -0.562175\nNext Confusion: 4.965051"}
{"text": "### State\nConfusion: 5.756139\nAction: correct_fact\nReward: 0.139779\nNext Confusion: 6.064686"}
{"text": "### State\nConfusion: 3.910989\nAction: analogize\nReward: -2.469072\nNext Confusion: 5.263532"}
{"text": "### State\nConfusion: 5.079583\nAction: question\nReward: 0.720805\nNext Confusion: 4.772066"}
{"text": "### State\nConfusion: 8.160942\nAction: explain\nReward: -0.003734\nNext Confusion: 8.076292"}
{"text": "### State\nConfusion: 4.063059\nAction: analogize\nReward: -1.248169\nNext Confusion: 5.102006"}
{"text": "### State\nConfusion: 3.923972\nAction: correct_fact\nReward: -0.436689\nNext Confusion: 3.59706"}
{"text": "### State\nConfusion: 4.257745\nAction: analogize\nReward: 0.975398\nNext Confusion: 4.396928"}
{"text": "### State\nConfusion: 4.448721\nAction: correct_fact\nReward: -0.095114\nNext Confusion: 4.54971"}
{"text": "### State\nConfusion: 8.187359\nAction: correct_fact\nReward: -0.402091\nNext Confusion: 8.562424"}
{"text": "### State\nConfusion: 6.941269\nAction: analogize\nReward: -0.082177\nNext Confusion: 6.415106"}
{"text": "### State\nConfusion: 3.733281\nAction: analogize\nReward: -0.557303\nNext Confusion: 4.495642"}
{"text": "### State\nConfusion: 5.054769\nAction: question\nReward: 0.385418\nNext Confusion: 4.25285"}
{"text": "### State\nConfusion: 6.988386\nAction: analogize\nReward: -0.997999\nNext Confusion: 7.872257"}
{"text": "### State\nConfusion: 3.323612\nAction: analogize\nReward: -0.417391\nNext Confusion: 3.446508"}
{"text": "### State\nConfusion: 7.580118\nAction: analogize\nReward: -0.193736\nNext Confusion: 8.119838"}
{"text": "### State\nConfusion: 6.262873\nAction: analogize\nReward: 0.25262\nNext Confusion: 6.57022"}
{"text": "### State\nConfusion: 3.24616\nAction: analogize\nReward: 0.176095\nNext Confusion: 3.061988"}
{"text": "### State\nConfusion: 3.785727\nAction: analogize\nReward: -0.123162\nNext Confusion: 4.260601"}
{"text": "### State\nConfusion: 4.510389\nAction: analogize\nReward: -0.087179\nNext Confusion: 4.489273"}
{"text": "### State\nConfusion: 3.244055\nAction: analogize\nReward: -1.109454\nNext Confusion: 3.376668"}
{"text": "### State\nConfusion: 3.47781\nAction: analogize\nReward: 1.156379\nNext Confusion: 3.43952"}
{"text": "### State\nConfusion: 3.995885\nAction: analogize\nReward: -0.481654\nNext Confusion: 4.774538"}
{"text": "### State\nConfusion: 8.914354\nAction: analogize\nReward: 0.049812\nNext Confusion: 8.771034"}
{"text": "### State\nConfusion: 1.772292\nAction: explain\nReward: 0.297904\nNext Confusion: 1.719932"}
{"text": "### State\nConfusion: 4.163656\nAction: worked_example\nReward: 1.919813\nNext Confusion: 3.011497"}
{"text": "### State\nConfusion: 3.538392\nAction: analogize\nReward: -0.22508\nNext Confusion: 4.106795"}
{"text": "### State\nConfusion: 4.116727\nAction: worked_example\nReward: 2.444091\nNext Confusion: 2.687809"}
{"text": "### State\nConfusion: 6.723317\nAction: explain\nReward: 0.072298\nNext Confusion: 6.793771"}
{"text": "### State\nConfusion: 3.330479\nAction: analogize\nReward: -1.032799\nNext Confusion: 4.53574"}
{"text": "### State\nConfusion: 3.567197\nAction: analogize\nReward: -0.658102\nNext Confusion: 4.478553"}
{"text": "### State\nConfusion: 4.345864\nAction: analogize\nReward: -0.269636\nNext Confusion: 4.495819"}
{"text": "### State\nConfusion: 5.757518\nAction: analogize\nReward: -0.708589\nNext Confusion: 5.357489"}
{"text": "### State\nConfusion: 8.696834\nAction: explain\nReward: 0.505359\nNext Confusion: 8.75677"}
{"text": "### State\nConfusion: 3.847008\nAction: question\nReward: -1.407674\nNext Confusion: 4.419054"}
{"text": "### State\nConfusion: 5.584003\nAction: analogize\nReward: -0.606178\nNext Confusion: 6.05324"}
{"text": "### State\nConfusion: 4.09134\nAction: analogize\nReward: 0.24887\nNext Confusion: 3.440027"}
{"text": "### State\nConfusion: 8.342102\nAction: question\nReward: 0.388513\nNext Confusion: 8.468631"}
{"text": "### State\nConfusion: 4.246235\nAction: analogize\nReward: -0.367984\nNext Confusion: 4.55566"}
{"text": "### State\nConfusion: 5.041572\nAction: question\nReward: -0.049042\nNext Confusion: 4.999399"}
{"text": "### State\nConfusion: 3.843461\nAction: question\nReward: 0.018297\nNext Confusion: 3.998616"}
{"text": "### State\nConfusion: 3.468152\nAction: analogize\nReward: -0.219468\nNext Confusion: 3.57777"}
{"text": "### State\nConfusion: 4.381477\nAction: analogize\nReward: 0.03071\nNext Confusion: 4.940474"}
{"text": "### State\nConfusion: 5.756888\nAction: analogize\nReward: 0.89899\nNext Confusion: 5.094104"}
{"text": "### State\nConfusion: 4.333212\nAction: analogize\nReward: -0.6245\nNext Confusion: 5.334216"}
{"text": "### State\nConfusion: 5.47342\nAction: question\nReward: -0.154752\nNext Confusion: 5.659844"}
{"text": "### State\nConfusion: 8.083508\nAction: analogize\nReward: -0.103334\nNext Confusion: 8.280694"}
{"text": "### State\nConfusion: 4.620574\nAction: explain\nReward: -0.077588\nNext Confusion: 4.464561"}
{"text": "### State\nConfusion: 3.974661\nAction: worked_example\nReward: 1.125675\nNext Confusion: 3.31946"}
{"text": "### State\nConfusion: 4.42193\nAction: analogize\nReward: -0.47474\nNext Confusion: 4.429034"}
{"text": "### State\nConfusion: 5.231293\nAction: analogize\nReward: -0.845958\nNext Confusion: 6.019314"}
{"text": "### State\nConfusion: 3.228954\nAction: analogize\nReward: -0.857821\nNext Confusion: 4.013046"}
{"text": "### State\nConfusion: 3.676788\nAction: analogize\nReward: 1.48361\nNext Confusion: 2.914774"}
{"text": "### State\nConfusion: 6.744595\nAction: analogize\nReward: -3.523492\nNext Confusion: 7.119243"}
{"text": "### State\nConfusion: 6.573047\nAction: analogize\nReward: 0.065792\nNext Confusion: 6.158436"}
{"text": "### State\nConfusion: 4.348007\nAction: analogize\nReward: -1.154604\nNext Confusion: 5.238071"}
{"text": "### State\nConfusion: 5.936844\nAction: analogize\nReward: -1.194396\nNext Confusion: 7.531626"}
{"text": "### State\nConfusion: 5.42589\nAction: analogize\nReward: -0.43157\nNext Confusion: 5.821207"}
{"text": "### State\nConfusion: 6.476246\nAction: explain\nReward: -0.381727\nNext Confusion: 6.038479"}
{"text": "### State\nConfusion: 3.408969\nAction: correct_fact\nReward: -0.29008\nNext Confusion: 3.610456"}
{"text": "### State\nConfusion: 3.137696\nAction: analogize\nReward: -1.614321\nNext Confusion: 4.338411"}
{"text": "### State\nConfusion: 9.981143\nAction: question\nReward: 0.336403\nNext Confusion: 9.127187"}
{"text": "### State\nConfusion: 5.250209\nAction: worked_example\nReward: 1.215982\nNext Confusion: 4.074452"}
{"text": "### State\nConfusion: 3.848411\nAction: correct_fact\nReward: -0.315407\nNext Confusion: 3.798653"}
{"text": "### State\nConfusion: 5.364659\nAction: analogize\nReward: -0.832938\nNext Confusion: 6.078216"}
{"text": "### State\nConfusion: 5.980426\nAction: analogize\nReward: -0.953331\nNext Confusion: 6.469077"}
{"text": "### State\nConfusion: 5.083078\nAction: analogize\nReward: -0.609319\nNext Confusion: 5.623208"}
{"text": "### State\nConfusion: 7.247568\nAction: analogize\nReward: -0.292227\nNext Confusion: 7.618113"}
{"text": "### State\nConfusion: 3.334119\nAction: analogize\nReward: -1.237248\nNext Confusion: 4.051495"}
{"text": "### State\nConfusion: 3.771148\nAction: analogize\nReward: -0.28148\nNext Confusion: 4.278025"}
{"text": "### State\nConfusion: 7.054849\nAction: analogize\nReward: 0.276644\nNext Confusion: 7.069348"}
{"text": "### State\nConfusion: 4.223377\nAction: question\nReward: 0.878895\nNext Confusion: 3.37235"}
{"text": "### State\nConfusion: 6.538994\nAction: analogize\nReward: -0.657308\nNext Confusion: 7.263082"}
{"text": "### State\nConfusion: 4.482307\nAction: analogize\nReward: -0.734999\nNext Confusion: 5.658994"}
{"text": "### State\nConfusion: 7.982437\nAction: worked_example\nReward: 2.063794\nNext Confusion: 5.985748"}
{"text": "### State\nConfusion: 6.038796\nAction: analogize\nReward: -0.428013\nNext Confusion: 6.309379"}
{"text": "### State\nConfusion: 3.535395\nAction: explain\nReward: -0.369274\nNext Confusion: 3.703277"}
{"text": "### State\nConfusion: 2.860002\nAction: explain\nReward: 0.130337\nNext Confusion: 2.802935"}
{"text": "### State\nConfusion: 3.763097\nAction: correct_fact\nReward: -0.823417\nNext Confusion: 4.379292"}
{"text": "### State\nConfusion: 4.180916\nAction: question\nReward: 0.741489\nNext Confusion: 3.423447"}
{"text": "### State\nConfusion: 5.614097\nAction: question\nReward: 0.531301\nNext Confusion: 5.212755"}
{"text": "### State\nConfusion: 8.834068\nAction: analogize\nReward: -0.189005\nNext Confusion: 9.355236"}
{"text": "### State\nConfusion: 6.024891\nAction: correct_fact\nReward: 0.064373\nNext Confusion: 6.602543"}
{"text": "### State\nConfusion: 5.876128\nAction: explain\nReward: 0.267207\nNext Confusion: 5.626973"}
{"text": "### State\nConfusion: 6.593964\nAction: analogize\nReward: 0.138768\nNext Confusion: 6.006979"}
{"text": "### State\nConfusion: 3.408307\nAction: analogize\nReward: -0.436008\nNext Confusion: 3.196113"}
{"text": "### State\nConfusion: 7.686703\nAction: question\nReward: -0.568404\nNext Confusion: 7.67233"}
{"text": "### State\nConfusion: 5.321778\nAction: question\nReward: 1.497982\nNext Confusion: 4.268611"}
{"text": "### State\nConfusion: 4.75159\nAction: analogize\nReward: 1.433057\nNext Confusion: 4.059246"}
{"text": "### State\nConfusion: 3.645793\nAction: question\nReward: 1.585023\nNext Confusion: 1.974554"}
{"text": "### State\nConfusion: 6.01909\nAction: analogize\nReward: 0.32362\nNext Confusion: 6.217828"}
{"text": "### State\nConfusion: 6.016679\nAction: analogize\nReward: 0.081677\nNext Confusion: 6.395025"}
{"text": "### State\nConfusion: 6.743756\nAction: analogize\nReward: -1.098936\nNext Confusion: 7.562526"}
{"text": "### State\nConfusion: 7.090468\nAction: analogize\nReward: 0.011776\nNext Confusion: 7.157492"}
{"text": "### State\nConfusion: 4.080909\nAction: question\nReward: 0.274189\nNext Confusion: 3.741003"}
{"text": "### State\nConfusion: 6.508274\nAction: explain\nReward: -0.447604\nNext Confusion: 6.71945"}
{"text": "### State\nConfusion: 6.877663\nAction: analogize\nReward: -0.5954\nNext Confusion: 7.796532"}
{"text": "### State\nConfusion: 5.203895\nAction: analogize\nReward: -0.700037\nNext Confusion: 5.556193"}
{"text": "### State\nConfusion: 3.591128\nAction: correct_fact\nReward: -0.794202\nNext Confusion: 3.814581"}
{"text": "### State\nConfusion: 4.232285\nAction: explain\nReward: 0.143006\nNext Confusion: 3.782716"}
{"text": "### State\nConfusion: 4.805493\nAction: correct_fact\nReward: -0.240368\nNext Confusion: 5.179041"}
{"text": "### State\nConfusion: 3.191029\nAction: correct_fact\nReward: -4.082677\nNext Confusion: 3.805619"}
{"text": "### State\nConfusion: 3.123191\nAction: analogize\nReward: 0.157324\nNext Confusion: 2.772966"}
{"text": "### State\nConfusion: 5.475059\nAction: analogize\nReward: -0.537958\nNext Confusion: 5.817322"}
{"text": "### State\nConfusion: 8.821889\nAction: analogize\nReward: 0.389415\nNext Confusion: 8.971167"}
{"text": "### State\nConfusion: 5.51871\nAction: question\nReward: -0.490781\nNext Confusion: 5.568502"}
{"text": "### State\nConfusion: 4.363986\nAction: worked_example\nReward: 2.794462\nNext Confusion: 2.385778"}
{"text": "### State\nConfusion: 3.660182\nAction: analogize\nReward: 0.197274\nNext Confusion: 4.073714"}
{"text": "### State\nConfusion: 4.188292\nAction: question\nReward: 2.06589\nNext Confusion: 3.152765"}
{"text": "### State\nConfusion: 4.624219\nAction: explain\nReward: 0.03086\nNext Confusion: 4.495928"}
{"text": "### State\nConfusion: 3.352286\nAction: analogize\nReward: 0.358749\nNext Confusion: 3.544025"}
{"text": "### State\nConfusion: 3.218407\nAction: analogize\nReward: -0.809351\nNext Confusion: 3.86212"}
{"text": "### State\nConfusion: 4.368701\nAction: analogize\nReward: 0.097597\nNext Confusion: 4.579212"}
{"text": "### State\nConfusion: 7.926002\nAction: question\nReward: 1.346095\nNext Confusion: 7.494742"}
{"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 2.632339\nNext Confusion: 8.704373"}
{"text": "### State\nConfusion: 6.556739\nAction: worked_example\nReward: 0.730109\nNext Confusion: 5.347394"}
{"text": "### State\nConfusion: 6.622032\nAction: worked_example\nReward: 1.884973\nNext Confusion: 3.975837"}
{"text": "### State\nConfusion: 6.837491\nAction: explain\nReward: -0.347375\nNext Confusion: 7.207565"}
{"text": "### State\nConfusion: 4.140696\nAction: analogize\nReward: -0.565153\nNext Confusion: 4.848446"}
{"text": "### State\nConfusion: 6.545013\nAction: analogize\nReward: 0.076453\nNext Confusion: 6.966989"}
{"text": "### State\nConfusion: 4.751852\nAction: analogize\nReward: -0.798605\nNext Confusion: 5.513865"}
{"text": "### State\nConfusion: 5.967764\nAction: analogize\nReward: 0.171464\nNext Confusion: 6.42246"}
{"text": "### State\nConfusion: 7.188017\nAction: analogize\nReward: -0.994482\nNext Confusion: 8.0922"}
{"text": "### State\nConfusion: 4.973029\nAction: question\nReward: 0.046659\nNext Confusion: 4.721402"}
{"text": "### State\nConfusion: 6.592906\nAction: correct_fact\nReward: 1.085134\nNext Confusion: 5.660255"}
{"text": "### State\nConfusion: 3.720168\nAction: explain\nReward: 1.271093\nNext Confusion: 3.076503"}
{"text": "### State\nConfusion: 5.986892\nAction: explain\nReward: 0.533297\nNext Confusion: 5.874181"}
{"text": "### State\nConfusion: 3.338895\nAction: analogize\nReward: 0.165743\nNext Confusion: 3.483409"}
{"text": "### State\nConfusion: 6.299437\nAction: question\nReward: 0.090953\nNext Confusion: 5.496086"}
{"text": "### State\nConfusion: 3.233407\nAction: analogize\nReward: 0.193732\nNext Confusion: 3.252628"}
{"text": "### State\nConfusion: 3.767879\nAction: analogize\nReward: -0.519034\nNext Confusion: 4.018177"}
{"text": "### State\nConfusion: 3.284015\nAction: explain\nReward: 0.343419\nNext Confusion: 3.226797"}
{"text": "### State\nConfusion: 4.966618\nAction: analogize\nReward: -1.346464\nNext Confusion: 5.568508"}
{"text": "### State\nConfusion: 7.611811\nAction: analogize\nReward: -1.118271\nNext Confusion: 8.414276"}
{"text": "### State\nConfusion: 3.678779\nAction: analogize\nReward: -1.83396\nNext Confusion: 4.816208"}
{"text": "### State\nConfusion: 9.551186\nAction: worked_example\nReward: 1.757724\nNext Confusion: 7.951963"}
{"text": "### State\nConfusion: 6.259004\nAction: analogize\nReward: 0.637289\nNext Confusion: 5.072351"}
{"text": "### State\nConfusion: 4.111844\nAction: analogize\nReward: 0.086688\nNext Confusion: 4.294176"}
{"text": "### State\nConfusion: 5.955097\nAction: analogize\nReward: -1.022934\nNext Confusion: 5.988747"}
{"text": "### State\nConfusion: 3.42395\nAction: analogize\nReward: -0.086424\nNext Confusion: 3.586034"}
{"text": "### State\nConfusion: 6.418479\nAction: worked_example\nReward: 3.015016\nNext Confusion: 4.592889"}
{"text": "### State\nConfusion: 3.27804\nAction: correct_fact\nReward: 1.052366\nNext Confusion: 2.910743"}
{"text": "### State\nConfusion: 6.8594\nAction: question\nReward: 0.394227\nNext Confusion: 6.030883"}
{"text": "### State\nConfusion: 3.918734\nAction: analogize\nReward: 0.392353\nNext Confusion: 3.580582"}
{"text": "### State\nConfusion: 4.108029\nAction: explain\nReward: 0.186519\nNext Confusion: 4.014627"}
{"text": "### State\nConfusion: 6.534582\nAction: analogize\nReward: -1.341765\nNext Confusion: 7.152132"}
{"text": "### State\nConfusion: 4.526416\nAction: analogize\nReward: 0.041798\nNext Confusion: 4.610605"}
{"text": "### State\nConfusion: 3.207484\nAction: analogize\nReward: -0.274951\nNext Confusion: 3.235347"}
{"text": "### State\nConfusion: 7.274545\nAction: correct_fact\nReward: 0.060256\nNext Confusion: 7.385714"}
{"text": "### State\nConfusion: 6.405168\nAction: analogize\nReward: 1.062377\nNext Confusion: 6.203435"}
{"text": "### State\nConfusion: 6.145315\nAction: analogize\nReward: 0.297888\nNext Confusion: 5.948101"}
{"text": "### State\nConfusion: 4.362407\nAction: question\nReward: -0.376688\nNext Confusion: 4.091491"}
{"text": "### State\nConfusion: 8.439035\nAction: analogize\nReward: -0.300894\nNext Confusion: 8.39113"}
{"text": "### State\nConfusion: 3.67231\nAction: analogize\nReward: -1.550801\nNext Confusion: 4.500672"}
{"text": "### State\nConfusion: 3.833536\nAction: correct_fact\nReward: 0.260055\nNext Confusion: 4.460357"}
{"text": "### State\nConfusion: 4.286399\nAction: analogize\nReward: -0.380031\nNext Confusion: 4.813565"}
{"text": "### State\nConfusion: 3.622745\nAction: analogize\nReward: 1.280211\nNext Confusion: 3.017132"}
{"text": "### State\nConfusion: 3.164635\nAction: analogize\nReward: -1.033433\nNext Confusion: 4.108108"}
{"text": "### State\nConfusion: 4.392075\nAction: correct_fact\nReward: -0.532647\nNext Confusion: 4.885617"}
{"text": "### State\nConfusion: 3.536113\nAction: explain\nReward: 1.498938\nNext Confusion: 2.96965"}
{"text": "### State\nConfusion: 8.397891\nAction: correct_fact\nReward: 1.004038\nNext Confusion: 7.644212"}
{"text": "### State\nConfusion: 2.638306\nAction: analogize\nReward: -0.30316\nNext Confusion: 3.268882"}
{"text": "### State\nConfusion: 2.290366\nAction: worked_example\nReward: 1.073395\nNext Confusion: 1.899378"}
{"text": "### State\nConfusion: 4.104017\nAction: correct_fact\nReward: -0.132089\nNext Confusion: 4.67455"}
{"text": "### State\nConfusion: 4.281032\nAction: explain\nReward: 0.437552\nNext Confusion: 3.920859"}
{"text": "### State\nConfusion: 9.430725\nAction: analogize\nReward: -0.210339\nNext Confusion: 9.636165"}
{"text": "### State\nConfusion: 3.160262\nAction: analogize\nReward: 0.90191\nNext Confusion: 2.309478"}
{"text": "### State\nConfusion: 3.093624\nAction: analogize\nReward: -0.678115\nNext Confusion: 4.155883"}
{"text": "### State\nConfusion: 5.114895\nAction: explain\nReward: 0.544845\nNext Confusion: 5.086789"}
{"text": "### State\nConfusion: 7.723752\nAction: analogize\nReward: -0.464143\nNext Confusion: 8.243782"}
{"text": "### State\nConfusion: 4.042224\nAction: correct_fact\nReward: -0.309778\nNext Confusion: 4.224926"}
{"text": "### State\nConfusion: 1.298197\nAction: analogize\nReward: -0.120681\nNext Confusion: 2.041737"}
{"text": "### State\nConfusion: 3.211188\nAction: question\nReward: 0.94096\nNext Confusion: 2.304281"}
{"text": "### State\nConfusion: 2.177077\nAction: question\nReward: 0.754392\nNext Confusion: 1.588065"}
{"text": "### State\nConfusion: 5.599477\nAction: explain\nReward: 0.427335\nNext Confusion: 5.764095"}
{"text": "### State\nConfusion: 5.271157\nAction: explain\nReward: 1.201553\nNext Confusion: 4.389104"}
{"text": "### State\nConfusion: 5.172612\nAction: explain\nReward: -0.578675\nNext Confusion: 5.641872"}
{"text": "### State\nConfusion: 4.746197\nAction: explain\nReward: -0.415519\nNext Confusion: 4.961803"}
{"text": "### State\nConfusion: 5.20597\nAction: analogize\nReward: -0.188651\nNext Confusion: 4.729918"}
{"text": "### State\nConfusion: 7.899179\nAction: explain\nReward: 0.9233\nNext Confusion: 7.617782"}
{"text": "### State\nConfusion: 4.606825\nAction: question\nReward: 1.332907\nNext Confusion: 3.817736"}
{"text": "### State\nConfusion: 3.545838\nAction: question\nReward: 0.584057\nNext Confusion: 3.080739"}
{"text": "### State\nConfusion: 5.416692\nAction: analogize\nReward: -1.181844\nNext Confusion: 6.291075"}
{"text": "### State\nConfusion: 8.278512\nAction: explain\nReward: 0.65172\nNext Confusion: 8.075535"}
{"text": "### State\nConfusion: 4.03923\nAction: worked_example\nReward: 1.425754\nNext Confusion: 2.703941"}
{"text": "### State\nConfusion: 7.588636\nAction: explain\nReward: 0.627005\nNext Confusion: 6.970175"}
{"text": "### State\nConfusion: 6.207129\nAction: analogize\nReward: -0.164946\nNext Confusion: 6.207433"}
{"text": "### State\nConfusion: 4.607996\nAction: analogize\nReward: -0.501607\nNext Confusion: 4.840988"}
{"text": "### State\nConfusion: 4.281465\nAction: question\nReward: 1.016601\nNext Confusion: 3.84681"}
{"text": "### State\nConfusion: 4.746888\nAction: explain\nReward: 1.408749\nNext Confusion: 3.521709"}
{"text": "### State\nConfusion: 2.90317\nAction: analogize\nReward: -0.593073\nNext Confusion: 3.09674"}
{"text": "### State\nConfusion: 5.362331\nAction: analogize\nReward: -0.147509\nNext Confusion: 5.639575"}
{"text": "### State\nConfusion: 5.548513\nAction: analogize\nReward: -0.657696\nNext Confusion: 6.434372"}
{"text": "### State\nConfusion: 4.179947\nAction: explain\nReward: 1.051147\nNext Confusion: 3.829047"}
{"text": "### State\nConfusion: 6.712979\nAction: question\nReward: 0.125504\nNext Confusion: 6.897026"}
{"text": "### State\nConfusion: 5.161298\nAction: explain\nReward: -0.894121\nNext Confusion: 5.72294"}
{"text": "### State\nConfusion: 4.199243\nAction: analogize\nReward: 1.274532\nNext Confusion: 3.317777"}
{"text": "### State\nConfusion: 2.205262\nAction: worked_example\nReward: 0.85029\nNext Confusion: 1.233461"}
{"text": "### State\nConfusion: 4.533222\nAction: question\nReward: -0.208696\nNext Confusion: 4.21478"}
{"text": "### State\nConfusion: 4.401053\nAction: analogize\nReward: 0.256719\nNext Confusion: 4.846919"}
{"text": "### State\nConfusion: 3.643785\nAction: analogize\nReward: -0.314908\nNext Confusion: 3.947566"}
{"text": "### State\nConfusion: 3.676824\nAction: analogize\nReward: -0.267246\nNext Confusion: 3.87749"}
{"text": "### State\nConfusion: 4.581174\nAction: analogize\nReward: -0.067764\nNext Confusion: 4.506774"}
{"text": "### State\nConfusion: 5.592551\nAction: question\nReward: 0.994469\nNext Confusion: 5.1464"}
{"text": "### State\nConfusion: 4.900575\nAction: analogize\nReward: -1.442941\nNext Confusion: 5.849224"}
{"text": "### State\nConfusion: 4.030148\nAction: explain\nReward: -0.223471\nNext Confusion: 4.296577"}
{"text": "### State\nConfusion: 5.54907\nAction: correct_fact\nReward: 1.283735\nNext Confusion: 4.96947"}
{"text": "### State\nConfusion: 6.399455\nAction: explain\nReward: 0.074439\nNext Confusion: 6.507652"}
{"text": "### State\nConfusion: 4.697805\nAction: analogize\nReward: -1.345821\nNext Confusion: 5.294332"}
{"text": "### State\nConfusion: 7.291886\nAction: analogize\nReward: -0.49404\nNext Confusion: 7.596599"}
{"text": "### State\nConfusion: 8.478653\nAction: worked_example\nReward: 1.67051\nNext Confusion: 7.127231"}
{"text": "### State\nConfusion: 4.146376\nAction: question\nReward: -0.30378\nNext Confusion: 4.132405"}
{"text": "### State\nConfusion: 9.333189\nAction: analogize\nReward: -1.131478\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 5.150481\nAction: explain\nReward: -0.021077\nNext Confusion: 5.147341"}
{"text": "### State\nConfusion: 5.005999\nAction: analogize\nReward: 0.261416\nNext Confusion: 5.043668"}
{"text": "### State\nConfusion: 5.417343\nAction: analogize\nReward: 0.55921\nNext Confusion: 5.474882"}
{"text": "### State\nConfusion: 5.937985\nAction: explain\nReward: 0.105923\nNext Confusion: 5.750947"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: -0.186882\nNext Confusion: 9.814193"}
{"text": "### State\nConfusion: 2.234744\nAction: analogize\nReward: 0.880647\nNext Confusion: 1.544325"}
{"text": "### State\nConfusion: 1.236512\nAction: question\nReward: 1.106881\nNext Confusion: 0.7996"}
{"text": "### State\nConfusion: 4.044295\nAction: analogize\nReward: -1.293793\nNext Confusion: 4.683942"}
{"text": "### State\nConfusion: 4.212736\nAction: explain\nReward: 0.423388\nNext Confusion: 3.542225"}
{"text": "### State\nConfusion: 3.587829\nAction: analogize\nReward: -1.093237\nNext Confusion: 4.790663"}
{"text": "### State\nConfusion: 5.96449\nAction: question\nReward: -0.061454\nNext Confusion: 5.93381"}
{"text": "### State\nConfusion: 3.885393\nAction: analogize\nReward: -0.367448\nNext Confusion: 3.773087"}
{"text": "### State\nConfusion: 6.738736\nAction: explain\nReward: 0.954335\nNext Confusion: 6.322676"}
{"text": "### State\nConfusion: 3.025954\nAction: analogize\nReward: -0.77141\nNext Confusion: 3.877892"}
{"text": "### State\nConfusion: 4.1147\nAction: analogize\nReward: 0.392259\nNext Confusion: 3.516709"}
{"text": "### State\nConfusion: 6.666338\nAction: analogize\nReward: 0.324842\nNext Confusion: 6.57961"}
{"text": "### State\nConfusion: 5.232429\nAction: analogize\nReward: 0.382367\nNext Confusion: 4.214555"}
{"text": "### State\nConfusion: 7.714635\nAction: analogize\nReward: 0.360041\nNext Confusion: 8.002491"}
{"text": "### State\nConfusion: 5.043363\nAction: analogize\nReward: -0.185386\nNext Confusion: 5.162632"}
{"text": "### State\nConfusion: 8.755225\nAction: analogize\nReward: -0.111426\nNext Confusion: 9.15125"}
{"text": "### State\nConfusion: 7.030897\nAction: analogize\nReward: -1.325821\nNext Confusion: 7.800182"}
{"text": "### State\nConfusion: 7.853434\nAction: analogize\nReward: 0.080185\nNext Confusion: 8.146503"}
{"text": "### State\nConfusion: 3.47865\nAction: analogize\nReward: -0.742602\nNext Confusion: 3.932566"}
{"text": "### State\nConfusion: 3.318392\nAction: worked_example\nReward: 1.172948\nNext Confusion: 2.795324"}
{"text": "### State\nConfusion: 3.788924\nAction: question\nReward: 0.853461\nNext Confusion: 3.229144"}
{"text": "### State\nConfusion: 3.487349\nAction: question\nReward: 1.199487\nNext Confusion: 2.967386"}
{"text": "### State\nConfusion: 3.806279\nAction: question\nReward: 2.302437\nNext Confusion: 2.469832"}
{"text": "### State\nConfusion: 3.311562\nAction: analogize\nReward: -0.445051\nNext Confusion: 3.310418"}
{"text": "### State\nConfusion: 5.622833\nAction: analogize\nReward: -1.147916\nNext Confusion: 7.239026"}
{"text": "### State\nConfusion: 4.229888\nAction: analogize\nReward: -0.575123\nNext Confusion: 5.167108"}
{"text": "### State\nConfusion: 4.295042\nAction: analogize\nReward: -0.594404\nNext Confusion: 4.929984"}
{"text": "### State\nConfusion: 3.346937\nAction: analogize\nReward: -1.200558\nNext Confusion: 4.280535"}
{"text": "### State\nConfusion: 6.188107\nAction: analogize\nReward: 0.015743\nNext Confusion: 6.055892"}
{"text": "### State\nConfusion: 4.454732\nAction: question\nReward: 1.193721\nNext Confusion: 3.239344"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.732955\nNext Confusion: 9.617961"}
{"text": "### State\nConfusion: 5.953883\nAction: analogize\nReward: 0.366403\nNext Confusion: 6.130521"}
{"text": "### State\nConfusion: 5.771333\nAction: explain\nReward: -1.45427\nNext Confusion: 6.330766"}
{"text": "### State\nConfusion: 4.92416\nAction: explain\nReward: -0.239389\nNext Confusion: 5.232991"}
{"text": "### State\nConfusion: 2.797066\nAction: analogize\nReward: -1.545631\nNext Confusion: 4.092842"}
{"text": "### State\nConfusion: 3.410682\nAction: explain\nReward: 1.680386\nNext Confusion: 2.625942"}
{"text": "### State\nConfusion: 4.196591\nAction: analogize\nReward: 0.143843\nNext Confusion: 4.218358"}
{"text": "### State\nConfusion: 4.527193\nAction: explain\nReward: 1.279641\nNext Confusion: 3.833264"}
{"text": "### State\nConfusion: 7.109824\nAction: analogize\nReward: -1.056292\nNext Confusion: 7.897732"}
{"text": "### State\nConfusion: 3.4512\nAction: question\nReward: 0.101518\nNext Confusion: 2.863511"}
{"text": "### State\nConfusion: 9.769079\nAction: worked_example\nReward: -0.05826\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 7.047394\nAction: correct_fact\nReward: -0.477517\nNext Confusion: 6.644616"}
{"text": "### State\nConfusion: 5.039032\nAction: analogize\nReward: 0.542779\nNext Confusion: 4.704964"}
{"text": "### State\nConfusion: 2.455086\nAction: analogize\nReward: -0.42512\nNext Confusion: 2.582648"}
{"text": "### State\nConfusion: 8.79387\nAction: explain\nReward: 1.493231\nNext Confusion: 8.595444"}
{"text": "### State\nConfusion: 6.5493\nAction: question\nReward: 1.596032\nNext Confusion: 5.77124"}
{"text": "### State\nConfusion: 3.847614\nAction: analogize\nReward: -0.041072\nNext Confusion: 3.546278"}
{"text": "### State\nConfusion: 3.479255\nAction: worked_example\nReward: 0.772089\nNext Confusion: 2.406743"}
{"text": "### State\nConfusion: 3.405935\nAction: worked_example\nReward: 0.631187\nNext Confusion: 2.943548"}
{"text": "### State\nConfusion: 7.263623\nAction: analogize\nReward: -0.889767\nNext Confusion: 7.725207"}
{"text": "### State\nConfusion: 3.591611\nAction: worked_example\nReward: 1.93213\nNext Confusion: 2.058504"}
{"text": "### State\nConfusion: 5.776166\nAction: explain\nReward: 0.764305\nNext Confusion: 5.656176"}
{"text": "### State\nConfusion: 3.816689\nAction: worked_example\nReward: 2.065074\nNext Confusion: 2.308856"}
{"text": "### State\nConfusion: 6.615742\nAction: analogize\nReward: -1.522533\nNext Confusion: 7.848629"}
{"text": "### State\nConfusion: 3.603578\nAction: analogize\nReward: 0.183606\nNext Confusion: 3.590095"}
{"text": "### State\nConfusion: 6.595288\nAction: analogize\nReward: -0.773955\nNext Confusion: 7.387215"}
{"text": "### State\nConfusion: 3.243061\nAction: analogize\nReward: -0.86511\nNext Confusion: 4.083089"}
{"text": "### State\nConfusion: 6.382723\nAction: correct_fact\nReward: 0.514668\nNext Confusion: 6.59727"}
{"text": "### State\nConfusion: 3.61621\nAction: correct_fact\nReward: 0.218221\nNext Confusion: 3.916531"}
{"text": "### State\nConfusion: 4.585879\nAction: explain\nReward: 0.441228\nNext Confusion: 4.214521"}
{"text": "### State\nConfusion: 5.550642\nAction: analogize\nReward: -0.23505\nNext Confusion: 5.782431"}
{"text": "### State\nConfusion: 3.977782\nAction: analogize\nReward: 0.619823\nNext Confusion: 3.979027"}
{"text": "### State\nConfusion: 3.403716\nAction: analogize\nReward: -0.803805\nNext Confusion: 3.704608"}
{"text": "### State\nConfusion: 3.522781\nAction: analogize\nReward: -0.594497\nNext Confusion: 3.853947"}
{"text": "### State\nConfusion: 3.428078\nAction: worked_example\nReward: 2.908679\nNext Confusion: 0.783914"}
{"text": "### State\nConfusion: 2.765526\nAction: worked_example\nReward: 0.146944\nNext Confusion: 2.475324"}
{"text": "### State\nConfusion: 5.662729\nAction: analogize\nReward: -0.053028\nNext Confusion: 6.267484"}
{"text": "### State\nConfusion: 2.97392\nAction: correct_fact\nReward: 0.207476\nNext Confusion: 2.529974"}
{"text": "### State\nConfusion: 4.33869\nAction: analogize\nReward: -0.048698\nNext Confusion: 4.062849"}
{"text": "### State\nConfusion: 4.013338\nAction: explain\nReward: 0.400833\nNext Confusion: 3.595311"}
{"text": "### State\nConfusion: 4.072468\nAction: analogize\nReward: -1.069305\nNext Confusion: 4.625163"}
{"text": "### State\nConfusion: 3.534719\nAction: analogize\nReward: -0.496206\nNext Confusion: 3.767223"}
{"text": "### State\nConfusion: 3.301023\nAction: analogize\nReward: -0.343358\nNext Confusion: 3.224041"}
{"text": "### State\nConfusion: 5.737082\nAction: question\nReward: 0.787803\nNext Confusion: 5.253803"}
{"text": "### State\nConfusion: 5.034364\nAction: correct_fact\nReward: -0.452177\nNext Confusion: 5.496845"}
{"text": "### State\nConfusion: 8.053705\nAction: analogize\nReward: 0.306109\nNext Confusion: 8.010042"}
{"text": "### State\nConfusion: 4.250006\nAction: analogize\nReward: -1.557089\nNext Confusion: 5.197942"}
{"text": "### State\nConfusion: 2.612885\nAction: analogize\nReward: 0.520245\nNext Confusion: 2.377651"}
{"text": "### State\nConfusion: 4.320713\nAction: question\nReward: 0.08004\nNext Confusion: 4.111726"}
{"text": "### State\nConfusion: 6.200604\nAction: analogize\nReward: 0.148336\nNext Confusion: 6.110532"}
{"text": "### State\nConfusion: 2.905517\nAction: explain\nReward: 0.205134\nNext Confusion: 2.580832"}
{"text": "### State\nConfusion: 7.576805\nAction: analogize\nReward: -1.736052\nNext Confusion: 7.978537"}
{"text": "### State\nConfusion: 4.337534\nAction: analogize\nReward: -0.830804\nNext Confusion: 5.049174"}
{"text": "### State\nConfusion: 4.344432\nAction: analogize\nReward: 0.553108\nNext Confusion: 4.202552"}
{"text": "### State\nConfusion: 4.759101\nAction: analogize\nReward: 1.546088\nNext Confusion: 3.756994"}
{"text": "### State\nConfusion: 5.246162\nAction: question\nReward: 0.419569\nNext Confusion: 5.021464"}
{"text": "### State\nConfusion: 4.600087\nAction: analogize\nReward: -0.103434\nNext Confusion: 4.98533"}
{"text": "### State\nConfusion: 5.103688\nAction: explain\nReward: -1.820209\nNext Confusion: 5.43225"}
{"text": "### State\nConfusion: 4.324837\nAction: analogize\nReward: 0.048282\nNext Confusion: 4.139078"}
{"text": "### State\nConfusion: 2.427948\nAction: analogize\nReward: -0.687189\nNext Confusion: 2.827288"}
{"text": "### State\nConfusion: 5.790867\nAction: explain\nReward: 0.480449\nNext Confusion: 5.232456"}
{"text": "### State\nConfusion: 2.040263\nAction: analogize\nReward: -0.197799\nNext Confusion: 2.24734"}
{"text": "### State\nConfusion: 2.582153\nAction: worked_example\nReward: 0.554199\nNext Confusion: 1.467742"}
{"text": "### State\nConfusion: 4.024901\nAction: explain\nReward: 0.246961\nNext Confusion: 3.849331"}
{"text": "### State\nConfusion: 3.155271\nAction: explain\nReward: 0.93429\nNext Confusion: 2.779514"}
{"text": "### State\nConfusion: 4.333934\nAction: analogize\nReward: -1.382026\nNext Confusion: 5.333732"}
{"text": "### State\nConfusion: 3.711759\nAction: analogize\nReward: -1.054925\nNext Confusion: 4.427508"}
{"text": "### State\nConfusion: 4.197458\nAction: analogize\nReward: -0.672473\nNext Confusion: 4.707225"}
{"text": "### State\nConfusion: 3.595974\nAction: analogize\nReward: -0.30356\nNext Confusion: 3.692486"}
{"text": "### State\nConfusion: 9.424139\nAction: question\nReward: -0.051805\nNext Confusion: 9.827569"}
{"text": "### State\nConfusion: 3.847582\nAction: explain\nReward: -0.545496\nNext Confusion: 4.474236"}
{"text": "### State\nConfusion: 4.257097\nAction: correct_fact\nReward: -1.007093\nNext Confusion: 5.195939"}
{"text": "### State\nConfusion: 3.375196\nAction: analogize\nReward: -0.613092\nNext Confusion: 3.231864"}
{"text": "### State\nConfusion: 4.372277\nAction: analogize\nReward: -0.062679\nNext Confusion: 4.743267"}
{"text": "### State\nConfusion: 5.252807\nAction: analogize\nReward: -1.110442\nNext Confusion: 6.075631"}
{"text": "### State\nConfusion: 4.687475\nAction: analogize\nReward: 0.085918\nNext Confusion: 4.711984"}
{"text": "### State\nConfusion: 4.754649\nAction: analogize\nReward: -0.419951\nNext Confusion: 5.299336"}
{"text": "### State\nConfusion: 6.040562\nAction: analogize\nReward: -0.296113\nNext Confusion: 6.770728"}
{"text": "### State\nConfusion: 5.219658\nAction: explain\nReward: -0.377415\nNext Confusion: 5.787689"}
{"text": "### State\nConfusion: 4.082221\nAction: explain\nReward: 1.391503\nNext Confusion: 3.019216"}
{"text": "### State\nConfusion: 5.875564\nAction: explain\nReward: 0.35298\nNext Confusion: 5.948457"}
{"text": "### State\nConfusion: 3.879623\nAction: analogize\nReward: 0.372709\nNext Confusion: 3.920777"}
{"text": "### State\nConfusion: 4.371979\nAction: analogize\nReward: 0.109884\nNext Confusion: 4.681374"}
{"text": "### State\nConfusion: 4.799329\nAction: analogize\nReward: 0.134149\nNext Confusion: 4.814604"}
{"text": "### State\nConfusion: 2.772295\nAction: analogize\nReward: -1.053458\nNext Confusion: 3.162443"}
{"text": "### State\nConfusion: 4.656308\nAction: analogize\nReward: -0.716218\nNext Confusion: 5.735703"}
{"text": "### State\nConfusion: 3.399582\nAction: analogize\nReward: 0.010502\nNext Confusion: 3.909108"}
{"text": "### State\nConfusion: 4.250917\nAction: analogize\nReward: 0.365275\nNext Confusion: 4.656218"}
{"text": "### State\nConfusion: 3.886501\nAction: analogize\nReward: 0.02291\nNext Confusion: 4.124634"}
{"text": "### State\nConfusion: 7.677275\nAction: analogize\nReward: 0.266499\nNext Confusion: 7.756824"}
{"text": "### State\nConfusion: 4.214218\nAction: analogize\nReward: 0.21845\nNext Confusion: 4.028525"}
{"text": "### State\nConfusion: 3.2064\nAction: correct_fact\nReward: 0.527978\nNext Confusion: 2.366183"}
{"text": "### State\nConfusion: 6.950843\nAction: analogize\nReward: 0.101171\nNext Confusion: 7.08694"}
{"text": "### State\nConfusion: 3.835536\nAction: analogize\nReward: -0.621992\nNext Confusion: 4.231325"}
{"text": "### State\nConfusion: 6.523788\nAction: analogize\nReward: -0.239707\nNext Confusion: 6.37458"}
{"text": "### State\nConfusion: 3.839309\nAction: analogize\nReward: -0.047621\nNext Confusion: 4.649327"}
{"text": "### State\nConfusion: 8.457514\nAction: analogize\nReward: -0.49514\nNext Confusion: 8.68329"}
{"text": "### State\nConfusion: 6.079686\nAction: analogize\nReward: -0.505621\nNext Confusion: 6.997733"}
{"text": "### State\nConfusion: 3.052662\nAction: worked_example\nReward: 2.304519\nNext Confusion: 1.063142"}
{"text": "### State\nConfusion: 4.0362\nAction: worked_example\nReward: 1.032511\nNext Confusion: 2.903929"}
{"text": "### State\nConfusion: 5.259984\nAction: question\nReward: 0.140425\nNext Confusion: 5.599321"}
{"text": "### State\nConfusion: 5.692397\nAction: analogize\nReward: 0.152449\nNext Confusion: 5.766351"}
{"text": "### State\nConfusion: 3.010824\nAction: analogize\nReward: -0.822476\nNext Confusion: 4.061491"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.54809\nNext Confusion: 9.630048"}
{"text": "### State\nConfusion: 6.365635\nAction: correct_fact\nReward: 0.303342\nNext Confusion: 6.359275"}
{"text": "### State\nConfusion: 4.506102\nAction: worked_example\nReward: 3.257079\nNext Confusion: 2.463102"}
{"text": "### State\nConfusion: 6.650249\nAction: question\nReward: 0.689143\nNext Confusion: 5.702714"}
{"text": "### State\nConfusion: 3.872385\nAction: analogize\nReward: -1.4376\nNext Confusion: 5.064346"}
{"text": "### State\nConfusion: 5.604603\nAction: question\nReward: 0.936614\nNext Confusion: 4.981571"}
{"text": "### State\nConfusion: 6.364416\nAction: question\nReward: -0.058641\nNext Confusion: 5.740748"}
{"text": "### State\nConfusion: 5.162976\nAction: analogize\nReward: -0.52851\nNext Confusion: 5.366485"}
{"text": "### State\nConfusion: 4.989271\nAction: question\nReward: 0.420257\nNext Confusion: 3.911954"}
{"text": "### State\nConfusion: 4.054232\nAction: analogize\nReward: -0.821515\nNext Confusion: 4.815986"}
{"text": "### State\nConfusion: 3.795604\nAction: correct_fact\nReward: -0.037172\nNext Confusion: 4.267697"}
{"text": "### State\nConfusion: 7.883514\nAction: analogize\nReward: -0.555406\nNext Confusion: 8.099345"}
{"text": "### State\nConfusion: 4.195034\nAction: analogize\nReward: -0.593037\nNext Confusion: 4.585204"}
{"text": "### State\nConfusion: 3.851079\nAction: question\nReward: -0.757824\nNext Confusion: 3.551079"}
{"text": "### State\nConfusion: 2.634171\nAction: analogize\nReward: -1.035621\nNext Confusion: 3.713475"}
{"text": "### State\nConfusion: 8.174025\nAction: analogize\nReward: -0.735074\nNext Confusion: 8.931459"}
{"text": "### State\nConfusion: 5.127021\nAction: analogize\nReward: -0.197244\nNext Confusion: 5.958104"}
{"text": "### State\nConfusion: 5.421781\nAction: question\nReward: 0.750681\nNext Confusion: 4.586025"}
{"text": "### State\nConfusion: 5.553261\nAction: question\nReward: 0.301218\nNext Confusion: 4.546919"}
{"text": "### State\nConfusion: 7.039034\nAction: analogize\nReward: 0.021269\nNext Confusion: 7.317868"}
{"text": "### State\nConfusion: 4.994831\nAction: analogize\nReward: -0.938323\nNext Confusion: 5.784461"}
{"text": "### State\nConfusion: 3.516691\nAction: worked_example\nReward: 1.905112\nNext Confusion: 2.429983"}
{"text": "### State\nConfusion: 9.025807\nAction: analogize\nReward: -0.680691\nNext Confusion: 9.413644"}
{"text": "### State\nConfusion: 5.016836\nAction: question\nReward: 1.476656\nNext Confusion: 4.174343"}
{"text": "### State\nConfusion: 3.783532\nAction: explain\nReward: 0.61822\nNext Confusion: 3.470704"}
{"text": "### State\nConfusion: 7.004241\nAction: analogize\nReward: 0.271521\nNext Confusion: 6.929998"}
{"text": "### State\nConfusion: 3.788744\nAction: correct_fact\nReward: 0.356577\nNext Confusion: 3.955624"}
{"text": "### State\nConfusion: 3.585034\nAction: analogize\nReward: 0.180996\nNext Confusion: 3.571377"}
{"text": "### State\nConfusion: 5.884622\nAction: analogize\nReward: 0.365228\nNext Confusion: 5.668638"}
{"text": "### State\nConfusion: 3.248542\nAction: analogize\nReward: 0.325974\nNext Confusion: 3.396679"}
{"text": "### State\nConfusion: 4.457415\nAction: question\nReward: -0.504474\nNext Confusion: 5.242377"}
{"text": "### State\nConfusion: 2.183769\nAction: analogize\nReward: -0.43012\nNext Confusion: 2.796507"}
{"text": "### State\nConfusion: 3.337488\nAction: question\nReward: 0.915789\nNext Confusion: 3.028513"}
{"text": "### State\nConfusion: 3.885993\nAction: analogize\nReward: 0.845579\nNext Confusion: 4.075472"}
{"text": "### State\nConfusion: 5.473674\nAction: explain\nReward: 0.411274\nNext Confusion: 5.449886"}
{"text": "### State\nConfusion: 6.636641\nAction: analogize\nReward: 0.105483\nNext Confusion: 6.738043"}
{"text": "### State\nConfusion: 3.585574\nAction: question\nReward: 0.51839\nNext Confusion: 3.376465"}
{"text": "### State\nConfusion: 4.07502\nAction: question\nReward: 1.004779\nNext Confusion: 3.735128"}
{"text": "### State\nConfusion: 2.115222\nAction: analogize\nReward: -1.265686\nNext Confusion: 2.850984"}
{"text": "### State\nConfusion: 3.176735\nAction: explain\nReward: 0.091548\nNext Confusion: 3.295829"}
{"text": "### State\nConfusion: 5.73152\nAction: worked_example\nReward: 0.402069\nNext Confusion: 5.152857"}
{"text": "### State\nConfusion: 2.005058\nAction: analogize\nReward: -1.365192\nNext Confusion: 2.684636"}
{"text": "### State\nConfusion: 3.535708\nAction: explain\nReward: 0.106466\nNext Confusion: 3.094336"}
{"text": "### State\nConfusion: 7.704071\nAction: explain\nReward: 0.721175\nNext Confusion: 6.908477"}
{"text": "### State\nConfusion: 5.243719\nAction: explain\nReward: 1.340814\nNext Confusion: 4.052736"}
{"text": "### State\nConfusion: 7.535902\nAction: analogize\nReward: 0.726927\nNext Confusion: 7.673487"}
{"text": "### State\nConfusion: 4.64706\nAction: analogize\nReward: -0.708878\nNext Confusion: 5.376009"}
{"text": "### State\nConfusion: 7.268917\nAction: correct_fact\nReward: -1.127313\nNext Confusion: 7.566716"}
{"text": "### State\nConfusion: 5.639025\nAction: analogize\nReward: -0.472023\nNext Confusion: 6.138049"}
{"text": "### State\nConfusion: 6.242031\nAction: analogize\nReward: 0.58307\nNext Confusion: 5.923123"}
{"text": "### State\nConfusion: 3.121297\nAction: analogize\nReward: -0.48891\nNext Confusion: 3.906541"}
{"text": "### State\nConfusion: 5.029388\nAction: correct_fact\nReward: -1.002845\nNext Confusion: 6.173575"}
{"text": "### State\nConfusion: 5.053395\nAction: analogize\nReward: -0.313301\nNext Confusion: 5.932772"}
{"text": "### State\nConfusion: 3.194567\nAction: analogize\nReward: 0.039439\nNext Confusion: 3.161913"}
{"text": "### State\nConfusion: 4.130982\nAction: explain\nReward: 0.624081\nNext Confusion: 3.669775"}
{"text": "### State\nConfusion: 7.574032\nAction: analogize\nReward: -0.309397\nNext Confusion: 8.215817"}
{"text": "### State\nConfusion: 6.767454\nAction: analogize\nReward: -0.829196\nNext Confusion: 7.24061"}
{"text": "### State\nConfusion: 2.361092\nAction: analogize\nReward: 0.40583\nNext Confusion: 2.588848"}
{"text": "### State\nConfusion: 3.66922\nAction: analogize\nReward: -0.820735\nNext Confusion: 4.432349"}
{"text": "### State\nConfusion: 6.70331\nAction: correct_fact\nReward: 0.493159\nNext Confusion: 6.04956"}
{"text": "### State\nConfusion: 4.071138\nAction: explain\nReward: -0.054266\nNext Confusion: 3.786052"}
{"text": "### State\nConfusion: 7.498981\nAction: worked_example\nReward: 0.760847\nNext Confusion: 7.090668"}
{"text": "### State\nConfusion: 4.860355\nAction: analogize\nReward: 0.671602\nNext Confusion: 5.004103"}
{"text": "### State\nConfusion: 5.043371\nAction: analogize\nReward: -1.172208\nNext Confusion: 6.048717"}
{"text": "### State\nConfusion: 7.594638\nAction: question\nReward: 0.169686\nNext Confusion: 7.070739"}
{"text": "### State\nConfusion: 4.307424\nAction: question\nReward: 0.69544\nNext Confusion: 3.544158"}
{"text": "### State\nConfusion: 3.624445\nAction: analogize\nReward: -1.039277\nNext Confusion: 4.741978"}
{"text": "### State\nConfusion: 5.519311\nAction: analogize\nReward: -0.172748\nNext Confusion: 6.113349"}
{"text": "### State\nConfusion: 7.701839\nAction: worked_example\nReward: 0.471455\nNext Confusion: 7.059561"}
{"text": "### State\nConfusion: 3.533838\nAction: analogize\nReward: -1.50476\nNext Confusion: 4.605111"}
{"text": "### State\nConfusion: 4.857623\nAction: analogize\nReward: 0.089029\nNext Confusion: 5.381652"}
{"text": "### State\nConfusion: 5.524953\nAction: explain\nReward: 1.281716\nNext Confusion: 4.764929"}
{"text": "### State\nConfusion: 4.382893\nAction: analogize\nReward: 0.915903\nNext Confusion: 4.357092"}
{"text": "### State\nConfusion: 3.510692\nAction: worked_example\nReward: 2.042163\nNext Confusion: 1.725706"}
{"text": "### State\nConfusion: 3.533618\nAction: analogize\nReward: -0.318574\nNext Confusion: 3.606989"}
{"text": "### State\nConfusion: 4.581437\nAction: analogize\nReward: -0.908441\nNext Confusion: 5.250843"}
{"text": "### State\nConfusion: 3.491754\nAction: explain\nReward: 0.09608\nNext Confusion: 3.502002"}
{"text": "### State\nConfusion: 2.910742\nAction: analogize\nReward: -0.185903\nNext Confusion: 2.924014"}
{"text": "### State\nConfusion: 4.220527\nAction: analogize\nReward: 0.157805\nNext Confusion: 4.241376"}
{"text": "### State\nConfusion: 4.236191\nAction: worked_example\nReward: 1.708519\nNext Confusion: 2.755738"}
{"text": "### State\nConfusion: 6.356474\nAction: worked_example\nReward: 1.986318\nNext Confusion: 4.847041"}
{"text": "### State\nConfusion: 6.596906\nAction: explain\nReward: 0.945082\nNext Confusion: 5.777353"}
{"text": "### State\nConfusion: 9.065663\nAction: worked_example\nReward: 2.490255\nNext Confusion: 7.027289"}
{"text": "### State\nConfusion: 5.798057\nAction: question\nReward: 1.632281\nNext Confusion: 4.945551"}
{"text": "### State\nConfusion: 7.547707\nAction: analogize\nReward: -0.470323\nNext Confusion: 8.281344"}
{"text": "### State\nConfusion: 6.886375\nAction: correct_fact\nReward: 0.841331\nNext Confusion: 6.200259"}
{"text": "### State\nConfusion: 2.545521\nAction: analogize\nReward: -0.931296\nNext Confusion: 2.997872"}
{"text": "### State\nConfusion: 3.436865\nAction: question\nReward: 0.605545\nNext Confusion: 3.034606"}
{"text": "### State\nConfusion: 5.48512\nAction: question\nReward: 0.552247\nNext Confusion: 4.395566"}
{"text": "### State\nConfusion: 1.99361\nAction: correct_fact\nReward: 0.398921\nNext Confusion: 2.023035"}
{"text": "### State\nConfusion: 5.620854\nAction: analogize\nReward: -0.537399\nNext Confusion: 6.000232"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.398559\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 4.712558\nAction: explain\nReward: 0.35094\nNext Confusion: 4.100639"}
{"text": "### State\nConfusion: 6.00095\nAction: question\nReward: -0.739487\nNext Confusion: 6.590231"}
{"text": "### State\nConfusion: 3.43446\nAction: analogize\nReward: -0.346556\nNext Confusion: 3.86933"}
{"text": "### State\nConfusion: 4.312879\nAction: correct_fact\nReward: -0.838297\nNext Confusion: 4.408473"}
{"text": "### State\nConfusion: 5.588582\nAction: analogize\nReward: -0.240599\nNext Confusion: 5.828334"}
{"text": "### State\nConfusion: 4.105963\nAction: analogize\nReward: 0.322535\nNext Confusion: 4.015285"}
{"text": "### State\nConfusion: 3.421049\nAction: analogize\nReward: 0.553435\nNext Confusion: 3.008563"}
{"text": "### State\nConfusion: 7.728908\nAction: question\nReward: 1.823694\nNext Confusion: 6.789086"}
{"text": "### State\nConfusion: 3.844443\nAction: worked_example\nReward: 0.401461\nNext Confusion: 2.993685"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: -0.052131\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 4.919034\nAction: analogize\nReward: -1.120059\nNext Confusion: 5.821943"}
{"text": "### State\nConfusion: 6.176862\nAction: analogize\nReward: 0.605978\nNext Confusion: 6.245404"}
{"text": "### State\nConfusion: 3.05765\nAction: analogize\nReward: 0.151237\nNext Confusion: 3.941056"}
{"text": "### State\nConfusion: 4.497888\nAction: analogize\nReward: 0.20143\nNext Confusion: 4.707649"}
{"text": "### State\nConfusion: 2.331526\nAction: explain\nReward: -0.390644\nNext Confusion: 1.887389"}
{"text": "### State\nConfusion: 5.476439\nAction: correct_fact\nReward: -0.625659\nNext Confusion: 5.484992"}
{"text": "### State\nConfusion: 4.749814\nAction: correct_fact\nReward: 0.255413\nNext Confusion: 4.287553"}
{"text": "### State\nConfusion: 4.367284\nAction: correct_fact\nReward: 0.754914\nNext Confusion: 4.831099"}
{"text": "### State\nConfusion: 3.990207\nAction: analogize\nReward: 0.489978\nNext Confusion: 3.777346"}
{"text": "### State\nConfusion: 2.36078\nAction: correct_fact\nReward: 0.131446\nNext Confusion: 2.419431"}
{"text": "### State\nConfusion: 5.181594\nAction: analogize\nReward: -1.023529\nNext Confusion: 6.165031"}
{"text": "### State\nConfusion: 6.155968\nAction: worked_example\nReward: 1.867317\nNext Confusion: 4.540814"}
{"text": "### State\nConfusion: 3.921279\nAction: worked_example\nReward: 1.38055\nNext Confusion: 3.467207"}
{"text": "### State\nConfusion: 4.874241\nAction: analogize\nReward: 0.049841\nNext Confusion: 4.888297"}
{"text": "### State\nConfusion: 3.457221\nAction: analogize\nReward: -0.881108\nNext Confusion: 3.81485"}
{"text": "### State\nConfusion: 3.625238\nAction: analogize\nReward: -1.632032\nNext Confusion: 4.600665"}
{"text": "### State\nConfusion: 3.610027\nAction: correct_fact\nReward: 0.990948\nNext Confusion: 3.138358"}
{"text": "### State\nConfusion: 4.592354\nAction: explain\nReward: -1.103914\nNext Confusion: 4.699113"}
{"text": "### State\nConfusion: 3.965907\nAction: analogize\nReward: 0.427269\nNext Confusion: 4.632581"}
{"text": "### State\nConfusion: 6.224049\nAction: analogize\nReward: 0.403771\nNext Confusion: 6.236999"}
{"text": "### State\nConfusion: 3.513805\nAction: explain\nReward: 0.413231\nNext Confusion: 3.090316"}
{"text": "### State\nConfusion: 3.581797\nAction: analogize\nReward: -1.081045\nNext Confusion: 4.690057"}
{"text": "### State\nConfusion: 4.502285\nAction: analogize\nReward: -0.115318\nNext Confusion: 4.403182"}
{"text": "### State\nConfusion: 1.682893\nAction: analogize\nReward: 0.704574\nNext Confusion: 1.458029"}
{"text": "### State\nConfusion: 3.767197\nAction: explain\nReward: -0.390816\nNext Confusion: 3.399625"}
{"text": "### State\nConfusion: 9.782035\nAction: explain\nReward: 1.040522\nNext Confusion: 9.875731"}
{"text": "### State\nConfusion: 3.654296\nAction: analogize\nReward: 0.143296\nNext Confusion: 3.830031"}
{"text": "### State\nConfusion: 5.027151\nAction: analogize\nReward: -0.585108\nNext Confusion: 5.767681"}
{"text": "### State\nConfusion: 6.183936\nAction: question\nReward: 1.22717\nNext Confusion: 5.29231"}
{"text": "### State\nConfusion: 3.36798\nAction: analogize\nReward: -0.535297\nNext Confusion: 3.329399"}
{"text": "### State\nConfusion: 2.521666\nAction: correct_fact\nReward: -0.191486\nNext Confusion: 2.38397"}
{"text": "### State\nConfusion: 3.486265\nAction: explain\nReward: -1.035973\nNext Confusion: 3.673044"}
{"text": "### State\nConfusion: 2.735054\nAction: analogize\nReward: -0.808363\nNext Confusion: 3.324509"}
{"text": "### State\nConfusion: 3.34873\nAction: explain\nReward: -0.335458\nNext Confusion: 3.526425"}
{"text": "### State\nConfusion: 5.857225\nAction: analogize\nReward: -0.201824\nNext Confusion: 6.406119"}
{"text": "### State\nConfusion: 5.450879\nAction: worked_example\nReward: 0.994392\nNext Confusion: 4.314799"}
{"text": "### State\nConfusion: 6.129226\nAction: explain\nReward: -0.347463\nNext Confusion: 6.183104"}
{"text": "### State\nConfusion: 5.796948\nAction: worked_example\nReward: 1.181532\nNext Confusion: 4.332111"}
{"text": "### State\nConfusion: 4.019942\nAction: analogize\nReward: -1.097066\nNext Confusion: 4.38926"}
{"text": "### State\nConfusion: 6.346842\nAction: analogize\nReward: -0.05843\nNext Confusion: 6.536785"}
{"text": "### State\nConfusion: 4.351801\nAction: analogize\nReward: 1.803047\nNext Confusion: 3.652701"}
{"text": "### State\nConfusion: 3.17073\nAction: question\nReward: 0.985057\nNext Confusion: 2.444106"}
{"text": "### State\nConfusion: 3.334849\nAction: analogize\nReward: -0.158321\nNext Confusion: 3.459907"}
{"text": "### State\nConfusion: 2.858408\nAction: analogize\nReward: -1.283425\nNext Confusion: 4.023058"}
{"text": "### State\nConfusion: 3.845057\nAction: worked_example\nReward: 2.053946\nNext Confusion: 2.178561"}
{"text": "### State\nConfusion: 3.872064\nAction: analogize\nReward: -0.908379\nNext Confusion: 4.351231"}
{"text": "### State\nConfusion: 6.930616\nAction: analogize\nReward: -0.606489\nNext Confusion: 7.437333"}
{"text": "### State\nConfusion: 4.201954\nAction: analogize\nReward: -0.143287\nNext Confusion: 4.545249"}
{"text": "### State\nConfusion: 3.460118\nAction: analogize\nReward: -0.210642\nNext Confusion: 3.958432"}
{"text": "### State\nConfusion: 5.932957\nAction: analogize\nReward: -0.423899\nNext Confusion: 6.711923"}
{"text": "### State\nConfusion: 6.451794\nAction: analogize\nReward: 0.163913\nNext Confusion: 6.371612"}
{"text": "### State\nConfusion: 6.143884\nAction: analogize\nReward: -1.50412\nNext Confusion: 7.402514"}
{"text": "### State\nConfusion: 3.787366\nAction: analogize\nReward: 0.144803\nNext Confusion: 3.96288"}
{"text": "### State\nConfusion: 3.46052\nAction: analogize\nReward: -0.435846\nNext Confusion: 4.18104"}
{"text": "### State\nConfusion: 5.670783\nAction: analogize\nReward: -1.127217\nNext Confusion: 6.834265"}
{"text": "### State\nConfusion: 6.505923\nAction: analogize\nReward: 0.189847\nNext Confusion: 6.595182"}
{"text": "### State\nConfusion: 3.762953\nAction: worked_example\nReward: -0.449213\nNext Confusion: 4.189637"}
{"text": "### State\nConfusion: 7.300265\nAction: analogize\nReward: -0.242874\nNext Confusion: 7.255626"}
{"text": "### State\nConfusion: 3.446495\nAction: explain\nReward: 0.351842\nNext Confusion: 3.319413"}
{"text": "### State\nConfusion: 6.007587\nAction: analogize\nReward: -0.389668\nNext Confusion: 6.238716"}
{"text": "### State\nConfusion: 4.569729\nAction: worked_example\nReward: 1.310543\nNext Confusion: 3.685556"}
{"text": "### State\nConfusion: 4.289463\nAction: question\nReward: -3.159156\nNext Confusion: 3.588308"}
{"text": "### State\nConfusion: 3.805428\nAction: analogize\nReward: -0.137826\nNext Confusion: 4.271468"}
{"text": "### State\nConfusion: 5.339787\nAction: analogize\nReward: -0.725403\nNext Confusion: 5.959084"}
{"text": "### State\nConfusion: 6.367439\nAction: worked_example\nReward: -0.410931\nNext Confusion: 5.970048"}
{"text": "### State\nConfusion: 4.185916\nAction: analogize\nReward: -1.11682\nNext Confusion: 4.895042"}
{"text": "### State\nConfusion: 4.987749\nAction: explain\nReward: -0.259711\nNext Confusion: 5.626599"}
{"text": "### State\nConfusion: 4.17443\nAction: explain\nReward: -0.007389\nNext Confusion: 3.915439"}
{"text": "### State\nConfusion: 7.400229\nAction: analogize\nReward: -0.440491\nNext Confusion: 7.743072"}
{"text": "### State\nConfusion: 5.192929\nAction: correct_fact\nReward: 0.679553\nNext Confusion: 5.149954"}
{"text": "### State\nConfusion: 4.627472\nAction: analogize\nReward: -0.525458\nNext Confusion: 5.036076"}
{"text": "### State\nConfusion: 3.448749\nAction: correct_fact\nReward: -0.686939\nNext Confusion: 4.127151"}
{"text": "### State\nConfusion: 4.950369\nAction: analogize\nReward: -0.497006\nNext Confusion: 5.154066"}
{"text": "### State\nConfusion: 3.633871\nAction: analogize\nReward: -0.05189\nNext Confusion: 4.004499"}
{"text": "### State\nConfusion: 7.003464\nAction: worked_example\nReward: 2.687881\nNext Confusion: 4.899817"}
{"text": "### State\nConfusion: 3.792127\nAction: analogize\nReward: -0.308802\nNext Confusion: 4.434352"}
{"text": "### State\nConfusion: 3.283564\nAction: analogize\nReward: 1.190446\nNext Confusion: 3.436299"}
{"text": "### State\nConfusion: 7.512014\nAction: analogize\nReward: -0.078398\nNext Confusion: 8.393634"}
{"text": "### State\nConfusion: 3.925258\nAction: correct_fact\nReward: 0.514447\nNext Confusion: 3.727096"}
{"text": "### State\nConfusion: 6.571545\nAction: worked_example\nReward: 1.64758\nNext Confusion: 5.171705"}
{"text": "### State\nConfusion: 3.203366\nAction: analogize\nReward: -0.582273\nNext Confusion: 3.776733"}
{"text": "### State\nConfusion: 8.83236\nAction: question\nReward: -1.0229\nNext Confusion: 9.455678"}
{"text": "### State\nConfusion: 6.196302\nAction: analogize\nReward: -0.494157\nNext Confusion: 6.269166"}
{"text": "### State\nConfusion: 7.119833\nAction: analogize\nReward: -0.632348\nNext Confusion: 7.671856"}
{"text": "### State\nConfusion: 3.446799\nAction: analogize\nReward: -0.235273\nNext Confusion: 3.195469"}
{"text": "### State\nConfusion: 7.038426\nAction: worked_example\nReward: 1.144741\nNext Confusion: 5.865378"}
{"text": "### State\nConfusion: 4.291528\nAction: explain\nReward: -1.507047\nNext Confusion: 5.162663"}
{"text": "### State\nConfusion: 6.868341\nAction: question\nReward: 1.142\nNext Confusion: 6.448683"}
{"text": "### State\nConfusion: 3.429575\nAction: question\nReward: 0.260133\nNext Confusion: 3.166194"}
{"text": "### State\nConfusion: 4.569086\nAction: explain\nReward: 1.034115\nNext Confusion: 3.786755"}
{"text": "### State\nConfusion: 4.636881\nAction: analogize\nReward: 1.196279\nNext Confusion: 4.518783"}
{"text": "### State\nConfusion: 4.169616\nAction: explain\nReward: 0.036306\nNext Confusion: 3.886209"}
{"text": "### State\nConfusion: 5.246602\nAction: analogize\nReward: -0.15856\nNext Confusion: 5.307223"}
{"text": "### State\nConfusion: 3.581232\nAction: analogize\nReward: 0.436844\nNext Confusion: 3.550339"}
{"text": "### State\nConfusion: 5.709446\nAction: analogize\nReward: -0.037479\nNext Confusion: 6.23242"}
{"text": "### State\nConfusion: 6.971954\nAction: analogize\nReward: -0.883138\nNext Confusion: 7.784509"}
{"text": "### State\nConfusion: 4.800322\nAction: explain\nReward: -0.156482\nNext Confusion: 5.04152"}
{"text": "### State\nConfusion: 5.968896\nAction: question\nReward: 0.735787\nNext Confusion: 5.41699"}
{"text": "### State\nConfusion: 7.609529\nAction: analogize\nReward: -0.907761\nNext Confusion: 8.138722"}
{"text": "### State\nConfusion: 3.348556\nAction: analogize\nReward: -0.522565\nNext Confusion: 3.955441"}
{"text": "### State\nConfusion: 6.54799\nAction: question\nReward: 0.839793\nNext Confusion: 5.740779"}
{"text": "### State\nConfusion: 5.126441\nAction: explain\nReward: 2.21922\nNext Confusion: 3.600956"}
{"text": "### State\nConfusion: 4.444181\nAction: analogize\nReward: -0.272342\nNext Confusion: 4.869495"}
{"text": "### State\nConfusion: 4.507113\nAction: question\nReward: 0.040065\nNext Confusion: 4.782022"}
{"text": "### State\nConfusion: 4.47993\nAction: question\nReward: 1.40697\nNext Confusion: 3.401652"}
{"text": "### State\nConfusion: 6.068524\nAction: analogize\nReward: -0.164\nNext Confusion: 6.512142"}
{"text": "### State\nConfusion: 8.649179\nAction: analogize\nReward: -0.362601\nNext Confusion: 8.878508"}
{"text": "### State\nConfusion: 2.618763\nAction: analogize\nReward: -0.2353\nNext Confusion: 3.374444"}
{"text": "### State\nConfusion: 3.655497\nAction: analogize\nReward: 0.47788\nNext Confusion: 3.574526"}
{"text": "### State\nConfusion: 6.825738\nAction: question\nReward: 1.524791\nNext Confusion: 5.907894"}
{"text": "### State\nConfusion: 3.849934\nAction: analogize\nReward: -0.454719\nNext Confusion: 4.542527"}
{"text": "### State\nConfusion: 3.52636\nAction: analogize\nReward: -1.335167\nNext Confusion: 4.591458"}
{"text": "### State\nConfusion: 5.653258\nAction: analogize\nReward: 0.425317\nNext Confusion: 6.027638"}
{"text": "### State\nConfusion: 6.79264\nAction: question\nReward: -0.456067\nNext Confusion: 6.680654"}
{"text": "### State\nConfusion: 5.825899\nAction: explain\nReward: 1.137091\nNext Confusion: 4.860346"}
{"text": "### State\nConfusion: 4.024473\nAction: analogize\nReward: -1.130899\nNext Confusion: 5.080507"}
{"text": "### State\nConfusion: 5.027373\nAction: question\nReward: -0.279873\nNext Confusion: 5.103498"}
{"text": "### State\nConfusion: 4.107711\nAction: analogize\nReward: 0.062419\nNext Confusion: 3.716159"}
{"text": "### State\nConfusion: 6.085206\nAction: analogize\nReward: -0.034186\nNext Confusion: 6.579785"}
{"text": "### State\nConfusion: 9.329837\nAction: correct_fact\nReward: -0.34011\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 7.806332\nAction: analogize\nReward: -1.354402\nNext Confusion: 8.877954"}
{"text": "### State\nConfusion: 9.883806\nAction: analogize\nReward: -0.513869\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 6.910408\nAction: question\nReward: -0.016033\nNext Confusion: 6.908116"}
{"text": "### State\nConfusion: 4.543718\nAction: analogize\nReward: 0.746005\nNext Confusion: 4.320139"}
{"text": "### State\nConfusion: 3.82457\nAction: worked_example\nReward: 0.980693\nNext Confusion: 2.952715"}
{"text": "### State\nConfusion: 4.841876\nAction: analogize\nReward: 0.688047\nNext Confusion: 4.906706"}
{"text": "### State\nConfusion: 4.36744\nAction: explain\nReward: 0.149362\nNext Confusion: 4.382951"}
{"text": "### State\nConfusion: 3.4519\nAction: analogize\nReward: -0.289845\nNext Confusion: 3.852726"}
{"text": "### State\nConfusion: 4.559366\nAction: analogize\nReward: 0.248624\nNext Confusion: 4.250278"}
{"text": "### State\nConfusion: 4.871343\nAction: worked_example\nReward: 2.006758\nNext Confusion: 3.021118"}
{"text": "### State\nConfusion: 2.90186\nAction: worked_example\nReward: 0.47045\nNext Confusion: 1.383204"}
{"text": "### State\nConfusion: 3.085492\nAction: explain\nReward: 0.036318\nNext Confusion: 3.096477"}
{"text": "### State\nConfusion: 7.723372\nAction: analogize\nReward: -0.830827\nNext Confusion: 8.442617"}
{"text": "### State\nConfusion: 4.313051\nAction: analogize\nReward: 0.710948\nNext Confusion: 4.525727"}
{"text": "### State\nConfusion: 3.148665\nAction: correct_fact\nReward: 0.685984\nNext Confusion: 2.654194"}
{"text": "### State\nConfusion: 2.622224\nAction: explain\nReward: -0.410488\nNext Confusion: 2.958908"}
{"text": "### State\nConfusion: 4.929009\nAction: explain\nReward: 0.961794\nNext Confusion: 4.239855"}
{"text": "### State\nConfusion: 3.513432\nAction: question\nReward: 1.100215\nNext Confusion: 2.54015"}
{"text": "### State\nConfusion: 6.457459\nAction: analogize\nReward: -0.458103\nNext Confusion: 6.367057"}
{"text": "### State\nConfusion: 6.632654\nAction: analogize\nReward: 0.177102\nNext Confusion: 6.513361"}
{"text": "### State\nConfusion: 6.090064\nAction: question\nReward: 0.381686\nNext Confusion: 5.960601"}
{"text": "### State\nConfusion: 2.783939\nAction: analogize\nReward: 0.773949\nNext Confusion: 2.812135"}
{"text": "### State\nConfusion: 3.035819\nAction: analogize\nReward: -0.798472\nNext Confusion: 4.235182"}
{"text": "### State\nConfusion: 2.786038\nAction: worked_example\nReward: 0.139796\nNext Confusion: 2.822438"}
{"text": "### State\nConfusion: 3.744302\nAction: correct_fact\nReward: 0.623173\nNext Confusion: 3.392101"}
{"text": "### State\nConfusion: 3.43836\nAction: explain\nReward: 0.338272\nNext Confusion: 3.515679"}
{"text": "### State\nConfusion: 4.052148\nAction: analogize\nReward: -0.345972\nNext Confusion: 4.051585"}
{"text": "### State\nConfusion: 5.878376\nAction: analogize\nReward: 0.581745\nNext Confusion: 5.632409"}
{"text": "### State\nConfusion: 7.096414\nAction: worked_example\nReward: 0.754376\nNext Confusion: 6.939009"}
{"text": "### State\nConfusion: 3.708639\nAction: correct_fact\nReward: 0.057415\nNext Confusion: 4.125999"}
{"text": "### State\nConfusion: 7.392665\nAction: worked_example\nReward: 2.514391\nNext Confusion: 5.432892"}
{"text": "### State\nConfusion: 6.486508\nAction: analogize\nReward: -1.212701\nNext Confusion: 7.813913"}
{"text": "### State\nConfusion: 6.889399\nAction: analogize\nReward: -1.428461\nNext Confusion: 8.115419"}
{"text": "### State\nConfusion: 2.396941\nAction: analogize\nReward: 1.012256\nNext Confusion: 2.412092"}
{"text": "### State\nConfusion: 4.669146\nAction: worked_example\nReward: 0.570246\nNext Confusion: 4.402059"}
{"text": "### State\nConfusion: 4.528219\nAction: analogize\nReward: 0.682524\nNext Confusion: 4.172763"}
{"text": "### State\nConfusion: 7.065587\nAction: worked_example\nReward: 0.844311\nNext Confusion: 5.942281"}
{"text": "### State\nConfusion: 8.21628\nAction: explain\nReward: 1.303762\nNext Confusion: 7.426588"}
{"text": "### State\nConfusion: 4.642588\nAction: correct_fact\nReward: 0.030873\nNext Confusion: 4.616276"}
{"text": "### State\nConfusion: 6.436354\nAction: worked_example\nReward: 1.043848\nNext Confusion: 5.170756"}
{"text": "### State\nConfusion: 3.18685\nAction: question\nReward: 0.569739\nNext Confusion: 2.5131"}
{"text": "### State\nConfusion: 4.502162\nAction: analogize\nReward: 0.123031\nNext Confusion: 4.256639"}
{"text": "### State\nConfusion: 3.236621\nAction: analogize\nReward: -1.226024\nNext Confusion: 4.357628"}
{"text": "### State\nConfusion: 2.31076\nAction: analogize\nReward: 0.351427\nNext Confusion: 2.393765"}
{"text": "### State\nConfusion: 5.464483\nAction: analogize\nReward: -0.107958\nNext Confusion: 5.520666"}
{"text": "### State\nConfusion: 3.708102\nAction: explain\nReward: 0.049102\nNext Confusion: 2.566842"}
{"text": "### State\nConfusion: 4.404992\nAction: analogize\nReward: -0.510053\nNext Confusion: 4.818144"}
{"text": "### State\nConfusion: 3.450883\nAction: analogize\nReward: -1.250229\nNext Confusion: 4.223539"}
{"text": "### State\nConfusion: 5.159201\nAction: question\nReward: -0.898699\nNext Confusion: 5.392422"}
{"text": "### State\nConfusion: 5.612621\nAction: analogize\nReward: -1.899629\nNext Confusion: 6.393727"}
{"text": "### State\nConfusion: 2.496055\nAction: analogize\nReward: 0.083258\nNext Confusion: 2.69881"}
{"text": "### State\nConfusion: 4.094158\nAction: analogize\nReward: -0.281068\nNext Confusion: 4.18822"}
{"text": "### State\nConfusion: 6.628239\nAction: question\nReward: 0.187419\nNext Confusion: 6.615895"}
{"text": "### State\nConfusion: 5.738016\nAction: analogize\nReward: -1.199077\nNext Confusion: 6.711353"}
{"text": "### State\nConfusion: 6.312516\nAction: analogize\nReward: 0.42972\nNext Confusion: 6.950694"}
{"text": "### State\nConfusion: 3.805835\nAction: analogize\nReward: -0.01728\nNext Confusion: 3.534842"}
{"text": "### State\nConfusion: 7.522794\nAction: explain\nReward: 0.367578\nNext Confusion: 7.324257"}
{"text": "### State\nConfusion: 8.889452\nAction: analogize\nReward: -0.572201\nNext Confusion: 8.881808"}
{"text": "### State\nConfusion: 4.964946\nAction: analogize\nReward: -0.624785\nNext Confusion: 5.750177"}
{"text": "### State\nConfusion: 4.519431\nAction: analogize\nReward: -0.219609\nNext Confusion: 5.098336"}
{"text": "### State\nConfusion: 6.143557\nAction: question\nReward: 0.970562\nNext Confusion: 5.958963"}
{"text": "### State\nConfusion: 7.568764\nAction: analogize\nReward: -1.154265\nNext Confusion: 8.696467"}
{"text": "### State\nConfusion: 4.139323\nAction: explain\nReward: -0.497746\nNext Confusion: 4.463565"}
{"text": "### State\nConfusion: 8.346292\nAction: explain\nReward: 0.947031\nNext Confusion: 7.685566"}
{"text": "### State\nConfusion: 2.578793\nAction: worked_example\nReward: 0.8092\nNext Confusion: 1.972243"}
{"text": "### State\nConfusion: 3.574151\nAction: question\nReward: -0.001893\nNext Confusion: 2.94033"}
{"text": "### State\nConfusion: 3.391303\nAction: analogize\nReward: 0.159351\nNext Confusion: 2.838915"}
{"text": "### State\nConfusion: 4.434367\nAction: correct_fact\nReward: 0.725639\nNext Confusion: 3.582488"}
{"text": "### State\nConfusion: 4.485438\nAction: analogize\nReward: -1.644401\nNext Confusion: 5.267482"}
{"text": "### State\nConfusion: 3.795971\nAction: worked_example\nReward: 2.16479\nNext Confusion: 1.835349"}
{"text": "### State\nConfusion: 4.48812\nAction: analogize\nReward: -0.252056\nNext Confusion: 4.740558"}
{"text": "### State\nConfusion: 6.62453\nAction: analogize\nReward: -0.599549\nNext Confusion: 7.097754"}
{"text": "### State\nConfusion: 3.436073\nAction: analogize\nReward: -0.128818\nNext Confusion: 3.550781"}
{"text": "### State\nConfusion: 6.385056\nAction: explain\nReward: -0.614619\nNext Confusion: 6.825878"}
{"text": "### State\nConfusion: 4.543844\nAction: analogize\nReward: -0.357182\nNext Confusion: 4.814017"}
{"text": "### State\nConfusion: 5.016735\nAction: correct_fact\nReward: -1.125121\nNext Confusion: 5.913329"}
{"text": "### State\nConfusion: 3.947467\nAction: analogize\nReward: -1.027909\nNext Confusion: 5.059095"}
{"text": "### State\nConfusion: 5.171448\nAction: analogize\nReward: 0.577806\nNext Confusion: 5.198316"}
{"text": "### State\nConfusion: 8.871197\nAction: question\nReward: -0.803998\nNext Confusion: 9.891808"}
{"text": "### State\nConfusion: 4.985002\nAction: correct_fact\nReward: 0.845959\nNext Confusion: 4.625676"}
{"text": "### State\nConfusion: 5.58028\nAction: analogize\nReward: 0.36508\nNext Confusion: 5.53048"}
{"text": "### State\nConfusion: 3.853096\nAction: question\nReward: 1.561354\nNext Confusion: 3.282271"}
{"text": "### State\nConfusion: 5.917102\nAction: analogize\nReward: 0.05721\nNext Confusion: 5.640356"}
{"text": "### State\nConfusion: 3.994782\nAction: analogize\nReward: -0.043592\nNext Confusion: 4.123508"}
{"text": "### State\nConfusion: 4.253486\nAction: analogize\nReward: -1.054318\nNext Confusion: 5.083886"}
{"text": "### State\nConfusion: 3.477302\nAction: correct_fact\nReward: 0.558578\nNext Confusion: 3.341924"}
{"text": "### State\nConfusion: 6.411841\nAction: question\nReward: 0.474881\nNext Confusion: 5.896121"}
{"text": "### State\nConfusion: 5.725423\nAction: explain\nReward: -0.616484\nNext Confusion: 6.39949"}
{"text": "### State\nConfusion: 6.111221\nAction: analogize\nReward: 0.543065\nNext Confusion: 5.609612"}
{"text": "### State\nConfusion: 4.020193\nAction: analogize\nReward: -1.372713\nNext Confusion: 4.147141"}
{"text": "### State\nConfusion: 3.316797\nAction: explain\nReward: 1.090464\nNext Confusion: 2.999543"}
{"text": "### State\nConfusion: 2.233988\nAction: question\nReward: 0.011496\nNext Confusion: 2.114241"}
{"text": "### State\nConfusion: 7.038006\nAction: question\nReward: -0.747112\nNext Confusion: 6.650014"}
{"text": "### State\nConfusion: 5.194492\nAction: question\nReward: 0.315717\nNext Confusion: 4.832783"}
{"text": "### State\nConfusion: 5.63014\nAction: analogize\nReward: 0.03927\nNext Confusion: 5.690156"}
{"text": "### State\nConfusion: 3.946496\nAction: analogize\nReward: 0.064232\nNext Confusion: 5.154892"}
{"text": "### State\nConfusion: 4.438356\nAction: analogize\nReward: 0.268679\nNext Confusion: 3.728327"}
{"text": "### State\nConfusion: 5.453129\nAction: worked_example\nReward: 1.927628\nNext Confusion: 3.642222"}
{"text": "### State\nConfusion: 9.513212\nAction: question\nReward: 0.707923\nNext Confusion: 8.978129"}
{"text": "### State\nConfusion: 4.612859\nAction: question\nReward: 0.298371\nNext Confusion: 3.895462"}
{"text": "### State\nConfusion: 4.710078\nAction: explain\nReward: -0.092907\nNext Confusion: 5.249335"}
{"text": "### State\nConfusion: 5.980734\nAction: analogize\nReward: -0.194105\nNext Confusion: 6.089187"}
{"text": "### State\nConfusion: 2.867381\nAction: analogize\nReward: -0.180572\nNext Confusion: 3.381789"}
{"text": "### State\nConfusion: 3.278762\nAction: analogize\nReward: -0.748468\nNext Confusion: 4.113945"}
{"text": "### State\nConfusion: 5.969358\nAction: correct_fact\nReward: 1.535565\nNext Confusion: 4.871317"}
{"text": "### State\nConfusion: 7.275512\nAction: question\nReward: 0.812077\nNext Confusion: 6.31911"}
{"text": "### State\nConfusion: 2.584528\nAction: analogize\nReward: 1.050292\nNext Confusion: 2.113127"}
{"text": "### State\nConfusion: 3.503871\nAction: explain\nReward: 0.04849\nNext Confusion: 3.540186"}
{"text": "### State\nConfusion: 4.614442\nAction: analogize\nReward: 0.522376\nNext Confusion: 5.167736"}
{"text": "### State\nConfusion: 7.105223\nAction: explain\nReward: 0.918648\nNext Confusion: 6.37787"}
{"text": "### State\nConfusion: 3.646493\nAction: question\nReward: 1.323374\nNext Confusion: 2.361944"}
{"text": "### State\nConfusion: 4.541859\nAction: analogize\nReward: -0.008162\nNext Confusion: 4.688088"}
{"text": "### State\nConfusion: 3.714432\nAction: correct_fact\nReward: -1.29251\nNext Confusion: 5.221759"}
{"text": "### State\nConfusion: 4.098722\nAction: worked_example\nReward: 1.309035\nNext Confusion: 1.963477"}
{"text": "### State\nConfusion: 2.639115\nAction: analogize\nReward: 1.241288\nNext Confusion: 1.959279"}
{"text": "### State\nConfusion: 4.209787\nAction: explain\nReward: -0.674517\nNext Confusion: 4.646072"}
{"text": "### State\nConfusion: 4.237337\nAction: analogize\nReward: 0.299678\nNext Confusion: 3.768544"}
{"text": "### State\nConfusion: 4.216485\nAction: analogize\nReward: -0.08135\nNext Confusion: 4.33587"}
{"text": "### State\nConfusion: 4.031052\nAction: explain\nReward: 0.003192\nNext Confusion: 3.64745"}
{"text": "### State\nConfusion: 4.382634\nAction: worked_example\nReward: 1.73771\nNext Confusion: 2.649923"}
{"text": "### State\nConfusion: 5.132607\nAction: explain\nReward: 0.382928\nNext Confusion: 4.589305"}
{"text": "### State\nConfusion: 3.012049\nAction: correct_fact\nReward: 0.826988\nNext Confusion: 2.747293"}
{"text": "### State\nConfusion: 3.640825\nAction: question\nReward: 0.142369\nNext Confusion: 3.34056"}
{"text": "### State\nConfusion: 7.855445\nAction: analogize\nReward: -0.694125\nNext Confusion: 7.793014"}
{"text": "### State\nConfusion: 3.575469\nAction: analogize\nReward: 0.731786\nNext Confusion: 2.930617"}
{"text": "### State\nConfusion: 5.864926\nAction: correct_fact\nReward: -0.739393\nNext Confusion: 6.226672"}
{"text": "### State\nConfusion: 4.742312\nAction: worked_example\nReward: 2.256657\nNext Confusion: 3.236478"}
{"text": "### State\nConfusion: 3.681698\nAction: worked_example\nReward: 2.565199\nNext Confusion: 1.781532"}
{"text": "### State\nConfusion: 5.45446\nAction: worked_example\nReward: 1.317665\nNext Confusion: 4.860599"}
{"text": "### State\nConfusion: 3.974952\nAction: explain\nReward: 0.611576\nNext Confusion: 3.247132"}
{"text": "### State\nConfusion: 4.964568\nAction: analogize\nReward: -1.248745\nNext Confusion: 5.991206"}
{"text": "### State\nConfusion: 6.005152\nAction: question\nReward: -0.355016\nNext Confusion: 6.667488"}
{"text": "### State\nConfusion: 3.601624\nAction: explain\nReward: 1.362752\nNext Confusion: 2.763752"}
{"text": "### State\nConfusion: 3.35013\nAction: explain\nReward: 1.195844\nNext Confusion: 3.74787"}
{"text": "### State\nConfusion: 2.871308\nAction: question\nReward: 1.241142\nNext Confusion: 1.719885"}
{"text": "### State\nConfusion: 3.469416\nAction: question\nReward: 0.918632\nNext Confusion: 3.167326"}
{"text": "### State\nConfusion: 3.741222\nAction: explain\nReward: 0.985201\nNext Confusion: 3.32284"}
{"text": "### State\nConfusion: 6.040923\nAction: analogize\nReward: -0.280699\nNext Confusion: 6.575557"}
{"text": "### State\nConfusion: 7.684596\nAction: explain\nReward: 0.497976\nNext Confusion: 7.635492"}
{"text": "### State\nConfusion: 3.280561\nAction: question\nReward: 1.638365\nNext Confusion: 1.803264"}
{"text": "### State\nConfusion: 7.663548\nAction: analogize\nReward: -0.226292\nNext Confusion: 7.610589"}
{"text": "### State\nConfusion: 3.984347\nAction: explain\nReward: 0.399343\nNext Confusion: 3.598188"}
{"text": "### State\nConfusion: 4.128422\nAction: analogize\nReward: -0.020421\nNext Confusion: 4.24922"}
{"text": "### State\nConfusion: 4.86738\nAction: explain\nReward: 0.412249\nNext Confusion: 4.323055"}
{"text": "### State\nConfusion: 3.86008\nAction: analogize\nReward: -0.201859\nNext Confusion: 4.178868"}
{"text": "### State\nConfusion: 2.770174\nAction: analogize\nReward: -0.44956\nNext Confusion: 2.613835"}
{"text": "### State\nConfusion: 4.151423\nAction: worked_example\nReward: 1.723241\nNext Confusion: 2.956266"}
{"text": "### State\nConfusion: 6.310332\nAction: analogize\nReward: 0.928881\nNext Confusion: 5.853417"}
{"text": "### State\nConfusion: 3.257129\nAction: analogize\nReward: -0.580045\nNext Confusion: 3.712077"}
{"text": "### State\nConfusion: 3.381224\nAction: analogize\nReward: 0.35832\nNext Confusion: 2.915107"}
{"text": "### State\nConfusion: 5.293595\nAction: analogize\nReward: 0.024167\nNext Confusion: 5.410278"}
{"text": "### State\nConfusion: 3.664918\nAction: analogize\nReward: 0.629177\nNext Confusion: 3.607038"}
{"text": "### State\nConfusion: 3.814348\nAction: analogize\nReward: -0.991218\nNext Confusion: 4.655007"}
{"text": "### State\nConfusion: 5.846762\nAction: worked_example\nReward: 1.942244\nNext Confusion: 4.556337"}
{"text": "### State\nConfusion: 5.780618\nAction: worked_example\nReward: 0.564407\nNext Confusion: 5.13104"}
{"text": "### State\nConfusion: 3.274057\nAction: correct_fact\nReward: -0.765777\nNext Confusion: 3.590828"}
{"text": "### State\nConfusion: 3.975992\nAction: analogize\nReward: -1.591306\nNext Confusion: 4.650961"}
{"text": "### State\nConfusion: 2.629499\nAction: analogize\nReward: 0.559346\nNext Confusion: 2.924919"}
{"text": "### State\nConfusion: 4.576322\nAction: question\nReward: 1.115169\nNext Confusion: 3.710506"}
{"text": "### State\nConfusion: 4.406433\nAction: question\nReward: 0.384513\nNext Confusion: 4.542675"}
{"text": "### State\nConfusion: 3.695796\nAction: explain\nReward: -0.09499\nNext Confusion: 3.903161"}
{"text": "### State\nConfusion: 3.598451\nAction: analogize\nReward: -0.256206\nNext Confusion: 4.140747"}
{"text": "### State\nConfusion: 3.289128\nAction: analogize\nReward: -0.30123\nNext Confusion: 3.63686"}
{"text": "### State\nConfusion: 6.212872\nAction: analogize\nReward: 1.21673\nNext Confusion: 5.301171"}
{"text": "### State\nConfusion: 2.665315\nAction: analogize\nReward: 0.487963\nNext Confusion: 2.517148"}
{"text": "### State\nConfusion: 7.634206\nAction: analogize\nReward: 0.364259\nNext Confusion: 7.525106"}
{"text": "### State\nConfusion: 4.694621\nAction: analogize\nReward: -0.110508\nNext Confusion: 4.591361"}
{"text": "### State\nConfusion: 3.257582\nAction: analogize\nReward: 0.048877\nNext Confusion: 3.63751"}
{"text": "### State\nConfusion: 7.117423\nAction: analogize\nReward: 0.360643\nNext Confusion: 6.834822"}
{"text": "### State\nConfusion: 2.396412\nAction: analogize\nReward: -0.941327\nNext Confusion: 3.360732"}
{"text": "### State\nConfusion: 3.254998\nAction: explain\nReward: 0.913498\nNext Confusion: 2.997523"}
{"text": "### State\nConfusion: 4.529406\nAction: question\nReward: 0.384746\nNext Confusion: 4.120297"}
{"text": "### State\nConfusion: 3.558558\nAction: analogize\nReward: -0.363472\nNext Confusion: 3.638471"}
{"text": "### State\nConfusion: 3.043111\nAction: question\nReward: 0.349095\nNext Confusion: 2.999848"}
{"text": "### State\nConfusion: 7.750188\nAction: analogize\nReward: -0.581808\nNext Confusion: 8.439862"}
{"text": "### State\nConfusion: 3.544147\nAction: analogize\nReward: -0.685245\nNext Confusion: 4.018972"}
{"text": "### State\nConfusion: 3.828862\nAction: question\nReward: 1.050056\nNext Confusion: 3.542843"}
{"text": "### State\nConfusion: 3.389573\nAction: explain\nReward: 0.664424\nNext Confusion: 3.723847"}
{"text": "### State\nConfusion: 4.171442\nAction: analogize\nReward: -1.336033\nNext Confusion: 5.128333"}
{"text": "### State\nConfusion: 3.576818\nAction: worked_example\nReward: 0.172307\nNext Confusion: 3.437141"}
{"text": "### State\nConfusion: 4.544904\nAction: correct_fact\nReward: -0.733044\nNext Confusion: 4.568539"}
{"text": "### State\nConfusion: 7.43306\nAction: worked_example\nReward: 2.463857\nNext Confusion: 5.143341"}
{"text": "### State\nConfusion: 3.439448\nAction: analogize\nReward: -0.571421\nNext Confusion: 4.087216"}
{"text": "### State\nConfusion: 3.704676\nAction: analogize\nReward: -1.422864\nNext Confusion: 4.662764"}
{"text": "### State\nConfusion: 3.942663\nAction: correct_fact\nReward: 0.486764\nNext Confusion: 3.753754"}
{"text": "### State\nConfusion: 6.66982\nAction: worked_example\nReward: 1.728911\nNext Confusion: 4.802022"}
{"text": "### State\nConfusion: 5.246306\nAction: worked_example\nReward: -0.294401\nNext Confusion: 5.037916"}
{"text": "### State\nConfusion: 9.410426\nAction: analogize\nReward: -1.320736\nNext Confusion: 9.686011"}
{"text": "### State\nConfusion: 6.195813\nAction: analogize\nReward: 0.606987\nNext Confusion: 6.49038"}
{"text": "### State\nConfusion: 5.922952\nAction: explain\nReward: 0.232012\nNext Confusion: 5.318613"}
{"text": "### State\nConfusion: 5.650737\nAction: question\nReward: 0.866942\nNext Confusion: 4.455626"}
{"text": "### State\nConfusion: 3.919665\nAction: analogize\nReward: -0.997712\nNext Confusion: 4.632429"}
{"text": "### State\nConfusion: 3.941246\nAction: question\nReward: 0.649643\nNext Confusion: 3.760049"}
{"text": "### State\nConfusion: 6.998311\nAction: analogize\nReward: -0.427791\nNext Confusion: 7.825326"}
{"text": "### State\nConfusion: 3.439216\nAction: analogize\nReward: -0.835926\nNext Confusion: 4.451193"}
{"text": "### State\nConfusion: 6.045847\nAction: analogize\nReward: 0.214939\nNext Confusion: 6.031241"}
{"text": "### State\nConfusion: 6.210586\nAction: question\nReward: -3.694161\nNext Confusion: 6.180187"}
{"text": "### State\nConfusion: 4.026471\nAction: analogize\nReward: -0.220299\nNext Confusion: 4.869419"}
{"text": "### State\nConfusion: 7.577714\nAction: analogize\nReward: -1.174037\nNext Confusion: 8.802045"}
{"text": "### State\nConfusion: 3.543173\nAction: analogize\nReward: -0.365294\nNext Confusion: 3.866641"}
{"text": "### State\nConfusion: 5.09092\nAction: analogize\nReward: 1.13697\nNext Confusion: 4.349956"}
{"text": "### State\nConfusion: 8.079839\nAction: analogize\nReward: 0.226414\nNext Confusion: 8.204414"}
{"text": "### State\nConfusion: 7.353573\nAction: analogize\nReward: -1.642375\nNext Confusion: 8.902788"}
{"text": "### State\nConfusion: 5.057345\nAction: correct_fact\nReward: 0.380797\nNext Confusion: 4.734569"}
{"text": "### State\nConfusion: 9.910346\nAction: analogize\nReward: -1.254739\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.257287\nAction: explain\nReward: 1.209915\nNext Confusion: 2.730555"}
{"text": "### State\nConfusion: 2.891581\nAction: correct_fact\nReward: 1.087497\nNext Confusion: 2.386734"}
{"text": "### State\nConfusion: 3.396667\nAction: analogize\nReward: -0.498005\nNext Confusion: 3.937934"}
{"text": "### State\nConfusion: 6.116508\nAction: analogize\nReward: 0.1768\nNext Confusion: 6.346785"}
{"text": "### State\nConfusion: 9.778449\nAction: analogize\nReward: -0.537666\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.730837\nAction: analogize\nReward: 0.814024\nNext Confusion: 3.501215"}
{"text": "### State\nConfusion: 6.362095\nAction: question\nReward: -0.753607\nNext Confusion: 6.451304"}
{"text": "### State\nConfusion: 3.924527\nAction: question\nReward: 1.359091\nNext Confusion: 2.809764"}
{"text": "### State\nConfusion: 4.227033\nAction: analogize\nReward: -0.552635\nNext Confusion: 4.370997"}
{"text": "### State\nConfusion: 4.017463\nAction: explain\nReward: 0.588962\nNext Confusion: 3.727648"}
{"text": "### State\nConfusion: 3.403827\nAction: analogize\nReward: -0.26206\nNext Confusion: 4.009012"}
{"text": "### State\nConfusion: 5.242961\nAction: analogize\nReward: 0.309312\nNext Confusion: 5.459402"}
{"text": "### State\nConfusion: 5.55746\nAction: analogize\nReward: -0.133931\nNext Confusion: 5.654018"}
{"text": "### State\nConfusion: 7.313769\nAction: analogize\nReward: 1.068778\nNext Confusion: 7.331645"}
{"text": "### State\nConfusion: 4.527921\nAction: analogize\nReward: 0.174773\nNext Confusion: 4.651058"}
{"text": "### State\nConfusion: 4.213427\nAction: explain\nReward: 0.440255\nNext Confusion: 3.94091"}
{"text": "### State\nConfusion: 5.50433\nAction: analogize\nReward: -0.638466\nNext Confusion: 6.355938"}
{"text": "### State\nConfusion: 4.101085\nAction: analogize\nReward: -1.054182\nNext Confusion: 5.146448"}
{"text": "### State\nConfusion: 4.349844\nAction: analogize\nReward: -0.170631\nNext Confusion: 4.653336"}
{"text": "### State\nConfusion: 4.015178\nAction: correct_fact\nReward: 0.047997\nNext Confusion: 4.260065"}
{"text": "### State\nConfusion: 2.358372\nAction: analogize\nReward: -0.598425\nNext Confusion: 2.979395"}
{"text": "### State\nConfusion: 4.52034\nAction: explain\nReward: 0.450091\nNext Confusion: 4.371052"}
{"text": "### State\nConfusion: 6.761803\nAction: analogize\nReward: 1.344641\nNext Confusion: 5.632773"}
{"text": "### State\nConfusion: 5.87241\nAction: question\nReward: 0.377697\nNext Confusion: 5.843579"}
{"text": "### State\nConfusion: 3.923287\nAction: analogize\nReward: -0.657101\nNext Confusion: 4.637989"}
{"text": "### State\nConfusion: 9.313203\nAction: question\nReward: 0.856034\nNext Confusion: 8.638969"}
{"text": "### State\nConfusion: 3.786844\nAction: analogize\nReward: 0.06864\nNext Confusion: 4.0975"}
{"text": "### State\nConfusion: 5.629326\nAction: explain\nReward: 0.078317\nNext Confusion: 6.028976"}
{"text": "### State\nConfusion: 10.0\nAction: question\nReward: -0.123322\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 5.23106\nAction: analogize\nReward: -0.280209\nNext Confusion: 5.898155"}
{"text": "### State\nConfusion: 4.382555\nAction: analogize\nReward: -1.659106\nNext Confusion: 5.728748"}
{"text": "### State\nConfusion: 6.25581\nAction: question\nReward: 0.21789\nNext Confusion: 5.880257"}
{"text": "### State\nConfusion: 2.868613\nAction: analogize\nReward: 0.909742\nNext Confusion: 2.372117"}
{"text": "### State\nConfusion: 3.684495\nAction: correct_fact\nReward: -0.745467\nNext Confusion: 4.462787"}
{"text": "### State\nConfusion: 3.525275\nAction: analogize\nReward: -0.111069\nNext Confusion: 4.110827"}
{"text": "### State\nConfusion: 3.225854\nAction: analogize\nReward: -0.629036\nNext Confusion: 3.792701"}
{"text": "### State\nConfusion: 4.270598\nAction: analogize\nReward: 0.480308\nNext Confusion: 3.756942"}
{"text": "### State\nConfusion: 4.836082\nAction: analogize\nReward: -0.761899\nNext Confusion: 5.27111"}
{"text": "### State\nConfusion: 8.947462\nAction: analogize\nReward: -1.034251\nNext Confusion: 8.811069"}
{"text": "### State\nConfusion: 6.235946\nAction: explain\nReward: 0.433649\nNext Confusion: 5.740307"}
{"text": "### State\nConfusion: 7.087507\nAction: question\nReward: 1.290575\nNext Confusion: 5.952477"}
{"text": "### State\nConfusion: 3.511142\nAction: correct_fact\nReward: 0.069971\nNext Confusion: 3.650214"}
{"text": "### State\nConfusion: 3.852046\nAction: question\nReward: 0.918159\nNext Confusion: 3.057334"}
{"text": "### State\nConfusion: 6.66341\nAction: analogize\nReward: 0.553079\nNext Confusion: 6.19827"}
{"text": "### State\nConfusion: 4.063512\nAction: analogize\nReward: -0.20142\nNext Confusion: 4.057294"}
{"text": "### State\nConfusion: 9.915004\nAction: analogize\nReward: 0.468495\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 4.206673\nAction: question\nReward: 1.256381\nNext Confusion: 3.352159"}
{"text": "### State\nConfusion: 8.36885\nAction: analogize\nReward: -1.800544\nNext Confusion: 9.007835"}
{"text": "### State\nConfusion: 5.879703\nAction: explain\nReward: 0.015996\nNext Confusion: 5.779414"}
{"text": "### State\nConfusion: 3.387135\nAction: worked_example\nReward: -0.285351\nNext Confusion: 3.143748"}
{"text": "### State\nConfusion: 3.42407\nAction: explain\nReward: -1.038417\nNext Confusion: 4.218859"}
{"text": "### State\nConfusion: 5.131143\nAction: explain\nReward: 0.547275\nNext Confusion: 4.681826"}
{"text": "### State\nConfusion: 6.287015\nAction: question\nReward: 2.149246\nNext Confusion: 5.357315"}
{"text": "### State\nConfusion: 3.635542\nAction: correct_fact\nReward: 0.099958\nNext Confusion: 3.996667"}
{"text": "### State\nConfusion: 4.814803\nAction: analogize\nReward: -0.395849\nNext Confusion: 4.792355"}
{"text": "### State\nConfusion: 8.044463\nAction: explain\nReward: -0.297554\nNext Confusion: 7.542534"}
{"text": "### State\nConfusion: 8.401709\nAction: analogize\nReward: -0.967848\nNext Confusion: 9.894008"}
{"text": "### State\nConfusion: 6.648936\nAction: explain\nReward: 1.280506\nNext Confusion: 5.837753"}
{"text": "### State\nConfusion: 2.353691\nAction: analogize\nReward: 1.116544\nNext Confusion: 1.840919"}
{"text": "### State\nConfusion: 2.624949\nAction: analogize\nReward: 0.572625\nNext Confusion: 2.075723"}
{"text": "### State\nConfusion: 2.680676\nAction: worked_example\nReward: 0.813016\nNext Confusion: 1.673576"}
{"text": "### State\nConfusion: 6.013613\nAction: explain\nReward: 0.225777\nNext Confusion: 5.772486"}
{"text": "### State\nConfusion: 6.153138\nAction: analogize\nReward: 0.394142\nNext Confusion: 6.72213"}
{"text": "### State\nConfusion: 3.991692\nAction: analogize\nReward: -0.533825\nNext Confusion: 3.808707"}
{"text": "### State\nConfusion: 4.422607\nAction: analogize\nReward: 0.446483\nNext Confusion: 4.097887"}
{"text": "### State\nConfusion: 3.941558\nAction: analogize\nReward: -0.217683\nNext Confusion: 4.405933"}
{"text": "### State\nConfusion: 4.22295\nAction: analogize\nReward: 0.082211\nNext Confusion: 4.529451"}
{"text": "### State\nConfusion: 5.897951\nAction: analogize\nReward: -0.363261\nNext Confusion: 6.494216"}
{"text": "### State\nConfusion: 3.695955\nAction: analogize\nReward: 0.097225\nNext Confusion: 3.346347"}
{"text": "### State\nConfusion: 3.633662\nAction: worked_example\nReward: 0.364471\nNext Confusion: 3.045426"}
{"text": "### State\nConfusion: 8.276167\nAction: analogize\nReward: -0.287066\nNext Confusion: 8.230134"}
{"text": "### State\nConfusion: 7.056122\nAction: analogize\nReward: 0.076491\nNext Confusion: 7.140624"}
{"text": "### State\nConfusion: 6.252013\nAction: analogize\nReward: -0.740944\nNext Confusion: 7.121952"}
{"text": "### State\nConfusion: 9.184165\nAction: analogize\nReward: -0.583071\nNext Confusion: 9.831767"}
{"text": "### State\nConfusion: 7.184398\nAction: explain\nReward: -0.123245\nNext Confusion: 7.505643"}
{"text": "### State\nConfusion: 3.933074\nAction: analogize\nReward: -0.656493\nNext Confusion: 4.151541"}
{"text": "### State\nConfusion: 4.39857\nAction: correct_fact\nReward: 0.819409\nNext Confusion: 3.612621"}
{"text": "### State\nConfusion: 7.368159\nAction: question\nReward: -0.305729\nNext Confusion: 6.645311"}
{"text": "### State\nConfusion: 6.044369\nAction: explain\nReward: -1.484875\nNext Confusion: 6.056402"}
{"text": "### State\nConfusion: 3.630255\nAction: explain\nReward: 0.14536\nNext Confusion: 3.800318"}
{"text": "### State\nConfusion: 3.970609\nAction: explain\nReward: 0.544449\nNext Confusion: 3.713812"}
{"text": "### State\nConfusion: 6.995214\nAction: analogize\nReward: -0.404806\nNext Confusion: 7.366908"}
{"text": "### State\nConfusion: 2.539684\nAction: question\nReward: 0.684036\nNext Confusion: 1.758763"}
{"text": "### State\nConfusion: 4.291467\nAction: analogize\nReward: 0.211242\nNext Confusion: 4.725763"}
{"text": "### State\nConfusion: 3.88243\nAction: analogize\nReward: 1.228902\nNext Confusion: 3.252029"}
{"text": "### State\nConfusion: 3.367319\nAction: question\nReward: 0.896723\nNext Confusion: 2.189595"}
{"text": "### State\nConfusion: 4.465155\nAction: explain\nReward: 0.145001\nNext Confusion: 4.276695"}
{"text": "### State\nConfusion: 5.148044\nAction: analogize\nReward: -0.85651\nNext Confusion: 5.814228"}
{"text": "### State\nConfusion: 6.414485\nAction: analogize\nReward: -0.167799\nNext Confusion: 6.776225"}
{"text": "### State\nConfusion: 5.660143\nAction: explain\nReward: 0.666448\nNext Confusion: 5.634968"}
{"text": "### State\nConfusion: 3.073078\nAction: worked_example\nReward: 1.191585\nNext Confusion: 1.84006"}
{"text": "### State\nConfusion: 4.392269\nAction: explain\nReward: -0.13347\nNext Confusion: 4.073843"}
{"text": "### State\nConfusion: 5.864765\nAction: analogize\nReward: -1.350922\nNext Confusion: 6.857552"}
{"text": "### State\nConfusion: 4.392626\nAction: analogize\nReward: -1.048435\nNext Confusion: 4.934402"}
{"text": "### State\nConfusion: 7.997655\nAction: correct_fact\nReward: 0.441853\nNext Confusion: 7.844183"}
{"text": "### State\nConfusion: 4.253817\nAction: analogize\nReward: -0.180777\nNext Confusion: 4.751152"}
{"text": "### State\nConfusion: 4.194475\nAction: analogize\nReward: -0.13063\nNext Confusion: 4.203718"}
{"text": "### State\nConfusion: 3.775372\nAction: analogize\nReward: -0.102879\nNext Confusion: 4.218565"}
{"text": "### State\nConfusion: 3.37428\nAction: explain\nReward: -0.147437\nNext Confusion: 3.824593"}
{"text": "### State\nConfusion: 5.427704\nAction: explain\nReward: 0.223987\nNext Confusion: 4.968712"}
{"text": "### State\nConfusion: 3.636294\nAction: analogize\nReward: -1.018032\nNext Confusion: 4.057275"}
{"text": "### State\nConfusion: 6.214283\nAction: worked_example\nReward: 2.571471\nNext Confusion: 5.29688"}
{"text": "### State\nConfusion: 3.93336\nAction: analogize\nReward: -0.744836\nNext Confusion: 4.682724"}
{"text": "### State\nConfusion: 4.802226\nAction: analogize\nReward: -0.819463\nNext Confusion: 6.311615"}
{"text": "### State\nConfusion: 3.885915\nAction: analogize\nReward: 0.055154\nNext Confusion: 3.762716"}
{"text": "### State\nConfusion: 4.20142\nAction: analogize\nReward: -0.829035\nNext Confusion: 4.799394"}
{"text": "### State\nConfusion: 3.003221\nAction: analogize\nReward: -0.314853\nNext Confusion: 3.496412"}
{"text": "### State\nConfusion: 4.048622\nAction: analogize\nReward: -1.332944\nNext Confusion: 5.046173"}
{"text": "### State\nConfusion: 5.844535\nAction: analogize\nReward: 0.587102\nNext Confusion: 5.329001"}
{"text": "### State\nConfusion: 2.72501\nAction: analogize\nReward: -0.226736\nNext Confusion: 2.524352"}
{"text": "### State\nConfusion: 7.002024\nAction: correct_fact\nReward: 0.509639\nNext Confusion: 6.743065"}
{"text": "### State\nConfusion: 3.325475\nAction: question\nReward: 0.06805\nNext Confusion: 3.499943"}
{"text": "### State\nConfusion: 3.802268\nAction: analogize\nReward: -0.0121\nNext Confusion: 4.107089"}
{"text": "### State\nConfusion: 2.142409\nAction: correct_fact\nReward: 0.629906\nNext Confusion: 1.889285"}
{"text": "### State\nConfusion: 4.549087\nAction: analogize\nReward: -0.264564\nNext Confusion: 5.161147"}
{"text": "### State\nConfusion: 3.565095\nAction: question\nReward: -0.083092\nNext Confusion: 3.521617"}
{"text": "### State\nConfusion: 6.562612\nAction: analogize\nReward: -0.482755\nNext Confusion: 7.10455"}
{"text": "### State\nConfusion: 4.241969\nAction: explain\nReward: -0.754929\nNext Confusion: 5.114543"}
{"text": "### State\nConfusion: 2.703632\nAction: analogize\nReward: -0.503238\nNext Confusion: 3.434828"}
{"text": "### State\nConfusion: 4.048455\nAction: worked_example\nReward: 1.670669\nNext Confusion: 3.173782"}
{"text": "### State\nConfusion: 3.324484\nAction: correct_fact\nReward: -0.601735\nNext Confusion: 4.07383"}
{"text": "### State\nConfusion: 5.943546\nAction: analogize\nReward: -0.626702\nNext Confusion: 6.768705"}
{"text": "### State\nConfusion: 7.54099\nAction: worked_example\nReward: -0.038655\nNext Confusion: 7.901897"}
{"text": "### State\nConfusion: 5.921177\nAction: analogize\nReward: -0.518356\nNext Confusion: 6.339199"}
{"text": "### State\nConfusion: 3.627352\nAction: question\nReward: 1.265732\nNext Confusion: 2.68943"}
{"text": "### State\nConfusion: 4.268603\nAction: analogize\nReward: 0.224228\nNext Confusion: 4.325311"}
{"text": "### State\nConfusion: 7.438092\nAction: analogize\nReward: 0.769506\nNext Confusion: 6.760568"}
{"text": "### State\nConfusion: 5.561396\nAction: analogize\nReward: 0.192368\nNext Confusion: 6.595321"}
{"text": "### State\nConfusion: 3.728523\nAction: analogize\nReward: 0.288576\nNext Confusion: 4.402052"}
{"text": "### State\nConfusion: 2.824914\nAction: explain\nReward: -0.298342\nNext Confusion: 3.015885"}
{"text": "### State\nConfusion: 3.5681\nAction: analogize\nReward: -1.062345\nNext Confusion: 3.871775"}
{"text": "### State\nConfusion: 5.129294\nAction: correct_fact\nReward: 0.217271\nNext Confusion: 4.869125"}
{"text": "### State\nConfusion: 3.513123\nAction: analogize\nReward: 0.348811\nNext Confusion: 3.595584"}
{"text": "### State\nConfusion: 3.052817\nAction: worked_example\nReward: 2.832272\nNext Confusion: 0.91998"}
{"text": "### State\nConfusion: 7.955312\nAction: analogize\nReward: -0.117205\nNext Confusion: 8.157463"}
{"text": "### State\nConfusion: 3.807566\nAction: explain\nReward: 0.103445\nNext Confusion: 3.867911"}
{"text": "### State\nConfusion: 3.244115\nAction: worked_example\nReward: 1.05523\nNext Confusion: 1.530957"}
{"text": "### State\nConfusion: 2.521808\nAction: analogize\nReward: -0.867435\nNext Confusion: 2.434005"}
{"text": "### State\nConfusion: 9.124791\nAction: analogize\nReward: -0.841239\nNext Confusion: 9.679058"}
{"text": "### State\nConfusion: 5.325014\nAction: analogize\nReward: -1.782482\nNext Confusion: 6.970171"}
{"text": "### State\nConfusion: 4.339715\nAction: explain\nReward: 0.008463\nNext Confusion: 3.846507"}
{"text": "### State\nConfusion: 6.615342\nAction: question\nReward: 1.733143\nNext Confusion: 5.407709"}
{"text": "### State\nConfusion: 4.24576\nAction: explain\nReward: -0.29446\nNext Confusion: 4.733876"}
{"text": "### State\nConfusion: 4.335828\nAction: analogize\nReward: -0.146346\nNext Confusion: 4.271027"}
{"text": "### State\nConfusion: 4.58664\nAction: analogize\nReward: -0.103805\nNext Confusion: 4.874441"}
{"text": "### State\nConfusion: 4.006128\nAction: analogize\nReward: -0.725112\nNext Confusion: 4.798853"}
{"text": "### State\nConfusion: 5.253432\nAction: worked_example\nReward: 2.352804\nNext Confusion: 3.767527"}
{"text": "### State\nConfusion: 3.005701\nAction: explain\nReward: 0.139484\nNext Confusion: 2.093722"}
{"text": "### State\nConfusion: 3.874975\nAction: explain\nReward: -0.787808\nNext Confusion: 3.802166"}
{"text": "### State\nConfusion: 3.41289\nAction: explain\nReward: 1.219302\nNext Confusion: 2.298687"}
{"text": "### State\nConfusion: 3.578395\nAction: question\nReward: 0.811461\nNext Confusion: 2.339393"}
{"text": "### State\nConfusion: 3.207485\nAction: analogize\nReward: 0.315275\nNext Confusion: 2.610947"}
{"text": "### State\nConfusion: 2.575161\nAction: worked_example\nReward: 2.987322\nNext Confusion: 0.201491"}
{"text": "### State\nConfusion: 4.277697\nAction: analogize\nReward: 0.129558\nNext Confusion: 4.751849"}
{"text": "### State\nConfusion: 8.911267\nAction: correct_fact\nReward: -0.124284\nNext Confusion: 8.876928"}
{"text": "### State\nConfusion: 3.571608\nAction: explain\nReward: -0.07214\nNext Confusion: 3.462044"}
{"text": "### State\nConfusion: 6.763837\nAction: analogize\nReward: -0.857645\nNext Confusion: 7.317293"}
{"text": "### State\nConfusion: 9.618838\nAction: analogize\nReward: -0.535427\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.973634\nAction: explain\nReward: -0.52296\nNext Confusion: 4.014759"}
{"text": "### State\nConfusion: 4.366366\nAction: analogize\nReward: 0.616717\nNext Confusion: 3.759719"}
{"text": "### State\nConfusion: 3.682107\nAction: correct_fact\nReward: -0.336892\nNext Confusion: 3.918612"}
{"text": "### State\nConfusion: 5.293279\nAction: worked_example\nReward: 0.835641\nNext Confusion: 4.078385"}
{"text": "### State\nConfusion: 4.117586\nAction: analogize\nReward: -1.002237\nNext Confusion: 4.322363"}
{"text": "### State\nConfusion: 4.685913\nAction: question\nReward: 0.069347\nNext Confusion: 4.376846"}
{"text": "### State\nConfusion: 7.71473\nAction: worked_example\nReward: 2.620265\nNext Confusion: 6.301953"}
{"text": "### State\nConfusion: 4.964281\nAction: correct_fact\nReward: 0.475344\nNext Confusion: 5.341691"}
{"text": "### State\nConfusion: 3.69864\nAction: explain\nReward: 0.306914\nNext Confusion: 3.666771"}
{"text": "### State\nConfusion: 4.640393\nAction: correct_fact\nReward: 0.83143\nNext Confusion: 4.648771"}
{"text": "### State\nConfusion: 3.663903\nAction: explain\nReward: 0.146509\nNext Confusion: 3.056985"}
{"text": "### State\nConfusion: 4.372581\nAction: analogize\nReward: 1.235951\nNext Confusion: 3.717001"}
{"text": "### State\nConfusion: 4.812475\nAction: question\nReward: 0.287407\nNext Confusion: 4.611772"}
{"text": "### State\nConfusion: 4.098743\nAction: correct_fact\nReward: 1.399562\nNext Confusion: 3.505099"}
{"text": "### State\nConfusion: 3.51753\nAction: correct_fact\nReward: 0.518148\nNext Confusion: 3.218788"}
{"text": "### State\nConfusion: 7.673138\nAction: analogize\nReward: -0.506786\nNext Confusion: 8.241877"}
{"text": "### State\nConfusion: 5.163452\nAction: correct_fact\nReward: 0.662886\nNext Confusion: 4.834873"}
{"text": "### State\nConfusion: 3.634682\nAction: explain\nReward: 1.605538\nNext Confusion: 2.658165"}
{"text": "### State\nConfusion: 4.804548\nAction: analogize\nReward: -0.526519\nNext Confusion: 5.3719"}
{"text": "### State\nConfusion: 5.730246\nAction: analogize\nReward: -0.369718\nNext Confusion: 6.35551"}
{"text": "### State\nConfusion: 3.63205\nAction: analogize\nReward: -0.231718\nNext Confusion: 4.320949"}
{"text": "### State\nConfusion: 6.188833\nAction: explain\nReward: -0.086604\nNext Confusion: 6.12198"}
{"text": "### State\nConfusion: 3.921316\nAction: question\nReward: 0.490084\nNext Confusion: 3.494523"}
{"text": "### State\nConfusion: 3.867314\nAction: question\nReward: 0.795442\nNext Confusion: 2.899061"}
{"text": "### State\nConfusion: 6.886967\nAction: explain\nReward: -0.57278\nNext Confusion: 6.923401"}
{"text": "### State\nConfusion: 3.680679\nAction: analogize\nReward: -0.210942\nNext Confusion: 3.925346"}
{"text": "### State\nConfusion: 8.736197\nAction: explain\nReward: -0.914355\nNext Confusion: 9.615757"}
{"text": "### State\nConfusion: 3.546871\nAction: question\nReward: 1.117404\nNext Confusion: 2.736447"}
{"text": "### State\nConfusion: 6.864706\nAction: analogize\nReward: 0.214425\nNext Confusion: 6.507564"}
{"text": "### State\nConfusion: 5.739324\nAction: explain\nReward: 0.335455\nNext Confusion: 5.200863"}
{"text": "### State\nConfusion: 6.305572\nAction: analogize\nReward: 0.075181\nNext Confusion: 6.520935"}
{"text": "### State\nConfusion: 4.454816\nAction: analogize\nReward: -0.289589\nNext Confusion: 4.773785"}
{"text": "### State\nConfusion: 3.731757\nAction: explain\nReward: 0.873925\nNext Confusion: 2.932281"}
{"text": "### State\nConfusion: 5.496557\nAction: correct_fact\nReward: 0.868323\nNext Confusion: 5.627773"}
{"text": "### State\nConfusion: 4.492536\nAction: analogize\nReward: -0.193267\nNext Confusion: 4.587128"}
{"text": "### State\nConfusion: 3.143882\nAction: explain\nReward: 0.736479\nNext Confusion: 2.814084"}
{"text": "### State\nConfusion: 6.209274\nAction: question\nReward: -0.687091\nNext Confusion: 6.21104"}
{"text": "### State\nConfusion: 4.661629\nAction: explain\nReward: 0.805475\nNext Confusion: 4.795467"}
{"text": "### State\nConfusion: 3.818923\nAction: analogize\nReward: -0.621434\nNext Confusion: 4.386176"}
{"text": "### State\nConfusion: 3.080188\nAction: correct_fact\nReward: 0.555658\nNext Confusion: 2.880363"}
{"text": "### State\nConfusion: 3.506171\nAction: analogize\nReward: 0.546359\nNext Confusion: 3.254782"}
{"text": "### State\nConfusion: 6.788947\nAction: question\nReward: 1.500851\nNext Confusion: 5.293974"}
{"text": "### State\nConfusion: 4.457938\nAction: analogize\nReward: -0.031805\nNext Confusion: 4.962243"}
{"text": "### State\nConfusion: 6.438289\nAction: explain\nReward: 0.217682\nNext Confusion: 6.198248"}
{"text": "### State\nConfusion: 4.113327\nAction: worked_example\nReward: 2.453192\nNext Confusion: 2.296527"}
{"text": "### State\nConfusion: 4.691368\nAction: analogize\nReward: -0.428181\nNext Confusion: 5.063083"}
{"text": "### State\nConfusion: 6.270288\nAction: analogize\nReward: 0.043957\nNext Confusion: 7.006448"}
{"text": "### State\nConfusion: 3.941789\nAction: analogize\nReward: -1.714923\nNext Confusion: 3.669449"}
{"text": "### State\nConfusion: 6.964881\nAction: analogize\nReward: -0.724558\nNext Confusion: 7.170291"}
{"text": "### State\nConfusion: 6.03055\nAction: explain\nReward: -0.30925\nNext Confusion: 5.772726"}
{"text": "### State\nConfusion: 4.264237\nAction: question\nReward: 1.085219\nNext Confusion: 3.315322"}
{"text": "### State\nConfusion: 3.291669\nAction: analogize\nReward: -0.695105\nNext Confusion: 3.578062"}
{"text": "### State\nConfusion: 2.852946\nAction: explain\nReward: 0.896694\nNext Confusion: 2.766197"}
{"text": "### State\nConfusion: 7.655255\nAction: analogize\nReward: -0.602577\nNext Confusion: 8.688943"}
{"text": "### State\nConfusion: 7.109231\nAction: analogize\nReward: -1.82364\nNext Confusion: 7.303028"}
{"text": "### State\nConfusion: 6.423125\nAction: analogize\nReward: -0.296018\nNext Confusion: 7.129556"}
{"text": "### State\nConfusion: 3.713895\nAction: correct_fact\nReward: 1.061697\nNext Confusion: 3.08923"}
{"text": "### State\nConfusion: 5.848164\nAction: explain\nReward: 0.196832\nNext Confusion: 5.547879"}
{"text": "### State\nConfusion: 3.044751\nAction: analogize\nReward: 0.050225\nNext Confusion: 2.399746"}
{"text": "### State\nConfusion: 3.267077\nAction: correct_fact\nReward: 0.065062\nNext Confusion: 3.377568"}
{"text": "### State\nConfusion: 3.677239\nAction: analogize\nReward: 0.623853\nNext Confusion: 3.404431"}
{"text": "### State\nConfusion: 3.430977\nAction: analogize\nReward: -0.637506\nNext Confusion: 4.219885"}
{"text": "### State\nConfusion: 3.54131\nAction: analogize\nReward: -0.296704\nNext Confusion: 3.700912"}
{"text": "### State\nConfusion: 3.03738\nAction: analogize\nReward: -0.444641\nNext Confusion: 3.498177"}
{"text": "### State\nConfusion: 4.582879\nAction: analogize\nReward: -0.954809\nNext Confusion: 5.478829"}
{"text": "### State\nConfusion: 5.151244\nAction: worked_example\nReward: 2.380252\nNext Confusion: 3.726861"}
{"text": "### State\nConfusion: 5.093281\nAction: correct_fact\nReward: 0.95357\nNext Confusion: 4.533796"}
{"text": "### State\nConfusion: 7.686016\nAction: analogize\nReward: -0.938085\nNext Confusion: 8.366612"}
{"text": "### State\nConfusion: 4.178343\nAction: worked_example\nReward: 0.374517\nNext Confusion: 3.552812"}
{"text": "### State\nConfusion: 6.83574\nAction: question\nReward: 0.747584\nNext Confusion: 6.205879"}
{"text": "### State\nConfusion: 4.541863\nAction: analogize\nReward: -0.557367\nNext Confusion: 4.824588"}
{"text": "### State\nConfusion: 3.081633\nAction: explain\nReward: 0.136932\nNext Confusion: 2.914456"}
{"text": "### State\nConfusion: 2.926489\nAction: worked_example\nReward: 1.157325\nNext Confusion: 1.859197"}
{"text": "### State\nConfusion: 4.609409\nAction: analogize\nReward: 0.394905\nNext Confusion: 4.347848"}
{"text": "### State\nConfusion: 5.320403\nAction: correct_fact\nReward: 0.135497\nNext Confusion: 5.200538"}
{"text": "### State\nConfusion: 4.829214\nAction: explain\nReward: -0.83525\nNext Confusion: 5.605744"}
{"text": "### State\nConfusion: 6.698942\nAction: analogize\nReward: 0.293412\nNext Confusion: 6.224935"}
{"text": "### State\nConfusion: 3.408383\nAction: analogize\nReward: -1.13421\nNext Confusion: 4.121609"}
{"text": "### State\nConfusion: 2.550534\nAction: explain\nReward: 1.595885\nNext Confusion: 2.337572"}
{"text": "### State\nConfusion: 4.26689\nAction: analogize\nReward: -0.923485\nNext Confusion: 4.911927"}
{"text": "### State\nConfusion: 4.54758\nAction: analogize\nReward: 0.028784\nNext Confusion: 4.688157"}
{"text": "### State\nConfusion: 3.963094\nAction: analogize\nReward: 0.601362\nNext Confusion: 3.715459"}
{"text": "### State\nConfusion: 4.264097\nAction: analogize\nReward: 0.165267\nNext Confusion: 4.050135"}
{"text": "### State\nConfusion: 4.775359\nAction: analogize\nReward: 0.805797\nNext Confusion: 4.071631"}
{"text": "### State\nConfusion: 4.309736\nAction: analogize\nReward: -1.096488\nNext Confusion: 4.92112"}
{"text": "### State\nConfusion: 3.836866\nAction: analogize\nReward: 0.243327\nNext Confusion: 3.894223"}
{"text": "### State\nConfusion: 3.551428\nAction: analogize\nReward: -0.240741\nNext Confusion: 3.635041"}
{"text": "### State\nConfusion: 6.219377\nAction: analogize\nReward: -0.440877\nNext Confusion: 6.443679"}
{"text": "### State\nConfusion: 8.134678\nAction: analogize\nReward: -0.952597\nNext Confusion: 9.351403"}
{"text": "### State\nConfusion: 7.455897\nAction: worked_example\nReward: 3.000091\nNext Confusion: 5.743115"}
{"text": "### State\nConfusion: 3.591007\nAction: analogize\nReward: 0.415582\nNext Confusion: 3.694337"}
{"text": "### State\nConfusion: 5.798405\nAction: correct_fact\nReward: 0.444779\nNext Confusion: 5.077715"}
{"text": "### State\nConfusion: 3.156499\nAction: correct_fact\nReward: 0.71058\nNext Confusion: 3.226074"}
{"text": "### State\nConfusion: 2.320361\nAction: analogize\nReward: -0.406777\nNext Confusion: 2.661581"}
{"text": "### State\nConfusion: 3.254967\nAction: explain\nReward: 0.564712\nNext Confusion: 2.827411"}
{"text": "### State\nConfusion: 3.106249\nAction: analogize\nReward: -0.246119\nNext Confusion: 3.193833"}
{"text": "### State\nConfusion: 6.7713\nAction: analogize\nReward: -0.996812\nNext Confusion: 7.599147"}
{"text": "### State\nConfusion: 3.82842\nAction: analogize\nReward: -0.442356\nNext Confusion: 3.9427"}
{"text": "### State\nConfusion: 3.163116\nAction: analogize\nReward: -0.57468\nNext Confusion: 3.300192"}
{"text": "### State\nConfusion: 3.769666\nAction: analogize\nReward: -1.033696\nNext Confusion: 3.965607"}
{"text": "### State\nConfusion: 4.149986\nAction: explain\nReward: 2.104363\nNext Confusion: 3.151176"}
{"text": "### State\nConfusion: 3.644197\nAction: correct_fact\nReward: 0.883312\nNext Confusion: 2.699045"}
{"text": "### State\nConfusion: 3.353661\nAction: worked_example\nReward: -1.517132\nNext Confusion: 3.120078"}
{"text": "### State\nConfusion: 3.097823\nAction: worked_example\nReward: 0.987777\nNext Confusion: 2.541394"}
{"text": "### State\nConfusion: 1.986863\nAction: analogize\nReward: -0.166395\nNext Confusion: 2.263733"}
{"text": "### State\nConfusion: 3.803623\nAction: explain\nReward: -0.599474\nNext Confusion: 4.320712"}
{"text": "### State\nConfusion: 2.989635\nAction: analogize\nReward: -0.391854\nNext Confusion: 3.153213"}
{"text": "### State\nConfusion: 3.825881\nAction: analogize\nReward: -0.304546\nNext Confusion: 4.124488"}
{"text": "### State\nConfusion: 3.925646\nAction: analogize\nReward: 1.280908\nNext Confusion: 3.250851"}
{"text": "### State\nConfusion: 3.520161\nAction: correct_fact\nReward: 1.316759\nNext Confusion: 3.189941"}
{"text": "### State\nConfusion: 5.792756\nAction: worked_example\nReward: 1.211052\nNext Confusion: 4.469619"}
{"text": "### State\nConfusion: 5.700114\nAction: worked_example\nReward: 1.746722\nNext Confusion: 4.20747"}
{"text": "### State\nConfusion: 3.441217\nAction: analogize\nReward: -1.146556\nNext Confusion: 4.742431"}
{"text": "### State\nConfusion: 6.980942\nAction: worked_example\nReward: 2.354988\nNext Confusion: 5.094528"}
{"text": "### State\nConfusion: 4.636788\nAction: worked_example\nReward: 1.395544\nNext Confusion: 2.763631"}
{"text": "### State\nConfusion: 4.57248\nAction: worked_example\nReward: -0.893071\nNext Confusion: 5.374165"}
{"text": "### State\nConfusion: 4.616354\nAction: correct_fact\nReward: -0.458687\nNext Confusion: 4.978016"}
{"text": "### State\nConfusion: 9.871258\nAction: worked_example\nReward: 0.951345\nNext Confusion: 8.626527"}
{"text": "### State\nConfusion: 5.400334\nAction: analogize\nReward: -3.107368\nNext Confusion: 4.699464"}
{"text": "### State\nConfusion: 3.301483\nAction: analogize\nReward: -0.056496\nNext Confusion: 3.928677"}
{"text": "### State\nConfusion: 6.662239\nAction: explain\nReward: -0.308954\nNext Confusion: 7.263927"}
{"text": "### State\nConfusion: 4.49765\nAction: analogize\nReward: -0.627163\nNext Confusion: 4.901896"}
{"text": "### State\nConfusion: 7.890362\nAction: correct_fact\nReward: -1.032236\nNext Confusion: 8.792729"}
{"text": "### State\nConfusion: 3.356429\nAction: analogize\nReward: 0.527811\nNext Confusion: 3.568363"}
{"text": "### State\nConfusion: 3.184431\nAction: analogize\nReward: 0.474491\nNext Confusion: 2.551207"}
{"text": "### State\nConfusion: 4.585125\nAction: analogize\nReward: 0.313417\nNext Confusion: 4.652011"}
{"text": "### State\nConfusion: 7.036767\nAction: analogize\nReward: -0.249813\nNext Confusion: 8.359595"}
{"text": "### State\nConfusion: 4.977924\nAction: analogize\nReward: -0.39375\nNext Confusion: 5.75664"}
{"text": "### State\nConfusion: 4.899474\nAction: analogize\nReward: 0.324894\nNext Confusion: 4.782554"}
{"text": "### State\nConfusion: 5.46263\nAction: analogize\nReward: 0.041644\nNext Confusion: 5.704385"}
{"text": "### State\nConfusion: 3.728646\nAction: explain\nReward: 0.553889\nNext Confusion: 3.630325"}
{"text": "### State\nConfusion: 3.874559\nAction: analogize\nReward: 0.23354\nNext Confusion: 3.770426"}
{"text": "### State\nConfusion: 4.150585\nAction: analogize\nReward: -0.386776\nNext Confusion: 4.294056"}
{"text": "### State\nConfusion: 3.750043\nAction: analogize\nReward: -0.098659\nNext Confusion: 3.729299"}
{"text": "### State\nConfusion: 4.046793\nAction: analogize\nReward: 1.120879\nNext Confusion: 3.69392"}
{"text": "### State\nConfusion: 4.282802\nAction: analogize\nReward: 0.229984\nNext Confusion: 4.534772"}
{"text": "### State\nConfusion: 9.815567\nAction: explain\nReward: 0.113322\nNext Confusion: 9.897961"}
{"text": "### State\nConfusion: 8.447412\nAction: question\nReward: 0.905137\nNext Confusion: 7.587751"}
{"text": "### State\nConfusion: 8.702408\nAction: correct_fact\nReward: 0.327523\nNext Confusion: 8.957442"}
{"text": "### State\nConfusion: 4.547598\nAction: question\nReward: 0.19703\nNext Confusion: 4.136933"}
{"text": "### State\nConfusion: 3.093116\nAction: analogize\nReward: -0.690177\nNext Confusion: 3.494307"}
{"text": "### State\nConfusion: 6.10634\nAction: analogize\nReward: -0.06686\nNext Confusion: 6.080076"}
{"text": "### State\nConfusion: 5.061502\nAction: explain\nReward: -0.08977\nNext Confusion: 5.423969"}
{"text": "### State\nConfusion: 6.275469\nAction: analogize\nReward: -0.406191\nNext Confusion: 6.53923"}
{"text": "### State\nConfusion: 2.794288\nAction: explain\nReward: -0.477082\nNext Confusion: 3.057377"}
{"text": "### State\nConfusion: 7.193281\nAction: analogize\nReward: -0.987684\nNext Confusion: 8.28434"}
{"text": "### State\nConfusion: 4.47008\nAction: analogize\nReward: 2.03497\nNext Confusion: 4.062569"}
{"text": "### State\nConfusion: 7.197754\nAction: explain\nReward: 0.563753\nNext Confusion: 6.897256"}
{"text": "### State\nConfusion: 6.664058\nAction: analogize\nReward: -1.519211\nNext Confusion: 7.664808"}
{"text": "### State\nConfusion: 9.278358\nAction: analogize\nReward: -0.130705\nNext Confusion: 9.787616"}
{"text": "### State\nConfusion: 3.831932\nAction: analogize\nReward: 0.197891\nNext Confusion: 4.277423"}
{"text": "### State\nConfusion: 5.387299\nAction: correct_fact\nReward: 0.229777\nNext Confusion: 5.204511"}
{"text": "### State\nConfusion: 3.000513\nAction: question\nReward: 0.870176\nNext Confusion: 2.886877"}
{"text": "### State\nConfusion: 4.005711\nAction: explain\nReward: 0.577621\nNext Confusion: 3.404496"}
{"text": "### State\nConfusion: 3.297642\nAction: correct_fact\nReward: 0.209132\nNext Confusion: 3.396185"}
{"text": "### State\nConfusion: 3.705981\nAction: question\nReward: -0.267627\nNext Confusion: 3.913401"}
{"text": "### State\nConfusion: 2.617965\nAction: analogize\nReward: 0.747505\nNext Confusion: 1.880171"}
{"text": "### State\nConfusion: 7.499238\nAction: analogize\nReward: 0.395212\nNext Confusion: 7.566947"}
{"text": "### State\nConfusion: 7.075433\nAction: explain\nReward: 0.394626\nNext Confusion: 6.730183"}
{"text": "### State\nConfusion: 5.561482\nAction: explain\nReward: 0.444965\nNext Confusion: 5.071474"}
{"text": "### State\nConfusion: 4.43107\nAction: question\nReward: 1.475692\nNext Confusion: 3.331222"}
{"text": "### State\nConfusion: 4.206248\nAction: worked_example\nReward: 1.131951\nNext Confusion: 2.576638"}
{"text": "### State\nConfusion: 4.046033\nAction: analogize\nReward: 0.078906\nNext Confusion: 3.6898"}
{"text": "### State\nConfusion: 3.234105\nAction: explain\nReward: 1.041857\nNext Confusion: 2.462756"}
{"text": "### State\nConfusion: 3.184991\nAction: analogize\nReward: -0.740992\nNext Confusion: 3.408039"}
{"text": "### State\nConfusion: 7.069261\nAction: worked_example\nReward: 1.829486\nNext Confusion: 5.310039"}
{"text": "### State\nConfusion: 4.233379\nAction: correct_fact\nReward: -0.757781\nNext Confusion: 4.137143"}
{"text": "### State\nConfusion: 5.110789\nAction: analogize\nReward: 0.68815\nNext Confusion: 4.324455"}
{"text": "### State\nConfusion: 6.596481\nAction: explain\nReward: -0.336485\nNext Confusion: 6.980236"}
{"text": "### State\nConfusion: 4.242512\nAction: analogize\nReward: -0.919348\nNext Confusion: 4.882627"}
{"text": "### State\nConfusion: 4.002249\nAction: analogize\nReward: -0.165824\nNext Confusion: 4.386279"}
{"text": "### State\nConfusion: 3.579485\nAction: worked_example\nReward: 1.023444\nNext Confusion: 2.80122"}
{"text": "### State\nConfusion: 3.234276\nAction: analogize\nReward: -0.679574\nNext Confusion: 4.246226"}
{"text": "### State\nConfusion: 4.081505\nAction: analogize\nReward: -0.179928\nNext Confusion: 4.543588"}
{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.626965\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.283777\nAction: worked_example\nReward: 1.920165\nNext Confusion: 1.740775"}
{"text": "### State\nConfusion: 4.382779\nAction: analogize\nReward: 0.254878\nNext Confusion: 4.928416"}
{"text": "### State\nConfusion: 6.469818\nAction: analogize\nReward: 0.10987\nNext Confusion: 6.650122"}
{"text": "### State\nConfusion: 3.401842\nAction: analogize\nReward: -0.199118\nNext Confusion: 3.205163"}
{"text": "### State\nConfusion: 4.53089\nAction: analogize\nReward: -1.084081\nNext Confusion: 4.936621"}
{"text": "### State\nConfusion: 7.181551\nAction: explain\nReward: 0.857128\nNext Confusion: 7.342774"}
{"text": "### State\nConfusion: 4.468257\nAction: analogize\nReward: -0.448214\nNext Confusion: 5.072941"}
{"text": "### State\nConfusion: 3.547867\nAction: analogize\nReward: -0.621887\nNext Confusion: 4.220783"}
{"text": "### State\nConfusion: 8.407737\nAction: explain\nReward: 0.888179\nNext Confusion: 7.801633"}
{"text": "### State\nConfusion: 3.330818\nAction: analogize\nReward: 0.427229\nNext Confusion: 3.057645"}
{"text": "### State\nConfusion: 9.220737\nAction: question\nReward: 1.039978\nNext Confusion: 8.286354"}
{"text": "### State\nConfusion: 6.95472\nAction: correct_fact\nReward: 0.03733\nNext Confusion: 7.105115"}
{"text": "### State\nConfusion: 3.038147\nAction: explain\nReward: -0.052942\nNext Confusion: 3.534167"}
{"text": "### State\nConfusion: 4.203768\nAction: analogize\nReward: 0.44414\nNext Confusion: 3.748747"}
{"text": "### State\nConfusion: 4.394054\nAction: analogize\nReward: 0.863723\nNext Confusion: 4.08186"}
{"text": "### State\nConfusion: 7.25743\nAction: correct_fact\nReward: 0.615971\nNext Confusion: 6.862161"}
{"text": "### State\nConfusion: 3.332897\nAction: worked_example\nReward: 1.561058\nNext Confusion: 2.125503"}
{"text": "### State\nConfusion: 2.435858\nAction: analogize\nReward: 0.474768\nNext Confusion: 2.184254"}
{"text": "### State\nConfusion: 3.347793\nAction: question\nReward: 0.604531\nNext Confusion: 3.077612"}
{"text": "### State\nConfusion: 4.475967\nAction: analogize\nReward: -0.318223\nNext Confusion: 4.587817"}
{"text": "### State\nConfusion: 2.567078\nAction: analogize\nReward: 0.606041\nNext Confusion: 2.992178"}
{"text": "### State\nConfusion: 3.475035\nAction: correct_fact\nReward: 0.14881\nNext Confusion: 3.128379"}
{"text": "### State\nConfusion: 7.965955\nAction: analogize\nReward: -0.464932\nNext Confusion: 8.695142"}
{"text": "### State\nConfusion: 6.437036\nAction: worked_example\nReward: 1.469581\nNext Confusion: 5.73296"}
{"text": "### State\nConfusion: 5.183217\nAction: question\nReward: 0.797294\nNext Confusion: 4.91528"}
{"text": "### State\nConfusion: 4.545303\nAction: analogize\nReward: -0.524589\nNext Confusion: 4.333676"}
{"text": "### State\nConfusion: 3.575009\nAction: analogize\nReward: -0.159195\nNext Confusion: 3.466167"}
{"text": "### State\nConfusion: 3.788009\nAction: correct_fact\nReward: 0.762772\nNext Confusion: 3.053306"}
{"text": "### State\nConfusion: 9.45097\nAction: analogize\nReward: 0.388164\nNext Confusion: 9.557405"}
{"text": "### State\nConfusion: 2.62543\nAction: analogize\nReward: 0.251797\nNext Confusion: 2.618268"}
{"text": "### State\nConfusion: 2.320139\nAction: analogize\nReward: 0.979596\nNext Confusion: 1.804903"}
{"text": "### State\nConfusion: 6.174851\nAction: analogize\nReward: -0.47453\nNext Confusion: 6.922316"}
{"text": "### State\nConfusion: 4.333913\nAction: analogize\nReward: -0.327869\nNext Confusion: 4.996931"}
{"text": "### State\nConfusion: 5.262707\nAction: correct_fact\nReward: -0.606454\nNext Confusion: 5.759839"}
{"text": "### State\nConfusion: 5.174058\nAction: correct_fact\nReward: -0.344999\nNext Confusion: 5.809796"}
{"text": "### State\nConfusion: 4.217094\nAction: correct_fact\nReward: -1.139956\nNext Confusion: 4.768854"}
{"text": "### State\nConfusion: 2.933767\nAction: explain\nReward: 0.647692\nNext Confusion: 2.456029"}
{"text": "### State\nConfusion: 3.497738\nAction: correct_fact\nReward: 0.961823\nNext Confusion: 2.937699"}
{"text": "### State\nConfusion: 4.59177\nAction: analogize\nReward: 0.722301\nNext Confusion: 4.028663"}
{"text": "### State\nConfusion: 4.772787\nAction: explain\nReward: -0.079586\nNext Confusion: 5.282044"}
{"text": "### State\nConfusion: 2.663238\nAction: correct_fact\nReward: 0.744137\nNext Confusion: 2.511148"}
{"text": "### State\nConfusion: 3.442076\nAction: worked_example\nReward: 1.852468\nNext Confusion: 1.87048"}
{"text": "### State\nConfusion: 3.418484\nAction: correct_fact\nReward: -1.434631\nNext Confusion: 4.678504"}
{"text": "### State\nConfusion: 7.50074\nAction: analogize\nReward: -1.03624\nNext Confusion: 7.782828"}
{"text": "### State\nConfusion: 3.662425\nAction: analogize\nReward: 0.288273\nNext Confusion: 3.881542"}
{"text": "### State\nConfusion: 5.825604\nAction: explain\nReward: 0.167269\nNext Confusion: 5.945172"}
{"text": "### State\nConfusion: 8.298186\nAction: explain\nReward: -0.642418\nNext Confusion: 9.145937"}
{"text": "### State\nConfusion: 3.544472\nAction: analogize\nReward: 1.496793\nNext Confusion: 3.059354"}
{"text": "### State\nConfusion: 5.426896\nAction: worked_example\nReward: 2.035843\nNext Confusion: 3.913632"}
{"text": "### State\nConfusion: 7.057615\nAction: worked_example\nReward: 1.300917\nNext Confusion: 6.035897"}
{"text": "### State\nConfusion: 5.308103\nAction: worked_example\nReward: 0.597929\nNext Confusion: 4.281115"}
{"text": "### State\nConfusion: 4.660218\nAction: correct_fact\nReward: -1.198085\nNext Confusion: 5.178064"}
{"text": "### State\nConfusion: 4.182568\nAction: analogize\nReward: -0.669219\nNext Confusion: 4.998801"}
{"text": "### State\nConfusion: 3.374448\nAction: question\nReward: 0.148296\nNext Confusion: 3.367422"}
{"text": "### State\nConfusion: 2.244774\nAction: analogize\nReward: 0.139914\nNext Confusion: 2.010998"}
{"text": "### State\nConfusion: 7.57388\nAction: worked_example\nReward: 2.927079\nNext Confusion: 5.321944"}
{"text": "### State\nConfusion: 5.053628\nAction: analogize\nReward: -0.848654\nNext Confusion: 5.590656"}
{"text": "### State\nConfusion: 4.58681\nAction: question\nReward: 0.398616\nNext Confusion: 3.826893"}
{"text": "### State\nConfusion: 4.385522\nAction: analogize\nReward: 0.196747\nNext Confusion: 4.364781"}
{"text": "### State\nConfusion: 5.258719\nAction: analogize\nReward: -0.478197\nNext Confusion: 5.745747"}
{"text": "### State\nConfusion: 4.160741\nAction: analogize\nReward: -0.373814\nNext Confusion: 4.572683"}
{"text": "### State\nConfusion: 4.594178\nAction: explain\nReward: -0.379261\nNext Confusion: 4.967944"}
{"text": "### State\nConfusion: 3.734883\nAction: explain\nReward: -0.135488\nNext Confusion: 3.877711"}
{"text": "### State\nConfusion: 8.422437\nAction: correct_fact\nReward: 1.419393\nNext Confusion: 8.090842"}
{"text": "### State\nConfusion: 4.626043\nAction: question\nReward: 0.781404\nNext Confusion: 4.271732"}
{"text": "### State\nConfusion: 4.303492\nAction: analogize\nReward: -0.486873\nNext Confusion: 5.130393"}
{"text": "### State\nConfusion: 6.131956\nAction: analogize\nReward: -0.443139\nNext Confusion: 5.680384"}
{"text": "### State\nConfusion: 3.401462\nAction: analogize\nReward: -0.628829\nNext Confusion: 3.549128"}
{"text": "### State\nConfusion: 4.063388\nAction: question\nReward: 0.669799\nNext Confusion: 3.684196"}
{"text": "### State\nConfusion: 4.185434\nAction: analogize\nReward: 0.082712\nNext Confusion: 4.268066"}
{"text": "### State\nConfusion: 5.392101\nAction: analogize\nReward: -0.201228\nNext Confusion: 5.383713"}
{"text": "### State\nConfusion: 3.389313\nAction: analogize\nReward: -1.13185\nNext Confusion: 4.473208"}
{"text": "### State\nConfusion: 3.382116\nAction: explain\nReward: -0.083349\nNext Confusion: 3.077605"}
{"text": "### State\nConfusion: 6.708632\nAction: analogize\nReward: 0.235717\nNext Confusion: 6.575146"}
{"text": "### State\nConfusion: 4.120906\nAction: question\nReward: -0.201961\nNext Confusion: 4.228239"}
{"text": "### State\nConfusion: 5.375295\nAction: analogize\nReward: -0.566432\nNext Confusion: 6.098354"}
{"text": "### State\nConfusion: 5.3075\nAction: explain\nReward: 0.840567\nNext Confusion: 4.593798"}
{"text": "### State\nConfusion: 3.732203\nAction: analogize\nReward: 0.019524\nNext Confusion: 3.975943"}
{"text": "### State\nConfusion: 6.234458\nAction: question\nReward: -0.545665\nNext Confusion: 5.8335"}
{"text": "### State\nConfusion: 2.99104\nAction: analogize\nReward: -0.644436\nNext Confusion: 3.476136"}
{"text": "### State\nConfusion: 2.404935\nAction: explain\nReward: 0.535707\nNext Confusion: 2.225598"}
{"text": "### State\nConfusion: 3.773187\nAction: analogize\nReward: -0.455111\nNext Confusion: 4.499895"}
{"text": "### State\nConfusion: 3.483044\nAction: explain\nReward: 0.347968\nNext Confusion: 2.844657"}
{"text": "### State\nConfusion: 4.503647\nAction: analogize\nReward: -0.185608\nNext Confusion: 4.902484"}
{"text": "### State\nConfusion: 5.3724\nAction: explain\nReward: 0.451463\nNext Confusion: 5.077805"}
{"text": "### State\nConfusion: 3.309671\nAction: analogize\nReward: -0.143453\nNext Confusion: 3.113236"}
{"text": "### State\nConfusion: 4.513547\nAction: question\nReward: 0.606718\nNext Confusion: 4.085341"}
{"text": "### State\nConfusion: 3.603875\nAction: analogize\nReward: -1.52553\nNext Confusion: 4.786602"}
{"text": "### State\nConfusion: 7.368618\nAction: analogize\nReward: -0.765855\nNext Confusion: 7.418029"}
{"text": "### State\nConfusion: 7.257043\nAction: worked_example\nReward: 0.905552\nNext Confusion: 6.61073"}
{"text": "### State\nConfusion: 5.561694\nAction: analogize\nReward: -0.508069\nNext Confusion: 6.053774"}
{"text": "### State\nConfusion: 2.828592\nAction: analogize\nReward: -0.277313\nNext Confusion: 2.854013"}
{"text": "### State\nConfusion: 3.715696\nAction: analogize\nReward: 0.745882\nNext Confusion: 4.146977"}
{"text": "### State\nConfusion: 3.437749\nAction: analogize\nReward: 0.142635\nNext Confusion: 3.554578"}
{"text": "### State\nConfusion: 2.441164\nAction: analogize\nReward: 0.021128\nNext Confusion: 2.135772"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.631064\nNext Confusion: 9.471717"}
{"text": "### State\nConfusion: 3.799863\nAction: analogize\nReward: -0.517685\nNext Confusion: 4.380528"}
{"text": "### State\nConfusion: 4.649355\nAction: analogize\nReward: -0.19733\nNext Confusion: 5.630637"}
{"text": "### State\nConfusion: 4.754737\nAction: analogize\nReward: 0.957142\nNext Confusion: 4.203445"}
{"text": "### State\nConfusion: 3.006349\nAction: analogize\nReward: 0.189458\nNext Confusion: 3.092292"}
{"text": "### State\nConfusion: 3.176708\nAction: analogize\nReward: 0.504138\nNext Confusion: 3.474577"}
{"text": "### State\nConfusion: 10.0\nAction: analogize\nReward: 0.068862\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.380745\nAction: analogize\nReward: 0.278275\nNext Confusion: 3.226712"}
{"text": "### State\nConfusion: 4.818123\nAction: correct_fact\nReward: -0.642482\nNext Confusion: 5.129877"}
{"text": "### State\nConfusion: 1.902845\nAction: question\nReward: -0.081912\nNext Confusion: 2.182037"}
{"text": "### State\nConfusion: 6.238435\nAction: question\nReward: -0.37372\nNext Confusion: 5.979242"}
{"text": "### State\nConfusion: 3.918744\nAction: explain\nReward: -0.178161\nNext Confusion: 3.932915"}
{"text": "### State\nConfusion: 6.612762\nAction: analogize\nReward: 0.452281\nNext Confusion: 6.863951"}
{"text": "### State\nConfusion: 3.425594\nAction: analogize\nReward: -0.412629\nNext Confusion: 3.878789"}
{"text": "### State\nConfusion: 4.775546\nAction: analogize\nReward: -0.009128\nNext Confusion: 4.696767"}
{"text": "### State\nConfusion: 7.393962\nAction: analogize\nReward: -0.722784\nNext Confusion: 7.283319"}
{"text": "### State\nConfusion: 5.283406\nAction: analogize\nReward: 0.286061\nNext Confusion: 5.141407"}
{"text": "### State\nConfusion: 6.754779\nAction: worked_example\nReward: 2.055535\nNext Confusion: 5.141479"}
{"text": "### State\nConfusion: 4.003936\nAction: question\nReward: -0.513979\nNext Confusion: 4.440584"}
{"text": "### State\nConfusion: 10.0\nAction: explain\nReward: 0.83318\nNext Confusion: 9.88262"}
{"text": "### State\nConfusion: 6.166142\nAction: worked_example\nReward: 0.421776\nNext Confusion: 5.762331"}
{"text": "### State\nConfusion: 3.861957\nAction: question\nReward: 0.724535\nNext Confusion: 3.452488"}
{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 1.315561\nNext Confusion: 7.715245"}
{"text": "### State\nConfusion: 8.259786\nAction: explain\nReward: 0.558777\nNext Confusion: 7.904824"}
{"text": "### State\nConfusion: 3.152868\nAction: correct_fact\nReward: 0.234806\nNext Confusion: 3.087118"}
{"text": "### State\nConfusion: 6.291952\nAction: analogize\nReward: -1.134346\nNext Confusion: 6.971364"}
{"text": "### State\nConfusion: 3.391283\nAction: explain\nReward: -0.301607\nNext Confusion: 3.773489"}
{"text": "### State\nConfusion: 7.058365\nAction: question\nReward: 0.096737\nNext Confusion: 6.488666"}
{"text": "### State\nConfusion: 5.330192\nAction: question\nReward: 0.434085\nNext Confusion: 4.503624"}
{"text": "### State\nConfusion: 7.3815\nAction: explain\nReward: -0.624081\nNext Confusion: 7.829628"}
{"text": "### State\nConfusion: 3.936956\nAction: explain\nReward: 0.156011\nNext Confusion: 4.77193"}
{"text": "### State\nConfusion: 8.507776\nAction: analogize\nReward: -0.267206\nNext Confusion: 8.839627"}
{"text": "### State\nConfusion: 3.966226\nAction: analogize\nReward: 0.07544\nNext Confusion: 3.734597"}
{"text": "### State\nConfusion: 3.389673\nAction: analogize\nReward: -0.385492\nNext Confusion: 3.757603"}
{"text": "### State\nConfusion: 4.152238\nAction: explain\nReward: 0.194201\nNext Confusion: 3.124795"}
{"text": "### State\nConfusion: 3.671381\nAction: explain\nReward: -0.851671\nNext Confusion: 4.85722"}
{"text": "### State\nConfusion: 3.87501\nAction: analogize\nReward: 0.963411\nNext Confusion: 4.011062"}
{"text": "### State\nConfusion: 3.591396\nAction: correct_fact\nReward: -0.060506\nNext Confusion: 3.749811"}
{"text": "### State\nConfusion: 5.843234\nAction: explain\nReward: 1.538094\nNext Confusion: 4.893917"}
{"text": "### State\nConfusion: 6.427896\nAction: worked_example\nReward: 0.989949\nNext Confusion: 4.926106"}
{"text": "### State\nConfusion: 6.490404\nAction: analogize\nReward: -0.035181\nNext Confusion: 6.649741"}
{"text": "### State\nConfusion: 4.298608\nAction: analogize\nReward: 0.51613\nNext Confusion: 4.339033"}
{"text": "### State\nConfusion: 2.794963\nAction: explain\nReward: -0.328953\nNext Confusion: 3.284198"}
{"text": "### State\nConfusion: 0.692194\nAction: worked_example\nReward: 0.527875\nNext Confusion: 0.0"}
{"text": "### State\nConfusion: 3.597074\nAction: explain\nReward: -0.452491\nNext Confusion: 4.077954"}
{"text": "### State\nConfusion: 3.767463\nAction: analogize\nReward: -1.452971\nNext Confusion: 4.617874"}
{"text": "### State\nConfusion: 4.362273\nAction: worked_example\nReward: 0.933517\nNext Confusion: 3.596099"}
{"text": "### State\nConfusion: 4.670532\nAction: analogize\nReward: -1.184355\nNext Confusion: 4.818303"}
{"text": "### State\nConfusion: 3.000032\nAction: analogize\nReward: -0.435179\nNext Confusion: 4.011406"}
{"text": "### State\nConfusion: 3.608845\nAction: explain\nReward: 0.150313\nNext Confusion: 3.592252"}
{"text": "### State\nConfusion: 3.324292\nAction: question\nReward: -0.160978\nNext Confusion: 3.667143"}
{"text": "### State\nConfusion: 3.206405\nAction: analogize\nReward: 0.718679\nNext Confusion: 2.95461"}
{"text": "### State\nConfusion: 6.566045\nAction: analogize\nReward: -0.764221\nNext Confusion: 7.17747"}
{"text": "### State\nConfusion: 4.402418\nAction: analogize\nReward: -1.400358\nNext Confusion: 5.781072"}
{"text": "### State\nConfusion: 3.839939\nAction: analogize\nReward: 1.133303\nNext Confusion: 3.523669"}
{"text": "### State\nConfusion: 4.051802\nAction: analogize\nReward: 0.732211\nNext Confusion: 3.626338"}
{"text": "### State\nConfusion: 3.779393\nAction: correct_fact\nReward: -0.882573\nNext Confusion: 4.401586"}
{"text": "### State\nConfusion: 4.234075\nAction: analogize\nReward: 0.006339\nNext Confusion: 4.259672"}
{"text": "### State\nConfusion: 5.705486\nAction: analogize\nReward: -0.177564\nNext Confusion: 5.695643"}
{"text": "### State\nConfusion: 3.404266\nAction: question\nReward: 0.306554\nNext Confusion: 3.231444"}
{"text": "### State\nConfusion: 3.485604\nAction: analogize\nReward: 0.322089\nNext Confusion: 3.532859"}
{"text": "### State\nConfusion: 5.956239\nAction: analogize\nReward: 0.175887\nNext Confusion: 5.894433"}
{"text": "### State\nConfusion: 3.64884\nAction: correct_fact\nReward: 0.186027\nNext Confusion: 4.024583"}
{"text": "### State\nConfusion: 4.357332\nAction: analogize\nReward: 0.094647\nNext Confusion: 4.234993"}
{"text": "### State\nConfusion: 3.956645\nAction: analogize\nReward: 0.143816\nNext Confusion: 2.74001"}
{"text": "### State\nConfusion: 4.275213\nAction: analogize\nReward: -0.112997\nNext Confusion: 4.803157"}
{"text": "### State\nConfusion: 7.308207\nAction: question\nReward: -0.002123\nNext Confusion: 6.719409"}
{"text": "### State\nConfusion: 6.641357\nAction: analogize\nReward: -0.387628\nNext Confusion: 7.074882"}
{"text": "### State\nConfusion: 7.486015\nAction: analogize\nReward: -0.275908\nNext Confusion: 7.823219"}
{"text": "### State\nConfusion: 7.737795\nAction: worked_example\nReward: 0.754587\nNext Confusion: 7.086144"}
{"text": "### State\nConfusion: 6.91396\nAction: worked_example\nReward: -0.301934\nNext Confusion: 7.024792"}
{"text": "### State\nConfusion: 3.362437\nAction: analogize\nReward: -0.823035\nNext Confusion: 3.973793"}
{"text": "### State\nConfusion: 4.437325\nAction: analogize\nReward: -0.477802\nNext Confusion: 5.210613"}
{"text": "### State\nConfusion: 3.576501\nAction: analogize\nReward: -0.928897\nNext Confusion: 3.691247"}
{"text": "### State\nConfusion: 3.308704\nAction: analogize\nReward: 0.099583\nNext Confusion: 3.11851"}
{"text": "### State\nConfusion: 4.28096\nAction: analogize\nReward: -0.935757\nNext Confusion: 4.505311"}
{"text": "### State\nConfusion: 7.126233\nAction: analogize\nReward: -2.046618\nNext Confusion: 8.965673"}
{"text": "### State\nConfusion: 5.618792\nAction: explain\nReward: -0.070551\nNext Confusion: 6.015597"}
{"text": "### State\nConfusion: 7.323637\nAction: analogize\nReward: 0.798525\nNext Confusion: 7.287678"}
{"text": "### State\nConfusion: 3.949625\nAction: analogize\nReward: -0.422096\nNext Confusion: 3.592318"}
{"text": "### State\nConfusion: 5.734334\nAction: analogize\nReward: -0.061426\nNext Confusion: 5.932269"}
{"text": "### State\nConfusion: 4.095278\nAction: explain\nReward: 0.150416\nNext Confusion: 3.920164"}
{"text": "### State\nConfusion: 4.267157\nAction: correct_fact\nReward: -0.55467\nNext Confusion: 4.589067"}
{"text": "### State\nConfusion: 3.476372\nAction: analogize\nReward: 0.434812\nNext Confusion: 3.368467"}
{"text": "### State\nConfusion: 3.845721\nAction: analogize\nReward: -0.44389\nNext Confusion: 3.958671"}
{"text": "### State\nConfusion: 5.89789\nAction: explain\nReward: 0.289961\nNext Confusion: 5.80951"}
{"text": "### State\nConfusion: 3.30418\nAction: question\nReward: 0.561687\nNext Confusion: 2.938793"}
{"text": "### State\nConfusion: 5.601084\nAction: analogize\nReward: 0.090898\nNext Confusion: 6.04158"}
{"text": "### State\nConfusion: 3.804507\nAction: explain\nReward: -0.214608\nNext Confusion: 4.102091"}
{"text": "### State\nConfusion: 5.699248\nAction: analogize\nReward: -0.86557\nNext Confusion: 7.082009"}
{"text": "### State\nConfusion: 3.841494\nAction: question\nReward: 1.661304\nNext Confusion: 2.886172"}
{"text": "### State\nConfusion: 6.02223\nAction: analogize\nReward: -0.655605\nNext Confusion: 6.230086"}
{"text": "### State\nConfusion: 5.195364\nAction: explain\nReward: -0.001099\nNext Confusion: 4.825065"}
{"text": "### State\nConfusion: 4.841122\nAction: question\nReward: 1.924098\nNext Confusion: 4.41556"}
{"text": "### State\nConfusion: 3.552158\nAction: explain\nReward: 1.212741\nNext Confusion: 1.933405"}
{"text": "### State\nConfusion: 7.492652\nAction: analogize\nReward: -0.788627\nNext Confusion: 8.387055"}
{"text": "### State\nConfusion: 2.427369\nAction: analogize\nReward: -0.966672\nNext Confusion: 2.26519"}
{"text": "### State\nConfusion: 5.075143\nAction: analogize\nReward: -0.249792\nNext Confusion: 5.690755"}
{"text": "### State\nConfusion: 6.168964\nAction: analogize\nReward: -0.402024\nNext Confusion: 6.470056"}
{"text": "### State\nConfusion: 9.379551\nAction: correct_fact\nReward: 0.264547\nNext Confusion: 9.748802"}
{"text": "### State\nConfusion: 5.41286\nAction: analogize\nReward: 0.3176\nNext Confusion: 5.959353"}
{"text": "### State\nConfusion: 3.271821\nAction: analogize\nReward: -0.490908\nNext Confusion: 2.909299"}
{"text": "### State\nConfusion: 3.993392\nAction: explain\nReward: 0.02944\nNext Confusion: 4.182259"}
{"text": "### State\nConfusion: 3.636055\nAction: analogize\nReward: 0.567075\nNext Confusion: 3.196599"}
{"text": "### State\nConfusion: 4.865194\nAction: question\nReward: 0.345734\nNext Confusion: 4.333092"}
{"text": "### State\nConfusion: 3.210343\nAction: worked_example\nReward: 1.467505\nNext Confusion: 1.968105"}
{"text": "### State\nConfusion: 3.497403\nAction: analogize\nReward: -0.218089\nNext Confusion: 3.823413"}
{"text": "### State\nConfusion: 7.678603\nAction: question\nReward: 1.366405\nNext Confusion: 6.973949"}
{"text": "### State\nConfusion: 5.769523\nAction: question\nReward: 1.414712\nNext Confusion: 4.295852"}
{"text": "### State\nConfusion: 6.377737\nAction: analogize\nReward: 1.021054\nNext Confusion: 5.484272"}
{"text": "### State\nConfusion: 4.100323\nAction: analogize\nReward: 0.385391\nNext Confusion: 3.907769"}
{"text": "### State\nConfusion: 3.378142\nAction: analogize\nReward: -0.65456\nNext Confusion: 3.887522"}
{"text": "### State\nConfusion: 2.248417\nAction: analogize\nReward: 1.451595\nNext Confusion: 1.826454"}
{"text": "### State\nConfusion: 5.806881\nAction: analogize\nReward: 0.423723\nNext Confusion: 5.425294"}
{"text": "### State\nConfusion: 9.48094\nAction: question\nReward: -0.237491\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 7.971427\nAction: analogize\nReward: -0.560404\nNext Confusion: 8.549126"}
{"text": "### State\nConfusion: 4.571994\nAction: analogize\nReward: -0.816992\nNext Confusion: 4.862229"}
{"text": "### State\nConfusion: 7.443913\nAction: analogize\nReward: -0.611569\nNext Confusion: 7.605993"}
{"text": "### State\nConfusion: 7.047747\nAction: worked_example\nReward: 2.416605\nNext Confusion: 4.614026"}
{"text": "### State\nConfusion: 4.453329\nAction: analogize\nReward: -0.823522\nNext Confusion: 4.277873"}
{"text": "### State\nConfusion: 4.519389\nAction: analogize\nReward: 0.845583\nNext Confusion: 3.553505"}
{"text": "### State\nConfusion: 3.012619\nAction: worked_example\nReward: 1.78637\nNext Confusion: 1.214761"}
{"text": "### State\nConfusion: 6.717383\nAction: analogize\nReward: 0.672834\nNext Confusion: 6.077421"}
{"text": "### State\nConfusion: 8.516796\nAction: analogize\nReward: 0.896699\nNext Confusion: 8.070201"}
{"text": "### State\nConfusion: 6.385722\nAction: worked_example\nReward: 1.639703\nNext Confusion: 5.6651"}
{"text": "### State\nConfusion: 3.420088\nAction: worked_example\nReward: 1.214258\nNext Confusion: 1.911085"}
{"text": "### State\nConfusion: 3.304491\nAction: correct_fact\nReward: -0.659982\nNext Confusion: 3.89256"}
{"text": "### State\nConfusion: 4.34604\nAction: analogize\nReward: 0.367453\nNext Confusion: 4.545939"}
{"text": "### State\nConfusion: 5.70637\nAction: explain\nReward: 0.986834\nNext Confusion: 5.318554"}
{"text": "### State\nConfusion: 4.569731\nAction: explain\nReward: -0.251817\nNext Confusion: 5.206977"}
{"text": "### State\nConfusion: 6.623116\nAction: question\nReward: 0.813307\nNext Confusion: 6.747557"}
{"text": "### State\nConfusion: 2.478569\nAction: question\nReward: 0.624874\nNext Confusion: 2.662309"}
{"text": "### State\nConfusion: 2.731783\nAction: explain\nReward: -0.700712\nNext Confusion: 3.661917"}
{"text": "### State\nConfusion: 2.529542\nAction: analogize\nReward: -0.754609\nNext Confusion: 3.141305"}
{"text": "### State\nConfusion: 4.801964\nAction: explain\nReward: 0.7866\nNext Confusion: 3.92003"}
{"text": "### State\nConfusion: 5.041254\nAction: worked_example\nReward: 2.127081\nNext Confusion: 3.543361"}
{"text": "### State\nConfusion: 2.69548\nAction: analogize\nReward: -1.212333\nNext Confusion: 3.334755"}
{"text": "### State\nConfusion: 6.504969\nAction: correct_fact\nReward: -0.521073\nNext Confusion: 6.356454"}
{"text": "### State\nConfusion: 6.60377\nAction: question\nReward: 1.630758\nNext Confusion: 5.570652"}
{"text": "### State\nConfusion: 6.362903\nAction: question\nReward: 0.464984\nNext Confusion: 6.482695"}
{"text": "### State\nConfusion: 3.440521\nAction: analogize\nReward: 0.046752\nNext Confusion: 3.519933"}
{"text": "### State\nConfusion: 5.921055\nAction: correct_fact\nReward: -0.238961\nNext Confusion: 6.003142"}
{"text": "### State\nConfusion: 3.715529\nAction: analogize\nReward: 0.378563\nNext Confusion: 3.28992"}
{"text": "### State\nConfusion: 5.372323\nAction: analogize\nReward: -0.712887\nNext Confusion: 5.683921"}
{"text": "### State\nConfusion: 5.731329\nAction: analogize\nReward: -0.035359\nNext Confusion: 6.064223"}
{"text": "### State\nConfusion: 6.944568\nAction: analogize\nReward: 0.637097\nNext Confusion: 6.785502"}
{"text": "### State\nConfusion: 5.343425\nAction: question\nReward: 1.413053\nNext Confusion: 4.877586"}
{"text": "### State\nConfusion: 4.044001\nAction: correct_fact\nReward: 0.791123\nNext Confusion: 4.111125"}
{"text": "### State\nConfusion: 2.885599\nAction: question\nReward: 0.741619\nNext Confusion: 2.525219"}
{"text": "### State\nConfusion: 4.033328\nAction: analogize\nReward: -0.324879\nNext Confusion: 3.825331"}
{"text": "### State\nConfusion: 3.583706\nAction: analogize\nReward: 0.5554\nNext Confusion: 2.749167"}
{"text": "### State\nConfusion: 6.577643\nAction: analogize\nReward: 0.07324\nNext Confusion: 6.620709"}
{"text": "### State\nConfusion: 3.076465\nAction: explain\nReward: 0.322549\nNext Confusion: 2.944134"}
{"text": "### State\nConfusion: 4.1453\nAction: analogize\nReward: -0.292769\nNext Confusion: 4.125049"}
{"text": "### State\nConfusion: 3.86788\nAction: worked_example\nReward: 2.352502\nNext Confusion: 2.928135"}
{"text": "### State\nConfusion: 3.783789\nAction: analogize\nReward: 0.076052\nNext Confusion: 4.21214"}
{"text": "### State\nConfusion: 2.774392\nAction: analogize\nReward: 0.770279\nNext Confusion: 2.368991"}
{"text": "### State\nConfusion: 5.348946\nAction: explain\nReward: 0.926692\nNext Confusion: 4.922467"}
{"text": "### State\nConfusion: 6.317099\nAction: worked_example\nReward: 1.679236\nNext Confusion: 5.322797"}
{"text": "### State\nConfusion: 3.215445\nAction: worked_example\nReward: 1.053004\nNext Confusion: 1.814326"}
{"text": "### State\nConfusion: 3.391156\nAction: analogize\nReward: -0.276477\nNext Confusion: 3.740653"}
{"text": "### State\nConfusion: 2.778777\nAction: analogize\nReward: 0.684112\nNext Confusion: 2.150054"}
{"text": "### State\nConfusion: 5.19486\nAction: analogize\nReward: 1.399383\nNext Confusion: 4.791388"}
{"text": "### State\nConfusion: 6.92237\nAction: worked_example\nReward: 2.276795\nNext Confusion: 5.880061"}
{"text": "### State\nConfusion: 2.999296\nAction: correct_fact\nReward: -0.39091\nNext Confusion: 3.133839"}
{"text": "### State\nConfusion: 3.52445\nAction: worked_example\nReward: 1.497804\nNext Confusion: 2.473041"}
{"text": "### State\nConfusion: 3.491305\nAction: analogize\nReward: -1.428821\nNext Confusion: 4.530365"}
{"text": "### State\nConfusion: 7.938798\nAction: question\nReward: 1.634904\nNext Confusion: 7.270522"}
{"text": "### State\nConfusion: 3.63357\nAction: explain\nReward: -1.215149\nNext Confusion: 4.452176"}
{"text": "### State\nConfusion: 7.658671\nAction: correct_fact\nReward: -0.057792\nNext Confusion: 7.543473"}
{"text": "### State\nConfusion: 4.069772\nAction: analogize\nReward: 0.751406\nNext Confusion: 3.895314"}
{"text": "### State\nConfusion: 5.882581\nAction: correct_fact\nReward: -0.371607\nNext Confusion: 5.911959"}
{"text": "### State\nConfusion: 4.669287\nAction: explain\nReward: -0.120479\nNext Confusion: 4.880317"}
{"text": "### State\nConfusion: 4.291442\nAction: correct_fact\nReward: -0.798368\nNext Confusion: 4.442684"}
{"text": "### State\nConfusion: 5.854513\nAction: analogize\nReward: -0.261615\nNext Confusion: 6.171615"}
{"text": "### State\nConfusion: 6.836235\nAction: explain\nReward: -0.649459\nNext Confusion: 6.768502"}
{"text": "### State\nConfusion: 3.439055\nAction: analogize\nReward: -0.147351\nNext Confusion: 3.926904"}
{"text": "### State\nConfusion: 3.48054\nAction: correct_fact\nReward: 0.060255\nNext Confusion: 3.50379"}
{"text": "### State\nConfusion: 4.569834\nAction: analogize\nReward: -1.409163\nNext Confusion: 5.57295"}
{"text": "### State\nConfusion: 6.003526\nAction: question\nReward: 0.689142\nNext Confusion: 5.342512"}
{"text": "### State\nConfusion: 4.746644\nAction: analogize\nReward: -1.093781\nNext Confusion: 5.532921"}
{"text": "### State\nConfusion: 7.746864\nAction: analogize\nReward: -0.978909\nNext Confusion: 8.778952"}
{"text": "### State\nConfusion: 8.820141\nAction: explain\nReward: 0.943031\nNext Confusion: 7.812218"}
{"text": "### State\nConfusion: 8.521159\nAction: analogize\nReward: 0.563675\nNext Confusion: 9.113123"}
{"text": "### State\nConfusion: 2.340023\nAction: explain\nReward: 0.847821\nNext Confusion: 1.814186"}
{"text": "### State\nConfusion: 4.345832\nAction: question\nReward: 1.241657\nNext Confusion: 3.922095"}
{"text": "### State\nConfusion: 4.367841\nAction: analogize\nReward: -3.81745\nNext Confusion: 4.413113"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.006\nNext Confusion: 10.0"}
{"text": "### State\nConfusion: 3.855926\nAction: worked_example\nReward: 1.359279\nNext Confusion: 2.140503"}
{"text": "### State\nConfusion: 5.947092\nAction: analogize\nReward: -1.062392\nNext Confusion: 6.499333"}
{"text": "### State\nConfusion: 4.267658\nAction: explain\nReward: -0.076467\nNext Confusion: 4.075721"}
{"text": "### State\nConfusion: 5.994388\nAction: worked_example\nReward: 1.125528\nNext Confusion: 4.426499"}
{"text": "### State\nConfusion: 10.0\nAction: worked_example\nReward: 0.999445\nNext Confusion: 8.992741"}
{"text": "### State\nConfusion: 5.015469\nAction: question\nReward: -0.550988\nNext Confusion: 5.527272"}
{"text": "### State\nConfusion: 4.494957\nAction: analogize\nReward: -1.693285\nNext Confusion: 5.166589"}
{"text": "### State\nConfusion: 3.628634\nAction: analogize\nReward: -1.178641\nNext Confusion: 3.971706"}
{"text": "### State\nConfusion: 3.521578\nAction: explain\nReward: 0.53535\nNext Confusion: 4.013176"}
{"text": "### State\nConfusion: 3.901601\nAction: analogize\nReward: -0.547104\nNext Confusion: 4.631837"}
{"text": "### State\nConfusion: 4.647506\nAction: explain\nReward: 0.631395\nNext Confusion: 4.224959"}
{"text": "### State\nConfusion: 5.711406\nAction: explain\nReward: 0.49222\nNext Confusion: 6.024854"}
{"text": "### State\nConfusion: 4.07159\nAction: analogize\nReward: -0.424504\nNext Confusion: 4.798265"}
{"text": "### State\nConfusion: 4.140967\nAction: correct_fact\nReward: -1.231822\nNext Confusion: 5.130908"}
{"text": "### State\nConfusion: 6.352411\nAction: analogize\nReward: -1.18697\nNext Confusion: 7.181125"}
{"text": "### State\nConfusion: 7.185508\nAction: worked_example\nReward: 3.034571\nNext Confusion: 4.742269"}
{"text": "### State\nConfusion: 6.044193\nAction: correct_fact\nReward: -0.131452\nNext Confusion: 6.259499"}
{"text": "### State\nConfusion: 3.850863\nAction: correct_fact\nReward: -1.165361\nNext Confusion: 4.437674"}
{"text": "### State\nConfusion: 5.709232\nAction: analogize\nReward: -0.335393\nNext Confusion: 6.03693"}
{"text": "### State\nConfusion: 2.790189\nAction: question\nReward: -0.015608\nNext Confusion: 2.420717"}
{"text": "### State\nConfusion: 2.813883\nAction: analogize\nReward: -0.325674\nNext Confusion: 3.097643"}
{"text": "### State\nConfusion: 4.600974\nAction: analogize\nReward: -0.689222\nNext Confusion: 5.273267"}
{"text": "### State\nConfusion: 6.823961\nAction: question\nReward: 0.231637\nNext Confusion: 6.530535"}
{"text": "### State\nConfusion: 4.187769\nAction: analogize\nReward: 0.41784\nNext Confusion: 4.147315"}
{"text": "### State\nConfusion: 3.270071\nAction: analogize\nReward: -0.701233\nNext Confusion: 3.924204"}
{"text": "### State\nConfusion: 3.892113\nAction: worked_example\nReward: 1.771034\nNext Confusion: 3.077213"}
{"text": "### State\nConfusion: 3.897737\nAction: analogize\nReward: -0.880082\nNext Confusion: 4.28867"}
{"text": "### State\nConfusion: 4.182186\nAction: question\nReward: 1.025072\nNext Confusion: 3.581476"}
{"text": "### State\nConfusion: 3.280212\nAction: analogize\nReward: -0.230556\nNext Confusion: 3.718891"}
{"text": "### State\nConfusion: 5.115473\nAction: analogize\nReward: -0.052009\nNext Confusion: 5.389236"}
{"text": "### State\nConfusion: 3.25951\nAction: explain\nReward: 0.456638\nNext Confusion: 2.630789"}
{"text": "### State\nConfusion: 4.461349\nAction: explain\nReward: -0.238552\nNext Confusion: 4.220826"}
{"text": "### State\nConfusion: 3.37934\nAction: explain\nReward: 1.300042\nNext Confusion: 2.233323"}
{"text": "### State\nConfusion: 7.708539\nAction: explain\nReward: 0.779534\nNext Confusion: 7.262962"}
{"text": "### State\nConfusion: 4.487832\nAction: correct_fact\nReward: -0.640726\nNext Confusion: 4.850804"}
{"text": "### State\nConfusion: 7.527032\nAction: explain\nReward: 0.188903\nNext Confusion: 7.260336"}
{"text": "### State\nConfusion: 5.608341\nAction: correct_fact\nReward: 0.029112\nNext Confusion: 5.848946"}
{"text": "### State\nConfusion: 2.400969\nAction: analogize\nReward: -1.50911\nNext Confusion: 4.221312"}
{"text": "### State\nConfusion: 4.617443\nAction: explain\nReward: 0.140889\nNext Confusion: 4.101977"}
{"text": "### State\nConfusion: 1.713455\nAction: analogize\nReward: -1.625323\nNext Confusion: 3.040552"}
{"text": "### State\nConfusion: 3.207202\nAction: worked_example\nReward: 0.662639\nNext Confusion: 2.216147"}
{"text": "### State\nConfusion: 6.116821\nAction: explain\nReward: 0.739173\nNext Confusion: 5.414842"}
{"text": "### State\nConfusion: 7.716421\nAction: question\nReward: -0.244427\nNext Confusion: 7.80996"}
{"text": "### State\nConfusion: 4.312419\nAction: analogize\nReward: 0.045445\nNext Confusion: 4.849009"}
{"text": "### State\nConfusion: 2.497267\nAction: analogize\nReward: -0.53257\nNext Confusion: 3.211382"}
{"text": "### State\nConfusion: 4.186003\nAction: explain\nReward: -0.100915\nNext Confusion: 4.1359"}
{"text": "### State\nConfusion: 6.124384\nAction: correct_fact\nReward: -0.222745\nNext Confusion: 6.004959"}
{"text": "### State\nConfusion: 4.838198\nAction: analogize\nReward: -0.537104\nNext Confusion: 5.21931"}
{"text": "### State\nConfusion: 4.336282\nAction: analogize\nReward: 0.362864\nNext Confusion: 4.121377"}
{"text": "### State\nConfusion: 6.199643\nAction: analogize\nReward: 0.089828\nNext Confusion: 6.90752"}
{"text": "### State\nConfusion: 6.246179\nAction: worked_example\nReward: 0.016199\nNext Confusion: 6.216495"}
{"text": "### State\nConfusion: 3.753611\nAction: worked_example\nReward: 2.03355\nNext Confusion: 1.908327"}
{"text": "### State\nConfusion: 3.90575\nAction: analogize\nReward: -0.864023\nNext Confusion: 4.875975"}
{"text": "### State\nConfusion: 3.125639\nAction: analogize\nReward: -0.59186\nNext Confusion: 3.940023"}
{"text": "### State\nConfusion: 4.659585\nAction: analogize\nReward: -0.154829\nNext Confusion: 4.735131"}
{"text": "### State\nConfusion: 2.798429\nAction: analogize\nReward: -0.435412\nNext Confusion: 3.529701"}
{"text": "### State\nConfusion: 3.181256\nAction: explain\nReward: 0.548676\nNext Confusion: 3.000934"}
{"text": "### State\nConfusion: 4.824714\nAction: worked_example\nReward: 0.915687\nNext Confusion: 3.833705"}
{"text": "### State\nConfusion: 3.840567\nAction: explain\nReward: 0.957436\nNext Confusion: 3.357632"}
{"text": "### State\nConfusion: 7.498173\nAction: analogize\nReward: 0.597861\nNext Confusion: 7.415368"}
{"text": "### State\nConfusion: 4.988446\nAction: explain\nReward: 0.252937\nNext Confusion: 4.888795"}
{"text": "### State\nConfusion: 4.628919\nAction: analogize\nReward: -0.793281\nNext Confusion: 5.547164"}
{"text": "### State\nConfusion: 2.532769\nAction: analogize\nReward: 0.608471\nNext Confusion: 2.095517"}
{"text": "### State\nConfusion: 6.552197\nAction: analogize\nReward: -0.143695\nNext Confusion: 6.75866"}
{"text": "### State\nConfusion: 6.521643\nAction: analogize\nReward: 0.625005\nNext Confusion: 6.203167"}
{"text": "### State\nConfusion: 4.492259\nAction: worked_example\nReward: 2.173731\nNext Confusion: 2.631922"}
{"text": "### State\nConfusion: 7.241621\nAction: question\nReward: 1.406091\nNext Confusion: 5.86407"}
{"text": "### State\nConfusion: 3.693046\nAction: explain\nReward: 1.068534\nNext Confusion: 2.440671"}
{"text": "### State\nConfusion: 3.962626\nAction: explain\nReward: -0.730675\nNext Confusion: 4.393443"}
{"text": "### State\nConfusion: 6.47488\nAction: analogize\nReward: -0.811074\nNext Confusion: 7.624598"}
{"text": "### State\nConfusion: 7.081945\nAction: analogize\nReward: 0.137783\nNext Confusion: 6.88515"}
{"text": "### State\nConfusion: 6.003791\nAction: analogize\nReward: 0.25566\nNext Confusion: 6.172901"}
{"text": "### State\nConfusion: 4.536524\nAction: analogize\nReward: -0.882808\nNext Confusion: 5.352852"}
{"text": "### State\nConfusion: 7.325707\nAction: analogize\nReward: 0.390309\nNext Confusion: 6.489121"}
{"text": "### State\nConfusion: 3.344648\nAction: analogize\nReward: -3.003033\nNext Confusion: 2.995784"}
{"text": "### State\nConfusion: 4.936317\nAction: analogize\nReward: 0.065177\nNext Confusion: 4.409051"}
{"text": "### State\nConfusion: 5.412323\nAction: worked_example\nReward: 0.69626\nNext Confusion: 4.851012"}
{"text": "### State\nConfusion: 3.733434\nAction: correct_fact\nReward: 1.215873\nNext Confusion: 3.299668"}
{"text": "### State\nConfusion: 3.594582\nAction: analogize\nReward: -0.245522\nNext Confusion: 4.261171"}
{"text": "### State\nConfusion: 5.702906\nAction: analogize\nReward: -1.517803\nNext Confusion: 7.966025"}
{"text": "### State\nConfusion: 4.458236\nAction: analogize\nReward: -0.448019\nNext Confusion: 4.331777"}
{"text": "### State\nConfusion: 3.877192\nAction: correct_fact\nReward: -0.606948\nNext Confusion: 4.709797"}
{"text": "### State\nConfusion: 4.004076\nAction: explain\nReward: 1.357555\nNext Confusion: 3.258263"}
{"text": "### State\nConfusion: 7.576537\nAction: analogize\nReward: -1.53424\nNext Confusion: 8.860916"}
{"text": "### State\nConfusion: 3.554901\nAction: question\nReward: 0.474365\nNext Confusion: 3.006228"}
{"text": "### State\nConfusion: 3.251683\nAction: correct_fact\nReward: 0.518483\nNext Confusion: 2.64814"}
{"text": "### State\nConfusion: 3.569207\nAction: analogize\nReward: -0.410212\nNext Confusion: 3.989583"}
{"text": "### State\nConfusion: 3.670016\nAction: explain\nReward: 1.762957\nNext Confusion: 2.516897"}
{"text": "### State\nConfusion: 2.353776\nAction: worked_example\nReward: 2.261622\nNext Confusion: 0.0"}
{"text": "### State\nConfusion: 5.180178\nAction: worked_example\nReward: 1.90911\nNext Confusion: 3.493268"}
{"text": "### State\nConfusion: 4.770649\nAction: analogize\nReward: -0.145658\nNext Confusion: 4.804286"}
{"text": "### State\nConfusion: 3.80372\nAction: analogize\nReward: -1.000576\nNext Confusion: 4.46522"}
{"text": "### State\nConfusion: 6.532478\nAction: analogize\nReward: -0.946917\nNext Confusion: 7.290173"}
{"text": "### State\nConfusion: 6.0253\nAction: analogize\nReward: -0.456855\nNext Confusion: 6.661849"}
{"text": "### State\nConfusion: 5.025003\nAction: correct_fact\nReward: 0.758891\nNext Confusion: 4.576565"}
{"text": "### State\nConfusion: 2.845722\nAction: analogize\nReward: 1.140977\nNext Confusion: 2.995232"}
{"text": "### State\nConfusion: 7.297413\nAction: analogize\nReward: 1.366689\nNext Confusion: 7.151979"}
{"text": "### State\nConfusion: 9.557812\nAction: analogize\nReward: -0.288579\nNext Confusion: 9.169003"}
{"text": "### State\nConfusion: 6.37981\nAction: explain\nReward: 1.050409\nNext Confusion: 6.148158"}
{"text": "### State\nConfusion: 3.714115\nAction: worked_example\nReward: 2.349328\nNext Confusion: 1.744698"}
{"text": "### State\nConfusion: 3.174654\nAction: analogize\nReward: 0.685784\nNext Confusion: 3.14699"}
{"text": "### State\nConfusion: 6.137669\nAction: question\nReward: 0.671082\nNext Confusion: 5.555594"}
{"text": "### State\nConfusion: 3.0194\nAction: question\nReward: 0.66125\nNext Confusion: 2.640104"}
{"text": "### State\nConfusion: 5.61132\nAction: worked_example\nReward: 1.569835\nNext Confusion: 3.932574"}
{"text": "### State\nConfusion: 5.073577\nAction: analogize\nReward: -0.484999\nNext Confusion: 5.577819"}
{"text": "### State\nConfusion: 6.558275\nAction: analogize\nReward: 0.953879\nNext Confusion: 6.599469"}
{"text": "### State\nConfusion: 3.460505\nAction: analogize\nReward: -1.369673\nNext Confusion: 4.018704"}
{"text": "### State\nConfusion: 4.546068\nAction: analogize\nReward: 0.296617\nNext Confusion: 3.999932"}
{"text": "### State\nConfusion: 2.789076\nAction: correct_fact\nReward: -0.139055\nNext Confusion: 3.162709"}
{"text": "### State\nConfusion: 4.722394\nAction: analogize\nReward: -0.272211\nNext Confusion: 5.144951"}
{"text": "### State\nConfusion: 6.894866\nAction: analogize\nReward: 0.365742\nNext Confusion: 6.84301"}
{"text": "### State\nConfusion: 6.848865\nAction: analogize\nReward: -0.199233\nNext Confusion: 6.911935"}
{"text": "### State\nConfusion: 7.232148\nAction: analogize\nReward: 0.043035\nNext Confusion: 7.288547"}
{"text": "### State\nConfusion: 3.149628\nAction: explain\nReward: -0.427429\nNext Confusion: 3.316867"}
{"text": "### State\nConfusion: 3.926137\nAction: question\nReward: 1.42821\nNext Confusion: 2.881466"}
{"text": "### State\nConfusion: 3.196238\nAction: analogize\nReward: 1.17027\nNext Confusion: 2.384046"}
{"text": "### State\nConfusion: 5.307725\nAction: correct_fact\nReward: -1.086515\nNext Confusion: 5.717581"}
{"text": "### State\nConfusion: 5.070612\nAction: question\nReward: -0.603608\nNext Confusion: 5.819668"}
{"text": "### State\nConfusion: 7.103436\nAction: analogize\nReward: -1.143868\nNext Confusion: 8.541348"}
{"text": "### State\nConfusion: 6.538433\nAction: analogize\nReward: 0.299716\nNext Confusion: 7.164129"}
{"text": "### State\nConfusion: 5.592933\nAction: analogize\nReward: -0.571434\nNext Confusion: 6.062175"}
{"text": "### State\nConfusion: 3.944029\nAction: analogize\nReward: -0.0834\nNext Confusion: 4.574897"}
{"text": "### State\nConfusion: 2.918218\nAction: analogize\nReward: 0.128027\nNext Confusion: 3.573923"}
{"text": "### State\nConfusion: 4.998973\nAction: analogize\nReward: 0.099739\nNext Confusion: 4.395178"}
{"text": "### State\nConfusion: 4.694908\nAction: analogize\nReward: 0.148056\nNext Confusion: 5.204955"}
{"text": "### State\nConfusion: 3.781684\nAction: analogize\nReward: -0.263645\nNext Confusion: 4.12463"}
{"text": "### State\nConfusion: 6.591872\nAction: analogize\nReward: -1.118598\nNext Confusion: 7.24768"}
{"text": "### State\nConfusion: 3.995603\nAction: explain\nReward: 1.786482\nNext Confusion: 2.911538"}
{"text": "### State\nConfusion: 5.361206\nAction: question\nReward: 0.458134\nNext Confusion: 5.455734"}
{"text": "### State\nConfusion: 3.610285\nAction: analogize\nReward: 0.819867\nNext Confusion: 2.220746"}
{"text": "### State\nConfusion: 6.07102\nAction: explain\nReward: 0.175273\nNext Confusion: 6.668109"}
{"text": "### State\nConfusion: 3.806757\nAction: question\nReward: 0.064293\nNext Confusion: 3.739635"}
{"text": "### State\nConfusion: 4.38536\nAction: analogize\nReward: -1.787108\nNext Confusion: 5.131299"}
{"text": "### State\nConfusion: 7.341646\nAction: correct_fact\nReward: -0.362596\nNext Confusion: 8.125962"}
{"text": "### State\nConfusion: 4.368033\nAction: question\nReward: -0.037405\nNext Confusion: 4.460615"}
{"text": "### State\nConfusion: 4.966889\nAction: analogize\nReward: 0.38992\nNext Confusion: 4.741419"}
{"text": "### State\nConfusion: 1.737497\nAction: explain\nReward: -0.588786\nNext Confusion: 1.845481"}
{"text": "### State\nConfusion: 4.904597\nAction: analogize\nReward: -0.504161\nNext Confusion: 5.030465"}
{"text": "### State\nConfusion: 3.854805\nAction: correct_fact\nReward: 0.143636\nNext Confusion: 4.084965"}
{"text": "### State\nConfusion: 10.0\nAction: correct_fact\nReward: 0.161128\nNext Confusion: 9.897921"}
{"text": "### State\nConfusion: 6.948946\nAction: explain\nReward: 0.019254\nNext Confusion: 7.208404"}
{"text": "### State\nConfusion: 5.288423\nAction: worked_example\nReward: 0.99203\nNext Confusion: 3.951557"}
{"text": "### State\nConfusion: 6.829811\nAction: analogize\nReward: -0.455124\nNext Confusion: 7.361673"}
{"text": "### State\nConfusion: 3.869974\nAction: analogize\nReward: 0.530551\nNext Confusion: 3.98222"}
{"text": "### State\nConfusion: 7.334596\nAction: analogize\nReward: -0.876755\nNext Confusion: 8.262695"}
{"text": "### State\nConfusion: 7.459252\nAction: analogize\nReward: -0.095127\nNext Confusion: 7.275376"}
{"text": "### State\nConfusion: 2.977803\nAction: correct_fact\nReward: -0.305983\nNext Confusion: 3.23653"}
{"text": "### State\nConfusion: 4.767777\nAction: analogize\nReward: -0.346948\nNext Confusion: 4.939303"}
{"text": "### State\nConfusion: 9.385743\nAction: correct_fact\nReward: -1.353628\nNext Confusion: 9.932337"}
{"text": "### State\nConfusion: 4.616424\nAction: worked_example\nReward: 1.173654\nNext Confusion: 2.993332"}
{"text": "### State\nConfusion: 3.982618\nAction: question\nReward: 1.465169\nNext Confusion: 3.196667"}
{"text": "### State\nConfusion: 3.731763\nAction: worked_example\nReward: 0.96159\nNext Confusion: 2.827726"}
{"text": "### State\nConfusion: 3.987744\nAction: analogize\nReward: -0.768711\nNext Confusion: 4.888868"}
{"text": "### State\nConfusion: 4.67894\nAction: question\nReward: 0.242026\nNext Confusion: 3.964318"}
{"text": "### State\nConfusion: 2.752789\nAction: analogize\nReward: -1.218773\nNext Confusion: 2.988962"}
{"text": "### State\nConfusion: 6.775572\nAction: analogize\nReward: 0.72213\nNext Confusion: 6.405007"}
{"text": "### State\nConfusion: 2.37896\nAction: worked_example\nReward: 1.470493\nNext Confusion: 1.213226"}
{"text": "### State\nConfusion: 4.411928\nAction: analogize\nReward: 2.35412\nNext Confusion: 3.227744"}
{"text": "### State\nConfusion: 4.708626\nAction: analogize\nReward: -0.456251\nNext Confusion: 4.466039"}