Text Generation
PEFT
Safetensors
llama
grpo
reinforcement-learning
devops
incident-response
openenv
unsloth
conversational
Instructions to use Arijit-07/aria-devops-llama8b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Arijit-07/aria-devops-llama8b with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio new
How to use Arijit-07/aria-devops-llama8b with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Arijit-07/aria-devops-llama8b to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Arijit-07/aria-devops-llama8b to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Arijit-07/aria-devops-llama8b to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Arijit-07/aria-devops-llama8b", max_seq_length=2048, )
| [ | |
| { | |
| "episode": 1, | |
| "task_id": "easy", | |
| "score": 0.54, | |
| "rolling_avg": 0.54, | |
| "loss": -0.12143471837043762, | |
| "elapsed_min": 0.2 | |
| }, | |
| { | |
| "episode": 2, | |
| "task_id": "easy", | |
| "score": 1.6400000000000001, | |
| "rolling_avg": 1.09, | |
| "loss": -0.12805303931236267, | |
| "elapsed_min": 1.2 | |
| }, | |
| { | |
| "episode": 3, | |
| "task_id": "easy", | |
| "score": 0.44, | |
| "rolling_avg": 0.8733333333333334, | |
| "loss": -0.06693907082080841, | |
| "elapsed_min": 1.4 | |
| }, | |
| { | |
| "episode": 4, | |
| "task_id": "easy", | |
| "score": 0.78, | |
| "rolling_avg": 0.8500000000000001, | |
| "loss": -0.16535498201847076, | |
| "elapsed_min": 1.7 | |
| }, | |
| { | |
| "episode": 5, | |
| "task_id": "easy", | |
| "score": 0.88, | |
| "rolling_avg": 0.8560000000000001, | |
| "loss": -0.11092036217451096, | |
| "elapsed_min": 2.1 | |
| }, | |
| { | |
| "episode": 6, | |
| "task_id": "easy", | |
| "score": 0.37, | |
| "rolling_avg": 0.775, | |
| "loss": -0.18090753257274628, | |
| "elapsed_min": 2.2 | |
| }, | |
| { | |
| "episode": 7, | |
| "task_id": "easy", | |
| "score": 0.37, | |
| "rolling_avg": 0.7171428571428572, | |
| "loss": -0.18480174243450165, | |
| "elapsed_min": 2.3 | |
| }, | |
| { | |
| "episode": 8, | |
| "task_id": "easy", | |
| "score": 0.54, | |
| "rolling_avg": 0.6950000000000001, | |
| "loss": -0.1352842003107071, | |
| "elapsed_min": 2.4 | |
| }, | |
| { | |
| "episode": 9, | |
| "task_id": "easy", | |
| "score": 0.37, | |
| "rolling_avg": 0.658888888888889, | |
| "loss": -0.10219474881887436, | |
| "elapsed_min": 2.5 | |
| }, | |
| { | |
| "episode": 10, | |
| "task_id": "easy", | |
| "score": 1.74, | |
| "rolling_avg": 0.7670000000000001, | |
| "loss": -0.12895138561725616, | |
| "elapsed_min": 3.5 | |
| }, | |
| { | |
| "episode": 11, | |
| "task_id": "easy", | |
| "score": 0.44, | |
| "rolling_avg": 0.7570000000000001, | |
| "loss": -0.1572495400905609, | |
| "elapsed_min": 3.7 | |
| }, | |
| { | |
| "episode": 12, | |
| "task_id": "easy", | |
| "score": 0.39, | |
| "rolling_avg": 0.632, | |
| "loss": -0.08590440452098846, | |
| "elapsed_min": 3.9 | |
| }, | |
| { | |
| "episode": 13, | |
| "task_id": "easy", | |
| "score": 0.5800000000000001, | |
| "rolling_avg": 0.6460000000000001, | |
| "loss": -0.07493701577186584, | |
| "elapsed_min": 4.3 | |
| }, | |
| { | |
| "episode": 14, | |
| "task_id": "easy", | |
| "score": 0.46, | |
| "rolling_avg": 0.6140000000000001, | |
| "loss": -0.1000489741563797, | |
| "elapsed_min": 4.5 | |
| }, | |
| { | |
| "episode": 15, | |
| "task_id": "easy", | |
| "score": 0.37, | |
| "rolling_avg": 0.563, | |
| "loss": -0.24669808149337769, | |
| "elapsed_min": 4.6 | |
| }, | |
| { | |
| "episode": 16, | |
| "task_id": "easy", | |
| "score": 1.6899999999999997, | |
| "rolling_avg": 0.695, | |
| "loss": -0.13417300581932068, | |
| "elapsed_min": 5.6 | |
| }, | |
| { | |
| "episode": 17, | |
| "task_id": "easy", | |
| "score": 0.7400000000000002, | |
| "rolling_avg": 0.732, | |
| "loss": -0.02531382441520691, | |
| "elapsed_min": 6.7 | |
| }, | |
| { | |
| "episode": 18, | |
| "task_id": "easy", | |
| "score": 0.8900000000000001, | |
| "rolling_avg": 0.767, | |
| "loss": -0.02539382129907608, | |
| "elapsed_min": 7.7 | |
| }, | |
| { | |
| "episode": 19, | |
| "task_id": "easy", | |
| "score": 0.7900000000000003, | |
| "rolling_avg": 0.8090000000000002, | |
| "loss": -0.024298425763845444, | |
| "elapsed_min": 8.8 | |
| }, | |
| { | |
| "episode": 20, | |
| "task_id": "easy", | |
| "score": 0.7400000000000002, | |
| "rolling_avg": 0.7090000000000001, | |
| "loss": -0.05224194377660751, | |
| "elapsed_min": 9.8 | |
| }, | |
| { | |
| "episode": 21, | |
| "task_id": "easy", | |
| "score": 0.7400000000000002, | |
| "rolling_avg": 0.7390000000000001, | |
| "loss": -0.058588918298482895, | |
| "elapsed_min": 10.9 | |
| }, | |
| { | |
| "episode": 22, | |
| "task_id": "easy", | |
| "score": 0.8900000000000001, | |
| "rolling_avg": 0.789, | |
| "loss": -0.10121776908636093, | |
| "elapsed_min": 11.9 | |
| }, | |
| { | |
| "episode": 23, | |
| "task_id": "easy", | |
| "score": 0.7400000000000002, | |
| "rolling_avg": 0.805, | |
| "loss": -0.05905468389391899, | |
| "elapsed_min": 13.0 | |
| }, | |
| { | |
| "episode": 24, | |
| "task_id": "easy", | |
| "score": 0.94, | |
| "rolling_avg": 0.8530000000000001, | |
| "loss": -0.11711429059505463, | |
| "elapsed_min": 14.0 | |
| }, | |
| { | |
| "episode": 25, | |
| "task_id": "easy", | |
| "score": 0.8400000000000001, | |
| "rolling_avg": 0.9, | |
| "loss": -0.09598871320486069, | |
| "elapsed_min": 15.1 | |
| }, | |
| { | |
| "episode": 26, | |
| "task_id": "easy", | |
| "score": 0.6900000000000002, | |
| "rolling_avg": 0.8, | |
| "loss": -0.0566844567656517, | |
| "elapsed_min": 16.1 | |
| }, | |
| { | |
| "episode": 27, | |
| "task_id": "easy", | |
| "score": 0.6400000000000001, | |
| "rolling_avg": 0.79, | |
| "loss": -0.05957688018679619, | |
| "elapsed_min": 17.2 | |
| }, | |
| { | |
| "episode": 28, | |
| "task_id": "easy", | |
| "score": 0.29, | |
| "rolling_avg": 0.7300000000000002, | |
| "loss": 0.0405953973531723, | |
| "elapsed_min": 18.2 | |
| }, | |
| { | |
| "episode": 29, | |
| "task_id": "easy", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.675, | |
| "loss": 0.0662434920668602, | |
| "elapsed_min": 19.3 | |
| }, | |
| { | |
| "episode": 30, | |
| "task_id": "easy", | |
| "score": 0.29000000000000004, | |
| "rolling_avg": 0.6300000000000001, | |
| "loss": 0.04871468245983124, | |
| "elapsed_min": 20.3 | |
| }, | |
| { | |
| "episode": 31, | |
| "task_id": "easy", | |
| "score": 0.3400000000000001, | |
| "rolling_avg": 0.5900000000000001, | |
| "loss": 0.0225782822817564, | |
| "elapsed_min": 21.3 | |
| }, | |
| { | |
| "episode": 32, | |
| "task_id": "easy", | |
| "score": 0.39, | |
| "rolling_avg": 0.54, | |
| "loss": 0.0005533260991796851, | |
| "elapsed_min": 22.3 | |
| }, | |
| { | |
| "episode": 33, | |
| "task_id": "easy", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.505, | |
| "loss": -0.00933866761624813, | |
| "elapsed_min": 23.4 | |
| }, | |
| { | |
| "episode": 34, | |
| "task_id": "easy", | |
| "score": 0.29, | |
| "rolling_avg": 0.44000000000000006, | |
| "loss": 0.025673745200037956, | |
| "elapsed_min": 24.4 | |
| }, | |
| { | |
| "episode": 35, | |
| "task_id": "easy", | |
| "score": 0.6900000000000002, | |
| "rolling_avg": 0.4250000000000001, | |
| "loss": -0.084134541451931, | |
| "elapsed_min": 25.4 | |
| }, | |
| { | |
| "episode": 36, | |
| "task_id": "easy", | |
| "score": 0.7400000000000002, | |
| "rolling_avg": 0.43000000000000005, | |
| "loss": -0.053947098553180695, | |
| "elapsed_min": 26.4 | |
| }, | |
| { | |
| "episode": 37, | |
| "task_id": "easy", | |
| "score": 0.9400000000000002, | |
| "rolling_avg": 0.46000000000000013, | |
| "loss": -0.1245078295469284, | |
| "elapsed_min": 27.4 | |
| }, | |
| { | |
| "episode": 38, | |
| "task_id": "easy", | |
| "score": 1.09, | |
| "rolling_avg": 0.5400000000000001, | |
| "loss": -0.1214509904384613, | |
| "elapsed_min": 28.5 | |
| }, | |
| { | |
| "episode": 39, | |
| "task_id": "easy", | |
| "score": 0.6900000000000002, | |
| "rolling_avg": 0.5850000000000002, | |
| "loss": -0.07190271466970444, | |
| "elapsed_min": 29.5 | |
| }, | |
| { | |
| "episode": 40, | |
| "task_id": "easy", | |
| "score": 0.78, | |
| "rolling_avg": 0.6340000000000001, | |
| "loss": -0.11039736866950989, | |
| "elapsed_min": 30.3 | |
| }, | |
| { | |
| "episode": 41, | |
| "task_id": "medium", | |
| "score": 0.49000000000000005, | |
| "rolling_avg": 0.49000000000000005, | |
| "loss": -0.09235180914402008, | |
| "elapsed_min": 31.4 | |
| }, | |
| { | |
| "episode": 42, | |
| "task_id": "medium", | |
| "score": 0.8400000000000001, | |
| "rolling_avg": 0.665, | |
| "loss": -0.176436185836792, | |
| "elapsed_min": 32.4 | |
| }, | |
| { | |
| "episode": 43, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.5733333333333334, | |
| "loss": -0.047593407332897186, | |
| "elapsed_min": 33.6 | |
| }, | |
| { | |
| "episode": 44, | |
| "task_id": "medium", | |
| "score": 0.3400000000000001, | |
| "rolling_avg": 0.5150000000000001, | |
| "loss": -0.040770307183265686, | |
| "elapsed_min": 34.7 | |
| }, | |
| { | |
| "episode": 45, | |
| "task_id": "medium", | |
| "score": 0.29, | |
| "rolling_avg": 0.4700000000000001, | |
| "loss": -0.02725624106824398, | |
| "elapsed_min": 35.7 | |
| }, | |
| { | |
| "episode": 46, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.4566666666666668, | |
| "loss": -0.046115946024656296, | |
| "elapsed_min": 36.8 | |
| }, | |
| { | |
| "episode": 47, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.42571428571428577, | |
| "loss": 0.00903060007840395, | |
| "elapsed_min": 37.9 | |
| }, | |
| { | |
| "episode": 48, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.4025, | |
| "loss": 0.01689656637609005, | |
| "elapsed_min": 39.0 | |
| }, | |
| { | |
| "episode": 49, | |
| "task_id": "medium", | |
| "score": 0.34, | |
| "rolling_avg": 0.39555555555555555, | |
| "loss": -0.028721345588564873, | |
| "elapsed_min": 40.1 | |
| }, | |
| { | |
| "episode": 50, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.395, | |
| "loss": -0.07144424319267273, | |
| "elapsed_min": 41.2 | |
| }, | |
| { | |
| "episode": 51, | |
| "task_id": "medium", | |
| "score": 0.5400000000000001, | |
| "rolling_avg": 0.4, | |
| "loss": -0.10756971687078476, | |
| "elapsed_min": 42.2 | |
| }, | |
| { | |
| "episode": 52, | |
| "task_id": "medium", | |
| "score": 0.4400000000000001, | |
| "rolling_avg": 0.36000000000000004, | |
| "loss": -0.10540562868118286, | |
| "elapsed_min": 43.3 | |
| }, | |
| { | |
| "episode": 53, | |
| "task_id": "medium", | |
| "score": 0.3400000000000001, | |
| "rolling_avg": 0.3550000000000001, | |
| "loss": -0.01990152895450592, | |
| "elapsed_min": 44.4 | |
| }, | |
| { | |
| "episode": 54, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.345, | |
| "loss": 0.011199951171875, | |
| "elapsed_min": 45.4 | |
| }, | |
| { | |
| "episode": 55, | |
| "task_id": "medium", | |
| "score": 0.34, | |
| "rolling_avg": 0.35, | |
| "loss": -0.0704483762383461, | |
| "elapsed_min": 46.5 | |
| }, | |
| { | |
| "episode": 56, | |
| "task_id": "medium", | |
| "score": 0.38999999999999996, | |
| "rolling_avg": 0.35, | |
| "loss": -0.08460726588964462, | |
| "elapsed_min": 47.6 | |
| }, | |
| { | |
| "episode": 57, | |
| "task_id": "medium", | |
| "score": 0.4900000000000001, | |
| "rolling_avg": 0.37500000000000006, | |
| "loss": -0.09814709424972534, | |
| "elapsed_min": 48.7 | |
| }, | |
| { | |
| "episode": 58, | |
| "task_id": "medium", | |
| "score": 0.39, | |
| "rolling_avg": 0.39000000000000007, | |
| "loss": -0.040027916431427, | |
| "elapsed_min": 49.7 | |
| }, | |
| { | |
| "episode": 59, | |
| "task_id": "medium", | |
| "score": 0.38999999999999996, | |
| "rolling_avg": 0.39500000000000013, | |
| "loss": -0.08392232656478882, | |
| "elapsed_min": 50.8 | |
| }, | |
| { | |
| "episode": 60, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.38000000000000006, | |
| "loss": -0.009092062711715698, | |
| "elapsed_min": 51.9 | |
| }, | |
| { | |
| "episode": 61, | |
| "task_id": "medium", | |
| "score": 0.44000000000000006, | |
| "rolling_avg": 0.37, | |
| "loss": -0.058374952524900436, | |
| "elapsed_min": 53.0 | |
| }, | |
| { | |
| "episode": 62, | |
| "task_id": "medium", | |
| "score": 0.43999999999999995, | |
| "rolling_avg": 0.37, | |
| "loss": -0.14105059206485748, | |
| "elapsed_min": 54.1 | |
| }, | |
| { | |
| "episode": 63, | |
| "task_id": "medium", | |
| "score": 0.4400000000000001, | |
| "rolling_avg": 0.38, | |
| "loss": -0.08380116522312164, | |
| "elapsed_min": 55.1 | |
| }, | |
| { | |
| "episode": 64, | |
| "task_id": "medium", | |
| "score": 0.49000000000000005, | |
| "rolling_avg": 0.40499999999999997, | |
| "loss": -0.11519451439380646, | |
| "elapsed_min": 56.2 | |
| }, | |
| { | |
| "episode": 65, | |
| "task_id": "medium", | |
| "score": 0.29, | |
| "rolling_avg": 0.4, | |
| "loss": -0.017708923667669296, | |
| "elapsed_min": 57.2 | |
| }, | |
| { | |
| "episode": 66, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.385, | |
| "loss": -0.007058671675622463, | |
| "elapsed_min": 58.3 | |
| }, | |
| { | |
| "episode": 67, | |
| "task_id": "medium", | |
| "score": 0.29000000000000004, | |
| "rolling_avg": 0.365, | |
| "loss": -0.028480907902121544, | |
| "elapsed_min": 59.4 | |
| }, | |
| { | |
| "episode": 68, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.35, | |
| "loss": 0.0003400370478630066, | |
| "elapsed_min": 60.4 | |
| }, | |
| { | |
| "episode": 69, | |
| "task_id": "medium", | |
| "score": 0.3400000000000001, | |
| "rolling_avg": 0.34500000000000003, | |
| "loss": -0.034816063940525055, | |
| "elapsed_min": 61.5 | |
| }, | |
| { | |
| "episode": 70, | |
| "task_id": "medium", | |
| "score": 0.4400000000000001, | |
| "rolling_avg": 0.365, | |
| "loss": -0.12672019004821777, | |
| "elapsed_min": 62.5 | |
| }, | |
| { | |
| "episode": 71, | |
| "task_id": "medium", | |
| "score": 0.54, | |
| "rolling_avg": 0.37500000000000006, | |
| "loss": -0.1321611851453781, | |
| "elapsed_min": 63.6 | |
| }, | |
| { | |
| "episode": 72, | |
| "task_id": "medium", | |
| "score": 0.4900000000000001, | |
| "rolling_avg": 0.38, | |
| "loss": -0.11640733480453491, | |
| "elapsed_min": 64.7 | |
| }, | |
| { | |
| "episode": 73, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.37500000000000006, | |
| "loss": -0.08983750641345978, | |
| "elapsed_min": 65.7 | |
| }, | |
| { | |
| "episode": 74, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.36500000000000005, | |
| "loss": -0.06033878028392792, | |
| "elapsed_min": 66.8 | |
| }, | |
| { | |
| "episode": 75, | |
| "task_id": "medium", | |
| "score": 0.33999999999999997, | |
| "rolling_avg": 0.37000000000000005, | |
| "loss": -0.046499669551849365, | |
| "elapsed_min": 67.9 | |
| }, | |
| { | |
| "episode": 76, | |
| "task_id": "medium", | |
| "score": 0.3400000000000001, | |
| "rolling_avg": 0.38000000000000006, | |
| "loss": -0.029506457969546318, | |
| "elapsed_min": 68.9 | |
| }, | |
| { | |
| "episode": 77, | |
| "task_id": "medium", | |
| "score": 0.39000000000000007, | |
| "rolling_avg": 0.39000000000000007, | |
| "loss": -0.08039389550685883, | |
| "elapsed_min": 70.0 | |
| }, | |
| { | |
| "episode": 78, | |
| "task_id": "medium", | |
| "score": 0.34, | |
| "rolling_avg": 0.4000000000000001, | |
| "loss": -0.0734604224562645, | |
| "elapsed_min": 71.0 | |
| }, | |
| { | |
| "episode": 79, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.39, | |
| "loss": -0.020788073539733887, | |
| "elapsed_min": 72.1 | |
| }, | |
| { | |
| "episode": 80, | |
| "task_id": "medium", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.37, | |
| "loss": -0.009029777720570564, | |
| "elapsed_min": 73.2 | |
| }, | |
| { | |
| "episode": 81, | |
| "task_id": "hard", | |
| "score": 1.5899999999999999, | |
| "rolling_avg": 1.5899999999999999, | |
| "loss": -0.10761424154043198, | |
| "elapsed_min": 74.3 | |
| }, | |
| { | |
| "episode": 82, | |
| "task_id": "hard", | |
| "score": 1.6399999999999997, | |
| "rolling_avg": 1.6149999999999998, | |
| "loss": -0.07337230443954468, | |
| "elapsed_min": 75.3 | |
| }, | |
| { | |
| "episode": 83, | |
| "task_id": "hard", | |
| "score": 1.5399999999999996, | |
| "rolling_avg": 1.5899999999999999, | |
| "loss": -0.10122022032737732, | |
| "elapsed_min": 76.4 | |
| }, | |
| { | |
| "episode": 84, | |
| "task_id": "hard", | |
| "score": 1.7399999999999995, | |
| "rolling_avg": 1.6274999999999997, | |
| "loss": -0.05785955488681793, | |
| "elapsed_min": 77.5 | |
| }, | |
| { | |
| "episode": 85, | |
| "task_id": "hard", | |
| "score": 2.0399999999999996, | |
| "rolling_avg": 1.7099999999999997, | |
| "loss": -0.09229159355163574, | |
| "elapsed_min": 78.6 | |
| }, | |
| { | |
| "episode": 86, | |
| "task_id": "hard", | |
| "score": 1.7899999999999996, | |
| "rolling_avg": 1.723333333333333, | |
| "loss": -0.1031956821680069, | |
| "elapsed_min": 79.6 | |
| }, | |
| { | |
| "episode": 87, | |
| "task_id": "hard", | |
| "score": 1.5399999999999998, | |
| "rolling_avg": 1.6971428571428568, | |
| "loss": -0.11315083503723145, | |
| "elapsed_min": 80.7 | |
| }, | |
| { | |
| "episode": 88, | |
| "task_id": "hard", | |
| "score": 1.14, | |
| "rolling_avg": 1.6274999999999997, | |
| "loss": -0.06293876469135284, | |
| "elapsed_min": 81.8 | |
| }, | |
| { | |
| "episode": 89, | |
| "task_id": "hard", | |
| "score": 0.9900000000000002, | |
| "rolling_avg": 1.5566666666666664, | |
| "loss": -0.09899002313613892, | |
| "elapsed_min": 82.9 | |
| }, | |
| { | |
| "episode": 90, | |
| "task_id": "hard", | |
| "score": 1.3399999999999999, | |
| "rolling_avg": 1.5349999999999997, | |
| "loss": -0.07810334116220474, | |
| "elapsed_min": 84.0 | |
| }, | |
| { | |
| "episode": 91, | |
| "task_id": "hard", | |
| "score": 0.8900000000000001, | |
| "rolling_avg": 1.4649999999999999, | |
| "loss": -0.10680361092090607, | |
| "elapsed_min": 85.1 | |
| }, | |
| { | |
| "episode": 92, | |
| "task_id": "hard", | |
| "score": 1.4899999999999998, | |
| "rolling_avg": 1.4499999999999997, | |
| "loss": -0.1284235566854477, | |
| "elapsed_min": 86.2 | |
| }, | |
| { | |
| "episode": 93, | |
| "task_id": "hard", | |
| "score": 1.09, | |
| "rolling_avg": 1.4049999999999998, | |
| "loss": -0.11051454395055771, | |
| "elapsed_min": 87.3 | |
| }, | |
| { | |
| "episode": 94, | |
| "task_id": "hard", | |
| "score": 1.14, | |
| "rolling_avg": 1.3450000000000002, | |
| "loss": -0.15035489201545715, | |
| "elapsed_min": 88.4 | |
| }, | |
| { | |
| "episode": 95, | |
| "task_id": "hard", | |
| "score": 0.94, | |
| "rolling_avg": 1.2349999999999999, | |
| "loss": -0.11060954630374908, | |
| "elapsed_min": 89.4 | |
| }, | |
| { | |
| "episode": 96, | |
| "task_id": "hard", | |
| "score": 0.44000000000000006, | |
| "rolling_avg": 1.1, | |
| "loss": -0.05425233766436577, | |
| "elapsed_min": 90.5 | |
| }, | |
| { | |
| "episode": 97, | |
| "task_id": "hard", | |
| "score": 0.4900000000000001, | |
| "rolling_avg": 0.9949999999999999, | |
| "loss": -0.08663400262594223, | |
| "elapsed_min": 91.6 | |
| }, | |
| { | |
| "episode": 98, | |
| "task_id": "hard", | |
| "score": 0.8399999999999999, | |
| "rolling_avg": 0.9649999999999999, | |
| "loss": -0.059657029807567596, | |
| "elapsed_min": 92.7 | |
| }, | |
| { | |
| "episode": 99, | |
| "task_id": "hard", | |
| "score": 0.6400000000000001, | |
| "rolling_avg": 0.93, | |
| "loss": -0.06711545586585999, | |
| "elapsed_min": 93.8 | |
| }, | |
| { | |
| "episode": 100, | |
| "task_id": "hard", | |
| "score": 0.44000000000000017, | |
| "rolling_avg": 0.8399999999999999, | |
| "loss": -0.02081288956105709, | |
| "elapsed_min": 94.8 | |
| }, | |
| { | |
| "episode": 101, | |
| "task_id": "hard", | |
| "score": 0.54, | |
| "rolling_avg": 0.805, | |
| "loss": -0.07743717730045319, | |
| "elapsed_min": 95.9 | |
| }, | |
| { | |
| "episode": 102, | |
| "task_id": "hard", | |
| "score": 0.44000000000000006, | |
| "rolling_avg": 0.7000000000000001, | |
| "loss": -0.018033726140856743, | |
| "elapsed_min": 97.0 | |
| }, | |
| { | |
| "episode": 103, | |
| "task_id": "hard", | |
| "score": 0.54, | |
| "rolling_avg": 0.6450000000000001, | |
| "loss": -0.07679533958435059, | |
| "elapsed_min": 98.1 | |
| }, | |
| { | |
| "episode": 104, | |
| "task_id": "hard", | |
| "score": 0.54, | |
| "rolling_avg": 0.5850000000000001, | |
| "loss": -0.06460466980934143, | |
| "elapsed_min": 99.2 | |
| }, | |
| { | |
| "episode": 105, | |
| "task_id": "hard", | |
| "score": 0.5400000000000001, | |
| "rolling_avg": 0.5450000000000002, | |
| "loss": -0.07421746850013733, | |
| "elapsed_min": 100.2 | |
| }, | |
| { | |
| "episode": 106, | |
| "task_id": "hard", | |
| "score": 0.4900000000000001, | |
| "rolling_avg": 0.55, | |
| "loss": -0.10693149268627167, | |
| "elapsed_min": 101.3 | |
| }, | |
| { | |
| "episode": 107, | |
| "task_id": "hard", | |
| "score": 0.39, | |
| "rolling_avg": 0.54, | |
| "loss": -0.041160814464092255, | |
| "elapsed_min": 102.4 | |
| }, | |
| { | |
| "episode": 108, | |
| "task_id": "hard", | |
| "score": 0.23999999999999996, | |
| "rolling_avg": 0.4800000000000001, | |
| "loss": 0.0243326835334301, | |
| "elapsed_min": 103.5 | |
| }, | |
| { | |
| "episode": 109, | |
| "task_id": "hard", | |
| "score": 0.34, | |
| "rolling_avg": 0.45000000000000007, | |
| "loss": -0.025716159492731094, | |
| "elapsed_min": 104.5 | |
| }, | |
| { | |
| "episode": 110, | |
| "task_id": "hard", | |
| "score": 0.5900000000000001, | |
| "rolling_avg": 0.465, | |
| "loss": -0.0469183623790741, | |
| "elapsed_min": 105.6 | |
| }, | |
| { | |
| "episode": 111, | |
| "task_id": "hard", | |
| "score": 0.29, | |
| "rolling_avg": 0.44000000000000006, | |
| "loss": -0.003872685134410858, | |
| "elapsed_min": 106.7 | |
| }, | |
| { | |
| "episode": 112, | |
| "task_id": "hard", | |
| "score": 0.43999999999999995, | |
| "rolling_avg": 0.44000000000000006, | |
| "loss": -0.01721161976456642, | |
| "elapsed_min": 107.8 | |
| }, | |
| { | |
| "episode": 113, | |
| "task_id": "hard", | |
| "score": 0.6900000000000002, | |
| "rolling_avg": 0.45499999999999996, | |
| "loss": -0.10440249741077423, | |
| "elapsed_min": 108.9 | |
| }, | |
| { | |
| "episode": 114, | |
| "task_id": "hard", | |
| "score": 0.33999999999999997, | |
| "rolling_avg": 0.43500000000000005, | |
| "loss": -0.031179871410131454, | |
| "elapsed_min": 109.9 | |
| }, | |
| { | |
| "episode": 115, | |
| "task_id": "hard", | |
| "score": 0.49000000000000005, | |
| "rolling_avg": 0.43000000000000005, | |
| "loss": -0.05355419963598251, | |
| "elapsed_min": 111.0 | |
| }, | |
| { | |
| "episode": 116, | |
| "task_id": "hard", | |
| "score": 0.5900000000000001, | |
| "rolling_avg": 0.44000000000000006, | |
| "loss": -0.04942004010081291, | |
| "elapsed_min": 112.1 | |
| }, | |
| { | |
| "episode": 117, | |
| "task_id": "hard", | |
| "score": 0.49000000000000016, | |
| "rolling_avg": 0.45, | |
| "loss": -0.04643632099032402, | |
| "elapsed_min": 113.2 | |
| }, | |
| { | |
| "episode": 118, | |
| "task_id": "hard", | |
| "score": 0.8400000000000001, | |
| "rolling_avg": 0.51, | |
| "loss": -0.05764473229646683, | |
| "elapsed_min": 114.3 | |
| }, | |
| { | |
| "episode": 119, | |
| "task_id": "hard", | |
| "score": 1.19, | |
| "rolling_avg": 0.5950000000000001, | |
| "loss": -0.12574931979179382, | |
| "elapsed_min": 115.4 | |
| }, | |
| { | |
| "episode": 120, | |
| "task_id": "hard", | |
| "score": 0.99, | |
| "rolling_avg": 0.6350000000000001, | |
| "loss": -0.07021882385015488, | |
| "elapsed_min": 116.4 | |
| }, | |
| { | |
| "episode": 121, | |
| "task_id": "bonus", | |
| "score": 0.72, | |
| "rolling_avg": 0.72, | |
| "loss": -0.1563481241464615, | |
| "elapsed_min": 117.6 | |
| }, | |
| { | |
| "episode": 122, | |
| "task_id": "bonus", | |
| "score": 0.6800000000000002, | |
| "rolling_avg": 0.7000000000000001, | |
| "loss": -0.13133162260055542, | |
| "elapsed_min": 118.7 | |
| }, | |
| { | |
| "episode": 123, | |
| "task_id": "bonus", | |
| "score": 0.76, | |
| "rolling_avg": 0.7200000000000001, | |
| "loss": -0.11883395165205002, | |
| "elapsed_min": 119.9 | |
| }, | |
| { | |
| "episode": 124, | |
| "task_id": "bonus", | |
| "score": 0.62, | |
| "rolling_avg": 0.6950000000000001, | |
| "loss": -0.12456952035427094, | |
| "elapsed_min": 121.0 | |
| }, | |
| { | |
| "episode": 125, | |
| "task_id": "bonus", | |
| "score": 0.49000000000000016, | |
| "rolling_avg": 0.6540000000000001, | |
| "loss": -0.10143512487411499, | |
| "elapsed_min": 122.1 | |
| }, | |
| { | |
| "episode": 126, | |
| "task_id": "bonus", | |
| "score": 0.7000000000000001, | |
| "rolling_avg": 0.6616666666666667, | |
| "loss": -0.13113725185394287, | |
| "elapsed_min": 123.2 | |
| }, | |
| { | |
| "episode": 127, | |
| "task_id": "bonus", | |
| "score": 0.7700000000000002, | |
| "rolling_avg": 0.6771428571428573, | |
| "loss": -0.11259196698665619, | |
| "elapsed_min": 124.3 | |
| }, | |
| { | |
| "episode": 128, | |
| "task_id": "bonus", | |
| "score": 0.81, | |
| "rolling_avg": 0.6937500000000001, | |
| "loss": -0.16549530625343323, | |
| "elapsed_min": 125.4 | |
| }, | |
| { | |
| "episode": 129, | |
| "task_id": "bonus", | |
| "score": 0.6700000000000002, | |
| "rolling_avg": 0.6911111111111112, | |
| "loss": -0.08555784821510315, | |
| "elapsed_min": 126.6 | |
| }, | |
| { | |
| "episode": 130, | |
| "task_id": "bonus", | |
| "score": 0.7300000000000002, | |
| "rolling_avg": 0.6950000000000001, | |
| "loss": -0.1284562349319458, | |
| "elapsed_min": 127.7 | |
| }, | |
| { | |
| "episode": 131, | |
| "task_id": "bonus", | |
| "score": 0.75, | |
| "rolling_avg": 0.6980000000000001, | |
| "loss": -0.13779829442501068, | |
| "elapsed_min": 128.8 | |
| }, | |
| { | |
| "episode": 132, | |
| "task_id": "bonus", | |
| "score": 0.7500000000000001, | |
| "rolling_avg": 0.7050000000000001, | |
| "loss": -0.10122223943471909, | |
| "elapsed_min": 130.0 | |
| }, | |
| { | |
| "episode": 133, | |
| "task_id": "bonus", | |
| "score": 0.6200000000000001, | |
| "rolling_avg": 0.6910000000000001, | |
| "loss": -0.10923080146312714, | |
| "elapsed_min": 131.1 | |
| }, | |
| { | |
| "episode": 134, | |
| "task_id": "bonus", | |
| "score": 0.5200000000000001, | |
| "rolling_avg": 0.6810000000000002, | |
| "loss": -0.13451352715492249, | |
| "elapsed_min": 132.3 | |
| }, | |
| { | |
| "episode": 135, | |
| "task_id": "bonus", | |
| "score": 0.7300000000000002, | |
| "rolling_avg": 0.7050000000000002, | |
| "loss": -0.16815370321273804, | |
| "elapsed_min": 133.5 | |
| }, | |
| { | |
| "episode": 136, | |
| "task_id": "bonus", | |
| "score": 0.9600000000000002, | |
| "rolling_avg": 0.7310000000000001, | |
| "loss": -0.1660919487476349, | |
| "elapsed_min": 134.6 | |
| }, | |
| { | |
| "episode": 137, | |
| "task_id": "bonus", | |
| "score": 0.6900000000000002, | |
| "rolling_avg": 0.7230000000000001, | |
| "loss": -0.13483691215515137, | |
| "elapsed_min": 135.7 | |
| }, | |
| { | |
| "episode": 138, | |
| "task_id": "bonus", | |
| "score": 0.5000000000000001, | |
| "rolling_avg": 0.6920000000000002, | |
| "loss": -0.10621734708547592, | |
| "elapsed_min": 136.8 | |
| }, | |
| { | |
| "episode": 139, | |
| "task_id": "bonus", | |
| "score": 0.76, | |
| "rolling_avg": 0.7010000000000001, | |
| "loss": -0.14708703756332397, | |
| "elapsed_min": 138.0 | |
| }, | |
| { | |
| "episode": 140, | |
| "task_id": "bonus", | |
| "score": 0.52, | |
| "rolling_avg": 0.68, | |
| "loss": -0.058200687170028687, | |
| "elapsed_min": 139.1 | |
| }, | |
| { | |
| "episode": 141, | |
| "task_id": "bonus", | |
| "score": 0.6000000000000001, | |
| "rolling_avg": 0.665, | |
| "loss": -0.09626239538192749, | |
| "elapsed_min": 140.2 | |
| }, | |
| { | |
| "episode": 142, | |
| "task_id": "bonus", | |
| "score": 0.8800000000000001, | |
| "rolling_avg": 0.678, | |
| "loss": -0.19182077050209045, | |
| "elapsed_min": 141.4 | |
| }, | |
| { | |
| "episode": 143, | |
| "task_id": "bonus", | |
| "score": 0.6800000000000002, | |
| "rolling_avg": 0.6840000000000002, | |
| "loss": -0.15185901522636414, | |
| "elapsed_min": 142.4 | |
| }, | |
| { | |
| "episode": 144, | |
| "task_id": "bonus", | |
| "score": 0.6700000000000002, | |
| "rolling_avg": 0.6990000000000001, | |
| "loss": -0.13845515251159668, | |
| "elapsed_min": 143.5 | |
| }, | |
| { | |
| "episode": 145, | |
| "task_id": "bonus", | |
| "score": 0.4200000000000001, | |
| "rolling_avg": 0.6679999999999999, | |
| "loss": -0.055254243314266205, | |
| "elapsed_min": 144.6 | |
| }, | |
| { | |
| "episode": 146, | |
| "task_id": "bonus", | |
| "score": 0.81, | |
| "rolling_avg": 0.6530000000000001, | |
| "loss": -0.0969042181968689, | |
| "elapsed_min": 145.9 | |
| }, | |
| { | |
| "episode": 147, | |
| "task_id": "bonus", | |
| "score": 0.6000000000000001, | |
| "rolling_avg": 0.6440000000000001, | |
| "loss": -0.07441800832748413, | |
| "elapsed_min": 147.0 | |
| }, | |
| { | |
| "episode": 148, | |
| "task_id": "bonus", | |
| "score": 0.8200000000000001, | |
| "rolling_avg": 0.6759999999999999, | |
| "loss": -0.18915283679962158, | |
| "elapsed_min": 148.1 | |
| }, | |
| { | |
| "episode": 149, | |
| "task_id": "bonus", | |
| "score": 0.5700000000000001, | |
| "rolling_avg": 0.657, | |
| "loss": -0.10145045816898346, | |
| "elapsed_min": 149.3 | |
| }, | |
| { | |
| "episode": 150, | |
| "task_id": "bonus", | |
| "score": 0.81, | |
| "rolling_avg": 0.6860000000000002, | |
| "loss": -0.1386324167251587, | |
| "elapsed_min": 150.4 | |
| }, | |
| { | |
| "episode": 151, | |
| "task_id": "bonus", | |
| "score": 0.6900000000000001, | |
| "rolling_avg": 0.6950000000000002, | |
| "loss": -0.12235265225172043, | |
| "elapsed_min": 151.5 | |
| }, | |
| { | |
| "episode": 152, | |
| "task_id": "bonus", | |
| "score": 0.5400000000000001, | |
| "rolling_avg": 0.6610000000000001, | |
| "loss": -0.15657515823841095, | |
| "elapsed_min": 152.6 | |
| }, | |
| { | |
| "episode": 153, | |
| "task_id": "bonus", | |
| "score": 0.5800000000000002, | |
| "rolling_avg": 0.6510000000000001, | |
| "loss": -0.1349593997001648, | |
| "elapsed_min": 153.8 | |
| }, | |
| { | |
| "episode": 154, | |
| "task_id": "bonus", | |
| "score": 0.6900000000000001, | |
| "rolling_avg": 0.6530000000000002, | |
| "loss": -0.11009057611227036, | |
| "elapsed_min": 154.9 | |
| }, | |
| { | |
| "episode": 155, | |
| "task_id": "bonus", | |
| "score": 0.6400000000000001, | |
| "rolling_avg": 0.6750000000000002, | |
| "loss": -0.13187479972839355, | |
| "elapsed_min": 156.0 | |
| }, | |
| { | |
| "episode": 156, | |
| "task_id": "bonus", | |
| "score": 0.9100000000000004, | |
| "rolling_avg": 0.6850000000000002, | |
| "loss": -0.2191038727760315, | |
| "elapsed_min": 157.2 | |
| }, | |
| { | |
| "episode": 157, | |
| "task_id": "bonus", | |
| "score": 0.7000000000000002, | |
| "rolling_avg": 0.6950000000000002, | |
| "loss": -0.14810220897197723, | |
| "elapsed_min": 158.3 | |
| }, | |
| { | |
| "episode": 158, | |
| "task_id": "bonus", | |
| "score": 0.6200000000000001, | |
| "rolling_avg": 0.675, | |
| "loss": -0.12571939826011658, | |
| "elapsed_min": 159.4 | |
| }, | |
| { | |
| "episode": 159, | |
| "task_id": "bonus", | |
| "score": 0.7000000000000002, | |
| "rolling_avg": 0.6880000000000001, | |
| "loss": -0.11064934730529785, | |
| "elapsed_min": 160.5 | |
| }, | |
| { | |
| "episode": 160, | |
| "task_id": "bonus", | |
| "score": 0.5900000000000001, | |
| "rolling_avg": 0.6660000000000001, | |
| "loss": -0.09303998947143555, | |
| "elapsed_min": 161.6 | |
| } | |
| ] |