{"shakespeare_char": {"means": {"final_train_loss_mean": 1.3379985094070435, "best_val_loss_mean": 1.4917181332906086, "total_train_time_mean": 106.32513523101807, "avg_inference_tokens_per_second_mean": 411.92593001257757}, "stderrs": {"final_train_loss_stderr": 0.003201916558833036, "best_val_loss_stderr": 0.003391268757171876, "total_train_time_stderr": 1.914600122872072, "avg_inference_tokens_per_second_stderr": 0.28394073235731665}, "final_info_dict": {"final_train_loss": [1.3515650033950806, 1.3306083679199219, 1.331822156906128], "best_val_loss": [1.4789913892745972, 1.5038937330245972, 1.4922692775726318], "total_train_time": [114.43608832359314, 102.65201187133789, 101.88730549812317], "avg_inference_tokens_per_second": [413.0958464706976, 411.58971501237374, 411.0922285546616]}}, "enwik8": {"means": {"final_train_loss_mean": 1.0732988119125366, "best_val_loss_mean": 0.9487595558166504, "total_train_time_mean": 1195.967306137085, "avg_inference_tokens_per_second_mean": 403.99181531961773}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [1.0732988119125366], "best_val_loss": [0.9487595558166504], "total_train_time": [1195.967306137085], "avg_inference_tokens_per_second": [403.99181531961773]}}, "text8": {"means": {"final_train_loss_mean": 1.126334309577942, "best_val_loss_mean": 0.9436998963356018, "total_train_time_mean": 1178.6216180324554, "avg_inference_tokens_per_second_mean": 406.6921961557513}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [1.126334309577942], "best_val_loss": [0.9436998963356018], "total_train_time": [1178.6216180324554], "avg_inference_tokens_per_second": [406.6921961557513]}}}