pradachan's picture
Upload folder using huggingface_hub
f71c233 verified
{"shakespeare_char": {"means": {"final_train_loss_mean": 1.2994411389033, "best_val_loss_mean": 1.488146464029948, "total_train_time_mean": 103.04783916473389, "avg_inference_tokens_per_second_mean": 399.31698367113546, "style_consistency_scores": {"mean_consistency": 0.9888888888888889, "std_consistency": 0.022222222222222216}}, "stderrs": {"final_train_loss_stderr": 0.014617790386462446, "best_val_loss_stderr": 0.003449464433022718, "total_train_time_stderr": 0.4264296812716999, "avg_inference_tokens_per_second_stderr": 2.685008524977055, "style_consistency_scores": {"mean_consistency": 0.005237828008789223, "std_consistency": 0.01047565601757848}}, "final_info_dict": {"final_train_loss": [1.323727011680603, 1.2378783226013184, 1.3367180824279785], "best_val_loss": [1.4816443920135498, 1.4800429344177246, 1.5027520656585693], "total_train_time": [104.74996542930603, 101.66579079627991, 102.72776126861572], "avg_inference_tokens_per_second": [404.83341990134625, 405.1902112059607, 387.92731990609946], "style_consistency_scores": [{"mean_consistency": 1.0, "std_consistency": 0.0}, {"mean_consistency": 0.9666666666666668, "std_consistency": 0.06666666666666665}, {"mean_consistency": 1.0, "std_consistency": 0.0}]}}, "enwik8": {"means": {"final_train_loss_mean": 1.0756142139434814, "best_val_loss_mean": 0.9364517331123352, "total_train_time_mean": 1176.2822530269623, "avg_inference_tokens_per_second_mean": 414.74111234255105, "style_consistency_scores": {"mean_consistency": 1.0, "std_consistency": 0.0}}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0, "style_consistency_scores": {"mean_consistency": 0.0, "std_consistency": 0.0}}, "final_info_dict": {"final_train_loss": [1.0756142139434814], "best_val_loss": [0.9364517331123352], "total_train_time": [1176.2822530269623], "avg_inference_tokens_per_second": [414.74111234255105], "style_consistency_scores": [{"mean_consistency": 1.0, "std_consistency": 0.0}]}}, "text8": {"means": {"final_train_loss_mean": 1.1109744310379028, "best_val_loss_mean": 0.9226217865943909, "total_train_time_mean": 1178.823966741562, "avg_inference_tokens_per_second_mean": 410.1106885446575, "style_consistency_scores": {"mean_consistency": 0.9833333333333334, "std_consistency": 0.04999999999999999}}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0, "style_consistency_scores": {"mean_consistency": 0.0, "std_consistency": 0.0}}, "final_info_dict": {"final_train_loss": [1.1109744310379028], "best_val_loss": [0.9226217865943909], "total_train_time": [1178.823966741562], "avg_inference_tokens_per_second": [410.1106885446575], "style_consistency_scores": [{"mean_consistency": 0.9833333333333334, "std_consistency": 0.04999999999999999}]}}}