diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,57546 @@ +{ + "best_metric": 1.542074203491211, + "best_model_checkpoint": "/mnt/data/alanhshao/vita-e2e/outputs/lucy_deepseek_adaptive_s3/checkpoint-13200", + "epoch": 1.6468039003250272, + "eval_steps": 400, + "global_step": 22800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "audio_loss_0": 4.7188, + "audio_loss_1": 5.0312, + "audio_loss_2": 4.7188, + "audio_loss_3": 5.8438, + "audio_loss_4": 5.7188, + "audio_loss_5": 5.625, + "audio_loss_6": 5.4375, + "epoch": 0, + "loss": 4.875, + "loss_text": 1.6797, + "state_loss_0": 0.0, + "step": 0 + }, + { + "audio_loss_0": 5.1562, + "audio_loss_1": 4.6875, + "audio_loss_2": 4.6875, + "audio_loss_3": 6.0, + "audio_loss_4": 5.7812, + "audio_loss_5": 5.6562, + "audio_loss_6": 5.5938, + "epoch": 0, + "loss": 4.7812, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 0 + }, + { + "epoch": 0.0018057060310581437, + "grad_norm": 10.090036392211914, + "learning_rate": 1.5042117930204574e-06, + "loss": 4.7359, + "step": 25 + }, + { + "audio_loss_0": 4.4062, + "audio_loss_1": 4.75, + "audio_loss_2": 4.375, + "audio_loss_3": 5.4062, + "audio_loss_4": 5.1562, + "audio_loss_5": 5.1562, + "audio_loss_6": 5.0312, + "epoch": 0.0018057060310581437, + "loss": 4.4375, + "loss_text": 1.375, + "state_loss_0": 0.0, + "step": 25 + }, + { + "audio_loss_0": 3.6094, + "audio_loss_1": 4.3438, + "audio_loss_2": 3.9844, + "audio_loss_3": 5.0938, + "audio_loss_4": 5.0, + "audio_loss_5": 4.9062, + "audio_loss_6": 4.875, + "epoch": 0.0018057060310581437, + "loss": 4.0625, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 25 + }, + { + "epoch": 0.0036114120621162874, + "grad_norm": 1.443138837814331, + "learning_rate": 3.0084235860409147e-06, + "loss": 4.0784, + "step": 50 + }, + { + "audio_loss_0": 3.7344, + "audio_loss_1": 4.0, + "audio_loss_2": 3.7812, + "audio_loss_3": 4.7812, + "audio_loss_4": 4.6562, + "audio_loss_5": 4.625, + "audio_loss_6": 4.3125, + "epoch": 0.0036114120621162874, + "loss": 3.9375, + "loss_text": 1.6484, + "state_loss_0": 0.0, + "step": 50 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.2812, + "epoch": 0.0036114120621162874, + "loss": 3.75, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 50 + }, + { + "epoch": 0.005417118093174431, + "grad_norm": 0.9063960909843445, + "learning_rate": 4.512635379061372e-06, + "loss": 3.845, + "step": 75 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3438, + "epoch": 0.005417118093174431, + "loss": 3.7344, + "loss_text": 1.125, + "state_loss_0": 0.0, + "step": 75 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.005417118093174431, + "loss": 3.7031, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 75 + }, + { + "epoch": 0.007222824124232575, + "grad_norm": 1.1464556455612183, + "learning_rate": 6.0168471720818295e-06, + "loss": 3.7785, + "step": 100 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.2188, + "epoch": 0.007222824124232575, + "loss": 3.6562, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 100 + }, + { + "audio_loss_0": 3.7344, + "audio_loss_1": 4.125, + "audio_loss_2": 3.8281, + "audio_loss_3": 4.7812, + "audio_loss_4": 4.6875, + "audio_loss_5": 4.625, + "audio_loss_6": 4.375, + "epoch": 0.007222824124232575, + "loss": 3.9375, + "loss_text": 1.4141, + "state_loss_0": 0.0, + "step": 100 + }, + { + "epoch": 0.00902853015529072, + "grad_norm": 1.0235897302627563, + "learning_rate": 7.521058965102287e-06, + "loss": 3.7359, + "step": 125 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.00902853015529072, + "loss": 3.5, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 125 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.125, + "epoch": 0.00902853015529072, + "loss": 3.5781, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 125 + }, + { + "epoch": 0.010834236186348862, + "grad_norm": 1.1745377779006958, + "learning_rate": 9.025270758122744e-06, + "loss": 3.7222, + "step": 150 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.010834236186348862, + "loss": 3.7031, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 150 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2812, + "epoch": 0.010834236186348862, + "loss": 3.7344, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 150 + }, + { + "epoch": 0.012639942217407007, + "grad_norm": 1.0553183555603027, + "learning_rate": 1.0529482551143202e-05, + "loss": 3.7158, + "step": 175 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.012639942217407007, + "loss": 3.625, + "loss_text": 0.8945, + "state_loss_0": 0.0, + "step": 175 + }, + { + "audio_loss_0": 3.6875, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3438, + "epoch": 0.012639942217407007, + "loss": 3.7656, + "loss_text": 1.2812, + "state_loss_0": 0.0, + "step": 175 + }, + { + "epoch": 0.01444564824846515, + "grad_norm": 1.2357053756713867, + "learning_rate": 1.2033694344163659e-05, + "loss": 3.7021, + "step": 200 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 4.0625, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.75, + "audio_loss_4": 4.6875, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.4375, + "epoch": 0.01444564824846515, + "loss": 3.875, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 200 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3125, + "epoch": 0.01444564824846515, + "loss": 3.6875, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 200 + }, + { + "epoch": 0.016251354279523293, + "grad_norm": 1.285864233970642, + "learning_rate": 1.3537906137184117e-05, + "loss": 3.6905, + "step": 225 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.016251354279523293, + "loss": 3.6406, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 225 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2812, + "epoch": 0.016251354279523293, + "loss": 3.75, + "loss_text": 1.125, + "state_loss_0": 0.0, + "step": 225 + }, + { + "epoch": 0.01805706031058144, + "grad_norm": 1.2042717933654785, + "learning_rate": 1.5042117930204574e-05, + "loss": 3.6845, + "step": 250 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.01805706031058144, + "loss": 3.5938, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 250 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.01805706031058144, + "loss": 3.7031, + "loss_text": 1.1484, + "state_loss_0": 0.0, + "step": 250 + }, + { + "epoch": 0.01986276634163958, + "grad_norm": 1.3100931644439697, + "learning_rate": 1.6546329723225032e-05, + "loss": 3.6916, + "step": 275 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.01986276634163958, + "loss": 3.6562, + "loss_text": 0.9961, + "state_loss_0": 0.0, + "step": 275 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.01986276634163958, + "loss": 3.625, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 275 + }, + { + "epoch": 0.021668472372697724, + "grad_norm": 1.164901614189148, + "learning_rate": 1.805054151624549e-05, + "loss": 3.6784, + "step": 300 + }, + { + "audio_loss_0": 3.7344, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.6562, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2812, + "epoch": 0.021668472372697724, + "loss": 3.7188, + "loss_text": 0.6133, + "state_loss_0": 0.0, + "step": 300 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.7656, + "audio_loss_3": 4.75, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5, + "audio_loss_6": 4.3125, + "epoch": 0.021668472372697724, + "loss": 3.7812, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 300 + }, + { + "epoch": 0.023474178403755867, + "grad_norm": 1.3732990026474, + "learning_rate": 1.955475330926595e-05, + "loss": 3.6745, + "step": 325 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.023474178403755867, + "loss": 3.5, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 325 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 4.1875, + "audio_loss_2": 3.7812, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3438, + "epoch": 0.023474178403755867, + "loss": 3.8281, + "loss_text": 0.9375, + "state_loss_0": 0.0, + "step": 325 + }, + { + "epoch": 0.025279884434814014, + "grad_norm": 1.6501582860946655, + "learning_rate": 2.1058965102286405e-05, + "loss": 3.6904, + "step": 350 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3438, + "epoch": 0.025279884434814014, + "loss": 3.7188, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 350 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.025279884434814014, + "loss": 3.7031, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 350 + }, + { + "epoch": 0.027085590465872156, + "grad_norm": 1.3624229431152344, + "learning_rate": 2.2563176895306858e-05, + "loss": 3.6771, + "step": 375 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.027085590465872156, + "loss": 3.7031, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 375 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2812, + "epoch": 0.027085590465872156, + "loss": 3.6094, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 375 + }, + { + "epoch": 0.0288912964969303, + "grad_norm": 1.2217912673950195, + "learning_rate": 2.4067388688327318e-05, + "loss": 3.6787, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 1.9609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.6641, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.5312, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.3672, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.5312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.7031, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9844, + "eval_loss_AQACONVA": 3.9844, + "eval_loss_text_AQACONVA": 2.5781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.7812, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.7656, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 3.9844, + "eval_loss_AQACONVA": 3.9844, + "eval_loss_text_AQACONVA": 2.0781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_AQACONVA": 3.7969, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.7031, + "eval_audio_loss_3_AQACONVA": 4.75, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9531, + "eval_loss_AQACONVA": 3.9531, + "eval_loss_text_AQACONVA": 1.7891, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.6328, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.6094, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.9062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.8125, + "eval_audio_loss_1_RQACONVA": 4.0312, + "eval_audio_loss_2_RQACONVA": 3.7969, + "eval_audio_loss_3_RQACONVA": 4.8125, + "eval_audio_loss_4_RQACONVA": 4.6562, + "eval_audio_loss_5_RQACONVA": 4.5938, + "eval_audio_loss_6_RQACONVA": 4.375, + "eval_loss": 4.0312, + "eval_loss_RQACONVA": 4.0312, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.9688, + "eval_audio_loss_2_RQACONVA": 3.7031, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5312, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 1.7734, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.9531, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.8047, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.4492, + "eval_loss_RQACONV": 0.4492, + "eval_loss_text_RQACONV": 0.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.7852, + "eval_loss_RQACONV": 0.7852, + "eval_loss_text_RQACONV": 1.5703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.8555, + "eval_loss_RQACONV": 0.8555, + "eval_loss_text_RQACONV": 1.7109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.4414, + "eval_loss_RQACONV": 0.4414, + "eval_loss_text_RQACONV": 0.8828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.4102, + "eval_loss_RQACONV": 0.4102, + "eval_loss_text_RQACONV": 0.8203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.4785, + "eval_loss_RQACONV": 0.4785, + "eval_loss_text_RQACONV": 0.957, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 1.1484, + "eval_loss_RQACONV": 1.1484, + "eval_loss_text_RQACONV": 2.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.0811, + "eval_loss_RQACONV": 0.0811, + "eval_loss_text_RQACONV": 0.1621, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.0332, + "eval_loss_RQACONV": 0.0332, + "eval_loss_text_RQACONV": 0.0664, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1016, + "eval_loss_RQACONV": 0.1016, + "eval_loss_text_RQACONV": 0.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.1147, + "eval_loss_RQACONV": 0.1147, + "eval_loss_text_RQACONV": 0.2295, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 400 + }, + { + "epoch": 0.0288912964969303, + "eval_loss": 1.648964762687683, + "eval_runtime": 28.4575, + "eval_samples_per_second": 187.964, + "eval_steps_per_second": 1.476, + "step": 400 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.0288912964969303, + "loss": 3.6562, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 400 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.0288912964969303, + "loss": 3.6719, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 400 + }, + { + "epoch": 0.030697002527988442, + "grad_norm": 1.2179944515228271, + "learning_rate": 2.5571600481347774e-05, + "loss": 3.6715, + "step": 425 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.030697002527988442, + "loss": 3.7031, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 425 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.030697002527988442, + "loss": 3.75, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 425 + }, + { + "epoch": 0.032502708559046585, + "grad_norm": 1.5326566696166992, + "learning_rate": 2.7075812274368234e-05, + "loss": 3.6846, + "step": 450 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1562, + "epoch": 0.032502708559046585, + "loss": 3.6719, + "loss_text": 0.8984, + "state_loss_0": 0.0, + "step": 450 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.7188, + "audio_loss_3": 4.75, + "audio_loss_4": 4.375, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2188, + "epoch": 0.032502708559046585, + "loss": 3.6719, + "loss_text": 0.5156, + "state_loss_0": 0.0, + "step": 450 + }, + { + "epoch": 0.03430841459010473, + "grad_norm": 1.34181809425354, + "learning_rate": 2.8580024067388687e-05, + "loss": 3.6848, + "step": 475 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.625, + "audio_loss_5": 4.5, + "audio_loss_6": 4.3125, + "epoch": 0.03430841459010473, + "loss": 3.7188, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 475 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.03430841459010473, + "loss": 3.75, + "loss_text": 0.8984, + "state_loss_0": 0.0, + "step": 475 + }, + { + "epoch": 0.03611412062116288, + "grad_norm": 1.41166353225708, + "learning_rate": 3.0084235860409147e-05, + "loss": 3.6758, + "step": 500 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.125, + "epoch": 0.03611412062116288, + "loss": 3.5156, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 500 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.375, + "audio_loss_4": 4.625, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0312, + "epoch": 0.03611412062116288, + "loss": 3.5625, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 500 + }, + { + "epoch": 0.03791982665222102, + "grad_norm": 1.3465031385421753, + "learning_rate": 3.15884476534296e-05, + "loss": 3.6786, + "step": 525 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.625, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.625, + "audio_loss_5": 4.6562, + "audio_loss_6": 4.2812, + "epoch": 0.03791982665222102, + "loss": 3.7188, + "loss_text": 0.7266, + "state_loss_0": 0.0, + "step": 525 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2812, + "epoch": 0.03791982665222102, + "loss": 3.7188, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 525 + }, + { + "epoch": 0.03972553268327916, + "grad_norm": 1.6188503503799438, + "learning_rate": 3.3092659446450064e-05, + "loss": 3.6814, + "step": 550 + }, + { + "audio_loss_0": 3.8438, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.7031, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2812, + "epoch": 0.03972553268327916, + "loss": 3.7969, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 550 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.875, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.03972553268327916, + "loss": 3.7031, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 550 + }, + { + "epoch": 0.0415312387143373, + "grad_norm": 1.34479558467865, + "learning_rate": 3.459687123947052e-05, + "loss": 3.6775, + "step": 575 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.875, + "audio_loss_2": 3.625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.0415312387143373, + "loss": 3.6875, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 575 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.0415312387143373, + "loss": 3.6875, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 575 + }, + { + "epoch": 0.04333694474539545, + "grad_norm": 1.3514716625213623, + "learning_rate": 3.610108303249098e-05, + "loss": 3.6795, + "step": 600 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.625, + "audio_loss_5": 4.5, + "audio_loss_6": 4.3125, + "epoch": 0.04333694474539545, + "loss": 3.6875, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 600 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.04333694474539545, + "loss": 3.5469, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 600 + }, + { + "epoch": 0.045142650776453595, + "grad_norm": 1.5722086429595947, + "learning_rate": 3.760529482551143e-05, + "loss": 3.6763, + "step": 625 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.045142650776453595, + "loss": 3.6875, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 625 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.045142650776453595, + "loss": 3.5938, + "loss_text": 0.7188, + "state_loss_0": 0.0, + "step": 625 + }, + { + "epoch": 0.046948356807511735, + "grad_norm": 1.6579945087432861, + "learning_rate": 3.91095066185319e-05, + "loss": 3.6781, + "step": 650 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 4.0, + "audio_loss_2": 3.75, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.046948356807511735, + "loss": 3.75, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 650 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.8438, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.2812, + "epoch": 0.046948356807511735, + "loss": 3.8125, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 650 + }, + { + "epoch": 0.04875406283856988, + "grad_norm": 1.4397574663162231, + "learning_rate": 4.0613718411552346e-05, + "loss": 3.6799, + "step": 675 + }, + { + "audio_loss_0": 3.9062, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.8906, + "audio_loss_3": 4.8125, + "audio_loss_4": 4.625, + "audio_loss_5": 4.5938, + "audio_loss_6": 4.4688, + "epoch": 0.04875406283856988, + "loss": 3.875, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 675 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.125, + "epoch": 0.04875406283856988, + "loss": 3.6875, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 675 + }, + { + "epoch": 0.05055976886962803, + "grad_norm": 1.76947820186615, + "learning_rate": 4.211793020457281e-05, + "loss": 3.687, + "step": 700 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.0938, + "epoch": 0.05055976886962803, + "loss": 3.6094, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 700 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.875, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.05055976886962803, + "loss": 3.6875, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 700 + }, + { + "epoch": 0.05236547490068617, + "grad_norm": 1.6433513164520264, + "learning_rate": 4.3622141997593266e-05, + "loss": 3.6862, + "step": 725 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.05236547490068617, + "loss": 3.6719, + "loss_text": 0.7305, + "state_loss_0": 0.0, + "step": 725 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.7344, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0625, + "epoch": 0.05236547490068617, + "loss": 3.6562, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 725 + }, + { + "epoch": 0.05417118093174431, + "grad_norm": 1.6864027976989746, + "learning_rate": 4.5126353790613716e-05, + "loss": 3.6942, + "step": 750 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.05417118093174431, + "loss": 3.7188, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 750 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.05417118093174431, + "loss": 3.6562, + "loss_text": 1.2812, + "state_loss_0": 0.0, + "step": 750 + }, + { + "epoch": 0.05597688696280245, + "grad_norm": 1.5095385313034058, + "learning_rate": 4.663056558363418e-05, + "loss": 3.6903, + "step": 775 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.3438, + "epoch": 0.05597688696280245, + "loss": 3.7969, + "loss_text": 1.4531, + "state_loss_0": 0.0, + "step": 775 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.75, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5, + "audio_loss_6": 4.1562, + "epoch": 0.05597688696280245, + "loss": 3.7031, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 775 + }, + { + "epoch": 0.0577825929938606, + "grad_norm": 1.372632384300232, + "learning_rate": 4.8134777376654636e-05, + "loss": 3.6948, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.6094, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.6562, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.4062, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.0, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.9688, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.6641, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.7969, + "eval_loss_AQACONVA": 3.7969, + "eval_loss_text_AQACONVA": 1.4062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.4688, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.4531, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.7188, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.7969, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.7969, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 3.9844, + "eval_loss_AQACONVA": 3.9844, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_AQACONVA": 3.8438, + "eval_audio_loss_1_AQACONVA": 4.0312, + "eval_audio_loss_2_AQACONVA": 3.7188, + "eval_audio_loss_3_AQACONVA": 4.75, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.625, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9688, + "eval_loss_AQACONVA": 3.9688, + "eval_loss_text_AQACONVA": 1.8047, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.6641, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.6562, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.7812, + "eval_audio_loss_1_RQACONVA": 4.0312, + "eval_audio_loss_2_RQACONVA": 3.8125, + "eval_audio_loss_3_RQACONVA": 4.8438, + "eval_audio_loss_4_RQACONVA": 4.5938, + "eval_audio_loss_5_RQACONVA": 4.5625, + "eval_audio_loss_6_RQACONVA": 4.375, + "eval_loss": 4.0, + "eval_loss_RQACONVA": 4.0, + "eval_loss_text_RQACONVA": 2.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.9844, + "eval_audio_loss_2_RQACONVA": 3.7031, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5312, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.5625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.6406, + "eval_audio_loss_1_RQACONVA": 3.9375, + "eval_audio_loss_2_RQACONVA": 3.7188, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5938, + "eval_audio_loss_5_RQACONVA": 4.5312, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8906, + "eval_loss_RQACONVA": 3.8906, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.9531, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.7188, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_RQACONVA": 3.8594, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.4766, + "eval_loss_RQACONV": 0.4766, + "eval_loss_text_RQACONV": 0.9531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.7812, + "eval_loss_RQACONV": 0.7812, + "eval_loss_text_RQACONV": 1.5625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.7656, + "eval_loss_RQACONV": 0.7656, + "eval_loss_text_RQACONV": 1.5312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.8828, + "eval_loss_RQACONV": 0.8828, + "eval_loss_text_RQACONV": 1.7656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.4336, + "eval_loss_RQACONV": 0.4336, + "eval_loss_text_RQACONV": 0.8672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.5312, + "eval_loss_RQACONV": 0.5312, + "eval_loss_text_RQACONV": 1.0625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 1.1719, + "eval_loss_RQACONV": 1.1719, + "eval_loss_text_RQACONV": 2.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1367, + "eval_loss_RQACONV": 0.1367, + "eval_loss_text_RQACONV": 0.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.0825, + "eval_loss_RQACONV": 0.0825, + "eval_loss_text_RQACONV": 0.165, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1611, + "eval_loss_RQACONV": 0.1611, + "eval_loss_text_RQACONV": 0.3223, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.0337, + "eval_loss_RQACONV": 0.0337, + "eval_loss_text_RQACONV": 0.0674, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1084, + "eval_loss_RQACONV": 0.1084, + "eval_loss_text_RQACONV": 0.2168, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.1338, + "eval_loss_RQACONV": 0.1338, + "eval_loss_text_RQACONV": 0.2676, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 800 + }, + { + "epoch": 0.0577825929938606, + "eval_loss": 1.663297176361084, + "eval_runtime": 27.6013, + "eval_samples_per_second": 193.796, + "eval_steps_per_second": 1.522, + "step": 800 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 4.1562, + "audio_loss_2": 3.7812, + "audio_loss_3": 4.75, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.3438, + "epoch": 0.0577825929938606, + "loss": 3.8594, + "loss_text": 1.1328, + "state_loss_0": 0.0, + "step": 800 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.0938, + "epoch": 0.0577825929938606, + "loss": 3.6406, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 800 + }, + { + "epoch": 0.059588299024918745, + "grad_norm": 1.6172524690628052, + "learning_rate": 4.963898916967509e-05, + "loss": 3.7015, + "step": 825 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.3125, + "epoch": 0.059588299024918745, + "loss": 3.7188, + "loss_text": 0.9062, + "state_loss_0": 0.0, + "step": 825 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.059588299024918745, + "loss": 3.7031, + "loss_text": 1.5156, + "state_loss_0": 0.0, + "step": 825 + }, + { + "epoch": 0.061394005055976884, + "grad_norm": 1.3569648265838623, + "learning_rate": 4.999993826419675e-05, + "loss": 3.6966, + "step": 850 + }, + { + "audio_loss_0": 3.9531, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.6875, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.2812, + "epoch": 0.061394005055976884, + "loss": 3.8125, + "loss_text": 0.8945, + "state_loss_0": 0.0, + "step": 850 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2188, + "epoch": 0.061394005055976884, + "loss": 3.75, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 850 + }, + { + "epoch": 0.06319971108703504, + "grad_norm": 1.4717838764190674, + "learning_rate": 4.999966891883527e-05, + "loss": 3.6952, + "step": 875 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5, + "audio_loss_6": 4.1875, + "epoch": 0.06319971108703504, + "loss": 3.6875, + "loss_text": 0.9141, + "state_loss_0": 0.0, + "step": 875 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.25, + "epoch": 0.06319971108703504, + "loss": 3.6562, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 875 + }, + { + "epoch": 0.06500541711809317, + "grad_norm": 1.4497020244598389, + "learning_rate": 4.999918580973735e-05, + "loss": 3.7039, + "step": 900 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.375, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.06500541711809317, + "loss": 3.5625, + "loss_text": 0.8555, + "state_loss_0": 0.0, + "step": 900 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.06500541711809317, + "loss": 3.75, + "loss_text": 1.25, + "state_loss_0": 0.0, + "step": 900 + }, + { + "epoch": 0.06681112314915132, + "grad_norm": 1.354184865951538, + "learning_rate": 4.999848894103389e-05, + "loss": 3.698, + "step": 925 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.06681112314915132, + "loss": 3.7188, + "loss_text": 0.9102, + "state_loss_0": 0.0, + "step": 925 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.875, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.06681112314915132, + "loss": 3.6719, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 925 + }, + { + "epoch": 0.06861682918020946, + "grad_norm": 1.2884694337844849, + "learning_rate": 4.99975783186836e-05, + "loss": 3.6987, + "step": 950 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.06861682918020946, + "loss": 3.75, + "loss_text": 1.4609, + "state_loss_0": 0.0, + "step": 950 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.3125, + "epoch": 0.06861682918020946, + "loss": 3.625, + "loss_text": 0.7188, + "state_loss_0": 0.0, + "step": 950 + }, + { + "epoch": 0.07042253521126761, + "grad_norm": 1.1150685548782349, + "learning_rate": 4.999645395047289e-05, + "loss": 3.7024, + "step": 975 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.07042253521126761, + "loss": 3.5, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 975 + }, + { + "audio_loss_0": 3.6094, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.07042253521126761, + "loss": 3.7344, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 975 + }, + { + "epoch": 0.07222824124232576, + "grad_norm": 1.199799656867981, + "learning_rate": 4.999511584601585e-05, + "loss": 3.7005, + "step": 1000 + }, + { + "audio_loss_0": 3.75, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.2812, + "epoch": 0.07222824124232576, + "loss": 3.75, + "loss_text": 0.9805, + "state_loss_0": 0.0, + "step": 1000 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0938, + "epoch": 0.07222824124232576, + "loss": 3.6406, + "loss_text": 0.8711, + "state_loss_0": 0.0, + "step": 1000 + }, + { + "epoch": 0.07403394727338389, + "grad_norm": 1.226834774017334, + "learning_rate": 4.9993564016754166e-05, + "loss": 3.7004, + "step": 1025 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.8906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.07403394727338389, + "loss": 3.6875, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 1025 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1875, + "epoch": 0.07403394727338389, + "loss": 3.5938, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 1025 + }, + { + "epoch": 0.07583965330444203, + "grad_norm": 1.2618212699890137, + "learning_rate": 4.999179847595701e-05, + "loss": 3.6992, + "step": 1050 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.07583965330444203, + "loss": 3.7812, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 1050 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.07583965330444203, + "loss": 3.6562, + "loss_text": 1.4062, + "state_loss_0": 0.0, + "step": 1050 + }, + { + "epoch": 0.07764535933550018, + "grad_norm": 1.312597632408142, + "learning_rate": 4.998981923872093e-05, + "loss": 3.6959, + "step": 1075 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5, + "audio_loss_6": 4.25, + "epoch": 0.07764535933550018, + "loss": 3.625, + "loss_text": 0.9375, + "state_loss_0": 0.0, + "step": 1075 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.9062, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.7188, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.375, + "epoch": 0.07764535933550018, + "loss": 3.7969, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 1075 + }, + { + "epoch": 0.07945106536655833, + "grad_norm": 1.3653581142425537, + "learning_rate": 4.998762632196969e-05, + "loss": 3.6949, + "step": 1100 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.75, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.375, + "epoch": 0.07945106536655833, + "loss": 3.8594, + "loss_text": 1.6562, + "state_loss_0": 0.0, + "step": 1100 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.07945106536655833, + "loss": 3.6719, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 1100 + }, + { + "epoch": 0.08125677139761647, + "grad_norm": 1.3395963907241821, + "learning_rate": 4.9985219744454204e-05, + "loss": 3.7015, + "step": 1125 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.08125677139761647, + "loss": 3.5938, + "loss_text": 1.3516, + "state_loss_0": 0.0, + "step": 1125 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.08125677139761647, + "loss": 3.6562, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 1125 + }, + { + "epoch": 0.0830624774286746, + "grad_norm": 1.4323700666427612, + "learning_rate": 4.99825995267523e-05, + "loss": 3.6877, + "step": 1150 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2812, + "epoch": 0.0830624774286746, + "loss": 3.7188, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 1150 + }, + { + "audio_loss_0": 4.0312, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.8281, + "audio_loss_3": 4.8125, + "audio_loss_4": 4.6875, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.4375, + "epoch": 0.0830624774286746, + "loss": 3.9062, + "loss_text": 0.9688, + "state_loss_0": 0.0, + "step": 1150 + }, + { + "epoch": 0.08486818345973275, + "grad_norm": 1.3579331636428833, + "learning_rate": 4.9979765691268585e-05, + "loss": 3.6809, + "step": 1175 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.08486818345973275, + "loss": 3.7188, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 1175 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.3125, + "epoch": 0.08486818345973275, + "loss": 3.7812, + "loss_text": 1.6797, + "state_loss_0": 0.0, + "step": 1175 + }, + { + "epoch": 0.0866738894907909, + "grad_norm": 1.1774629354476929, + "learning_rate": 4.997671826223421e-05, + "loss": 3.6885, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 2.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.5156, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.5234, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.4531, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.9844, + "eval_audio_loss_2_AQACONVA": 3.7188, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.7656, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.7812, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_AQACONVA": 3.7969, + "eval_audio_loss_1_AQACONVA": 4.0312, + "eval_audio_loss_2_AQACONVA": 3.7031, + "eval_audio_loss_3_AQACONVA": 4.75, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9531, + "eval_loss_AQACONVA": 3.9531, + "eval_loss_text_AQACONVA": 1.8438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.9297, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.7422, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.6094, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.7188, + "eval_audio_loss_1_RQACONVA": 4.0625, + "eval_audio_loss_2_RQACONVA": 3.8281, + "eval_audio_loss_3_RQACONVA": 4.8125, + "eval_audio_loss_4_RQACONVA": 4.625, + "eval_audio_loss_5_RQACONVA": 4.5938, + "eval_audio_loss_6_RQACONVA": 4.375, + "eval_loss": 4.0625, + "eval_loss_RQACONVA": 4.0625, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.9375, + "eval_audio_loss_2_RQACONVA": 3.6719, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.5938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.7031, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 1.9453, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.9531, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.8984, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6719, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.7383, + "eval_loss_RQACONV": 0.7383, + "eval_loss_text_RQACONV": 1.4766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.8555, + "eval_loss_RQACONV": 0.8555, + "eval_loss_text_RQACONV": 1.7109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.4258, + "eval_loss_RQACONV": 0.4258, + "eval_loss_text_RQACONV": 0.8516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 1.2031, + "eval_loss_RQACONV": 1.2031, + "eval_loss_text_RQACONV": 2.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1768, + "eval_loss_RQACONV": 0.1768, + "eval_loss_text_RQACONV": 0.3535, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.0457, + "eval_loss_RQACONV": 0.0457, + "eval_loss_text_RQACONV": 0.0913, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1143, + "eval_loss_RQACONV": 0.1143, + "eval_loss_text_RQACONV": 0.2285, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.1514, + "eval_loss_RQACONV": 0.1514, + "eval_loss_text_RQACONV": 0.3027, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1200 + }, + { + "epoch": 0.0866738894907909, + "eval_loss": 1.6661080121994019, + "eval_runtime": 27.8158, + "eval_samples_per_second": 192.301, + "eval_steps_per_second": 1.51, + "step": 1200 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.0866738894907909, + "loss": 3.7188, + "loss_text": 0.8789, + "state_loss_0": 0.0, + "step": 1200 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.0866738894907909, + "loss": 3.75, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 1200 + }, + { + "epoch": 0.08847959552184904, + "grad_norm": 1.2743616104125977, + "learning_rate": 4.9973457265706735e-05, + "loss": 3.6931, + "step": 1225 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.375, + "epoch": 0.08847959552184904, + "loss": 3.6719, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 1225 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.08847959552184904, + "loss": 3.6875, + "loss_text": 0.9102, + "state_loss_0": 0.0, + "step": 1225 + }, + { + "epoch": 0.09028530155290719, + "grad_norm": 1.2911070585250854, + "learning_rate": 4.996998272956982e-05, + "loss": 3.6972, + "step": 1250 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1562, + "epoch": 0.09028530155290719, + "loss": 3.625, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 1250 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.0938, + "epoch": 0.09028530155290719, + "loss": 3.6406, + "loss_text": 1.2266, + "state_loss_0": 0.0, + "step": 1250 + }, + { + "epoch": 0.09209100758396534, + "grad_norm": 1.1650667190551758, + "learning_rate": 4.9966294683533064e-05, + "loss": 3.6918, + "step": 1275 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.09209100758396534, + "loss": 3.5781, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 1275 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2188, + "epoch": 0.09209100758396534, + "loss": 3.7188, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 1275 + }, + { + "epoch": 0.09389671361502347, + "grad_norm": 1.241491436958313, + "learning_rate": 4.99623931591317e-05, + "loss": 3.6977, + "step": 1300 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.2188, + "epoch": 0.09389671361502347, + "loss": 3.625, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 1300 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0625, + "epoch": 0.09389671361502347, + "loss": 3.5625, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 1300 + }, + { + "epoch": 0.09570241964608162, + "grad_norm": 1.185113549232483, + "learning_rate": 4.995827818972636e-05, + "loss": 3.6886, + "step": 1325 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.09570241964608162, + "loss": 3.7656, + "loss_text": 1.4844, + "state_loss_0": 0.0, + "step": 1325 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.09570241964608162, + "loss": 3.5469, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 1325 + }, + { + "epoch": 0.09750812567713976, + "grad_norm": 1.1876832246780396, + "learning_rate": 4.9953949810502744e-05, + "loss": 3.6947, + "step": 1350 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.09750812567713976, + "loss": 3.6094, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 1350 + }, + { + "audio_loss_0": 3.8594, + "audio_loss_1": 4.125, + "audio_loss_2": 3.8438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.09750812567713976, + "loss": 3.7031, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 1350 + }, + { + "epoch": 0.09931383170819791, + "grad_norm": 1.6695477962493896, + "learning_rate": 4.994940805847136e-05, + "loss": 3.6845, + "step": 1375 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.09931383170819791, + "loss": 3.5938, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 1375 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.09931383170819791, + "loss": 3.5469, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 1375 + }, + { + "epoch": 0.10111953773925605, + "grad_norm": 1.2463228702545166, + "learning_rate": 4.994465297246721e-05, + "loss": 3.6906, + "step": 1400 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.10111953773925605, + "loss": 3.4688, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 1400 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.625, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2812, + "epoch": 0.10111953773925605, + "loss": 3.7969, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 1400 + }, + { + "epoch": 0.10292524377031419, + "grad_norm": 1.1842750310897827, + "learning_rate": 4.9939684593149426e-05, + "loss": 3.6871, + "step": 1425 + }, + { + "audio_loss_0": 3.7969, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.25, + "epoch": 0.10292524377031419, + "loss": 3.7812, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 1425 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3125, + "epoch": 0.10292524377031419, + "loss": 3.75, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 1425 + }, + { + "epoch": 0.10473094980137233, + "grad_norm": 1.2877933979034424, + "learning_rate": 4.9934502963000946e-05, + "loss": 3.6846, + "step": 1450 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.10473094980137233, + "loss": 3.6406, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 1450 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.10473094980137233, + "loss": 3.625, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 1450 + }, + { + "epoch": 0.10653665583243048, + "grad_norm": 1.164013385772705, + "learning_rate": 4.992910812632814e-05, + "loss": 3.6791, + "step": 1475 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.10653665583243048, + "loss": 3.75, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 1475 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.10653665583243048, + "loss": 3.75, + "loss_text": 1.2812, + "state_loss_0": 0.0, + "step": 1475 + }, + { + "epoch": 0.10834236186348863, + "grad_norm": 1.1094772815704346, + "learning_rate": 4.9923500129260423e-05, + "loss": 3.6927, + "step": 1500 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.875, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3125, + "epoch": 0.10834236186348863, + "loss": 3.7812, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 1500 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.10834236186348863, + "loss": 3.7188, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 1500 + }, + { + "epoch": 0.11014806789454677, + "grad_norm": 1.2582461833953857, + "learning_rate": 4.9917679019749916e-05, + "loss": 3.6858, + "step": 1525 + }, + { + "audio_loss_0": 3.6875, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.8125, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.375, + "epoch": 0.11014806789454677, + "loss": 3.8281, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 1525 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.3438, + "epoch": 0.11014806789454677, + "loss": 3.7812, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 1525 + }, + { + "epoch": 0.1119537739256049, + "grad_norm": 0.9448930025100708, + "learning_rate": 4.991164484757094e-05, + "loss": 3.6859, + "step": 1550 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.1119537739256049, + "loss": 3.6875, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 1550 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.1119537739256049, + "loss": 3.6875, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 1550 + }, + { + "epoch": 0.11375947995666305, + "grad_norm": 1.3869820833206177, + "learning_rate": 4.990539766431967e-05, + "loss": 3.6791, + "step": 1575 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1562, + "epoch": 0.11375947995666305, + "loss": 3.5469, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 1575 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.11375947995666305, + "loss": 3.6562, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 1575 + }, + { + "epoch": 0.1155651859877212, + "grad_norm": 1.1633530855178833, + "learning_rate": 4.989893752341366e-05, + "loss": 3.6845, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.9062, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 3.9531, + "eval_loss_AQACONVA": 3.9531, + "eval_loss_text_AQACONVA": 2.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8281, + "eval_loss_AQACONVA": 3.8281, + "eval_loss_text_AQACONVA": 1.7656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.5, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.4453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.4531, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.6562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.7812, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.7812, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.375, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.1875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_AQACONVA": 3.7812, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.6719, + "eval_audio_loss_3_AQACONVA": 4.75, + "eval_audio_loss_4_AQACONVA": 4.625, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9531, + "eval_loss_AQACONVA": 3.9531, + "eval_loss_text_AQACONVA": 1.8516, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.9688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.875, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.7344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.6094, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.7656, + "eval_audio_loss_1_RQACONVA": 4.0625, + "eval_audio_loss_2_RQACONVA": 3.7969, + "eval_audio_loss_3_RQACONVA": 4.7812, + "eval_audio_loss_4_RQACONVA": 4.5938, + "eval_audio_loss_5_RQACONVA": 4.5625, + "eval_audio_loss_6_RQACONVA": 4.3438, + "eval_loss": 4.0, + "eval_loss_RQACONVA": 4.0, + "eval_loss_text_RQACONVA": 2.2344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.6719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.5781, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.7031, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.9062, + "eval_loss_RQACONVA": 3.9062, + "eval_loss_text_RQACONVA": 1.9688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.4785, + "eval_loss_RQACONV": 0.4785, + "eval_loss_text_RQACONV": 0.957, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6914, + "eval_loss_RQACONV": 0.6914, + "eval_loss_text_RQACONV": 1.3828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6484, + "eval_loss_RQACONV": 0.6484, + "eval_loss_text_RQACONV": 1.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.75, + "eval_loss_RQACONV": 0.75, + "eval_loss_text_RQACONV": 1.5, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.8398, + "eval_loss_RQACONV": 0.8398, + "eval_loss_text_RQACONV": 1.6797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.4766, + "eval_loss_RQACONV": 0.4766, + "eval_loss_text_RQACONV": 0.9531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.4414, + "eval_loss_RQACONV": 0.4414, + "eval_loss_text_RQACONV": 0.8828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.5273, + "eval_loss_RQACONV": 0.5273, + "eval_loss_text_RQACONV": 1.0547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 1.2266, + "eval_loss_RQACONV": 1.2266, + "eval_loss_text_RQACONV": 2.4531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.165, + "eval_loss_RQACONV": 0.165, + "eval_loss_text_RQACONV": 0.3301, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.0918, + "eval_loss_RQACONV": 0.0918, + "eval_loss_text_RQACONV": 0.1836, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.1729, + "eval_loss_RQACONV": 0.1729, + "eval_loss_text_RQACONV": 0.3457, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.0505, + "eval_loss_RQACONV": 0.0505, + "eval_loss_text_RQACONV": 0.1011, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.1602, + "eval_loss_RQACONV": 0.1602, + "eval_loss_text_RQACONV": 0.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.106, + "eval_loss_RQACONV": 0.106, + "eval_loss_text_RQACONV": 0.2119, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 1600 + }, + { + "epoch": 0.1155651859877212, + "eval_loss": 1.6687062978744507, + "eval_runtime": 27.8014, + "eval_samples_per_second": 192.4, + "eval_steps_per_second": 1.511, + "step": 1600 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.1155651859877212, + "loss": 3.6875, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 1600 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.1155651859877212, + "loss": 3.6406, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 1600 + }, + { + "epoch": 0.11737089201877934, + "grad_norm": 1.2584939002990723, + "learning_rate": 4.989226448009141e-05, + "loss": 3.6905, + "step": 1625 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1562, + "epoch": 0.11737089201877934, + "loss": 3.6406, + "loss_text": 0.8398, + "state_loss_0": 0.0, + "step": 1625 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 4.125, + "audio_loss_2": 3.8281, + "audio_loss_3": 4.7812, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.5, + "audio_loss_6": 4.3125, + "epoch": 0.11737089201877934, + "loss": 3.8438, + "loss_text": 0.8594, + "state_loss_0": 0.0, + "step": 1625 + }, + { + "epoch": 0.11917659804983749, + "grad_norm": 1.254012107849121, + "learning_rate": 4.988537859141185e-05, + "loss": 3.6827, + "step": 1650 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.11917659804983749, + "loss": 3.6094, + "loss_text": 0.498, + "state_loss_0": 0.0, + "step": 1650 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.11917659804983749, + "loss": 3.7188, + "loss_text": 1.3047, + "state_loss_0": 0.0, + "step": 1650 + }, + { + "epoch": 0.12098230408089564, + "grad_norm": 1.1437209844589233, + "learning_rate": 4.987827991625389e-05, + "loss": 3.6753, + "step": 1675 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 4.0, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3438, + "epoch": 0.12098230408089564, + "loss": 3.7969, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 1675 + }, + { + "audio_loss_0": 3.7031, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.2812, + "epoch": 0.12098230408089564, + "loss": 3.75, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 1675 + }, + { + "epoch": 0.12278801011195377, + "grad_norm": 0.8917367458343506, + "learning_rate": 4.987096851531592e-05, + "loss": 3.68, + "step": 1700 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.12278801011195377, + "loss": 3.6875, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 1700 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.12278801011195377, + "loss": 3.6094, + "loss_text": 0.9922, + "state_loss_0": 0.0, + "step": 1700 + }, + { + "epoch": 0.12459371614301191, + "grad_norm": 1.4129472970962524, + "learning_rate": 4.986344445111525e-05, + "loss": 3.6789, + "step": 1725 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.12459371614301191, + "loss": 3.5938, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 1725 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1562, + "epoch": 0.12459371614301191, + "loss": 3.6406, + "loss_text": 0.9102, + "state_loss_0": 0.0, + "step": 1725 + }, + { + "epoch": 0.12639942217407008, + "grad_norm": 1.3350123167037964, + "learning_rate": 4.985570778798762e-05, + "loss": 3.6732, + "step": 1750 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.12639942217407008, + "loss": 3.625, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 1750 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.12639942217407008, + "loss": 3.7188, + "loss_text": 1.1719, + "state_loss_0": 0.0, + "step": 1750 + }, + { + "epoch": 0.1282051282051282, + "grad_norm": 1.0977102518081665, + "learning_rate": 4.984775859208663e-05, + "loss": 3.6851, + "step": 1775 + }, + { + "audio_loss_0": 3.75, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.1282051282051282, + "loss": 3.6875, + "loss_text": 0.9688, + "state_loss_0": 0.0, + "step": 1775 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.7031, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.1282051282051282, + "loss": 3.7812, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 1775 + }, + { + "epoch": 0.13001083423618634, + "grad_norm": 1.0363290309906006, + "learning_rate": 4.9839596931383163e-05, + "loss": 3.6759, + "step": 1800 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0938, + "epoch": 0.13001083423618634, + "loss": 3.5469, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 1800 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2188, + "epoch": 0.13001083423618634, + "loss": 3.6875, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 1800 + }, + { + "epoch": 0.1318165402672445, + "grad_norm": 1.1761291027069092, + "learning_rate": 4.983122287566484e-05, + "loss": 3.6821, + "step": 1825 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.1318165402672445, + "loss": 3.7344, + "loss_text": 1.3672, + "state_loss_0": 0.0, + "step": 1825 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1875, + "epoch": 0.1318165402672445, + "loss": 3.5938, + "loss_text": 0.8555, + "state_loss_0": 0.0, + "step": 1825 + }, + { + "epoch": 0.13362224629830263, + "grad_norm": 1.1241735219955444, + "learning_rate": 4.982263649653538e-05, + "loss": 3.6755, + "step": 1850 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.13362224629830263, + "loss": 3.6406, + "loss_text": 1.3438, + "state_loss_0": 0.0, + "step": 1850 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.13362224629830263, + "loss": 3.5781, + "loss_text": 0.8906, + "state_loss_0": 0.0, + "step": 1850 + }, + { + "epoch": 0.13542795232936078, + "grad_norm": 1.0872678756713867, + "learning_rate": 4.981383786741401e-05, + "loss": 3.6761, + "step": 1875 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.0625, + "epoch": 0.13542795232936078, + "loss": 3.5312, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 1875 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.125, + "epoch": 0.13542795232936078, + "loss": 3.6406, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 1875 + }, + { + "epoch": 0.13723365836041893, + "grad_norm": 1.288773536682129, + "learning_rate": 4.9804827063534845e-05, + "loss": 3.6707, + "step": 1900 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.13723365836041893, + "loss": 3.5938, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 1900 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.13723365836041893, + "loss": 3.6719, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 1900 + }, + { + "epoch": 0.13903936439147707, + "grad_norm": 1.2342087030410767, + "learning_rate": 4.979560416194623e-05, + "loss": 3.6696, + "step": 1925 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.13903936439147707, + "loss": 3.6562, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 1925 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.13903936439147707, + "loss": 3.625, + "loss_text": 0.7266, + "state_loss_0": 0.0, + "step": 1925 + }, + { + "epoch": 0.14084507042253522, + "grad_norm": 0.9407530426979065, + "learning_rate": 4.978616924151008e-05, + "loss": 3.6758, + "step": 1950 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.14084507042253522, + "loss": 3.6875, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 1950 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.14084507042253522, + "loss": 3.5938, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 1950 + }, + { + "epoch": 0.14265077645359336, + "grad_norm": 1.0229004621505737, + "learning_rate": 4.977652238290124e-05, + "loss": 3.6729, + "step": 1975 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.14265077645359336, + "loss": 3.5938, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 1975 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.14265077645359336, + "loss": 3.625, + "loss_text": 0.4785, + "state_loss_0": 0.0, + "step": 1975 + }, + { + "epoch": 0.1444564824846515, + "grad_norm": 1.207064151763916, + "learning_rate": 4.976666366860671e-05, + "loss": 3.6662, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.5312, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5938, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 2.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.7578, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.5, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.4688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.9844, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 4.0, + "eval_loss_AQACONVA": 4.0, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.75, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.7656, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3438, + "eval_loss": 3.9688, + "eval_loss_AQACONVA": 3.9688, + "eval_loss_text_AQACONVA": 2.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_AQACONVA": 3.7656, + "eval_audio_loss_1_AQACONVA": 4.0, + "eval_audio_loss_2_AQACONVA": 3.6562, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 1.8203, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.9375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.7031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.5781, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.9609, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.7188, + "eval_audio_loss_1_RQACONVA": 3.9688, + "eval_audio_loss_2_RQACONVA": 3.7344, + "eval_audio_loss_3_RQACONVA": 4.8125, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5312, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.9375, + "eval_audio_loss_2_RQACONVA": 3.6719, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.9531, + "eval_loss_RQACONVA": 3.9531, + "eval_loss_text_RQACONVA": 2.625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.6094, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_RQACONVA": 3.8594, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.8281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.8008, + "eval_loss_RQACONV": 0.8008, + "eval_loss_text_RQACONV": 1.6016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.8867, + "eval_loss_RQACONV": 0.8867, + "eval_loss_text_RQACONV": 1.7734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.4316, + "eval_loss_RQACONV": 0.4316, + "eval_loss_text_RQACONV": 0.8633, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 1.2266, + "eval_loss_RQACONV": 1.2266, + "eval_loss_text_RQACONV": 2.4531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1641, + "eval_loss_RQACONV": 0.1641, + "eval_loss_text_RQACONV": 0.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.0515, + "eval_loss_RQACONV": 0.0515, + "eval_loss_text_RQACONV": 0.103, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1602, + "eval_loss_RQACONV": 0.1602, + "eval_loss_text_RQACONV": 0.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1079, + "eval_loss_RQACONV": 0.1079, + "eval_loss_text_RQACONV": 0.2158, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2000 + }, + { + "epoch": 0.1444564824846515, + "eval_loss": 1.66098952293396, + "eval_runtime": 27.8144, + "eval_samples_per_second": 192.31, + "eval_steps_per_second": 1.51, + "step": 2000 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.1444564824846515, + "loss": 3.6719, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 2000 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5, + "audio_loss_4": 4.5, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.1444564824846515, + "loss": 3.6406, + "loss_text": 0.8086, + "state_loss_0": 0.0, + "step": 2000 + }, + { + "epoch": 0.14626218851570963, + "grad_norm": 1.175110936164856, + "learning_rate": 4.975659318292505e-05, + "loss": 3.6706, + "step": 2025 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.14626218851570963, + "loss": 3.625, + "loss_text": 1.2344, + "state_loss_0": 0.0, + "step": 2025 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.14626218851570963, + "loss": 3.7344, + "loss_text": 1.1719, + "state_loss_0": 0.0, + "step": 2025 + }, + { + "epoch": 0.14806789454676778, + "grad_norm": 1.2383140325546265, + "learning_rate": 4.9746311011965586e-05, + "loss": 3.6818, + "step": 2050 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 4.1875, + "audio_loss_2": 3.8281, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.625, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2812, + "epoch": 0.14806789454676778, + "loss": 3.8281, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 2050 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.14806789454676778, + "loss": 3.5938, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 2050 + }, + { + "epoch": 0.14987360057782592, + "grad_norm": 1.2143348455429077, + "learning_rate": 4.9735817243647684e-05, + "loss": 3.6699, + "step": 2075 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.3125, + "epoch": 0.14987360057782592, + "loss": 3.7188, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 2075 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.14987360057782592, + "loss": 3.5781, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 2075 + }, + { + "epoch": 0.15167930660888407, + "grad_norm": 1.1613425016403198, + "learning_rate": 4.972511196770002e-05, + "loss": 3.6702, + "step": 2100 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.15167930660888407, + "loss": 3.6875, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 2100 + }, + { + "audio_loss_0": 3.8438, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1562, + "epoch": 0.15167930660888407, + "loss": 3.7344, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 2100 + }, + { + "epoch": 0.15348501263994221, + "grad_norm": 1.0744401216506958, + "learning_rate": 4.971419527565981e-05, + "loss": 3.6696, + "step": 2125 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.15348501263994221, + "loss": 3.6562, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 2125 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.125, + "epoch": 0.15348501263994221, + "loss": 3.6094, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 2125 + }, + { + "epoch": 0.15529071867100036, + "grad_norm": 1.263473391532898, + "learning_rate": 4.9703067260872e-05, + "loss": 3.6572, + "step": 2150 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.15529071867100036, + "loss": 3.6094, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 2150 + }, + { + "audio_loss_0": 3.6094, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.15529071867100036, + "loss": 3.6562, + "loss_text": 0.8867, + "state_loss_0": 0.0, + "step": 2150 + }, + { + "epoch": 0.1570964247020585, + "grad_norm": 1.1033211946487427, + "learning_rate": 4.969172801848848e-05, + "loss": 3.6611, + "step": 2175 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.1570964247020585, + "loss": 3.5469, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 2175 + }, + { + "audio_loss_0": 3.7969, + "audio_loss_1": 4.0, + "audio_loss_2": 3.8125, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.6562, + "audio_loss_5": 4.5625, + "audio_loss_6": 4.3125, + "epoch": 0.1570964247020585, + "loss": 3.8438, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 2175 + }, + { + "epoch": 0.15890213073311665, + "grad_norm": 1.2553898096084595, + "learning_rate": 4.9680177645467305e-05, + "loss": 3.6614, + "step": 2200 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.15890213073311665, + "loss": 3.5469, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 2200 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.15890213073311665, + "loss": 3.4844, + "loss_text": 0.9258, + "state_loss_0": 0.0, + "step": 2200 + }, + { + "epoch": 0.1607078367641748, + "grad_norm": 0.9463462829589844, + "learning_rate": 4.96684162405718e-05, + "loss": 3.657, + "step": 2225 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.1607078367641748, + "loss": 3.6719, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 2225 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.875, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.1607078367641748, + "loss": 3.625, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 2225 + }, + { + "epoch": 0.16251354279523295, + "grad_norm": 1.2227954864501953, + "learning_rate": 4.965644390436978e-05, + "loss": 3.6635, + "step": 2250 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.16251354279523295, + "loss": 3.6094, + "loss_text": 0.9023, + "state_loss_0": 0.0, + "step": 2250 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.16251354279523295, + "loss": 3.7031, + "loss_text": 1.1484, + "state_loss_0": 0.0, + "step": 2250 + }, + { + "epoch": 0.1643192488262911, + "grad_norm": 1.0737227201461792, + "learning_rate": 4.964426073923265e-05, + "loss": 3.6566, + "step": 2275 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2812, + "epoch": 0.1643192488262911, + "loss": 3.875, + "loss_text": 2.25, + "state_loss_0": 0.0, + "step": 2275 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1875, + "epoch": 0.1643192488262911, + "loss": 3.5938, + "loss_text": 0.9648, + "state_loss_0": 0.0, + "step": 2275 + }, + { + "epoch": 0.1661249548573492, + "grad_norm": 1.1797856092453003, + "learning_rate": 4.9631866849334555e-05, + "loss": 3.6587, + "step": 2300 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.1661249548573492, + "loss": 3.75, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 2300 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.1661249548573492, + "loss": 3.6406, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 2300 + }, + { + "epoch": 0.16793066088840736, + "grad_norm": 0.9510337114334106, + "learning_rate": 4.961926234065146e-05, + "loss": 3.6612, + "step": 2325 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 4.0, + "audio_loss_2": 3.7656, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.16793066088840736, + "loss": 3.7031, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 2325 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.16793066088840736, + "loss": 3.6875, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 2325 + }, + { + "epoch": 0.1697363669194655, + "grad_norm": 1.1856486797332764, + "learning_rate": 4.960644732096027e-05, + "loss": 3.6575, + "step": 2350 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.1697363669194655, + "loss": 3.6562, + "loss_text": 1.2266, + "state_loss_0": 0.0, + "step": 2350 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.1697363669194655, + "loss": 3.6406, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 2350 + }, + { + "epoch": 0.17154207295052365, + "grad_norm": 1.2292428016662598, + "learning_rate": 4.959342189983789e-05, + "loss": 3.6602, + "step": 2375 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 4.0938, + "audio_loss_2": 3.7031, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.17154207295052365, + "loss": 3.7969, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 2375 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.17154207295052365, + "loss": 3.625, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 2375 + }, + { + "epoch": 0.1733477789815818, + "grad_norm": 0.9881263971328735, + "learning_rate": 4.9580186188660304e-05, + "loss": 3.6539, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.5156, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.0938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.8047, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.4844, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 1.5234, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_AQACONVA": 3.9688, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.7188, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.75, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_AQACONVA": 3.9688, + "eval_loss_text_AQACONVA": 2.1562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_AQACONVA": 3.75, + "eval_audio_loss_1_AQACONVA": 3.9688, + "eval_audio_loss_2_AQACONVA": 3.6562, + "eval_audio_loss_3_AQACONVA": 4.7188, + "eval_audio_loss_4_AQACONVA": 4.5938, + "eval_audio_loss_5_AQACONVA": 4.5625, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 1.8906, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.7734, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.6719, + "eval_audio_loss_1_RQACONVA": 3.9844, + "eval_audio_loss_2_RQACONVA": 3.7656, + "eval_audio_loss_3_RQACONVA": 4.7812, + "eval_audio_loss_4_RQACONVA": 4.5625, + "eval_audio_loss_5_RQACONVA": 4.5625, + "eval_audio_loss_6_RQACONVA": 4.3438, + "eval_loss": 3.9844, + "eval_loss_RQACONVA": 3.9844, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.5, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.9375, + "eval_loss_RQACONVA": 3.9375, + "eval_loss_text_RQACONVA": 2.6094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.7969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 1.9453, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.4355, + "eval_loss_RQACONV": 0.4355, + "eval_loss_text_RQACONV": 0.8711, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 1.2422, + "eval_loss_RQACONV": 1.2422, + "eval_loss_text_RQACONV": 2.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1738, + "eval_loss_RQACONV": 0.1738, + "eval_loss_text_RQACONV": 0.3477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1016, + "eval_loss_RQACONV": 0.1016, + "eval_loss_text_RQACONV": 0.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1807, + "eval_loss_RQACONV": 0.1807, + "eval_loss_text_RQACONV": 0.3613, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1689, + "eval_loss_RQACONV": 0.1689, + "eval_loss_text_RQACONV": 0.3379, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.0481, + "eval_loss_RQACONV": 0.0481, + "eval_loss_text_RQACONV": 0.0962, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1113, + "eval_loss_RQACONV": 0.1113, + "eval_loss_text_RQACONV": 0.2227, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2400 + }, + { + "epoch": 0.1733477789815818, + "eval_loss": 1.6573578119277954, + "eval_runtime": 27.8946, + "eval_samples_per_second": 191.757, + "eval_steps_per_second": 1.506, + "step": 2400 + }, + { + "audio_loss_0": 3.9062, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.6562, + "audio_loss_5": 4.5312, + "audio_loss_6": 4.3438, + "epoch": 0.1733477789815818, + "loss": 3.8438, + "loss_text": 1.0547, + "state_loss_0": 0.0, + "step": 2400 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.1733477789815818, + "loss": 3.625, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 2400 + }, + { + "epoch": 0.17515348501263994, + "grad_norm": 1.096373438835144, + "learning_rate": 4.9566740300601616e-05, + "loss": 3.6622, + "step": 2425 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.17515348501263994, + "loss": 3.5469, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 2425 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0938, + "epoch": 0.17515348501263994, + "loss": 3.5156, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 2425 + }, + { + "epoch": 0.1769591910436981, + "grad_norm": 1.0731014013290405, + "learning_rate": 4.9553084350633084e-05, + "loss": 3.6607, + "step": 2450 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.1769591910436981, + "loss": 3.7188, + "loss_text": 1.3281, + "state_loss_0": 0.0, + "step": 2450 + }, + { + "audio_loss_0": 3.6875, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.1769591910436981, + "loss": 3.6719, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 2450 + }, + { + "epoch": 0.17876489707475623, + "grad_norm": 1.5039048194885254, + "learning_rate": 4.953921845552212e-05, + "loss": 3.6496, + "step": 2475 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.625, + "audio_loss_6": 4.3125, + "epoch": 0.17876489707475623, + "loss": 3.75, + "loss_text": 0.7227, + "state_loss_0": 0.0, + "step": 2475 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0, + "epoch": 0.17876489707475623, + "loss": 3.5625, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 2475 + }, + { + "epoch": 0.18057060310581438, + "grad_norm": 0.9883484244346619, + "learning_rate": 4.952514273383132e-05, + "loss": 3.6507, + "step": 2500 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.18057060310581438, + "loss": 3.6094, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 2500 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.25, + "epoch": 0.18057060310581438, + "loss": 3.7812, + "loss_text": 1.6328, + "state_loss_0": 0.0, + "step": 2500 + }, + { + "epoch": 0.18237630913687253, + "grad_norm": 0.921057939529419, + "learning_rate": 4.951085730591744e-05, + "loss": 3.6518, + "step": 2525 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.25, + "epoch": 0.18237630913687253, + "loss": 3.5781, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 2525 + }, + { + "audio_loss_0": 3.75, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.18237630913687253, + "loss": 3.6719, + "loss_text": 0.7266, + "state_loss_0": 0.0, + "step": 2525 + }, + { + "epoch": 0.18418201516793067, + "grad_norm": 0.9655124545097351, + "learning_rate": 4.9496362293930345e-05, + "loss": 3.6534, + "step": 2550 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1875, + "epoch": 0.18418201516793067, + "loss": 3.5625, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 2550 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 4.1562, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.18418201516793067, + "loss": 3.7812, + "loss_text": 0.8906, + "state_loss_0": 0.0, + "step": 2550 + }, + { + "epoch": 0.1859877211989888, + "grad_norm": 1.1297218799591064, + "learning_rate": 4.9481657821812006e-05, + "loss": 3.6466, + "step": 2575 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.1859877211989888, + "loss": 3.5781, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 2575 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.1859877211989888, + "loss": 3.6094, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 2575 + }, + { + "epoch": 0.18779342723004694, + "grad_norm": 1.1849215030670166, + "learning_rate": 4.946674401529539e-05, + "loss": 3.6437, + "step": 2600 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.18779342723004694, + "loss": 3.6875, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 2600 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.18779342723004694, + "loss": 3.5938, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 2600 + }, + { + "epoch": 0.18959913326110509, + "grad_norm": 1.3129795789718628, + "learning_rate": 4.945162100190343e-05, + "loss": 3.6396, + "step": 2625 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.18959913326110509, + "loss": 3.6562, + "loss_text": 0.9727, + "state_loss_0": 0.0, + "step": 2625 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.18959913326110509, + "loss": 3.6875, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 2625 + }, + { + "epoch": 0.19140483929216323, + "grad_norm": 1.038933515548706, + "learning_rate": 4.943628891094791e-05, + "loss": 3.6411, + "step": 2650 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 4.0, + "audio_loss_2": 3.6719, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.375, + "epoch": 0.19140483929216323, + "loss": 3.8281, + "loss_text": 1.3281, + "state_loss_0": 0.0, + "step": 2650 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2188, + "epoch": 0.19140483929216323, + "loss": 3.7812, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 2650 + }, + { + "epoch": 0.19321054532322138, + "grad_norm": 1.0012823343276978, + "learning_rate": 4.942074787352838e-05, + "loss": 3.653, + "step": 2675 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.19321054532322138, + "loss": 3.6562, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 2675 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.19321054532322138, + "loss": 3.5781, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 2675 + }, + { + "epoch": 0.19501625135427952, + "grad_norm": 1.082421898841858, + "learning_rate": 4.9404998022531e-05, + "loss": 3.6493, + "step": 2700 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.7188, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.19501625135427952, + "loss": 3.7344, + "loss_text": 1.1328, + "state_loss_0": 0.0, + "step": 2700 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.19501625135427952, + "loss": 3.6875, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 2700 + }, + { + "epoch": 0.19682195738533767, + "grad_norm": 0.9718628525733948, + "learning_rate": 4.938903949262744e-05, + "loss": 3.6386, + "step": 2725 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0625, + "epoch": 0.19682195738533767, + "loss": 3.5312, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 2725 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.19682195738533767, + "loss": 3.6875, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 2725 + }, + { + "epoch": 0.19862766341639582, + "grad_norm": 1.0817433595657349, + "learning_rate": 4.93728724202737e-05, + "loss": 3.6449, + "step": 2750 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.19862766341639582, + "loss": 3.6406, + "loss_text": 0.8984, + "state_loss_0": 0.0, + "step": 2750 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0938, + "epoch": 0.19862766341639582, + "loss": 3.5156, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 2750 + }, + { + "epoch": 0.20043336944745396, + "grad_norm": 0.9505408406257629, + "learning_rate": 4.9356496943708954e-05, + "loss": 3.6387, + "step": 2775 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.20043336944745396, + "loss": 3.6875, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 2775 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.20043336944745396, + "loss": 3.6875, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 2775 + }, + { + "epoch": 0.2022390754785121, + "grad_norm": 1.1283518075942993, + "learning_rate": 4.9339913202954394e-05, + "loss": 3.6479, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.5, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.0312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.7266, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.4688, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.4844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.9531, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.9688, + "eval_loss_AQACONVA": 3.9688, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.7188, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.7188, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_AQACONVA": 3.7188, + "eval_audio_loss_1_AQACONVA": 3.9688, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 1.8828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 1.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.5469, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.8125, + "eval_audio_loss_1_RQACONVA": 3.9531, + "eval_audio_loss_2_RQACONVA": 3.7188, + "eval_audio_loss_3_RQACONVA": 4.75, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 3.9688, + "eval_loss_RQACONVA": 3.9688, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.9375, + "eval_loss_RQACONVA": 3.9375, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.625, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.9844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.5586, + "eval_loss_RQACONV": 0.5586, + "eval_loss_text_RQACONV": 1.1172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.75, + "eval_loss_RQACONV": 0.75, + "eval_loss_text_RQACONV": 1.5, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6953, + "eval_loss_RQACONV": 0.6953, + "eval_loss_text_RQACONV": 1.3906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.8398, + "eval_loss_RQACONV": 0.8398, + "eval_loss_text_RQACONV": 1.6797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.4375, + "eval_loss_RQACONV": 0.4375, + "eval_loss_text_RQACONV": 0.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 1.25, + "eval_loss_RQACONV": 1.25, + "eval_loss_text_RQACONV": 2.5, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1748, + "eval_loss_RQACONV": 0.1748, + "eval_loss_text_RQACONV": 0.3496, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1045, + "eval_loss_RQACONV": 0.1045, + "eval_loss_text_RQACONV": 0.209, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1777, + "eval_loss_RQACONV": 0.1777, + "eval_loss_text_RQACONV": 0.3555, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1611, + "eval_loss_RQACONV": 0.1611, + "eval_loss_text_RQACONV": 0.3223, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.0439, + "eval_loss_RQACONV": 0.0439, + "eval_loss_text_RQACONV": 0.0879, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1064, + "eval_loss_RQACONV": 0.1064, + "eval_loss_text_RQACONV": 0.2129, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 2800 + }, + { + "epoch": 0.2022390754785121, + "eval_loss": 1.6539496183395386, + "eval_runtime": 27.7399, + "eval_samples_per_second": 192.827, + "eval_steps_per_second": 1.514, + "step": 2800 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.2022390754785121, + "loss": 3.6719, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 2800 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.2022390754785121, + "loss": 3.5938, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 2800 + }, + { + "epoch": 0.20404478150957026, + "grad_norm": 1.0119855403900146, + "learning_rate": 4.932312133981199e-05, + "loss": 3.6424, + "step": 2825 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 4.0938, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.20404478150957026, + "loss": 3.6562, + "loss_text": 0.9922, + "state_loss_0": 0.0, + "step": 2825 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.20404478150957026, + "loss": 3.5625, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 2825 + }, + { + "epoch": 0.20585048754062837, + "grad_norm": 0.9101513624191284, + "learning_rate": 4.93061214978633e-05, + "loss": 3.6321, + "step": 2850 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.7188, + "audio_loss_3": 4.625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0312, + "epoch": 0.20585048754062837, + "loss": 3.6406, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 2850 + }, + { + "audio_loss_0": 3.75, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.20585048754062837, + "loss": 3.7031, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 2850 + }, + { + "epoch": 0.20765619357168652, + "grad_norm": 1.0912185907363892, + "learning_rate": 4.928891382246823e-05, + "loss": 3.64, + "step": 2875 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9844, + "epoch": 0.20765619357168652, + "loss": 3.5, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 2875 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.20765619357168652, + "loss": 3.6875, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 2875 + }, + { + "epoch": 0.20946189960274467, + "grad_norm": 0.8376546502113342, + "learning_rate": 4.927149846076382e-05, + "loss": 3.6408, + "step": 2900 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.20946189960274467, + "loss": 3.5938, + "loss_text": 1.25, + "state_loss_0": 0.0, + "step": 2900 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.20946189960274467, + "loss": 3.6562, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 2900 + }, + { + "epoch": 0.2112676056338028, + "grad_norm": 0.8117663860321045, + "learning_rate": 4.925387556166294e-05, + "loss": 3.6434, + "step": 2925 + }, + { + "audio_loss_0": 3.7969, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.2112676056338028, + "loss": 3.75, + "loss_text": 1.2656, + "state_loss_0": 0.0, + "step": 2925 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.2112676056338028, + "loss": 3.6406, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 2925 + }, + { + "epoch": 0.21307331166486096, + "grad_norm": 0.8773201704025269, + "learning_rate": 4.923604527585306e-05, + "loss": 3.6387, + "step": 2950 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1875, + "epoch": 0.21307331166486096, + "loss": 3.5938, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 2950 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.21307331166486096, + "loss": 3.8125, + "loss_text": 1.6328, + "state_loss_0": 0.0, + "step": 2950 + }, + { + "epoch": 0.2148790176959191, + "grad_norm": 0.854374349117279, + "learning_rate": 4.921800775579495e-05, + "loss": 3.6383, + "step": 2975 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.3125, + "epoch": 0.2148790176959191, + "loss": 3.6562, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 2975 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.2148790176959191, + "loss": 3.6562, + "loss_text": 0.8398, + "state_loss_0": 0.0, + "step": 2975 + }, + { + "epoch": 0.21668472372697725, + "grad_norm": 0.9231103658676147, + "learning_rate": 4.919976315572136e-05, + "loss": 3.6449, + "step": 3000 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.21668472372697725, + "loss": 3.5938, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 3000 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0625, + "epoch": 0.21668472372697725, + "loss": 3.5312, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 3000 + }, + { + "epoch": 0.2184904297580354, + "grad_norm": 1.2828786373138428, + "learning_rate": 4.9181311631635694e-05, + "loss": 3.6386, + "step": 3025 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.2184904297580354, + "loss": 3.6094, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 3025 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.25, + "epoch": 0.2184904297580354, + "loss": 3.6719, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 3025 + }, + { + "epoch": 0.22029613578909354, + "grad_norm": 1.018432378768921, + "learning_rate": 4.916265334131073e-05, + "loss": 3.6401, + "step": 3050 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.22029613578909354, + "loss": 3.5625, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 3050 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0938, + "epoch": 0.22029613578909354, + "loss": 3.5781, + "loss_text": 0.9336, + "state_loss_0": 0.0, + "step": 3050 + }, + { + "epoch": 0.2221018418201517, + "grad_norm": 0.9860376119613647, + "learning_rate": 4.9143788444287194e-05, + "loss": 3.6284, + "step": 3075 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.2221018418201517, + "loss": 3.7812, + "loss_text": 1.5469, + "state_loss_0": 0.0, + "step": 3075 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.3125, + "epoch": 0.2221018418201517, + "loss": 3.6875, + "loss_text": 0.9258, + "state_loss_0": 0.0, + "step": 3075 + }, + { + "epoch": 0.2239075478512098, + "grad_norm": 1.0472204685211182, + "learning_rate": 4.912471710187248e-05, + "loss": 3.6366, + "step": 3100 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.2239075478512098, + "loss": 3.7344, + "loss_text": 1.4141, + "state_loss_0": 0.0, + "step": 3100 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.2239075478512098, + "loss": 3.6719, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 3100 + }, + { + "epoch": 0.22571325388226796, + "grad_norm": 0.9122313261032104, + "learning_rate": 4.910543947713919e-05, + "loss": 3.641, + "step": 3125 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.5, + "audio_loss_6": 4.2812, + "epoch": 0.22571325388226796, + "loss": 3.7812, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 3125 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.25, + "epoch": 0.22571325388226796, + "loss": 3.625, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 3125 + }, + { + "epoch": 0.2275189599133261, + "grad_norm": 0.8638318181037903, + "learning_rate": 4.908595573492379e-05, + "loss": 3.6197, + "step": 3150 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.2275189599133261, + "loss": 3.6094, + "loss_text": 0.4707, + "state_loss_0": 0.0, + "step": 3150 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.2275189599133261, + "loss": 3.75, + "loss_text": 1.2344, + "state_loss_0": 0.0, + "step": 3150 + }, + { + "epoch": 0.22932466594438425, + "grad_norm": 0.801001787185669, + "learning_rate": 4.9066266041825194e-05, + "loss": 3.6292, + "step": 3175 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.2812, + "epoch": 0.22932466594438425, + "loss": 3.7812, + "loss_text": 1.5234, + "state_loss_0": 0.0, + "step": 3175 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0312, + "epoch": 0.22932466594438425, + "loss": 3.4688, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 3175 + }, + { + "epoch": 0.2311303719754424, + "grad_norm": 0.9539688229560852, + "learning_rate": 4.9046370566203324e-05, + "loss": 3.6289, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.5, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5312, + "eval_audio_loss_6_AQACONVA": 4.3125, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.0781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 1.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.5, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_AQACONVA": 3.8594, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.9375, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9531, + "eval_loss_AQACONVA": 3.9531, + "eval_loss_text_AQACONVA": 2.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.7031, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.7031, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5625, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_AQACONVA": 3.7031, + "eval_audio_loss_1_AQACONVA": 3.9688, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 1.8516, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.6641, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.8281, + "eval_audio_loss_1_RQACONVA": 3.9688, + "eval_audio_loss_2_RQACONVA": 3.7188, + "eval_audio_loss_3_RQACONVA": 4.75, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.3125, + "eval_loss": 4.0, + "eval_loss_RQACONVA": 4.0, + "eval_loss_text_RQACONVA": 2.3906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.9375, + "eval_loss_RQACONVA": 3.9375, + "eval_loss_text_RQACONVA": 2.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 1.9062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.8516, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.875, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.6914, + "eval_loss_RQACONV": 0.6914, + "eval_loss_text_RQACONV": 1.3828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.8633, + "eval_loss_RQACONV": 0.8633, + "eval_loss_text_RQACONV": 1.7266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.4414, + "eval_loss_RQACONV": 0.4414, + "eval_loss_text_RQACONV": 0.8828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 1.2578, + "eval_loss_RQACONV": 1.2578, + "eval_loss_text_RQACONV": 2.5156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.0864, + "eval_loss_RQACONV": 0.0864, + "eval_loss_text_RQACONV": 0.1729, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.0493, + "eval_loss_RQACONV": 0.0493, + "eval_loss_text_RQACONV": 0.0986, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.1196, + "eval_loss_RQACONV": 0.1196, + "eval_loss_text_RQACONV": 0.2393, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3200 + }, + { + "epoch": 0.2311303719754424, + "eval_loss": 1.648955225944519, + "eval_runtime": 29.8317, + "eval_samples_per_second": 179.306, + "eval_steps_per_second": 1.408, + "step": 3200 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.2311303719754424, + "loss": 3.6562, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 3200 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.2311303719754424, + "loss": 3.625, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 3200 + }, + { + "epoch": 0.23293607800650054, + "grad_norm": 1.0201008319854736, + "learning_rate": 4.902626947817769e-05, + "loss": 3.6317, + "step": 3225 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.23293607800650054, + "loss": 3.6719, + "loss_text": 1.3516, + "state_loss_0": 0.0, + "step": 3225 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.23293607800650054, + "loss": 3.5938, + "loss_text": 1.0547, + "state_loss_0": 0.0, + "step": 3225 + }, + { + "epoch": 0.2347417840375587, + "grad_norm": 1.2099103927612305, + "learning_rate": 4.90059629496259e-05, + "loss": 3.6263, + "step": 3250 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2188, + "epoch": 0.2347417840375587, + "loss": 3.6562, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 3250 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.2347417840375587, + "loss": 3.5938, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 3250 + }, + { + "epoch": 0.23654749006861683, + "grad_norm": 0.9656897187232971, + "learning_rate": 4.8985451154182236e-05, + "loss": 3.6235, + "step": 3275 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.23654749006861683, + "loss": 3.5938, + "loss_text": 1.2656, + "state_loss_0": 0.0, + "step": 3275 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.23654749006861683, + "loss": 3.6875, + "loss_text": 1.4062, + "state_loss_0": 0.0, + "step": 3275 + }, + { + "epoch": 0.23835319609967498, + "grad_norm": 0.8648170828819275, + "learning_rate": 4.8964734267236146e-05, + "loss": 3.6264, + "step": 3300 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.23835319609967498, + "loss": 3.625, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 3300 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.23835319609967498, + "loss": 3.6875, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 3300 + }, + { + "epoch": 0.24015890213073313, + "grad_norm": 0.9313539862632751, + "learning_rate": 4.894381246593073e-05, + "loss": 3.627, + "step": 3325 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.25, + "epoch": 0.24015890213073313, + "loss": 3.6562, + "loss_text": 1.25, + "state_loss_0": 0.0, + "step": 3325 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9844, + "epoch": 0.24015890213073313, + "loss": 3.4688, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 3325 + }, + { + "epoch": 0.24196460816179127, + "grad_norm": 1.048167109489441, + "learning_rate": 4.892268592916128e-05, + "loss": 3.623, + "step": 3350 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.24196460816179127, + "loss": 3.6406, + "loss_text": 1.4219, + "state_loss_0": 0.0, + "step": 3350 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.24196460816179127, + "loss": 3.5625, + "loss_text": 0.9258, + "state_loss_0": 0.0, + "step": 3350 + }, + { + "epoch": 0.2437703141928494, + "grad_norm": 1.6720383167266846, + "learning_rate": 4.8901354837573656e-05, + "loss": 3.6222, + "step": 3375 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.2437703141928494, + "loss": 3.7344, + "loss_text": 1.2109, + "state_loss_0": 0.0, + "step": 3375 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.2437703141928494, + "loss": 3.7031, + "loss_text": 1.3125, + "state_loss_0": 0.0, + "step": 3375 + }, + { + "epoch": 0.24557602022390754, + "grad_norm": 0.8912956714630127, + "learning_rate": 4.8879819373562846e-05, + "loss": 3.6205, + "step": 3400 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.24557602022390754, + "loss": 3.4375, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 3400 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.24557602022390754, + "loss": 3.6094, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 3400 + }, + { + "epoch": 0.24738172625496568, + "grad_norm": 0.9694085121154785, + "learning_rate": 4.885807972127133e-05, + "loss": 3.6223, + "step": 3425 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.24738172625496568, + "loss": 3.6094, + "loss_text": 0.7734, + "state_loss_0": 0.0, + "step": 3425 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.9844, + "audio_loss_2": 3.625, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.24738172625496568, + "loss": 3.6875, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 3425 + }, + { + "epoch": 0.24918743228602383, + "grad_norm": 0.9947062134742737, + "learning_rate": 4.8836136066587554e-05, + "loss": 3.6224, + "step": 3450 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.24918743228602383, + "loss": 3.6406, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 3450 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.24918743228602383, + "loss": 3.5, + "loss_text": 0.9141, + "state_loss_0": 0.0, + "step": 3450 + }, + { + "epoch": 0.250993138317082, + "grad_norm": 0.8414862155914307, + "learning_rate": 4.881398859714429e-05, + "loss": 3.6157, + "step": 3475 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.250993138317082, + "loss": 3.6562, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 3475 + }, + { + "audio_loss_0": 3.9375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.250993138317082, + "loss": 3.7188, + "loss_text": 0.8516, + "state_loss_0": 0.0, + "step": 3475 + }, + { + "epoch": 0.25279884434814015, + "grad_norm": 1.124085783958435, + "learning_rate": 4.879163750231709e-05, + "loss": 3.6099, + "step": 3500 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1562, + "epoch": 0.25279884434814015, + "loss": 3.7031, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 3500 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.25279884434814015, + "loss": 3.6406, + "loss_text": 0.8516, + "state_loss_0": 0.0, + "step": 3500 + }, + { + "epoch": 0.25460455037919827, + "grad_norm": 0.8459397554397583, + "learning_rate": 4.8769082973222626e-05, + "loss": 3.6309, + "step": 3525 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.25460455037919827, + "loss": 3.5625, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 3525 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.25460455037919827, + "loss": 3.7031, + "loss_text": 1.25, + "state_loss_0": 0.0, + "step": 3525 + }, + { + "epoch": 0.2564102564102564, + "grad_norm": 0.8770866990089417, + "learning_rate": 4.874632520271707e-05, + "loss": 3.6104, + "step": 3550 + }, + { + "audio_loss_0": 3.8906, + "audio_loss_1": 4.0625, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.2564102564102564, + "loss": 3.75, + "loss_text": 0.8398, + "state_loss_0": 0.0, + "step": 3550 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0, + "epoch": 0.2564102564102564, + "loss": 3.5938, + "loss_text": 1.6328, + "state_loss_0": 0.0, + "step": 3550 + }, + { + "epoch": 0.25821596244131456, + "grad_norm": 0.9018770456314087, + "learning_rate": 4.872336438539443e-05, + "loss": 3.6129, + "step": 3575 + }, + { + "audio_loss_0": 3.7031, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.6719, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5625, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.25821596244131456, + "loss": 3.7969, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 3575 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.25821596244131456, + "loss": 3.625, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 3575 + }, + { + "epoch": 0.2600216684723727, + "grad_norm": 0.9941697716712952, + "learning_rate": 4.870020071758492e-05, + "loss": 3.608, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.4688, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.5, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 1.4766, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8594, + "eval_loss_AQACONVA": 3.8594, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.6875, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_AQACONVA": 3.7188, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 1.9453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.7109, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.5469, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.6562, + "eval_audio_loss_1_RQACONVA": 3.9375, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.7812, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.9375, + "eval_loss_RQACONVA": 3.9375, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.9062, + "eval_loss_RQACONVA": 3.9062, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 1.9453, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.8359, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.75, + "eval_loss_RQACONV": 0.75, + "eval_loss_text_RQACONV": 1.5, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.875, + "eval_loss_RQACONV": 0.875, + "eval_loss_text_RQACONV": 1.75, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.4395, + "eval_loss_RQACONV": 0.4395, + "eval_loss_text_RQACONV": 0.8789, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.6094, + "eval_loss_RQACONV": 0.6094, + "eval_loss_text_RQACONV": 1.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 1.2734, + "eval_loss_RQACONV": 1.2734, + "eval_loss_text_RQACONV": 2.5469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1768, + "eval_loss_RQACONV": 0.1768, + "eval_loss_text_RQACONV": 0.3535, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.0811, + "eval_loss_RQACONV": 0.0811, + "eval_loss_text_RQACONV": 0.1621, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.0508, + "eval_loss_RQACONV": 0.0508, + "eval_loss_text_RQACONV": 0.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1191, + "eval_loss_RQACONV": 0.1191, + "eval_loss_text_RQACONV": 0.2383, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 3600 + }, + { + "epoch": 0.2600216684723727, + "eval_loss": 1.6437972784042358, + "eval_runtime": 27.8213, + "eval_samples_per_second": 192.263, + "eval_steps_per_second": 1.51, + "step": 3600 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.3125, + "epoch": 0.2600216684723727, + "loss": 3.6875, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 3600 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.2600216684723727, + "loss": 3.625, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 3600 + }, + { + "epoch": 0.26182737450343085, + "grad_norm": 1.2481244802474976, + "learning_rate": 4.8676834397353246e-05, + "loss": 3.6076, + "step": 3625 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.26182737450343085, + "loss": 3.6406, + "loss_text": 1.125, + "state_loss_0": 0.0, + "step": 3625 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.1562, + "epoch": 0.26182737450343085, + "loss": 3.4531, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 3625 + }, + { + "epoch": 0.263633080534489, + "grad_norm": 0.9371017813682556, + "learning_rate": 4.865326562449694e-05, + "loss": 3.61, + "step": 3650 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.263633080534489, + "loss": 3.4688, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 3650 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.263633080534489, + "loss": 3.5781, + "loss_text": 0.9375, + "state_loss_0": 0.0, + "step": 3650 + }, + { + "epoch": 0.26543878656554715, + "grad_norm": 0.8886885643005371, + "learning_rate": 4.862949460054462e-05, + "loss": 3.6065, + "step": 3675 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.26543878656554715, + "loss": 3.5938, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 3675 + }, + { + "audio_loss_0": 3.6719, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.26543878656554715, + "loss": 3.6406, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 3675 + }, + { + "epoch": 0.26724449259660527, + "grad_norm": 0.964941680431366, + "learning_rate": 4.860552152875429e-05, + "loss": 3.6034, + "step": 3700 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.26724449259660527, + "loss": 3.5156, + "loss_text": 0.9844, + "state_loss_0": 0.0, + "step": 3700 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.26724449259660527, + "loss": 3.6094, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 3700 + }, + { + "epoch": 0.26905019862766344, + "grad_norm": 0.8998181223869324, + "learning_rate": 4.8581346614111614e-05, + "loss": 3.6123, + "step": 3725 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9219, + "epoch": 0.26905019862766344, + "loss": 3.4375, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 3725 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.26905019862766344, + "loss": 3.6094, + "loss_text": 1.125, + "state_loss_0": 0.0, + "step": 3725 + }, + { + "epoch": 0.27085590465872156, + "grad_norm": 0.8165781497955322, + "learning_rate": 4.855697006332813e-05, + "loss": 3.6075, + "step": 3750 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.27085590465872156, + "loss": 3.4688, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 3750 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9062, + "epoch": 0.27085590465872156, + "loss": 3.4531, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 3750 + }, + { + "epoch": 0.2726616106897797, + "grad_norm": 0.9410061240196228, + "learning_rate": 4.8532392084839485e-05, + "loss": 3.6113, + "step": 3775 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.2726616106897797, + "loss": 3.5156, + "loss_text": 0.4238, + "state_loss_0": 0.0, + "step": 3775 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.2726616106897797, + "loss": 3.6875, + "loss_text": 1.7266, + "state_loss_0": 0.0, + "step": 3775 + }, + { + "epoch": 0.27446731672083785, + "grad_norm": 0.9665400981903076, + "learning_rate": 4.85076128888037e-05, + "loss": 3.6064, + "step": 3800 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.27446731672083785, + "loss": 3.6094, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 3800 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.27446731672083785, + "loss": 3.6562, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 3800 + }, + { + "epoch": 0.27627302275189597, + "grad_norm": 0.9274227023124695, + "learning_rate": 4.848263268709931e-05, + "loss": 3.6047, + "step": 3825 + }, + { + "audio_loss_0": 3.7188, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.27627302275189597, + "loss": 3.7188, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 3825 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.27627302275189597, + "loss": 3.5625, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 3825 + }, + { + "epoch": 0.27807872878295414, + "grad_norm": 0.9280036091804504, + "learning_rate": 4.8457451693323614e-05, + "loss": 3.6005, + "step": 3850 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.27807872878295414, + "loss": 3.5625, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 3850 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.27807872878295414, + "loss": 3.4375, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 3850 + }, + { + "epoch": 0.27988443481401226, + "grad_norm": 0.8088470101356506, + "learning_rate": 4.843207012279077e-05, + "loss": 3.6148, + "step": 3875 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8906, + "epoch": 0.27988443481401226, + "loss": 3.3594, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 3875 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.1875, + "epoch": 0.27988443481401226, + "loss": 3.6875, + "loss_text": 1.4141, + "state_loss_0": 0.0, + "step": 3875 + }, + { + "epoch": 0.28169014084507044, + "grad_norm": 0.9495465159416199, + "learning_rate": 4.8406488192530045e-05, + "loss": 3.5975, + "step": 3900 + }, + { + "audio_loss_0": 3.7031, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.25, + "epoch": 0.28169014084507044, + "loss": 3.6719, + "loss_text": 0.7734, + "state_loss_0": 0.0, + "step": 3900 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.28169014084507044, + "loss": 3.6562, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 3900 + }, + { + "epoch": 0.28349584687612855, + "grad_norm": 0.8003600239753723, + "learning_rate": 4.838070612128389e-05, + "loss": 3.6029, + "step": 3925 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1562, + "epoch": 0.28349584687612855, + "loss": 3.5938, + "loss_text": 0.8594, + "state_loss_0": 0.0, + "step": 3925 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.28349584687612855, + "loss": 3.6406, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 3925 + }, + { + "epoch": 0.28530155290718673, + "grad_norm": 1.0849804878234863, + "learning_rate": 4.83547241295061e-05, + "loss": 3.6018, + "step": 3950 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.28530155290718673, + "loss": 3.4688, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 3950 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.125, + "epoch": 0.28530155290718673, + "loss": 3.6406, + "loss_text": 1.3125, + "state_loss_0": 0.0, + "step": 3950 + }, + { + "epoch": 0.28710725893824485, + "grad_norm": 0.9131338596343994, + "learning_rate": 4.8328542439359925e-05, + "loss": 3.5898, + "step": 3975 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.2188, + "audio_loss_6": 3.6719, + "epoch": 0.28710725893824485, + "loss": 3.4219, + "loss_text": 0.7305, + "state_loss_0": 0.0, + "step": 3975 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.28710725893824485, + "loss": 3.5312, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 3975 + }, + { + "epoch": 0.288912964969303, + "grad_norm": 0.9610629081726074, + "learning_rate": 4.8302161274716156e-05, + "loss": 3.6073, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.4688, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2812, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7031, + "eval_loss_AQACONVA": 3.7031, + "eval_loss_text_AQACONVA": 1.4375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_AQACONVA": 3.8594, + "eval_loss_text_AQACONVA": 2.5938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.9375, + "eval_loss_AQACONVA": 3.9375, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8594, + "eval_loss_AQACONVA": 3.8594, + "eval_loss_text_AQACONVA": 1.8203, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.7344, + "eval_audio_loss_1_RQACONVA": 3.9219, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.75, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.25, + "eval_loss": 3.9531, + "eval_loss_RQACONVA": 3.9531, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.875, + "eval_audio_loss_2_RQACONVA": 3.6094, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.9062, + "eval_loss_RQACONVA": 3.9062, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 1.8828, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.875, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.8047, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.9453, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.6523, + "eval_loss_RQACONV": 0.6523, + "eval_loss_text_RQACONV": 1.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.4336, + "eval_loss_RQACONV": 0.4336, + "eval_loss_text_RQACONV": 0.8672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.4805, + "eval_loss_RQACONV": 0.4805, + "eval_loss_text_RQACONV": 0.9609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 1.2656, + "eval_loss_RQACONV": 1.2656, + "eval_loss_text_RQACONV": 2.5312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1758, + "eval_loss_RQACONV": 0.1758, + "eval_loss_text_RQACONV": 0.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1836, + "eval_loss_RQACONV": 0.1836, + "eval_loss_text_RQACONV": 0.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.042, + "eval_loss_RQACONV": 0.042, + "eval_loss_text_RQACONV": 0.084, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1514, + "eval_loss_RQACONV": 0.1514, + "eval_loss_text_RQACONV": 0.3027, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1099, + "eval_loss_RQACONV": 0.1099, + "eval_loss_text_RQACONV": 0.2197, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.1367, + "eval_loss_RQACONV": 0.1367, + "eval_loss_text_RQACONV": 0.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4000 + }, + { + "epoch": 0.288912964969303, + "eval_loss": 1.6356364488601685, + "eval_runtime": 27.6132, + "eval_samples_per_second": 193.711, + "eval_steps_per_second": 1.521, + "step": 4000 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2188, + "epoch": 0.288912964969303, + "loss": 3.6719, + "loss_text": 0.9062, + "state_loss_0": 0.0, + "step": 4000 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.288912964969303, + "loss": 3.5938, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 4000 + }, + { + "epoch": 0.29071867100036114, + "grad_norm": 1.0071992874145508, + "learning_rate": 4.8275580861151246e-05, + "loss": 3.5973, + "step": 4025 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.125, + "epoch": 0.29071867100036114, + "loss": 3.6719, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 4025 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.29071867100036114, + "loss": 3.5625, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 4025 + }, + { + "epoch": 0.29252437703141926, + "grad_norm": 0.8347063064575195, + "learning_rate": 4.824880142594534e-05, + "loss": 3.608, + "step": 4050 + }, + { + "audio_loss_0": 3.6094, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.29252437703141926, + "loss": 3.6406, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 4050 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.29252437703141926, + "loss": 3.5938, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 4050 + }, + { + "epoch": 0.29433008306247743, + "grad_norm": 0.8467749953269958, + "learning_rate": 4.822182319808035e-05, + "loss": 3.5866, + "step": 4075 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.29433008306247743, + "loss": 3.7344, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 4075 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.29433008306247743, + "loss": 3.6406, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 4075 + }, + { + "epoch": 0.29613578909353555, + "grad_norm": 0.8130489587783813, + "learning_rate": 4.819464640823802e-05, + "loss": 3.5948, + "step": 4100 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9531, + "epoch": 0.29613578909353555, + "loss": 3.5, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 4100 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.1875, + "epoch": 0.29613578909353555, + "loss": 3.5938, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 4100 + }, + { + "epoch": 0.2979414951245937, + "grad_norm": 1.0241819620132446, + "learning_rate": 4.81672712887979e-05, + "loss": 3.595, + "step": 4125 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.2979414951245937, + "loss": 3.6094, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 4125 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.2979414951245937, + "loss": 3.5312, + "loss_text": 0.3223, + "state_loss_0": 0.0, + "step": 4125 + }, + { + "epoch": 0.29974720115565184, + "grad_norm": 1.0117948055267334, + "learning_rate": 4.81396980738354e-05, + "loss": 3.5953, + "step": 4150 + }, + { + "audio_loss_0": 3.8281, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.29974720115565184, + "loss": 3.7031, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 4150 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.25, + "epoch": 0.29974720115565184, + "loss": 3.6875, + "loss_text": 0.4824, + "state_loss_0": 0.0, + "step": 4150 + }, + { + "epoch": 0.30155290718671, + "grad_norm": 0.9406443238258362, + "learning_rate": 4.811192699911978e-05, + "loss": 3.5953, + "step": 4175 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.30155290718671, + "loss": 3.5781, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 4175 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0312, + "epoch": 0.30155290718671, + "loss": 3.5938, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 4175 + }, + { + "epoch": 0.30335861321776814, + "grad_norm": 0.8224254250526428, + "learning_rate": 4.808395830211215e-05, + "loss": 3.5975, + "step": 4200 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.30335861321776814, + "loss": 3.6562, + "loss_text": 1.3594, + "state_loss_0": 0.0, + "step": 4200 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 3.9844, + "epoch": 0.30335861321776814, + "loss": 3.5469, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 4200 + }, + { + "epoch": 0.3051643192488263, + "grad_norm": 0.9104959964752197, + "learning_rate": 4.805579222196339e-05, + "loss": 3.5934, + "step": 4225 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.3051643192488263, + "loss": 3.7188, + "loss_text": 1.1875, + "state_loss_0": 0.0, + "step": 4225 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.875, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2812, + "epoch": 0.3051643192488263, + "loss": 3.7344, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 4225 + }, + { + "epoch": 0.30697002527988443, + "grad_norm": 0.8740366101264954, + "learning_rate": 4.802742899951215e-05, + "loss": 3.5942, + "step": 4250 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.30697002527988443, + "loss": 3.6094, + "loss_text": 0.9844, + "state_loss_0": 0.0, + "step": 4250 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.5, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.30697002527988443, + "loss": 3.6406, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 4250 + }, + { + "epoch": 0.3087757313109426, + "grad_norm": 1.0916526317596436, + "learning_rate": 4.7998868877282775e-05, + "loss": 3.5888, + "step": 4275 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0, + "epoch": 0.3087757313109426, + "loss": 3.5938, + "loss_text": 1.4844, + "state_loss_0": 0.0, + "step": 4275 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0312, + "epoch": 0.3087757313109426, + "loss": 3.5781, + "loss_text": 1.3672, + "state_loss_0": 0.0, + "step": 4275 + }, + { + "epoch": 0.3105814373420007, + "grad_norm": 0.9322145581245422, + "learning_rate": 4.797011209948326e-05, + "loss": 3.5889, + "step": 4300 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0938, + "epoch": 0.3105814373420007, + "loss": 3.625, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 4300 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.0938, + "epoch": 0.3105814373420007, + "loss": 3.7031, + "loss_text": 1.2891, + "state_loss_0": 0.0, + "step": 4300 + }, + { + "epoch": 0.31238714337305884, + "grad_norm": 0.8535451889038086, + "learning_rate": 4.7941158912003084e-05, + "loss": 3.5882, + "step": 4325 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9375, + "epoch": 0.31238714337305884, + "loss": 3.4375, + "loss_text": 0.4648, + "state_loss_0": 0.0, + "step": 4325 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.31238714337305884, + "loss": 3.5625, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 4325 + }, + { + "epoch": 0.314192849404117, + "grad_norm": 0.9114071726799011, + "learning_rate": 4.791200956241119e-05, + "loss": 3.5863, + "step": 4350 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0625, + "epoch": 0.314192849404117, + "loss": 3.5625, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 4350 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.375, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0938, + "epoch": 0.314192849404117, + "loss": 3.4844, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 4350 + }, + { + "epoch": 0.31599855543517513, + "grad_norm": 0.8336448073387146, + "learning_rate": 4.788266429995385e-05, + "loss": 3.5892, + "step": 4375 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9531, + "epoch": 0.31599855543517513, + "loss": 3.4688, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 4375 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.31599855543517513, + "loss": 3.5, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 4375 + }, + { + "epoch": 0.3178042614662333, + "grad_norm": 1.021711826324463, + "learning_rate": 4.78531233755525e-05, + "loss": 3.5866, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.4688, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7031, + "eval_loss_AQACONVA": 3.7031, + "eval_loss_text_AQACONVA": 1.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.5469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.6875, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.9219, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.6875, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 1.8672, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 1.6641, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.5, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 1.8359, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.5625, + "eval_loss_RQACONV": 0.5625, + "eval_loss_text_RQACONV": 1.125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.8594, + "eval_loss_RQACONV": 0.8594, + "eval_loss_text_RQACONV": 1.7188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.4609, + "eval_loss_RQACONV": 0.4609, + "eval_loss_text_RQACONV": 0.9219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.4297, + "eval_loss_RQACONV": 0.4297, + "eval_loss_text_RQACONV": 0.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.6094, + "eval_loss_RQACONV": 0.6094, + "eval_loss_text_RQACONV": 1.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 1.2656, + "eval_loss_RQACONV": 1.2656, + "eval_loss_text_RQACONV": 2.5312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.0879, + "eval_loss_RQACONV": 0.0879, + "eval_loss_text_RQACONV": 0.1758, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1748, + "eval_loss_RQACONV": 0.1748, + "eval_loss_text_RQACONV": 0.3496, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.0457, + "eval_loss_RQACONV": 0.0457, + "eval_loss_text_RQACONV": 0.0913, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1045, + "eval_loss_RQACONV": 0.1045, + "eval_loss_text_RQACONV": 0.209, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4400 + }, + { + "epoch": 0.3178042614662333, + "eval_loss": 1.634151816368103, + "eval_runtime": 28.0881, + "eval_samples_per_second": 190.436, + "eval_steps_per_second": 1.495, + "step": 4400 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.3178042614662333, + "loss": 3.6094, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 4400 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 4.0, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.25, + "epoch": 0.3178042614662333, + "loss": 3.7344, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 4400 + }, + { + "epoch": 0.3196099674972914, + "grad_norm": 0.9549098610877991, + "learning_rate": 4.7823387041801616e-05, + "loss": 3.5864, + "step": 4425 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.3196099674972914, + "loss": 3.5, + "loss_text": 0.4727, + "state_loss_0": 0.0, + "step": 4425 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.3196099674972914, + "loss": 3.5938, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 4425 + }, + { + "epoch": 0.3214156735283496, + "grad_norm": 1.0182392597198486, + "learning_rate": 4.779345555296656e-05, + "loss": 3.587, + "step": 4450 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.3214156735283496, + "loss": 3.6406, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 4450 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.3214156735283496, + "loss": 3.5938, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 4450 + }, + { + "epoch": 0.3232213795594077, + "grad_norm": 0.9262785315513611, + "learning_rate": 4.776332916498139e-05, + "loss": 3.5852, + "step": 4475 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.3232213795594077, + "loss": 3.5938, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 4475 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.375, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.3232213795594077, + "loss": 3.5156, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 4475 + }, + { + "epoch": 0.3250270855904659, + "grad_norm": 0.8989639282226562, + "learning_rate": 4.77330081354467e-05, + "loss": 3.5843, + "step": 4500 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.3250270855904659, + "loss": 3.5312, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 4500 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.1562, + "epoch": 0.3250270855904659, + "loss": 3.5156, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 4500 + }, + { + "epoch": 0.326832791621524, + "grad_norm": 0.8046611547470093, + "learning_rate": 4.770249272362738e-05, + "loss": 3.5808, + "step": 4525 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.326832791621524, + "loss": 3.625, + "loss_text": 0.8086, + "state_loss_0": 0.0, + "step": 4525 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.25, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.326832791621524, + "loss": 3.5625, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 4525 + }, + { + "epoch": 0.3286384976525822, + "grad_norm": 1.0469828844070435, + "learning_rate": 4.7671783190450425e-05, + "loss": 3.5886, + "step": 4550 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 4.0625, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.3286384976525822, + "loss": 3.6562, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 4550 + }, + { + "audio_loss_0": 3.7812, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.5, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.3286384976525822, + "loss": 3.875, + "loss_text": 1.75, + "state_loss_0": 0.0, + "step": 4550 + }, + { + "epoch": 0.3304442036836403, + "grad_norm": 0.9045549035072327, + "learning_rate": 4.764087979850269e-05, + "loss": 3.585, + "step": 4575 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.3304442036836403, + "loss": 3.625, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 4575 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.375, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.3304442036836403, + "loss": 3.5781, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 4575 + }, + { + "epoch": 0.3322499097146984, + "grad_norm": 0.8451515436172485, + "learning_rate": 4.760978281202866e-05, + "loss": 3.5805, + "step": 4600 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1875, + "epoch": 0.3322499097146984, + "loss": 3.625, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 4600 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.9062, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.3322499097146984, + "loss": 3.6875, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 4600 + }, + { + "epoch": 0.3340556157457566, + "grad_norm": 0.8398804068565369, + "learning_rate": 4.757849249692817e-05, + "loss": 3.5774, + "step": 4625 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.3340556157457566, + "loss": 3.7031, + "loss_text": 1.2656, + "state_loss_0": 0.0, + "step": 4625 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9844, + "epoch": 0.3340556157457566, + "loss": 3.4688, + "loss_text": 0.8359, + "state_loss_0": 0.0, + "step": 4625 + }, + { + "epoch": 0.3358613217768147, + "grad_norm": 1.0236443281173706, + "learning_rate": 4.754700912075417e-05, + "loss": 3.5815, + "step": 4650 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.3358613217768147, + "loss": 3.6094, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 4650 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.3358613217768147, + "loss": 3.4688, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 4650 + }, + { + "epoch": 0.3376670278078729, + "grad_norm": 0.8711819648742676, + "learning_rate": 4.7515332952710367e-05, + "loss": 3.5812, + "step": 4675 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0625, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8281, + "epoch": 0.3376670278078729, + "loss": 3.3281, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 4675 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0312, + "epoch": 0.3376670278078729, + "loss": 3.5625, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 4675 + }, + { + "epoch": 0.339472733838931, + "grad_norm": 0.9088438749313354, + "learning_rate": 4.7483464263649e-05, + "loss": 3.5752, + "step": 4700 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.339472733838931, + "loss": 3.5938, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 4700 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.339472733838931, + "loss": 3.5156, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 4700 + }, + { + "epoch": 0.3412784398699892, + "grad_norm": 0.9264962077140808, + "learning_rate": 4.745140332606849e-05, + "loss": 3.5791, + "step": 4725 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.7812, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1562, + "epoch": 0.3412784398699892, + "loss": 3.7031, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 4725 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9688, + "epoch": 0.3412784398699892, + "loss": 3.4375, + "loss_text": 0.5156, + "state_loss_0": 0.0, + "step": 4725 + }, + { + "epoch": 0.3430841459010473, + "grad_norm": 0.895003616809845, + "learning_rate": 4.741915041411108e-05, + "loss": 3.5723, + "step": 4750 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.25, + "epoch": 0.3430841459010473, + "loss": 3.5312, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 4750 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.3430841459010473, + "loss": 3.625, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 4750 + }, + { + "epoch": 0.3448898519321055, + "grad_norm": 0.8925836086273193, + "learning_rate": 4.738670580356055e-05, + "loss": 3.5674, + "step": 4775 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.3448898519321055, + "loss": 3.5469, + "loss_text": 0.8789, + "state_loss_0": 0.0, + "step": 4775 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.3448898519321055, + "loss": 3.5312, + "loss_text": 0.8359, + "state_loss_0": 0.0, + "step": 4775 + }, + { + "epoch": 0.3466955579631636, + "grad_norm": 0.8994699120521545, + "learning_rate": 4.735406977183982e-05, + "loss": 3.5719, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.4531, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5312, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.25, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.0938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 1.4609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.5625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.9219, + "eval_loss_AQACONVA": 3.9219, + "eval_loss_text_AQACONVA": 2.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.6719, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.9062, + "eval_loss_AQACONVA": 3.9062, + "eval_loss_text_AQACONVA": 2.2031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.9062, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 1.8984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 1.7031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.6875, + "eval_audio_loss_1_RQACONVA": 3.9531, + "eval_audio_loss_2_RQACONVA": 3.7188, + "eval_audio_loss_3_RQACONVA": 4.7188, + "eval_audio_loss_4_RQACONVA": 4.5312, + "eval_audio_loss_5_RQACONVA": 4.4688, + "eval_audio_loss_6_RQACONVA": 4.2812, + "eval_loss": 3.9531, + "eval_loss_RQACONVA": 3.9531, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_RQACONVA": 3.8594, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.8125, + "eval_loss_RQACONV": 0.8125, + "eval_loss_text_RQACONV": 1.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.4297, + "eval_loss_RQACONV": 0.4297, + "eval_loss_text_RQACONV": 0.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.5273, + "eval_loss_RQACONV": 0.5273, + "eval_loss_text_RQACONV": 1.0547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 1.2812, + "eval_loss_RQACONV": 1.2812, + "eval_loss_text_RQACONV": 2.5625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.0884, + "eval_loss_RQACONV": 0.0884, + "eval_loss_text_RQACONV": 0.1768, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1787, + "eval_loss_RQACONV": 0.1787, + "eval_loss_text_RQACONV": 0.3574, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.041, + "eval_loss_RQACONV": 0.041, + "eval_loss_text_RQACONV": 0.082, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1187, + "eval_loss_RQACONV": 0.1187, + "eval_loss_text_RQACONV": 0.2373, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 4800 + }, + { + "epoch": 0.3466955579631636, + "eval_loss": 1.6301740407943726, + "eval_runtime": 27.4194, + "eval_samples_per_second": 195.081, + "eval_steps_per_second": 1.532, + "step": 4800 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.3466955579631636, + "loss": 3.4688, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 4800 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.1875, + "epoch": 0.3466955579631636, + "loss": 3.8125, + "loss_text": 1.7578, + "state_loss_0": 0.0, + "step": 4800 + }, + { + "epoch": 0.34850126399422177, + "grad_norm": 1.0860153436660767, + "learning_rate": 4.732124259800859e-05, + "loss": 3.5768, + "step": 4825 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9531, + "epoch": 0.34850126399422177, + "loss": 3.4688, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 4825 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.34850126399422177, + "loss": 3.5938, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 4825 + }, + { + "epoch": 0.3503069700252799, + "grad_norm": 0.8934564590454102, + "learning_rate": 4.728822456276095e-05, + "loss": 3.577, + "step": 4850 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.3503069700252799, + "loss": 3.5625, + "loss_text": 1.2188, + "state_loss_0": 0.0, + "step": 4850 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.3503069700252799, + "loss": 3.7031, + "loss_text": 1.2969, + "state_loss_0": 0.0, + "step": 4850 + }, + { + "epoch": 0.352112676056338, + "grad_norm": 0.8686373233795166, + "learning_rate": 4.725501594842299e-05, + "loss": 3.5784, + "step": 4875 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0625, + "epoch": 0.352112676056338, + "loss": 3.5625, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 4875 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9844, + "epoch": 0.352112676056338, + "loss": 3.5312, + "loss_text": 1.3203, + "state_loss_0": 0.0, + "step": 4875 + }, + { + "epoch": 0.3539183820873962, + "grad_norm": 0.8405879139900208, + "learning_rate": 4.722161703895036e-05, + "loss": 3.5725, + "step": 4900 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.125, + "epoch": 0.3539183820873962, + "loss": 3.6406, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 4900 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0, + "epoch": 0.3539183820873962, + "loss": 3.5312, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 4900 + }, + { + "epoch": 0.3557240881184543, + "grad_norm": 0.8448956608772278, + "learning_rate": 4.7188028119925865e-05, + "loss": 3.5761, + "step": 4925 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.2188, + "epoch": 0.3557240881184543, + "loss": 3.5469, + "loss_text": 0.8555, + "state_loss_0": 0.0, + "step": 4925 + }, + { + "audio_loss_0": 3.7969, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.5938, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.3125, + "epoch": 0.3557240881184543, + "loss": 3.8125, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 4925 + }, + { + "epoch": 0.35752979414951247, + "grad_norm": 0.7134974002838135, + "learning_rate": 4.715424947855703e-05, + "loss": 3.5657, + "step": 4950 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.35752979414951247, + "loss": 3.5469, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 4950 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.25, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.35752979414951247, + "loss": 3.5625, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 4950 + }, + { + "epoch": 0.3593355001805706, + "grad_norm": 0.8881583213806152, + "learning_rate": 4.7120281403673634e-05, + "loss": 3.5671, + "step": 4975 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.3593355001805706, + "loss": 3.6094, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 4975 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.3593355001805706, + "loss": 3.6094, + "loss_text": 1.1328, + "state_loss_0": 0.0, + "step": 4975 + }, + { + "epoch": 0.36114120621162876, + "grad_norm": 0.9710733890533447, + "learning_rate": 4.708612418572521e-05, + "loss": 3.5715, + "step": 5000 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.36114120621162876, + "loss": 3.4219, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 5000 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.0625, + "epoch": 0.36114120621162876, + "loss": 3.5938, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 5000 + }, + { + "epoch": 0.3629469122426869, + "grad_norm": 0.9413662552833557, + "learning_rate": 4.705177811677861e-05, + "loss": 3.5679, + "step": 5025 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0, + "epoch": 0.3629469122426869, + "loss": 3.5, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 5025 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.3629469122426869, + "loss": 3.5469, + "loss_text": 0.7734, + "state_loss_0": 0.0, + "step": 5025 + }, + { + "epoch": 0.36475261827374506, + "grad_norm": 1.2584950923919678, + "learning_rate": 4.70172434905155e-05, + "loss": 3.558, + "step": 5050 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0938, + "epoch": 0.36475261827374506, + "loss": 3.5469, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 5050 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.36475261827374506, + "loss": 3.6094, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 5050 + }, + { + "epoch": 0.3665583243048032, + "grad_norm": 0.7811076045036316, + "learning_rate": 4.6982520602229794e-05, + "loss": 3.5683, + "step": 5075 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.3665583243048032, + "loss": 3.6562, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 5075 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0938, + "epoch": 0.3665583243048032, + "loss": 3.5469, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 5075 + }, + { + "epoch": 0.36836403033586135, + "grad_norm": 0.957200825214386, + "learning_rate": 4.694760974882522e-05, + "loss": 3.5761, + "step": 5100 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0, + "epoch": 0.36836403033586135, + "loss": 3.5625, + "loss_text": 1.4297, + "state_loss_0": 0.0, + "step": 5100 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.36836403033586135, + "loss": 3.6094, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 5100 + }, + { + "epoch": 0.37016973636691947, + "grad_norm": 1.200783610343933, + "learning_rate": 4.6912511228812695e-05, + "loss": 3.5711, + "step": 5125 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0625, + "epoch": 0.37016973636691947, + "loss": 3.5625, + "loss_text": 0.9727, + "state_loss_0": 0.0, + "step": 5125 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0, + "epoch": 0.37016973636691947, + "loss": 3.5938, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 5125 + }, + { + "epoch": 0.3719754423979776, + "grad_norm": 0.9100996255874634, + "learning_rate": 4.687722534230784e-05, + "loss": 3.5748, + "step": 5150 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.3719754423979776, + "loss": 3.6094, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 5150 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 4.0312, + "audio_loss_2": 3.8281, + "audio_loss_3": 4.75, + "audio_loss_4": 4.5, + "audio_loss_5": 4.5, + "audio_loss_6": 4.25, + "epoch": 0.3719754423979776, + "loss": 3.8438, + "loss_text": 1.3438, + "state_loss_0": 0.0, + "step": 5150 + }, + { + "epoch": 0.37378114842903576, + "grad_norm": 0.8001224994659424, + "learning_rate": 4.684175239102836e-05, + "loss": 3.5613, + "step": 5175 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0312, + "epoch": 0.37378114842903576, + "loss": 3.5, + "loss_text": 0.5859, + "state_loss_0": 0.0, + "step": 5175 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.375, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0938, + "epoch": 0.37378114842903576, + "loss": 3.6094, + "loss_text": 1.3125, + "state_loss_0": 0.0, + "step": 5175 + }, + { + "epoch": 0.3755868544600939, + "grad_norm": 0.7688998579978943, + "learning_rate": 4.680609267829149e-05, + "loss": 3.5636, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4688, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 1.6641, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 1.4375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.5156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8906, + "eval_loss_AQACONVA": 3.8906, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.6562, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.2031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_AQACONVA": 3.6719, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.6562, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 1.8828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.5938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.5625, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.8438, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8594, + "eval_loss_RQACONVA": 3.8594, + "eval_loss_text_RQACONVA": 1.9844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7812, + "eval_loss_RQACONVA": 3.7812, + "eval_loss_text_RQACONVA": 1.8281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.875, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.75, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.707, + "eval_loss_RQACONV": 0.707, + "eval_loss_text_RQACONV": 1.4141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.8672, + "eval_loss_RQACONV": 0.8672, + "eval_loss_text_RQACONV": 1.7344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.4688, + "eval_loss_RQACONV": 0.4688, + "eval_loss_text_RQACONV": 0.9375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.4336, + "eval_loss_RQACONV": 0.4336, + "eval_loss_text_RQACONV": 0.8672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.4785, + "eval_loss_RQACONV": 0.4785, + "eval_loss_text_RQACONV": 0.957, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 1.2812, + "eval_loss_RQACONV": 1.2812, + "eval_loss_text_RQACONV": 2.5625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1689, + "eval_loss_RQACONV": 0.1689, + "eval_loss_text_RQACONV": 0.3379, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.0859, + "eval_loss_RQACONV": 0.0859, + "eval_loss_text_RQACONV": 0.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1826, + "eval_loss_RQACONV": 0.1826, + "eval_loss_text_RQACONV": 0.3652, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.0452, + "eval_loss_RQACONV": 0.0452, + "eval_loss_text_RQACONV": 0.0903, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1367, + "eval_loss_RQACONV": 0.1367, + "eval_loss_text_RQACONV": 0.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.1133, + "eval_loss_RQACONV": 0.1133, + "eval_loss_text_RQACONV": 0.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.125, + "eval_loss_RQACONV": 0.125, + "eval_loss_text_RQACONV": 0.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5200 + }, + { + "epoch": 0.3755868544600939, + "eval_loss": 1.6255038976669312, + "eval_runtime": 27.7107, + "eval_samples_per_second": 193.03, + "eval_steps_per_second": 1.516, + "step": 5200 + }, + { + "audio_loss_0": 3.7031, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.3755868544600939, + "loss": 3.7188, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 5200 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9219, + "epoch": 0.3755868544600939, + "loss": 3.3594, + "loss_text": 0.8711, + "state_loss_0": 0.0, + "step": 5200 + }, + { + "epoch": 0.37739256049115205, + "grad_norm": 0.8059414625167847, + "learning_rate": 4.677024650901142e-05, + "loss": 3.5643, + "step": 5225 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.37739256049115205, + "loss": 3.5781, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 5225 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.37739256049115205, + "loss": 3.5625, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 5225 + }, + { + "epoch": 0.37919826652221017, + "grad_norm": 0.893162190914154, + "learning_rate": 4.673421418969664e-05, + "loss": 3.5654, + "step": 5250 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.37919826652221017, + "loss": 3.6094, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 5250 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1562, + "epoch": 0.37919826652221017, + "loss": 3.625, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 5250 + }, + { + "epoch": 0.38100397255326834, + "grad_norm": 0.9233382940292358, + "learning_rate": 4.6697996028447366e-05, + "loss": 3.5649, + "step": 5275 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9688, + "epoch": 0.38100397255326834, + "loss": 3.4219, + "loss_text": 0.8359, + "state_loss_0": 0.0, + "step": 5275 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.875, + "audio_loss_2": 3.8594, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.0938, + "epoch": 0.38100397255326834, + "loss": 3.6875, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 5275 + }, + { + "epoch": 0.38280967858432646, + "grad_norm": 0.9649381041526794, + "learning_rate": 4.6661592334952876e-05, + "loss": 3.5631, + "step": 5300 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 3.9375, + "epoch": 0.38280967858432646, + "loss": 3.5, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 5300 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.38280967858432646, + "loss": 3.7656, + "loss_text": 1.6406, + "state_loss_0": 0.0, + "step": 5300 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 0.9257931113243103, + "learning_rate": 4.662500342048887e-05, + "loss": 3.5639, + "step": 5325 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1562, + "epoch": 0.38461538461538464, + "loss": 3.5156, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 5325 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 3.9688, + "epoch": 0.38461538461538464, + "loss": 3.4844, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 5325 + }, + { + "epoch": 0.38642109064644276, + "grad_norm": 1.3566089868545532, + "learning_rate": 4.658822959791482e-05, + "loss": 3.5676, + "step": 5350 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.38642109064644276, + "loss": 3.5312, + "loss_text": 1.1719, + "state_loss_0": 0.0, + "step": 5350 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.38642109064644276, + "loss": 3.5312, + "loss_text": 1.2109, + "state_loss_0": 0.0, + "step": 5350 + }, + { + "epoch": 0.38822679667750093, + "grad_norm": 0.8726391196250916, + "learning_rate": 4.655127118167128e-05, + "loss": 3.5571, + "step": 5375 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.38822679667750093, + "loss": 3.625, + "loss_text": 1.4453, + "state_loss_0": 0.0, + "step": 5375 + }, + { + "audio_loss_0": 3.7812, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1875, + "epoch": 0.38822679667750093, + "loss": 3.6562, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 5375 + }, + { + "epoch": 0.39003250270855905, + "grad_norm": 0.8735027313232422, + "learning_rate": 4.651412848777719e-05, + "loss": 3.5557, + "step": 5400 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.875, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0938, + "epoch": 0.39003250270855905, + "loss": 3.6094, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 5400 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.39003250270855905, + "loss": 3.5938, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 5400 + }, + { + "epoch": 0.39183820873961717, + "grad_norm": 0.8997559547424316, + "learning_rate": 4.64768018338272e-05, + "loss": 3.5588, + "step": 5425 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.5, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.39183820873961717, + "loss": 3.4375, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 5425 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.39183820873961717, + "loss": 3.6562, + "loss_text": 1.5547, + "state_loss_0": 0.0, + "step": 5425 + }, + { + "epoch": 0.39364391477067534, + "grad_norm": 0.9363760948181152, + "learning_rate": 4.643929153898895e-05, + "loss": 3.5526, + "step": 5450 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0, + "epoch": 0.39364391477067534, + "loss": 3.5469, + "loss_text": 0.7031, + "state_loss_0": 0.0, + "step": 5450 + }, + { + "audio_loss_0": 3.7031, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.125, + "epoch": 0.39364391477067534, + "loss": 3.6875, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 5450 + }, + { + "epoch": 0.39544962080173346, + "grad_norm": 1.0280267000198364, + "learning_rate": 4.640159792400031e-05, + "loss": 3.5586, + "step": 5475 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.39544962080173346, + "loss": 3.6562, + "loss_text": 1.375, + "state_loss_0": 0.0, + "step": 5475 + }, + { + "audio_loss_0": 3.7188, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1562, + "epoch": 0.39544962080173346, + "loss": 3.6875, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 5475 + }, + { + "epoch": 0.39725532683279163, + "grad_norm": 0.8546712398529053, + "learning_rate": 4.636372131116667e-05, + "loss": 3.5455, + "step": 5500 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.4688, + "audio_loss_6": 4.25, + "epoch": 0.39725532683279163, + "loss": 3.6875, + "loss_text": 0.9062, + "state_loss_0": 0.0, + "step": 5500 + }, + { + "audio_loss_0": 3.75, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.39725532683279163, + "loss": 3.7344, + "loss_text": 0.9922, + "state_loss_0": 0.0, + "step": 5500 + }, + { + "epoch": 0.39906103286384975, + "grad_norm": 0.8034318685531616, + "learning_rate": 4.6325662024358166e-05, + "loss": 3.5511, + "step": 5525 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9219, + "epoch": 0.39906103286384975, + "loss": 3.4375, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 5525 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.39906103286384975, + "loss": 3.5625, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 5525 + }, + { + "epoch": 0.4008667388949079, + "grad_norm": 0.8680459856987, + "learning_rate": 4.628742038900692e-05, + "loss": 3.5521, + "step": 5550 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.4008667388949079, + "loss": 3.375, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 5550 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.4008667388949079, + "loss": 3.6875, + "loss_text": 1.7188, + "state_loss_0": 0.0, + "step": 5550 + }, + { + "epoch": 0.40267244492596604, + "grad_norm": 0.8836286067962646, + "learning_rate": 4.624899673210427e-05, + "loss": 3.5523, + "step": 5575 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.40267244492596604, + "loss": 3.5625, + "loss_text": 1.2891, + "state_loss_0": 0.0, + "step": 5575 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.40267244492596604, + "loss": 3.4219, + "loss_text": 0.4023, + "state_loss_0": 0.0, + "step": 5575 + }, + { + "epoch": 0.4044781509570242, + "grad_norm": 0.9025138020515442, + "learning_rate": 4.621039138219794e-05, + "loss": 3.5617, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.4375, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.0469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.7031, + "eval_loss_AQACONVA": 3.7031, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 1.3828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7969, + "eval_loss_AQACONVA": 3.7969, + "eval_loss_text_AQACONVA": 2.5469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.5, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.6562, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.2031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_AQACONVA": 3.6406, + "eval_audio_loss_1_AQACONVA": 3.8906, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 1.8984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.5781, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.7188, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.7188, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4375, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.9375, + "eval_loss_RQACONVA": 3.9375, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.375, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.8359, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.5586, + "eval_loss_RQACONV": 0.5586, + "eval_loss_text_RQACONV": 1.1172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6953, + "eval_loss_RQACONV": 0.6953, + "eval_loss_text_RQACONV": 1.3906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.8906, + "eval_loss_RQACONV": 0.8906, + "eval_loss_text_RQACONV": 1.7812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.4395, + "eval_loss_RQACONV": 0.4395, + "eval_loss_text_RQACONV": 0.8789, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.4785, + "eval_loss_RQACONV": 0.4785, + "eval_loss_text_RQACONV": 0.957, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 1.2891, + "eval_loss_RQACONV": 1.2891, + "eval_loss_text_RQACONV": 2.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.0908, + "eval_loss_RQACONV": 0.0908, + "eval_loss_text_RQACONV": 0.1816, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1895, + "eval_loss_RQACONV": 0.1895, + "eval_loss_text_RQACONV": 0.3789, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1592, + "eval_loss_RQACONV": 0.1592, + "eval_loss_text_RQACONV": 0.3184, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.0469, + "eval_loss_RQACONV": 0.0469, + "eval_loss_text_RQACONV": 0.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1367, + "eval_loss_RQACONV": 0.1367, + "eval_loss_text_RQACONV": 0.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1221, + "eval_loss_RQACONV": 0.1221, + "eval_loss_text_RQACONV": 0.2441, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.1143, + "eval_loss_RQACONV": 0.1143, + "eval_loss_text_RQACONV": 0.2285, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 5600 + }, + { + "epoch": 0.4044781509570242, + "eval_loss": 1.6238104104995728, + "eval_runtime": 28.4836, + "eval_samples_per_second": 187.792, + "eval_steps_per_second": 1.475, + "step": 5600 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9375, + "epoch": 0.4044781509570242, + "loss": 3.5, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 5600 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0625, + "epoch": 0.4044781509570242, + "loss": 3.5625, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 5600 + }, + { + "epoch": 0.40628385698808234, + "grad_norm": 1.1104497909545898, + "learning_rate": 4.617160466938925e-05, + "loss": 3.5557, + "step": 5625 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.40628385698808234, + "loss": 3.4375, + "loss_text": 0.7266, + "state_loss_0": 0.0, + "step": 5625 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.5, + "audio_loss_5": 4.375, + "audio_loss_6": 4.2812, + "epoch": 0.40628385698808234, + "loss": 3.7188, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 5625 + }, + { + "epoch": 0.4080895630191405, + "grad_norm": 0.8458991646766663, + "learning_rate": 4.6132636925330286e-05, + "loss": 3.5478, + "step": 5650 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.4080895630191405, + "loss": 3.5938, + "loss_text": 0.9688, + "state_loss_0": 0.0, + "step": 5650 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.1562, + "epoch": 0.4080895630191405, + "loss": 3.5312, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 5650 + }, + { + "epoch": 0.40989526905019863, + "grad_norm": 0.7735717296600342, + "learning_rate": 4.609348848322111e-05, + "loss": 3.541, + "step": 5675 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.4375, + "audio_loss_6": 4.2188, + "epoch": 0.40989526905019863, + "loss": 3.7188, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 5675 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0312, + "epoch": 0.40989526905019863, + "loss": 3.5625, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 5675 + }, + { + "epoch": 0.41170097508125675, + "grad_norm": 0.836807370185852, + "learning_rate": 4.605415967780684e-05, + "loss": 3.5468, + "step": 5700 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0938, + "epoch": 0.41170097508125675, + "loss": 3.5781, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 5700 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.41170097508125675, + "loss": 3.6875, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 5700 + }, + { + "epoch": 0.4135066811123149, + "grad_norm": 1.0144526958465576, + "learning_rate": 4.6014650845374804e-05, + "loss": 3.5547, + "step": 5725 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.4135066811123149, + "loss": 3.4688, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 5725 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.4135066811123149, + "loss": 3.4688, + "loss_text": 0.4629, + "state_loss_0": 0.0, + "step": 5725 + }, + { + "epoch": 0.41531238714337304, + "grad_norm": 0.8233093619346619, + "learning_rate": 4.597496232375173e-05, + "loss": 3.5454, + "step": 5750 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.7188, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.41531238714337304, + "loss": 3.7188, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 5750 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.41531238714337304, + "loss": 3.4375, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 5750 + }, + { + "epoch": 0.4171180931744312, + "grad_norm": 0.7815775275230408, + "learning_rate": 4.5935094452300764e-05, + "loss": 3.5562, + "step": 5775 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9531, + "epoch": 0.4171180931744312, + "loss": 3.5, + "loss_text": 1.3281, + "state_loss_0": 0.0, + "step": 5775 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.4171180931744312, + "loss": 3.5469, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 5775 + }, + { + "epoch": 0.41892379920548933, + "grad_norm": 0.7768523693084717, + "learning_rate": 4.589504757191863e-05, + "loss": 3.5586, + "step": 5800 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.41892379920548933, + "loss": 3.5469, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 5800 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.41892379920548933, + "loss": 3.5, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 5800 + }, + { + "epoch": 0.4207295052365475, + "grad_norm": 0.7672255635261536, + "learning_rate": 4.5854822025032683e-05, + "loss": 3.5445, + "step": 5825 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.8906, + "epoch": 0.4207295052365475, + "loss": 3.3906, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 5825 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.4207295052365475, + "loss": 3.5, + "loss_text": 0.5078, + "state_loss_0": 0.0, + "step": 5825 + }, + { + "epoch": 0.4225352112676056, + "grad_norm": 0.7807490825653076, + "learning_rate": 4.5814418155598023e-05, + "loss": 3.544, + "step": 5850 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.4225352112676056, + "loss": 3.5, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 5850 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.4225352112676056, + "loss": 3.5625, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 5850 + }, + { + "epoch": 0.4243409172986638, + "grad_norm": 0.9593159556388855, + "learning_rate": 4.57738363090945e-05, + "loss": 3.5384, + "step": 5875 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1875, + "epoch": 0.4243409172986638, + "loss": 3.5312, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 5875 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.4243409172986638, + "loss": 3.6719, + "loss_text": 1.3672, + "state_loss_0": 0.0, + "step": 5875 + }, + { + "epoch": 0.4261466233297219, + "grad_norm": 0.9850334525108337, + "learning_rate": 4.57330768325238e-05, + "loss": 3.5499, + "step": 5900 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.4261466233297219, + "loss": 3.5469, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 5900 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.4261466233297219, + "loss": 3.7031, + "loss_text": 1.6562, + "state_loss_0": 0.0, + "step": 5900 + }, + { + "epoch": 0.4279523293607801, + "grad_norm": 0.775562047958374, + "learning_rate": 4.569214007440646e-05, + "loss": 3.5421, + "step": 5925 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.1562, + "epoch": 0.4279523293607801, + "loss": 3.5469, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 5925 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9844, + "epoch": 0.4279523293607801, + "loss": 3.5156, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 5925 + }, + { + "epoch": 0.4297580353918382, + "grad_norm": 0.8122384548187256, + "learning_rate": 4.565102638477889e-05, + "loss": 3.5318, + "step": 5950 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.25, + "epoch": 0.4297580353918382, + "loss": 3.5, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 5950 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9688, + "epoch": 0.4297580353918382, + "loss": 3.4062, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 5950 + }, + { + "epoch": 0.43156374142289633, + "grad_norm": 0.8264349699020386, + "learning_rate": 4.560973611519037e-05, + "loss": 3.5344, + "step": 5975 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0625, + "epoch": 0.43156374142289633, + "loss": 3.5, + "loss_text": 0.8594, + "state_loss_0": 0.0, + "step": 5975 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.875, + "audio_loss_2": 3.7031, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0312, + "epoch": 0.43156374142289633, + "loss": 3.5625, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 5975 + }, + { + "epoch": 0.4333694474539545, + "grad_norm": 0.7405634522438049, + "learning_rate": 4.556826961870008e-05, + "loss": 3.5386, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4531, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4375, + "eval_audio_loss_6_AQACONVA": 4.2188, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.1875, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 1.3828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.5781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_AQACONVA": 3.6562, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 1.9062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.125, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.9375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.6406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.5625, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.6875, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.7188, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.9219, + "eval_loss_RQACONVA": 3.9219, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.8516, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.6094, + "eval_loss_RQACONV": 0.6094, + "eval_loss_text_RQACONV": 1.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.7266, + "eval_loss_RQACONV": 0.7266, + "eval_loss_text_RQACONV": 1.4531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.8789, + "eval_loss_RQACONV": 0.8789, + "eval_loss_text_RQACONV": 1.7578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.4434, + "eval_loss_RQACONV": 0.4434, + "eval_loss_text_RQACONV": 0.8867, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 1.2891, + "eval_loss_RQACONV": 1.2891, + "eval_loss_text_RQACONV": 2.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1318, + "eval_loss_RQACONV": 0.1318, + "eval_loss_text_RQACONV": 0.2637, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.168, + "eval_loss_RQACONV": 0.168, + "eval_loss_text_RQACONV": 0.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.085, + "eval_loss_RQACONV": 0.085, + "eval_loss_text_RQACONV": 0.1699, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1738, + "eval_loss_RQACONV": 0.1738, + "eval_loss_text_RQACONV": 0.3477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.0461, + "eval_loss_RQACONV": 0.0461, + "eval_loss_text_RQACONV": 0.0923, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1279, + "eval_loss_RQACONV": 0.1279, + "eval_loss_text_RQACONV": 0.2559, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.1187, + "eval_loss_RQACONV": 0.1187, + "eval_loss_text_RQACONV": 0.2373, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6000 + }, + { + "epoch": 0.4333694474539545, + "eval_loss": 1.6226952075958252, + "eval_runtime": 27.626, + "eval_samples_per_second": 193.622, + "eval_steps_per_second": 1.52, + "step": 6000 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.1562, + "epoch": 0.4333694474539545, + "loss": 3.5156, + "loss_text": 0.5859, + "state_loss_0": 0.0, + "step": 6000 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9844, + "epoch": 0.4333694474539545, + "loss": 3.4375, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 6000 + }, + { + "epoch": 0.4351751534850126, + "grad_norm": 0.7594168782234192, + "learning_rate": 4.552662724987404e-05, + "loss": 3.5271, + "step": 6025 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4688, + "audio_loss_5": 4.4062, + "audio_loss_6": 4.2188, + "epoch": 0.4351751534850126, + "loss": 3.7188, + "loss_text": 1.3203, + "state_loss_0": 0.0, + "step": 6025 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0938, + "epoch": 0.4351751534850126, + "loss": 3.4844, + "loss_text": 0.3965, + "state_loss_0": 0.0, + "step": 6025 + }, + { + "epoch": 0.4369808595160708, + "grad_norm": 0.925481379032135, + "learning_rate": 4.548480936478211e-05, + "loss": 3.5403, + "step": 6050 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9531, + "epoch": 0.4369808595160708, + "loss": 3.375, + "loss_text": 0.7188, + "state_loss_0": 0.0, + "step": 6050 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0312, + "epoch": 0.4369808595160708, + "loss": 3.5781, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 6050 + }, + { + "epoch": 0.4387865655471289, + "grad_norm": 1.0160472393035889, + "learning_rate": 4.544281632099491e-05, + "loss": 3.5312, + "step": 6075 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.4387865655471289, + "loss": 3.5469, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 6075 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0, + "epoch": 0.4387865655471289, + "loss": 3.4062, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 6075 + }, + { + "epoch": 0.4405922715781871, + "grad_norm": 0.9324707984924316, + "learning_rate": 4.540064847758079e-05, + "loss": 3.5366, + "step": 6100 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.125, + "epoch": 0.4405922715781871, + "loss": 3.5469, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 6100 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.1875, + "epoch": 0.4405922715781871, + "loss": 3.5625, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 6100 + }, + { + "epoch": 0.4423979776092452, + "grad_norm": 0.7694281339645386, + "learning_rate": 4.535830619510276e-05, + "loss": 3.5361, + "step": 6125 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0312, + "epoch": 0.4423979776092452, + "loss": 3.5, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 6125 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.4423979776092452, + "loss": 3.5156, + "loss_text": 1.4375, + "state_loss_0": 0.0, + "step": 6125 + }, + { + "epoch": 0.4442036836403034, + "grad_norm": 0.8006008863449097, + "learning_rate": 4.53157898356154e-05, + "loss": 3.5371, + "step": 6150 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.2188, + "epoch": 0.4442036836403034, + "loss": 3.6875, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 6150 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.4442036836403034, + "loss": 3.4688, + "loss_text": 1.1328, + "state_loss_0": 0.0, + "step": 6150 + }, + { + "epoch": 0.4460093896713615, + "grad_norm": 0.8194215893745422, + "learning_rate": 4.5273099762661746e-05, + "loss": 3.5335, + "step": 6175 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.4460093896713615, + "loss": 3.5625, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 6175 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.4460093896713615, + "loss": 3.5, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 6175 + }, + { + "epoch": 0.4478150957024196, + "grad_norm": 0.7919715642929077, + "learning_rate": 4.523023634127023e-05, + "loss": 3.5286, + "step": 6200 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.9531, + "audio_loss_2": 3.6562, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0625, + "epoch": 0.4478150957024196, + "loss": 3.5938, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 6200 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.0625, + "epoch": 0.4478150957024196, + "loss": 3.5625, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 6200 + }, + { + "epoch": 0.4496208017334778, + "grad_norm": 0.8863612413406372, + "learning_rate": 4.5187199937951496e-05, + "loss": 3.5358, + "step": 6225 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.125, + "epoch": 0.4496208017334778, + "loss": 3.5781, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 6225 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.4496208017334778, + "loss": 3.5312, + "loss_text": 1.2109, + "state_loss_0": 0.0, + "step": 6225 + }, + { + "epoch": 0.4514265077645359, + "grad_norm": 1.1762527227401733, + "learning_rate": 4.514399092069532e-05, + "loss": 3.5339, + "step": 6250 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.4514265077645359, + "loss": 3.5625, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 6250 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.5312, + "audio_loss_5": 4.375, + "audio_loss_6": 4.1875, + "epoch": 0.4514265077645359, + "loss": 3.6562, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 6250 + }, + { + "epoch": 0.4532322137955941, + "grad_norm": 0.8276361227035522, + "learning_rate": 4.510060965896743e-05, + "loss": 3.5333, + "step": 6275 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.375, + "audio_loss_3": 4.1562, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8281, + "epoch": 0.4532322137955941, + "loss": 3.3594, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 6275 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0625, + "epoch": 0.4532322137955941, + "loss": 3.5312, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 6275 + }, + { + "epoch": 0.4550379198266522, + "grad_norm": 1.0188590288162231, + "learning_rate": 4.505705652370636e-05, + "loss": 3.5345, + "step": 6300 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.4550379198266522, + "loss": 3.5469, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 6300 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.4550379198266522, + "loss": 3.5781, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 6300 + }, + { + "epoch": 0.4568436258577104, + "grad_norm": 1.0857727527618408, + "learning_rate": 4.501333188732029e-05, + "loss": 3.5405, + "step": 6325 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.125, + "epoch": 0.4568436258577104, + "loss": 3.5781, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 6325 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9219, + "epoch": 0.4568436258577104, + "loss": 3.4219, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 6325 + }, + { + "epoch": 0.4586493318887685, + "grad_norm": 0.9018699526786804, + "learning_rate": 4.496943612368383e-05, + "loss": 3.5314, + "step": 6350 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0, + "epoch": 0.4586493318887685, + "loss": 3.5156, + "loss_text": 0.9414, + "state_loss_0": 0.0, + "step": 6350 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.4586493318887685, + "loss": 3.5781, + "loss_text": 1.6328, + "state_loss_0": 0.0, + "step": 6350 + }, + { + "epoch": 0.46045503791982667, + "grad_norm": 1.1565041542053223, + "learning_rate": 4.492536960813485e-05, + "loss": 3.5303, + "step": 6375 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.4062, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0, + "epoch": 0.46045503791982667, + "loss": 3.5625, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 6375 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.46045503791982667, + "loss": 3.5625, + "loss_text": 0.8984, + "state_loss_0": 0.0, + "step": 6375 + }, + { + "epoch": 0.4622607439508848, + "grad_norm": 0.8966668248176575, + "learning_rate": 4.4881132717471256e-05, + "loss": 3.5313, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.4219, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.6562, + "eval_audio_loss_2_AQACONVA": 3.4531, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.4453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.3594, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.5938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.6094, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.875, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.625, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.6406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.5781, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6875, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.4375, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 2.1406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.3438, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.8438, + "eval_loss_RQACONVA": 3.8438, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.4062, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2812, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.8203, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 1.9609, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.7852, + "eval_loss_RQACONV": 0.7852, + "eval_loss_text_RQACONV": 1.5703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.875, + "eval_loss_RQACONV": 0.875, + "eval_loss_text_RQACONV": 1.75, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.4316, + "eval_loss_RQACONV": 0.4316, + "eval_loss_text_RQACONV": 0.8633, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.5312, + "eval_loss_RQACONV": 0.5312, + "eval_loss_text_RQACONV": 1.0625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.4766, + "eval_loss_RQACONV": 0.4766, + "eval_loss_text_RQACONV": 0.9531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 1.2969, + "eval_loss_RQACONV": 1.2969, + "eval_loss_text_RQACONV": 2.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.0962, + "eval_loss_RQACONV": 0.0962, + "eval_loss_text_RQACONV": 0.1924, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1699, + "eval_loss_RQACONV": 0.1699, + "eval_loss_text_RQACONV": 0.3398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.167, + "eval_loss_RQACONV": 0.167, + "eval_loss_text_RQACONV": 0.334, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.0376, + "eval_loss_RQACONV": 0.0376, + "eval_loss_text_RQACONV": 0.0752, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1338, + "eval_loss_RQACONV": 0.1338, + "eval_loss_text_RQACONV": 0.2676, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1133, + "eval_loss_RQACONV": 0.1133, + "eval_loss_text_RQACONV": 0.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.1162, + "eval_loss_RQACONV": 0.1162, + "eval_loss_text_RQACONV": 0.2324, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6400 + }, + { + "epoch": 0.4622607439508848, + "eval_loss": 1.6198457479476929, + "eval_runtime": 27.8803, + "eval_samples_per_second": 191.856, + "eval_steps_per_second": 1.506, + "step": 6400 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.4622607439508848, + "loss": 3.4062, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 6400 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.1562, + "epoch": 0.4622607439508848, + "loss": 3.5781, + "loss_text": 0.8711, + "state_loss_0": 0.0, + "step": 6400 + }, + { + "epoch": 0.46406644998194296, + "grad_norm": 0.7953397631645203, + "learning_rate": 4.4836725829947777e-05, + "loss": 3.5339, + "step": 6425 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.46406644998194296, + "loss": 3.4844, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 6425 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0312, + "epoch": 0.46406644998194296, + "loss": 3.5, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 6425 + }, + { + "epoch": 0.4658721560130011, + "grad_norm": 0.8167224526405334, + "learning_rate": 4.4792149325272736e-05, + "loss": 3.5304, + "step": 6450 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0938, + "epoch": 0.4658721560130011, + "loss": 3.5625, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 6450 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.4658721560130011, + "loss": 3.6406, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 6450 + }, + { + "epoch": 0.4676778620440592, + "grad_norm": 0.868109405040741, + "learning_rate": 4.47474035846048e-05, + "loss": 3.526, + "step": 6475 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.875, + "epoch": 0.4676778620440592, + "loss": 3.3594, + "loss_text": 0.5469, + "state_loss_0": 0.0, + "step": 6475 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.4676778620440592, + "loss": 3.5938, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 6475 + }, + { + "epoch": 0.4694835680751174, + "grad_norm": 0.7931364178657532, + "learning_rate": 4.470248899054968e-05, + "loss": 3.5279, + "step": 6500 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 3.9531, + "epoch": 0.4694835680751174, + "loss": 3.4688, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 6500 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0312, + "epoch": 0.4694835680751174, + "loss": 3.5312, + "loss_text": 0.7227, + "state_loss_0": 0.0, + "step": 6500 + }, + { + "epoch": 0.4712892741061755, + "grad_norm": 0.7382170557975769, + "learning_rate": 4.465740592715693e-05, + "loss": 3.5241, + "step": 6525 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.125, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9844, + "epoch": 0.4712892741061755, + "loss": 3.4531, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 6525 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.1562, + "epoch": 0.4712892741061755, + "loss": 3.6562, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 6525 + }, + { + "epoch": 0.47309498013723367, + "grad_norm": 0.8431880474090576, + "learning_rate": 4.4612154779916625e-05, + "loss": 3.5291, + "step": 6550 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7656, + "epoch": 0.47309498013723367, + "loss": 3.2812, + "loss_text": 0.8438, + "state_loss_0": 0.0, + "step": 6550 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.3125, + "audio_loss_6": 3.9844, + "epoch": 0.47309498013723367, + "loss": 3.5625, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 6550 + }, + { + "epoch": 0.4749006861682918, + "grad_norm": 1.0471121072769165, + "learning_rate": 4.456673593575606e-05, + "loss": 3.5245, + "step": 6575 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.5, + "audio_loss_4": 4.25, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0625, + "epoch": 0.4749006861682918, + "loss": 3.4688, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 6575 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.4749006861682918, + "loss": 3.4688, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 6575 + }, + { + "epoch": 0.47670639219934996, + "grad_norm": 0.800956666469574, + "learning_rate": 4.452114978303644e-05, + "loss": 3.5362, + "step": 6600 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0625, + "epoch": 0.47670639219934996, + "loss": 3.625, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 6600 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.47670639219934996, + "loss": 3.5, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 6600 + }, + { + "epoch": 0.4785120982304081, + "grad_norm": 0.8642157316207886, + "learning_rate": 4.44753967115496e-05, + "loss": 3.5217, + "step": 6625 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0938, + "epoch": 0.4785120982304081, + "loss": 3.4375, + "loss_text": 0.8867, + "state_loss_0": 0.0, + "step": 6625 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.4785120982304081, + "loss": 3.5469, + "loss_text": 0.8984, + "state_loss_0": 0.0, + "step": 6625 + }, + { + "epoch": 0.48031780426146625, + "grad_norm": 0.8302973508834839, + "learning_rate": 4.442947711251461e-05, + "loss": 3.5302, + "step": 6650 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0312, + "epoch": 0.48031780426146625, + "loss": 3.4062, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 6650 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9375, + "epoch": 0.48031780426146625, + "loss": 3.4375, + "loss_text": 0.8438, + "state_loss_0": 0.0, + "step": 6650 + }, + { + "epoch": 0.48212351029252437, + "grad_norm": 0.8438173532485962, + "learning_rate": 4.4383391378574465e-05, + "loss": 3.5139, + "step": 6675 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.48212351029252437, + "loss": 3.5312, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 6675 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0938, + "epoch": 0.48212351029252437, + "loss": 3.5156, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 6675 + }, + { + "epoch": 0.48392921632358255, + "grad_norm": 0.8365172147750854, + "learning_rate": 4.433713990379273e-05, + "loss": 3.5146, + "step": 6700 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 4.125, + "epoch": 0.48392921632358255, + "loss": 3.4844, + "loss_text": 0.4297, + "state_loss_0": 0.0, + "step": 6700 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.48392921632358255, + "loss": 3.5625, + "loss_text": 1.3516, + "state_loss_0": 0.0, + "step": 6700 + }, + { + "epoch": 0.48573492235464066, + "grad_norm": 0.9219153523445129, + "learning_rate": 4.429072308365019e-05, + "loss": 3.5225, + "step": 6725 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.25, + "epoch": 0.48573492235464066, + "loss": 3.6094, + "loss_text": 1.1484, + "state_loss_0": 0.0, + "step": 6725 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.7344, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3438, + "audio_loss_6": 4.2188, + "epoch": 0.48573492235464066, + "loss": 3.7344, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 6725 + }, + { + "epoch": 0.4875406283856988, + "grad_norm": 0.9778522849082947, + "learning_rate": 4.42441413150414e-05, + "loss": 3.523, + "step": 6750 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 3.9531, + "epoch": 0.4875406283856988, + "loss": 3.4688, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 6750 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9688, + "epoch": 0.4875406283856988, + "loss": 3.4375, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 6750 + }, + { + "epoch": 0.48934633441675696, + "grad_norm": 0.9199130535125732, + "learning_rate": 4.4197394996271345e-05, + "loss": 3.5222, + "step": 6775 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.25, + "audio_loss_6": 3.9531, + "epoch": 0.48934633441675696, + "loss": 3.5, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 6775 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.48934633441675696, + "loss": 3.5469, + "loss_text": 1.4609, + "state_loss_0": 0.0, + "step": 6775 + }, + { + "epoch": 0.4911520404478151, + "grad_norm": 0.8026413321495056, + "learning_rate": 4.415048452705205e-05, + "loss": 3.5205, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.4062, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.9844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.1719, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.3984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3594, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.5938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4688, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.8594, + "eval_loss_AQACONVA": 3.8594, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.375, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_AQACONVA": 3.6406, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 1.9141, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.3438, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.6719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.5469, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.6406, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.4688, + "eval_audio_loss_5_RQACONVA": 4.4062, + "eval_audio_loss_6_RQACONVA": 4.2188, + "eval_loss": 3.8906, + "eval_loss_RQACONVA": 3.8906, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.1562, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.7422, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.6406, + "eval_loss_RQACONV": 0.6406, + "eval_loss_text_RQACONV": 1.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.7891, + "eval_loss_RQACONV": 0.7891, + "eval_loss_text_RQACONV": 1.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.8633, + "eval_loss_RQACONV": 0.8633, + "eval_loss_text_RQACONV": 1.7266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.4238, + "eval_loss_RQACONV": 0.4238, + "eval_loss_text_RQACONV": 0.8477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.5273, + "eval_loss_RQACONV": 0.5273, + "eval_loss_text_RQACONV": 1.0547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.4805, + "eval_loss_RQACONV": 0.4805, + "eval_loss_text_RQACONV": 0.9609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 1.3047, + "eval_loss_RQACONV": 1.3047, + "eval_loss_text_RQACONV": 2.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.0811, + "eval_loss_RQACONV": 0.0811, + "eval_loss_text_RQACONV": 0.1621, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1738, + "eval_loss_RQACONV": 0.1738, + "eval_loss_text_RQACONV": 0.3477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.0415, + "eval_loss_RQACONV": 0.0415, + "eval_loss_text_RQACONV": 0.083, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1074, + "eval_loss_RQACONV": 0.1074, + "eval_loss_text_RQACONV": 0.2148, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.1094, + "eval_loss_RQACONV": 0.1094, + "eval_loss_text_RQACONV": 0.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 6800 + }, + { + "epoch": 0.4911520404478151, + "eval_loss": 1.6131680011749268, + "eval_runtime": 27.7304, + "eval_samples_per_second": 192.893, + "eval_steps_per_second": 1.515, + "step": 6800 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8906, + "epoch": 0.4911520404478151, + "loss": 3.3594, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 6800 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.4911520404478151, + "loss": 3.5938, + "loss_text": 1.3047, + "state_loss_0": 0.0, + "step": 6800 + }, + { + "epoch": 0.49295774647887325, + "grad_norm": 0.9373591542243958, + "learning_rate": 4.4103410308499085e-05, + "loss": 3.5215, + "step": 6825 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9375, + "epoch": 0.49295774647887325, + "loss": 3.4062, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 6825 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.49295774647887325, + "loss": 3.4219, + "loss_text": 0.6133, + "state_loss_0": 0.0, + "step": 6825 + }, + { + "epoch": 0.49476345250993137, + "grad_norm": 0.8451297283172607, + "learning_rate": 4.405617274312824e-05, + "loss": 3.5163, + "step": 6850 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.375, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.49476345250993137, + "loss": 3.4844, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 6850 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.49476345250993137, + "loss": 3.5, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 6850 + }, + { + "epoch": 0.49656915854098954, + "grad_norm": 0.8422694206237793, + "learning_rate": 4.400877223485199e-05, + "loss": 3.5114, + "step": 6875 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.49656915854098954, + "loss": 3.5625, + "loss_text": 0.5469, + "state_loss_0": 0.0, + "step": 6875 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.49656915854098954, + "loss": 3.5156, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 6875 + }, + { + "epoch": 0.49837486457204766, + "grad_norm": 0.7791448831558228, + "learning_rate": 4.396120918897609e-05, + "loss": 3.5148, + "step": 6900 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0, + "epoch": 0.49837486457204766, + "loss": 3.6094, + "loss_text": 1.4219, + "state_loss_0": 0.0, + "step": 6900 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.49837486457204766, + "loss": 3.5312, + "loss_text": 1.0938, + "state_loss_0": 0.0, + "step": 6900 + }, + { + "epoch": 0.5001805706031058, + "grad_norm": 0.9847010374069214, + "learning_rate": 4.3913484012196124e-05, + "loss": 3.515, + "step": 6925 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.25, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.5001805706031058, + "loss": 3.5312, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 6925 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0, + "epoch": 0.5001805706031058, + "loss": 3.4844, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 6925 + }, + { + "epoch": 0.501986276634164, + "grad_norm": 0.9608230590820312, + "learning_rate": 4.3865597112593956e-05, + "loss": 3.5053, + "step": 6950 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0, + "epoch": 0.501986276634164, + "loss": 3.5469, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 6950 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9375, + "epoch": 0.501986276634164, + "loss": 3.4062, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 6950 + }, + { + "epoch": 0.5037919826652221, + "grad_norm": 0.7750644087791443, + "learning_rate": 4.381754889963432e-05, + "loss": 3.516, + "step": 6975 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9844, + "epoch": 0.5037919826652221, + "loss": 3.5469, + "loss_text": 1.1875, + "state_loss_0": 0.0, + "step": 6975 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0, + "epoch": 0.5037919826652221, + "loss": 3.5312, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 6975 + }, + { + "epoch": 0.5055976886962803, + "grad_norm": 0.7703509330749512, + "learning_rate": 4.376933978416128e-05, + "loss": 3.5118, + "step": 7000 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.5055976886962803, + "loss": 3.5, + "loss_text": 1.2188, + "state_loss_0": 0.0, + "step": 7000 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0312, + "epoch": 0.5055976886962803, + "loss": 3.5625, + "loss_text": 1.2891, + "state_loss_0": 0.0, + "step": 7000 + }, + { + "epoch": 0.5074033947273384, + "grad_norm": 0.7495079636573792, + "learning_rate": 4.372097017839473e-05, + "loss": 3.5048, + "step": 7025 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.5074033947273384, + "loss": 3.4844, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 7025 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 3.8594, + "epoch": 0.5074033947273384, + "loss": 3.4844, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 7025 + }, + { + "epoch": 0.5092091007583965, + "grad_norm": 0.8355121612548828, + "learning_rate": 4.367244049592683e-05, + "loss": 3.5097, + "step": 7050 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0, + "epoch": 0.5092091007583965, + "loss": 3.4375, + "loss_text": 1.0547, + "state_loss_0": 0.0, + "step": 7050 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9375, + "epoch": 0.5092091007583965, + "loss": 3.3125, + "loss_text": 0.3477, + "state_loss_0": 0.0, + "step": 7050 + }, + { + "epoch": 0.5110148067894547, + "grad_norm": 0.8897727727890015, + "learning_rate": 4.362375115171854e-05, + "loss": 3.5121, + "step": 7075 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.5110148067894547, + "loss": 3.4688, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 7075 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0312, + "epoch": 0.5110148067894547, + "loss": 3.5, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 7075 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.7208817005157471, + "learning_rate": 4.357490256209603e-05, + "loss": 3.5039, + "step": 7100 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.25, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0, + "audio_loss_6": 3.8438, + "epoch": 0.5128205128205128, + "loss": 3.3594, + "loss_text": 0.6133, + "state_loss_0": 0.0, + "step": 7100 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.5, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.5128205128205128, + "loss": 3.5625, + "loss_text": 0.377, + "state_loss_0": 0.0, + "step": 7100 + }, + { + "epoch": 0.514626218851571, + "grad_norm": 0.7803882360458374, + "learning_rate": 4.35258951447471e-05, + "loss": 3.5059, + "step": 7125 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0, + "epoch": 0.514626218851571, + "loss": 3.4375, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 7125 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.514626218851571, + "loss": 3.5938, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 7125 + }, + { + "epoch": 0.5164319248826291, + "grad_norm": 0.927776575088501, + "learning_rate": 4.3476729318717643e-05, + "loss": 3.505, + "step": 7150 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 4.0938, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.25, + "audio_loss_6": 3.9688, + "epoch": 0.5164319248826291, + "loss": 3.6562, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 7150 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.5164319248826291, + "loss": 3.4844, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 7150 + }, + { + "epoch": 0.5182376309136872, + "grad_norm": 0.876704216003418, + "learning_rate": 4.3427405504408056e-05, + "loss": 3.5087, + "step": 7175 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.5182376309136872, + "loss": 3.5312, + "loss_text": 1.2188, + "state_loss_0": 0.0, + "step": 7175 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.75, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.375, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.1562, + "epoch": 0.5182376309136872, + "loss": 3.5469, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 7175 + }, + { + "epoch": 0.5200433369447454, + "grad_norm": 0.871898353099823, + "learning_rate": 4.337792412356965e-05, + "loss": 3.5012, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.1875, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.6797, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.3438, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.0312, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.4062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3594, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.3438, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4375, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1875, + "eval_loss": 3.875, + "eval_loss_AQACONVA": 3.875, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.5938, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3438, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.8281, + "eval_loss_AQACONVA": 3.8281, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_AQACONVA": 3.625, + "eval_audio_loss_1_AQACONVA": 3.8594, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7969, + "eval_loss_AQACONVA": 3.7969, + "eval_loss_text_AQACONVA": 1.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.0625, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.5469, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.7344, + "eval_audio_loss_1_RQACONVA": 3.9062, + "eval_audio_loss_2_RQACONVA": 3.6719, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.1875, + "eval_loss": 3.9062, + "eval_loss_RQACONVA": 3.9062, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.3125, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.75, + "eval_loss_RQACONVA": 3.75, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 1.8984, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.6914, + "eval_loss_RQACONV": 0.6914, + "eval_loss_text_RQACONV": 1.3828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.6406, + "eval_loss_RQACONV": 0.6406, + "eval_loss_text_RQACONV": 1.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.4609, + "eval_loss_RQACONV": 0.4609, + "eval_loss_text_RQACONV": 0.9219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.4219, + "eval_loss_RQACONV": 0.4219, + "eval_loss_text_RQACONV": 0.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 1.3125, + "eval_loss_RQACONV": 1.3125, + "eval_loss_text_RQACONV": 2.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1602, + "eval_loss_RQACONV": 0.1602, + "eval_loss_text_RQACONV": 0.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.0381, + "eval_loss_RQACONV": 0.0381, + "eval_loss_text_RQACONV": 0.0762, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1079, + "eval_loss_RQACONV": 0.1079, + "eval_loss_text_RQACONV": 0.2158, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.1099, + "eval_loss_RQACONV": 0.1099, + "eval_loss_text_RQACONV": 0.2197, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7200 + }, + { + "epoch": 0.5200433369447454, + "eval_loss": 1.6120426654815674, + "eval_runtime": 27.9514, + "eval_samples_per_second": 191.368, + "eval_steps_per_second": 1.503, + "step": 7200 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.5200433369447454, + "loss": 3.5156, + "loss_text": 0.9492, + "state_loss_0": 0.0, + "step": 7200 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.125, + "audio_loss_6": 3.8594, + "epoch": 0.5200433369447454, + "loss": 3.4531, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 7200 + }, + { + "epoch": 0.5218490429758036, + "grad_norm": 0.8099077939987183, + "learning_rate": 4.3328285599301e-05, + "loss": 3.4958, + "step": 7225 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9688, + "epoch": 0.5218490429758036, + "loss": 3.5, + "loss_text": 0.9102, + "state_loss_0": 0.0, + "step": 7225 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0, + "epoch": 0.5218490429758036, + "loss": 3.5, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 7225 + }, + { + "epoch": 0.5236547490068617, + "grad_norm": 0.8637711405754089, + "learning_rate": 4.32784903560444e-05, + "loss": 3.5038, + "step": 7250 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9844, + "epoch": 0.5236547490068617, + "loss": 3.3438, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 7250 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9062, + "epoch": 0.5236547490068617, + "loss": 3.4375, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 7250 + }, + { + "epoch": 0.5254604550379198, + "grad_norm": 0.790006160736084, + "learning_rate": 4.322853881958217e-05, + "loss": 3.5012, + "step": 7275 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9844, + "epoch": 0.5254604550379198, + "loss": 3.5, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 7275 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.25, + "audio_loss_6": 4.0625, + "epoch": 0.5254604550379198, + "loss": 3.5312, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 7275 + }, + { + "epoch": 0.527266161068978, + "grad_norm": 0.7571612596511841, + "learning_rate": 4.3178431417033065e-05, + "loss": 3.5016, + "step": 7300 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.527266161068978, + "loss": 3.5625, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 7300 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9062, + "epoch": 0.527266161068978, + "loss": 3.5156, + "loss_text": 1.3359, + "state_loss_0": 0.0, + "step": 7300 + }, + { + "epoch": 0.5290718671000361, + "grad_norm": 0.7820939421653748, + "learning_rate": 4.312816857684857e-05, + "loss": 3.4957, + "step": 7325 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9844, + "epoch": 0.5290718671000361, + "loss": 3.5938, + "loss_text": 1.7656, + "state_loss_0": 0.0, + "step": 7325 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0625, + "epoch": 0.5290718671000361, + "loss": 3.5312, + "loss_text": 1.2266, + "state_loss_0": 0.0, + "step": 7325 + }, + { + "epoch": 0.5308775731310943, + "grad_norm": 0.789517343044281, + "learning_rate": 4.307775072880927e-05, + "loss": 3.4964, + "step": 7350 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9688, + "epoch": 0.5308775731310943, + "loss": 3.4062, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 7350 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.5308775731310943, + "loss": 3.5625, + "loss_text": 0.7109, + "state_loss_0": 0.0, + "step": 7350 + }, + { + "epoch": 0.5326832791621524, + "grad_norm": 0.7604045271873474, + "learning_rate": 4.30271783040212e-05, + "loss": 3.4965, + "step": 7375 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.5326832791621524, + "loss": 3.4062, + "loss_text": 0.6992, + "state_loss_0": 0.0, + "step": 7375 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.5326832791621524, + "loss": 3.5781, + "loss_text": 1.3203, + "state_loss_0": 0.0, + "step": 7375 + }, + { + "epoch": 0.5344889851932105, + "grad_norm": 1.0701805353164673, + "learning_rate": 4.297645173491209e-05, + "loss": 3.4964, + "step": 7400 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0312, + "epoch": 0.5344889851932105, + "loss": 3.5156, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 7400 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9844, + "audio_loss_6": 4.0312, + "epoch": 0.5344889851932105, + "loss": 3.4062, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 7400 + }, + { + "epoch": 0.5362946912242686, + "grad_norm": 0.8026243448257446, + "learning_rate": 4.292557145522772e-05, + "loss": 3.494, + "step": 7425 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0625, + "epoch": 0.5362946912242686, + "loss": 3.5625, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 7425 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.5362946912242686, + "loss": 3.5, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 7425 + }, + { + "epoch": 0.5381003972553269, + "grad_norm": 0.6969600319862366, + "learning_rate": 4.287453790002821e-05, + "loss": 3.4981, + "step": 7450 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9688, + "epoch": 0.5381003972553269, + "loss": 3.4844, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 7450 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0312, + "epoch": 0.5381003972553269, + "loss": 3.4688, + "loss_text": 0.9141, + "state_loss_0": 0.0, + "step": 7450 + }, + { + "epoch": 0.539906103286385, + "grad_norm": 0.8179054260253906, + "learning_rate": 4.282335150568427e-05, + "loss": 3.4915, + "step": 7475 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0938, + "epoch": 0.539906103286385, + "loss": 3.5312, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 7475 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.539906103286385, + "loss": 3.4688, + "loss_text": 0.8516, + "state_loss_0": 0.0, + "step": 7475 + }, + { + "epoch": 0.5417118093174431, + "grad_norm": 0.8506993055343628, + "learning_rate": 4.277201270987349e-05, + "loss": 3.4882, + "step": 7500 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.25, + "audio_loss_5": 4.25, + "audio_loss_6": 4.125, + "epoch": 0.5417118093174431, + "loss": 3.4688, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 7500 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0, + "epoch": 0.5417118093174431, + "loss": 3.4219, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 7500 + }, + { + "epoch": 0.5435175153485012, + "grad_norm": 0.8043085336685181, + "learning_rate": 4.272052195157661e-05, + "loss": 3.4879, + "step": 7525 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0, + "epoch": 0.5435175153485012, + "loss": 3.3906, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 7525 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9688, + "epoch": 0.5435175153485012, + "loss": 3.4062, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 7525 + }, + { + "epoch": 0.5453232213795594, + "grad_norm": 0.9338936805725098, + "learning_rate": 4.2668879671073714e-05, + "loss": 3.4878, + "step": 7550 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0, + "epoch": 0.5453232213795594, + "loss": 3.5312, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 7550 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0, + "audio_loss_6": 3.8906, + "epoch": 0.5453232213795594, + "loss": 3.3281, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 7550 + }, + { + "epoch": 0.5471289274106176, + "grad_norm": 1.1098376512527466, + "learning_rate": 4.261708630994055e-05, + "loss": 3.4875, + "step": 7575 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0, + "audio_loss_6": 3.8906, + "epoch": 0.5471289274106176, + "loss": 3.4062, + "loss_text": 1.3125, + "state_loss_0": 0.0, + "step": 7575 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3438, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.5471289274106176, + "loss": 3.5469, + "loss_text": 1.3594, + "state_loss_0": 0.0, + "step": 7575 + }, + { + "epoch": 0.5489346334416757, + "grad_norm": 0.7700121402740479, + "learning_rate": 4.2565142311044645e-05, + "loss": 3.4854, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4531, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.1562, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 2.0469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.1562, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.1875, + "eval_audio_loss_6_AQACONVA": 4.0312, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.6562, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.1875, + "eval_audio_loss_6_AQACONVA": 4.0312, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.4062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.6562, + "eval_audio_loss_2_AQACONVA": 3.3438, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.2812, + "eval_audio_loss_5_AQACONVA": 4.1562, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.5781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.8438, + "eval_loss_AQACONVA": 3.8438, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.5938, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.6406, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.4062, + "eval_audio_loss_5_AQACONVA": 4.3125, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7969, + "eval_loss_AQACONVA": 3.7969, + "eval_loss_text_AQACONVA": 2.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_AQACONVA": 3.6094, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 1.8672, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 3.9844, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.5625, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 2.6094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 1.8984, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 1.8438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.625, + "eval_loss_RQACONV": 0.625, + "eval_loss_text_RQACONV": 1.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.8008, + "eval_loss_RQACONV": 0.8008, + "eval_loss_text_RQACONV": 1.6016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.8867, + "eval_loss_RQACONV": 0.8867, + "eval_loss_text_RQACONV": 1.7734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.4512, + "eval_loss_RQACONV": 0.4512, + "eval_loss_text_RQACONV": 0.9023, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.4102, + "eval_loss_RQACONV": 0.4102, + "eval_loss_text_RQACONV": 0.8203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 1.3047, + "eval_loss_RQACONV": 1.3047, + "eval_loss_text_RQACONV": 2.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.0913, + "eval_loss_RQACONV": 0.0913, + "eval_loss_text_RQACONV": 0.1826, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1602, + "eval_loss_RQACONV": 0.1602, + "eval_loss_text_RQACONV": 0.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.0381, + "eval_loss_RQACONV": 0.0381, + "eval_loss_text_RQACONV": 0.0762, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1172, + "eval_loss_RQACONV": 0.1172, + "eval_loss_text_RQACONV": 0.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.1104, + "eval_loss_RQACONV": 0.1104, + "eval_loss_text_RQACONV": 0.2207, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 7600 + }, + { + "epoch": 0.5489346334416757, + "eval_loss": 1.6029332876205444, + "eval_runtime": 28.2126, + "eval_samples_per_second": 189.596, + "eval_steps_per_second": 1.489, + "step": 7600 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.25, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.5489346334416757, + "loss": 3.5, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 7600 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9844, + "epoch": 0.5489346334416757, + "loss": 3.5, + "loss_text": 1.2578, + "state_loss_0": 0.0, + "step": 7600 + }, + { + "epoch": 0.5507403394727338, + "grad_norm": 0.8950313925743103, + "learning_rate": 4.251304811854165e-05, + "loss": 3.4866, + "step": 7625 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 4.0625, + "audio_loss_2": 3.7031, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0625, + "epoch": 0.5507403394727338, + "loss": 3.625, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 7625 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.5507403394727338, + "loss": 3.5312, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 7625 + }, + { + "epoch": 0.5525460455037919, + "grad_norm": 0.9570475816726685, + "learning_rate": 4.246080417787141e-05, + "loss": 3.4803, + "step": 7650 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7812, + "epoch": 0.5525460455037919, + "loss": 3.25, + "loss_text": 0.5586, + "state_loss_0": 0.0, + "step": 7650 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0938, + "epoch": 0.5525460455037919, + "loss": 3.5781, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 7650 + }, + { + "epoch": 0.5543517515348502, + "grad_norm": 0.8057247400283813, + "learning_rate": 4.240841093575425e-05, + "loss": 3.4764, + "step": 7675 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.8906, + "epoch": 0.5543517515348502, + "loss": 3.4062, + "loss_text": 1.125, + "state_loss_0": 0.0, + "step": 7675 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9219, + "epoch": 0.5543517515348502, + "loss": 3.3906, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 7675 + }, + { + "epoch": 0.5561574575659083, + "grad_norm": 0.816620409488678, + "learning_rate": 4.23558688401871e-05, + "loss": 3.4709, + "step": 7700 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.5561574575659083, + "loss": 3.5312, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 7700 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 4.25, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0312, + "epoch": 0.5561574575659083, + "loss": 3.5781, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 7700 + }, + { + "epoch": 0.5579631635969664, + "grad_norm": 0.8772222399711609, + "learning_rate": 4.230317834043971e-05, + "loss": 3.4811, + "step": 7725 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0, + "epoch": 0.5579631635969664, + "loss": 3.375, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 7725 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.1875, + "epoch": 0.5579631635969664, + "loss": 3.5625, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 7725 + }, + { + "epoch": 0.5597688696280245, + "grad_norm": 0.8258810639381409, + "learning_rate": 4.225033988705075e-05, + "loss": 3.4784, + "step": 7750 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.5597688696280245, + "loss": 3.4062, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 7750 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1875, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.9062, + "epoch": 0.5597688696280245, + "loss": 3.3906, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 7750 + }, + { + "epoch": 0.5615745756590828, + "grad_norm": 0.7350472807884216, + "learning_rate": 4.2197353931824044e-05, + "loss": 3.4699, + "step": 7775 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0, + "audio_loss_6": 4.0312, + "epoch": 0.5615745756590828, + "loss": 3.5469, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 7775 + }, + { + "audio_loss_0": 3.5781, + "audio_loss_1": 4.0938, + "audio_loss_2": 3.75, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.4375, + "audio_loss_5": 4.3125, + "audio_loss_6": 4.2812, + "epoch": 0.5615745756590828, + "loss": 3.7188, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 7775 + }, + { + "epoch": 0.5633802816901409, + "grad_norm": 0.8630298972129822, + "learning_rate": 4.214422092782459e-05, + "loss": 3.4788, + "step": 7800 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9531, + "epoch": 0.5633802816901409, + "loss": 3.5, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 7800 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9844, + "epoch": 0.5633802816901409, + "loss": 3.5, + "loss_text": 1.2891, + "state_loss_0": 0.0, + "step": 7800 + }, + { + "epoch": 0.565185987721199, + "grad_norm": 0.8478107452392578, + "learning_rate": 4.209094132937481e-05, + "loss": 3.468, + "step": 7825 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.375, + "audio_loss_5": 4.2188, + "audio_loss_6": 4.0938, + "epoch": 0.565185987721199, + "loss": 3.5469, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 7825 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.1875, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.8906, + "epoch": 0.565185987721199, + "loss": 3.3438, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 7825 + }, + { + "epoch": 0.5669916937522571, + "grad_norm": 1.0318553447723389, + "learning_rate": 4.203751559205056e-05, + "loss": 3.4758, + "step": 7850 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0938, + "epoch": 0.5669916937522571, + "loss": 3.5938, + "loss_text": 1.1484, + "state_loss_0": 0.0, + "step": 7850 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.2812, + "audio_loss_6": 4.0312, + "epoch": 0.5669916937522571, + "loss": 3.625, + "loss_text": 1.1641, + "state_loss_0": 0.0, + "step": 7850 + }, + { + "epoch": 0.5687973997833152, + "grad_norm": 0.8401951789855957, + "learning_rate": 4.1983944172677306e-05, + "loss": 3.4773, + "step": 7875 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.0625, + "epoch": 0.5687973997833152, + "loss": 3.5312, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 7875 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.9219, + "epoch": 0.5687973997833152, + "loss": 3.3438, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 7875 + }, + { + "epoch": 0.5706031058143735, + "grad_norm": 0.7879558801651001, + "learning_rate": 4.193022752932619e-05, + "loss": 3.4725, + "step": 7900 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9375, + "epoch": 0.5706031058143735, + "loss": 3.4688, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 7900 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.125, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.5706031058143735, + "loss": 3.4688, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 7900 + }, + { + "epoch": 0.5724088118454316, + "grad_norm": 0.9535936117172241, + "learning_rate": 4.18763661213101e-05, + "loss": 3.4738, + "step": 7925 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.875, + "audio_loss_6": 3.8281, + "epoch": 0.5724088118454316, + "loss": 3.3281, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 7925 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.125, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.9844, + "epoch": 0.5724088118454316, + "loss": 3.375, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 7925 + }, + { + "epoch": 0.5742145178764897, + "grad_norm": 0.8445006012916565, + "learning_rate": 4.1822360409179775e-05, + "loss": 3.4696, + "step": 7950 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9375, + "epoch": 0.5742145178764897, + "loss": 3.4375, + "loss_text": 0.8789, + "state_loss_0": 0.0, + "step": 7950 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0, + "epoch": 0.5742145178764897, + "loss": 3.5156, + "loss_text": 1.1406, + "state_loss_0": 0.0, + "step": 7950 + }, + { + "epoch": 0.5760202239075478, + "grad_norm": 1.1891508102416992, + "learning_rate": 4.1768210854719844e-05, + "loss": 3.4611, + "step": 7975 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0312, + "epoch": 0.5760202239075478, + "loss": 3.5, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 7975 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9062, + "epoch": 0.5760202239075478, + "loss": 3.4844, + "loss_text": 1.1719, + "state_loss_0": 0.0, + "step": 7975 + }, + { + "epoch": 0.577825929938606, + "grad_norm": 0.8512836694717407, + "learning_rate": 4.171391792094488e-05, + "loss": 3.4645, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.4062, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.2812, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.1719, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.2812, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 4.0, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.2812, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 4.0, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 1.4766, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.3438, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 4.0312, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.5312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.375, + "eval_audio_loss_5_AQACONVA": 4.1875, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.8281, + "eval_loss_AQACONVA": 3.8281, + "eval_loss_text_AQACONVA": 2.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.3438, + "eval_audio_loss_5_AQACONVA": 4.25, + "eval_audio_loss_6_AQACONVA": 4.0938, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_AQACONVA": 3.5938, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.3438, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.125, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 1.8828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 3.9375, + "eval_audio_loss_6_RQACONVA": 3.9844, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 1.6328, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.5156, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 1.9688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.6719, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.6875, + "eval_audio_loss_4_RQACONVA": 4.375, + "eval_audio_loss_5_RQACONVA": 4.25, + "eval_audio_loss_6_RQACONVA": 4.125, + "eval_loss": 3.875, + "eval_loss_RQACONVA": 3.875, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1875, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 2.6406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.3125, + "eval_audio_loss_5_RQACONVA": 4.1562, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 1.9062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.125, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 1.8438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.5742, + "eval_loss_RQACONV": 0.5742, + "eval_loss_text_RQACONV": 1.1484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.6094, + "eval_loss_RQACONV": 0.6094, + "eval_loss_text_RQACONV": 1.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.6406, + "eval_loss_RQACONV": 0.6406, + "eval_loss_text_RQACONV": 1.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.875, + "eval_loss_RQACONV": 0.875, + "eval_loss_text_RQACONV": 1.75, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.4199, + "eval_loss_RQACONV": 0.4199, + "eval_loss_text_RQACONV": 0.8398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 1.3125, + "eval_loss_RQACONV": 1.3125, + "eval_loss_text_RQACONV": 2.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.0806, + "eval_loss_RQACONV": 0.0806, + "eval_loss_text_RQACONV": 0.1611, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1611, + "eval_loss_RQACONV": 0.1611, + "eval_loss_text_RQACONV": 0.3223, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.0327, + "eval_loss_RQACONV": 0.0327, + "eval_loss_text_RQACONV": 0.0654, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.1074, + "eval_loss_RQACONV": 0.1074, + "eval_loss_text_RQACONV": 0.2148, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8000 + }, + { + "epoch": 0.577825929938606, + "eval_loss": 1.6007007360458374, + "eval_runtime": 27.8416, + "eval_samples_per_second": 192.123, + "eval_steps_per_second": 1.509, + "step": 8000 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0, + "epoch": 0.577825929938606, + "loss": 3.4688, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 8000 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.577825929938606, + "loss": 3.4375, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 8000 + }, + { + "epoch": 0.5796316359696642, + "grad_norm": 0.7510226368904114, + "learning_rate": 4.165948207209545e-05, + "loss": 3.4655, + "step": 8025 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.8906, + "epoch": 0.5796316359696642, + "loss": 3.3594, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 8025 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0312, + "epoch": 0.5796316359696642, + "loss": 3.5156, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 8025 + }, + { + "epoch": 0.5814373420007223, + "grad_norm": 0.8986449837684631, + "learning_rate": 4.160490377363412e-05, + "loss": 3.4654, + "step": 8050 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0938, + "audio_loss_6": 4.0, + "epoch": 0.5814373420007223, + "loss": 3.5469, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 8050 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9375, + "epoch": 0.5814373420007223, + "loss": 3.375, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 8050 + }, + { + "epoch": 0.5832430480317804, + "grad_norm": 0.7431805729866028, + "learning_rate": 4.1550183492241536e-05, + "loss": 3.461, + "step": 8075 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.9062, + "epoch": 0.5832430480317804, + "loss": 3.4375, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 8075 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.9375, + "epoch": 0.5832430480317804, + "loss": 3.3438, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 8075 + }, + { + "epoch": 0.5850487540628385, + "grad_norm": 0.7632334232330322, + "learning_rate": 4.149532169581235e-05, + "loss": 3.4635, + "step": 8100 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9531, + "epoch": 0.5850487540628385, + "loss": 3.4375, + "loss_text": 0.3867, + "state_loss_0": 0.0, + "step": 8100 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1875, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.9375, + "epoch": 0.5850487540628385, + "loss": 3.4688, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 8100 + }, + { + "epoch": 0.5868544600938967, + "grad_norm": 0.8157117962837219, + "learning_rate": 4.144031885345128e-05, + "loss": 3.4676, + "step": 8125 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9375, + "epoch": 0.5868544600938967, + "loss": 3.5, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 8125 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9844, + "epoch": 0.5868544600938967, + "loss": 3.4688, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 8125 + }, + { + "epoch": 0.5886601661249549, + "grad_norm": 0.8629337549209595, + "learning_rate": 4.138517543546908e-05, + "loss": 3.4617, + "step": 8150 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8906, + "epoch": 0.5886601661249549, + "loss": 3.4219, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 8150 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8906, + "epoch": 0.5886601661249549, + "loss": 3.4219, + "loss_text": 0.3594, + "state_loss_0": 0.0, + "step": 8150 + }, + { + "epoch": 0.590465872156013, + "grad_norm": 1.1162105798721313, + "learning_rate": 4.1329891913378525e-05, + "loss": 3.4623, + "step": 8175 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9531, + "epoch": 0.590465872156013, + "loss": 3.3906, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 8175 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.9531, + "epoch": 0.590465872156013, + "loss": 3.4062, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 8175 + }, + { + "epoch": 0.5922715781870711, + "grad_norm": 0.9691133499145508, + "learning_rate": 4.127446875989036e-05, + "loss": 3.4596, + "step": 8200 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0, + "epoch": 0.5922715781870711, + "loss": 3.6094, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 8200 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9844, + "epoch": 0.5922715781870711, + "loss": 3.4375, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 8200 + }, + { + "epoch": 0.5940772842181293, + "grad_norm": 0.8447632789611816, + "learning_rate": 4.121890644890927e-05, + "loss": 3.4567, + "step": 8225 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0938, + "epoch": 0.5940772842181293, + "loss": 3.5312, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 8225 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0625, + "epoch": 0.5940772842181293, + "loss": 3.4375, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 8225 + }, + { + "epoch": 0.5958829902491874, + "grad_norm": 0.7498201131820679, + "learning_rate": 4.116320545552983e-05, + "loss": 3.4595, + "step": 8250 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.0, + "audio_loss_6": 4.0, + "epoch": 0.5958829902491874, + "loss": 3.5, + "loss_text": 0.8086, + "state_loss_0": 0.0, + "step": 8250 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9375, + "epoch": 0.5958829902491874, + "loss": 3.4688, + "loss_text": 1.2344, + "state_loss_0": 0.0, + "step": 8250 + }, + { + "epoch": 0.5976886962802456, + "grad_norm": 0.923409104347229, + "learning_rate": 4.1107366256032465e-05, + "loss": 3.4434, + "step": 8275 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0625, + "epoch": 0.5976886962802456, + "loss": 3.5, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 8275 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2812, + "audio_loss_5": 3.9531, + "audio_loss_6": 4.0, + "epoch": 0.5976886962802456, + "loss": 3.4844, + "loss_text": 0.4688, + "state_loss_0": 0.0, + "step": 8275 + }, + { + "epoch": 0.5994944023113037, + "grad_norm": 1.0562717914581299, + "learning_rate": 4.1051389327879327e-05, + "loss": 3.4453, + "step": 8300 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9531, + "epoch": 0.5994944023113037, + "loss": 3.4531, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 8300 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.7969, + "epoch": 0.5994944023113037, + "loss": 3.2812, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 8300 + }, + { + "epoch": 0.6013001083423619, + "grad_norm": 0.8258801698684692, + "learning_rate": 4.0995275149710236e-05, + "loss": 3.4431, + "step": 8325 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.9531, + "epoch": 0.6013001083423619, + "loss": 3.375, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 8325 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0625, + "audio_loss_6": 4.0625, + "epoch": 0.6013001083423619, + "loss": 3.5625, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 8325 + }, + { + "epoch": 0.60310581437342, + "grad_norm": 0.8762686848640442, + "learning_rate": 4.0939024201338615e-05, + "loss": 3.4357, + "step": 8350 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9844, + "epoch": 0.60310581437342, + "loss": 3.4844, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 8350 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0, + "epoch": 0.60310581437342, + "loss": 3.5, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 8350 + }, + { + "epoch": 0.6049115204044782, + "grad_norm": 0.8179141879081726, + "learning_rate": 4.088263696374736e-05, + "loss": 3.447, + "step": 8375 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.875, + "audio_loss_6": 3.7812, + "epoch": 0.6049115204044782, + "loss": 3.3594, + "loss_text": 0.7539, + "state_loss_0": 0.0, + "step": 8375 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6875, + "epoch": 0.6049115204044782, + "loss": 3.2812, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 8375 + }, + { + "epoch": 0.6067172264355363, + "grad_norm": 0.9485668540000916, + "learning_rate": 4.082611391908469e-05, + "loss": 3.4354, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.0, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.1719, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4531, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.0938, + "eval_audio_loss_6_AQACONVA": 3.9531, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.0938, + "eval_audio_loss_6_AQACONVA": 3.9688, + "eval_loss": 3.5938, + "eval_loss_AQACONVA": 3.5938, + "eval_loss_text_AQACONVA": 1.4688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.3281, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.9844, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.5781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5469, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.1562, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.8125, + "eval_loss_AQACONVA": 3.8125, + "eval_loss_text_AQACONVA": 2.7812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.2188, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_AQACONVA": 3.6094, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.3125, + "eval_audio_loss_5_AQACONVA": 4.1875, + "eval_audio_loss_6_AQACONVA": 4.0625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 1.8984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.9219, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.1875, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.9375, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.5156, + "eval_audio_loss_2_RQACONVA": 3.2969, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0625, + "eval_audio_loss_6_RQACONVA": 3.9688, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.625, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.2812, + "eval_audio_loss_5_RQACONVA": 4.2188, + "eval_audio_loss_6_RQACONVA": 4.0938, + "eval_loss": 3.8281, + "eval_loss_RQACONVA": 3.8281, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.125, + "eval_audio_loss_6_RQACONVA": 4.0312, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 2.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.25, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 3.9531, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 1.8984, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.9375, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.7812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.9531, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 2.1719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.6914, + "eval_loss_RQACONV": 0.6914, + "eval_loss_text_RQACONV": 1.3828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.6523, + "eval_loss_RQACONV": 0.6523, + "eval_loss_text_RQACONV": 1.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.8711, + "eval_loss_RQACONV": 0.8711, + "eval_loss_text_RQACONV": 1.7422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4551, + "eval_loss_RQACONV": 0.4551, + "eval_loss_text_RQACONV": 0.9102, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4219, + "eval_loss_RQACONV": 0.4219, + "eval_loss_text_RQACONV": 0.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 1.3125, + "eval_loss_RQACONV": 1.3125, + "eval_loss_text_RQACONV": 2.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.0859, + "eval_loss_RQACONV": 0.0859, + "eval_loss_text_RQACONV": 0.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1611, + "eval_loss_RQACONV": 0.1611, + "eval_loss_text_RQACONV": 0.3223, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.033, + "eval_loss_RQACONV": 0.033, + "eval_loss_text_RQACONV": 0.0659, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1045, + "eval_loss_RQACONV": 0.1045, + "eval_loss_text_RQACONV": 0.209, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8400 + }, + { + "epoch": 0.6067172264355363, + "eval_loss": 1.5914788246154785, + "eval_runtime": 27.945, + "eval_samples_per_second": 191.412, + "eval_steps_per_second": 1.503, + "step": 8400 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.2188, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9219, + "epoch": 0.6067172264355363, + "loss": 3.5156, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 8400 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8594, + "epoch": 0.6067172264355363, + "loss": 3.4531, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 8400 + }, + { + "epoch": 0.6085229324665944, + "grad_norm": 0.9714045524597168, + "learning_rate": 4.076945555066011e-05, + "loss": 3.4406, + "step": 8425 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.8281, + "epoch": 0.6085229324665944, + "loss": 3.375, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 8425 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 4.0625, + "epoch": 0.6085229324665944, + "loss": 3.4688, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 8425 + }, + { + "epoch": 0.6103286384976526, + "grad_norm": 0.8150820732116699, + "learning_rate": 4.0712662342940225e-05, + "loss": 3.4405, + "step": 8450 + }, + { + "audio_loss_0": 3.8281, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.125, + "audio_loss_6": 4.0, + "epoch": 0.6103286384976526, + "loss": 3.5781, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 8450 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.875, + "audio_loss_6": 3.9219, + "epoch": 0.6103286384976526, + "loss": 3.375, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 8450 + }, + { + "epoch": 0.6121343445287107, + "grad_norm": 0.7930060625076294, + "learning_rate": 4.0655734781544586e-05, + "loss": 3.4391, + "step": 8475 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.9062, + "epoch": 0.6121343445287107, + "loss": 3.4531, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 8475 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9531, + "epoch": 0.6121343445287107, + "loss": 3.4844, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 8475 + }, + { + "epoch": 0.6139400505597689, + "grad_norm": 1.0647506713867188, + "learning_rate": 4.059867335324158e-05, + "loss": 3.4306, + "step": 8500 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9688, + "epoch": 0.6139400505597689, + "loss": 3.6094, + "loss_text": 1.0547, + "state_loss_0": 0.0, + "step": 8500 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.9375, + "epoch": 0.6139400505597689, + "loss": 3.4062, + "loss_text": 0.8359, + "state_loss_0": 0.0, + "step": 8500 + }, + { + "epoch": 0.615745756590827, + "grad_norm": 1.1023228168487549, + "learning_rate": 4.054147854594422e-05, + "loss": 3.4335, + "step": 8525 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.8438, + "epoch": 0.615745756590827, + "loss": 3.3594, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 8525 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.5, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.75, + "audio_loss_6": 3.7188, + "epoch": 0.615745756590827, + "loss": 3.2656, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 8525 + }, + { + "epoch": 0.6175514626218852, + "grad_norm": 1.0671043395996094, + "learning_rate": 4.0484150848706025e-05, + "loss": 3.4305, + "step": 8550 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.8438, + "epoch": 0.6175514626218852, + "loss": 3.4844, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 8550 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.8281, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.6175514626218852, + "loss": 3.5, + "loss_text": 0.9062, + "state_loss_0": 0.0, + "step": 8550 + }, + { + "epoch": 0.6193571686529433, + "grad_norm": 0.7411192059516907, + "learning_rate": 4.042669075171679e-05, + "loss": 3.4293, + "step": 8575 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0, + "audio_loss_5": 3.875, + "audio_loss_6": 3.625, + "epoch": 0.6193571686529433, + "loss": 3.2812, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 8575 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.9219, + "epoch": 0.6193571686529433, + "loss": 3.4062, + "loss_text": 1.1328, + "state_loss_0": 0.0, + "step": 8575 + }, + { + "epoch": 0.6211628746840014, + "grad_norm": 0.90294349193573, + "learning_rate": 4.0369098746298416e-05, + "loss": 3.4252, + "step": 8600 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8281, + "epoch": 0.6211628746840014, + "loss": 3.3906, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 8600 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6719, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.3125, + "audio_loss_5": 4.1562, + "audio_loss_6": 4.125, + "epoch": 0.6211628746840014, + "loss": 3.7188, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 8600 + }, + { + "epoch": 0.6229685807150596, + "grad_norm": 0.8000319600105286, + "learning_rate": 4.0311375324900724e-05, + "loss": 3.4284, + "step": 8625 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.8281, + "epoch": 0.6229685807150596, + "loss": 3.4531, + "loss_text": 1.2812, + "state_loss_0": 0.0, + "step": 8625 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9062, + "epoch": 0.6229685807150596, + "loss": 3.4844, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 8625 + }, + { + "epoch": 0.6247742867461177, + "grad_norm": 0.9107332825660706, + "learning_rate": 4.0253520981097214e-05, + "loss": 3.4274, + "step": 8650 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.8906, + "epoch": 0.6247742867461177, + "loss": 3.4531, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 8650 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9531, + "epoch": 0.6247742867461177, + "loss": 3.4375, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 8650 + }, + { + "epoch": 0.6265799927771759, + "grad_norm": 0.9050159454345703, + "learning_rate": 4.019553620958088e-05, + "loss": 3.4208, + "step": 8675 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8125, + "epoch": 0.6265799927771759, + "loss": 3.4688, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 8675 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7031, + "epoch": 0.6265799927771759, + "loss": 3.4062, + "loss_text": 1.4297, + "state_loss_0": 0.0, + "step": 8675 + }, + { + "epoch": 0.628385698808234, + "grad_norm": 1.0124633312225342, + "learning_rate": 4.013742150615993e-05, + "loss": 3.4177, + "step": 8700 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.7344, + "epoch": 0.628385698808234, + "loss": 3.3125, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 8700 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.0625, + "audio_loss_5": 4.0, + "audio_loss_6": 3.8906, + "epoch": 0.628385698808234, + "loss": 3.4844, + "loss_text": 1.1875, + "state_loss_0": 0.0, + "step": 8700 + }, + { + "epoch": 0.6301914048392921, + "grad_norm": 0.7465088367462158, + "learning_rate": 4.0079177367753596e-05, + "loss": 3.4136, + "step": 8725 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0312, + "audio_loss_5": 4.1562, + "audio_loss_6": 3.8594, + "epoch": 0.6301914048392921, + "loss": 3.3281, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 8725 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.8125, + "epoch": 0.6301914048392921, + "loss": 3.4062, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 8725 + }, + { + "epoch": 0.6319971108703503, + "grad_norm": 0.7112841606140137, + "learning_rate": 4.002080429238785e-05, + "loss": 3.4137, + "step": 8750 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.9375, + "epoch": 0.6319971108703503, + "loss": 3.5, + "loss_text": 0.4746, + "state_loss_0": 0.0, + "step": 8750 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.8594, + "epoch": 0.6319971108703503, + "loss": 3.5, + "loss_text": 0.9844, + "state_loss_0": 0.0, + "step": 8750 + }, + { + "epoch": 0.6338028169014085, + "grad_norm": 0.8555691242218018, + "learning_rate": 3.9962302779191186e-05, + "loss": 3.4171, + "step": 8775 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.7812, + "epoch": 0.6338028169014085, + "loss": 3.375, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 8775 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.9375, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9531, + "epoch": 0.6338028169014085, + "loss": 3.625, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 8775 + }, + { + "epoch": 0.6356085229324666, + "grad_norm": 0.9281635880470276, + "learning_rate": 3.99036733283903e-05, + "loss": 3.4102, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.2812, + "eval_audio_loss_5_AQACONVA": 4.1562, + "eval_audio_loss_6_AQACONVA": 3.9844, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 1.9844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.1719, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4531, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.5781, + "eval_loss_AQACONVA": 3.5781, + "eval_loss_text_AQACONVA": 1.6797, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.5469, + "eval_loss_AQACONVA": 3.5469, + "eval_loss_text_AQACONVA": 1.3984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.2656, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.3438, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.9062, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.0938, + "eval_audio_loss_6_AQACONVA": 3.9844, + "eval_loss": 3.7969, + "eval_loss_AQACONVA": 3.7969, + "eval_loss_text_AQACONVA": 2.75, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.2812, + "eval_audio_loss_5_AQACONVA": 4.1875, + "eval_audio_loss_6_AQACONVA": 3.9844, + "eval_loss": 3.7812, + "eval_loss_AQACONVA": 3.7812, + "eval_loss_text_AQACONVA": 2.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_AQACONVA": 3.5938, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5938, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 3.9688, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 1.9062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.8281, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9688, + "eval_audio_loss_6_RQACONVA": 3.8594, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 1.6094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.5156, + "eval_audio_loss_2_RQACONVA": 3.2969, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.8906, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.6406, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.1875, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 4.0, + "eval_loss": 3.7969, + "eval_loss_RQACONVA": 3.7969, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 3.9844, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 2.6406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0625, + "eval_audio_loss_6_RQACONVA": 3.875, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 1.875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.875, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9375, + "eval_audio_loss_6_RQACONVA": 3.875, + "eval_loss": 3.6094, + "eval_loss_RQACONVA": 3.6094, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.8203, + "eval_loss_RQACONV": 0.8203, + "eval_loss_text_RQACONV": 1.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.8516, + "eval_loss_RQACONV": 0.8516, + "eval_loss_text_RQACONV": 1.7031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.4199, + "eval_loss_RQACONV": 0.4199, + "eval_loss_text_RQACONV": 0.8398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 1.3203, + "eval_loss_RQACONV": 1.3203, + "eval_loss_text_RQACONV": 2.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1699, + "eval_loss_RQACONV": 0.1699, + "eval_loss_text_RQACONV": 0.3398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.0859, + "eval_loss_RQACONV": 0.0859, + "eval_loss_text_RQACONV": 0.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1738, + "eval_loss_RQACONV": 0.1738, + "eval_loss_text_RQACONV": 0.3477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.166, + "eval_loss_RQACONV": 0.166, + "eval_loss_text_RQACONV": 0.332, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.0408, + "eval_loss_RQACONV": 0.0408, + "eval_loss_text_RQACONV": 0.0815, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1089, + "eval_loss_RQACONV": 0.1089, + "eval_loss_text_RQACONV": 0.2178, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.1035, + "eval_loss_RQACONV": 0.1035, + "eval_loss_text_RQACONV": 0.207, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 8800 + }, + { + "epoch": 0.6356085229324666, + "eval_loss": 1.5861353874206543, + "eval_runtime": 27.7072, + "eval_samples_per_second": 193.055, + "eval_steps_per_second": 1.516, + "step": 8800 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.875, + "audio_loss_6": 3.9375, + "epoch": 0.6356085229324666, + "loss": 3.4062, + "loss_text": 0.8633, + "state_loss_0": 0.0, + "step": 8800 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.8594, + "epoch": 0.6356085229324666, + "loss": 3.4062, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 8800 + }, + { + "epoch": 0.6374142289635247, + "grad_norm": 0.964260458946228, + "learning_rate": 3.984491644130586e-05, + "loss": 3.4086, + "step": 8825 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.7812, + "epoch": 0.6374142289635247, + "loss": 3.3281, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 8825 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.9375, + "epoch": 0.6374142289635247, + "loss": 3.4688, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 8825 + }, + { + "epoch": 0.6392199349945829, + "grad_norm": 0.8003630042076111, + "learning_rate": 3.9786032620348176e-05, + "loss": 3.4156, + "step": 8850 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6719, + "epoch": 0.6392199349945829, + "loss": 3.2812, + "loss_text": 0.4355, + "state_loss_0": 0.0, + "step": 8850 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.375, + "audio_loss_4": 4.1875, + "audio_loss_5": 3.875, + "audio_loss_6": 3.875, + "epoch": 0.6392199349945829, + "loss": 3.375, + "loss_text": 0.6133, + "state_loss_0": 0.0, + "step": 8850 + }, + { + "epoch": 0.6410256410256411, + "grad_norm": 1.1806368827819824, + "learning_rate": 3.972702236901295e-05, + "loss": 3.4027, + "step": 8875 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.625, + "epoch": 0.6410256410256411, + "loss": 3.25, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 8875 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5781, + "epoch": 0.6410256410256411, + "loss": 3.2656, + "loss_text": 0.7227, + "state_loss_0": 0.0, + "step": 8875 + }, + { + "epoch": 0.6428313470566992, + "grad_norm": 0.8596497178077698, + "learning_rate": 3.966788619187692e-05, + "loss": 3.404, + "step": 8900 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.875, + "epoch": 0.6428313470566992, + "loss": 3.4062, + "loss_text": 0.4355, + "state_loss_0": 0.0, + "step": 8900 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.8438, + "epoch": 0.6428313470566992, + "loss": 3.4375, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 8900 + }, + { + "epoch": 0.6446370530877573, + "grad_norm": 1.1435071229934692, + "learning_rate": 3.96086245945936e-05, + "loss": 3.4063, + "step": 8925 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.75, + "epoch": 0.6446370530877573, + "loss": 3.375, + "loss_text": 0.9766, + "state_loss_0": 0.0, + "step": 8925 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7812, + "epoch": 0.6446370530877573, + "loss": 3.3594, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 8925 + }, + { + "epoch": 0.6464427591188154, + "grad_norm": 0.8785886168479919, + "learning_rate": 3.954923808388893e-05, + "loss": 3.4016, + "step": 8950 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.0938, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9219, + "epoch": 0.6464427591188154, + "loss": 3.5156, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 8950 + }, + { + "audio_loss_0": 3.6406, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.2812, + "audio_loss_5": 4.0, + "audio_loss_6": 3.9531, + "epoch": 0.6464427591188154, + "loss": 3.5312, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 8950 + }, + { + "epoch": 0.6482484651498736, + "grad_norm": 0.7968937158584595, + "learning_rate": 3.948972716755692e-05, + "loss": 3.4073, + "step": 8975 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.1875, + "audio_loss_6": 4.0312, + "epoch": 0.6482484651498736, + "loss": 3.625, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 8975 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5625, + "epoch": 0.6482484651498736, + "loss": 3.1875, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 8975 + }, + { + "epoch": 0.6500541711809318, + "grad_norm": 0.7994695901870728, + "learning_rate": 3.943009235445535e-05, + "loss": 3.4009, + "step": 9000 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.125, + "audio_loss_6": 3.9844, + "epoch": 0.6500541711809318, + "loss": 3.5781, + "loss_text": 1.0547, + "state_loss_0": 0.0, + "step": 9000 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2969, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.6406, + "epoch": 0.6500541711809318, + "loss": 3.2031, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 9000 + }, + { + "epoch": 0.6518598772119899, + "grad_norm": 0.9608354568481445, + "learning_rate": 3.9370334154501376e-05, + "loss": 3.4039, + "step": 9025 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7031, + "epoch": 0.6518598772119899, + "loss": 3.4062, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 9025 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.8281, + "epoch": 0.6518598772119899, + "loss": 3.3438, + "loss_text": 0.7539, + "state_loss_0": 0.0, + "step": 9025 + }, + { + "epoch": 0.653665583243048, + "grad_norm": 0.8028705716133118, + "learning_rate": 3.931045307866722e-05, + "loss": 3.4021, + "step": 9050 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0, + "audio_loss_5": 3.875, + "audio_loss_6": 3.7656, + "epoch": 0.653665583243048, + "loss": 3.4062, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 9050 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.5625, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8438, + "epoch": 0.653665583243048, + "loss": 3.4219, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 9050 + }, + { + "epoch": 0.6554712892741061, + "grad_norm": 0.8382323980331421, + "learning_rate": 3.925044963897577e-05, + "loss": 3.3982, + "step": 9075 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.75, + "epoch": 0.6554712892741061, + "loss": 3.3281, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 9075 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8125, + "epoch": 0.6554712892741061, + "loss": 3.4375, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 9075 + }, + { + "epoch": 0.6572769953051644, + "grad_norm": 0.8472492098808289, + "learning_rate": 3.919032434849619e-05, + "loss": 3.3921, + "step": 9100 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.9219, + "epoch": 0.6572769953051644, + "loss": 3.5, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 9100 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.8125, + "epoch": 0.6572769953051644, + "loss": 3.4688, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 9100 + }, + { + "epoch": 0.6590827013362225, + "grad_norm": 1.0447090864181519, + "learning_rate": 3.913007772133956e-05, + "loss": 3.3901, + "step": 9125 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.875, + "audio_loss_6": 3.8125, + "epoch": 0.6590827013362225, + "loss": 3.4375, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 9125 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.7812, + "epoch": 0.6590827013362225, + "loss": 3.3906, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 9125 + }, + { + "epoch": 0.6608884073672806, + "grad_norm": 0.912177562713623, + "learning_rate": 3.906971027265447e-05, + "loss": 3.4014, + "step": 9150 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7969, + "epoch": 0.6608884073672806, + "loss": 3.3125, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 9150 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.6875, + "epoch": 0.6608884073672806, + "loss": 3.3125, + "loss_text": 0.9141, + "state_loss_0": 0.0, + "step": 9150 + }, + { + "epoch": 0.6626941133983387, + "grad_norm": 0.8217678666114807, + "learning_rate": 3.90092225186226e-05, + "loss": 3.3841, + "step": 9175 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.875, + "audio_loss_6": 3.7969, + "epoch": 0.6626941133983387, + "loss": 3.2969, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 9175 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.7344, + "epoch": 0.6626941133983387, + "loss": 3.3125, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 9175 + }, + { + "epoch": 0.6644998194293968, + "grad_norm": 1.0825324058532715, + "learning_rate": 3.894861497645435e-05, + "loss": 3.3979, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.1562, + "eval_audio_loss_6_AQACONVA": 3.9375, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.1406, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.8125, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.8281, + "eval_loss": 3.5469, + "eval_loss_AQACONVA": 3.5469, + "eval_loss_text_AQACONVA": 1.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.6406, + "eval_audio_loss_2_AQACONVA": 3.3281, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.8281, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.9219, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 2.75, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.25, + "eval_audio_loss_5_AQACONVA": 4.1562, + "eval_audio_loss_6_AQACONVA": 3.9375, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 3.9375, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 1.9609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.75, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.9609, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9375, + "eval_audio_loss_6_RQACONVA": 3.7812, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.6172, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.5, + "eval_audio_loss_2_RQACONVA": 3.2969, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.8594, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0625, + "eval_audio_loss_6_RQACONVA": 3.9375, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.8906, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.5938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.1875, + "eval_audio_loss_5_RQACONVA": 3.9844, + "eval_audio_loss_6_RQACONVA": 3.7969, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.8906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.7969, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.8438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.7969, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.8008, + "eval_loss_RQACONV": 0.8008, + "eval_loss_text_RQACONV": 1.6016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.875, + "eval_loss_RQACONV": 0.875, + "eval_loss_text_RQACONV": 1.75, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4492, + "eval_loss_RQACONV": 0.4492, + "eval_loss_text_RQACONV": 0.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4238, + "eval_loss_RQACONV": 0.4238, + "eval_loss_text_RQACONV": 0.8477, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 1.3203, + "eval_loss_RQACONV": 1.3203, + "eval_loss_text_RQACONV": 2.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1699, + "eval_loss_RQACONV": 0.1699, + "eval_loss_text_RQACONV": 0.3398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.0947, + "eval_loss_RQACONV": 0.0947, + "eval_loss_text_RQACONV": 0.1895, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1836, + "eval_loss_RQACONV": 0.1836, + "eval_loss_text_RQACONV": 0.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1709, + "eval_loss_RQACONV": 0.1709, + "eval_loss_text_RQACONV": 0.3418, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.0596, + "eval_loss_RQACONV": 0.0596, + "eval_loss_text_RQACONV": 0.1191, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1152, + "eval_loss_RQACONV": 0.1152, + "eval_loss_text_RQACONV": 0.2305, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.1089, + "eval_loss_RQACONV": 0.1089, + "eval_loss_text_RQACONV": 0.2178, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9200 + }, + { + "epoch": 0.6644998194293968, + "eval_loss": 1.5762474536895752, + "eval_runtime": 28.2713, + "eval_samples_per_second": 189.202, + "eval_steps_per_second": 1.486, + "step": 9200 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7812, + "epoch": 0.6644998194293968, + "loss": 3.4062, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 9200 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.6719, + "epoch": 0.6644998194293968, + "loss": 3.375, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 9200 + }, + { + "epoch": 0.6663055254604551, + "grad_norm": 0.8834190368652344, + "learning_rate": 3.888788816438435e-05, + "loss": 3.3943, + "step": 9225 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.6094, + "epoch": 0.6663055254604551, + "loss": 3.2344, + "loss_text": 0.7266, + "state_loss_0": 0.0, + "step": 9225 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.5, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.75, + "audio_loss_6": 3.6406, + "epoch": 0.6663055254604551, + "loss": 3.2188, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 9225 + }, + { + "epoch": 0.6681112314915132, + "grad_norm": 0.8351423740386963, + "learning_rate": 3.8827042601667114e-05, + "loss": 3.3887, + "step": 9250 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7812, + "epoch": 0.6681112314915132, + "loss": 3.4688, + "loss_text": 1.3672, + "state_loss_0": 0.0, + "step": 9250 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.7812, + "epoch": 0.6681112314915132, + "loss": 3.4219, + "loss_text": 0.7734, + "state_loss_0": 0.0, + "step": 9250 + }, + { + "epoch": 0.6699169375225713, + "grad_norm": 0.9268518686294556, + "learning_rate": 3.87660788085725e-05, + "loss": 3.3887, + "step": 9275 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.8281, + "epoch": 0.6699169375225713, + "loss": 3.4531, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 9275 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.7031, + "epoch": 0.6699169375225713, + "loss": 3.3125, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 9275 + }, + { + "epoch": 0.6717226435536294, + "grad_norm": 0.7889868021011353, + "learning_rate": 3.870499730638135e-05, + "loss": 3.3945, + "step": 9300 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.8125, + "epoch": 0.6717226435536294, + "loss": 3.4688, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 9300 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.75, + "epoch": 0.6717226435536294, + "loss": 3.4219, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 9300 + }, + { + "epoch": 0.6735283495846877, + "grad_norm": 0.8525926470756531, + "learning_rate": 3.864379861738098e-05, + "loss": 3.3877, + "step": 9325 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7812, + "epoch": 0.6735283495846877, + "loss": 3.375, + "loss_text": 0.8477, + "state_loss_0": 0.0, + "step": 9325 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.8281, + "epoch": 0.6735283495846877, + "loss": 3.4844, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 9325 + }, + { + "epoch": 0.6753340556157458, + "grad_norm": 0.8131090402603149, + "learning_rate": 3.858248326486075e-05, + "loss": 3.3776, + "step": 9350 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.6094, + "audio_loss_3": 4.1562, + "audio_loss_4": 4.125, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.4688, + "epoch": 0.6753340556157458, + "loss": 3.3125, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 9350 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.8125, + "epoch": 0.6753340556157458, + "loss": 3.4688, + "loss_text": 0.9727, + "state_loss_0": 0.0, + "step": 9350 + }, + { + "epoch": 0.6771397616468039, + "grad_norm": 0.9664331674575806, + "learning_rate": 3.852105177310754e-05, + "loss": 3.3829, + "step": 9375 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.75, + "epoch": 0.6771397616468039, + "loss": 3.3594, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 9375 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6875, + "epoch": 0.6771397616468039, + "loss": 3.3438, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 9375 + }, + { + "epoch": 0.678945467677862, + "grad_norm": 0.9260796904563904, + "learning_rate": 3.845950466740133e-05, + "loss": 3.3824, + "step": 9400 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6875, + "epoch": 0.678945467677862, + "loss": 3.2812, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 9400 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.5, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5625, + "epoch": 0.678945467677862, + "loss": 3.1875, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 9400 + }, + { + "epoch": 0.6807511737089202, + "grad_norm": 0.7603830099105835, + "learning_rate": 3.839784247401066e-05, + "loss": 3.3785, + "step": 9425 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.7969, + "epoch": 0.6807511737089202, + "loss": 3.4688, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 9425 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.7188, + "epoch": 0.6807511737089202, + "loss": 3.3281, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 9425 + }, + { + "epoch": 0.6825568797399784, + "grad_norm": 0.8545697927474976, + "learning_rate": 3.833606572018813e-05, + "loss": 3.3735, + "step": 9450 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.7656, + "epoch": 0.6825568797399784, + "loss": 3.3125, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 9450 + }, + { + "audio_loss_0": 3.5, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.8281, + "epoch": 0.6825568797399784, + "loss": 3.4219, + "loss_text": 0.8711, + "state_loss_0": 0.0, + "step": 9450 + }, + { + "epoch": 0.6843625857710365, + "grad_norm": 0.8402989506721497, + "learning_rate": 3.827417493416595e-05, + "loss": 3.3829, + "step": 9475 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.8125, + "epoch": 0.6843625857710365, + "loss": 3.5, + "loss_text": 0.9727, + "state_loss_0": 0.0, + "step": 9475 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.75, + "epoch": 0.6843625857710365, + "loss": 3.4219, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 9475 + }, + { + "epoch": 0.6861682918020946, + "grad_norm": 0.7921481132507324, + "learning_rate": 3.821217064515133e-05, + "loss": 3.3747, + "step": 9500 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.7344, + "epoch": 0.6861682918020946, + "loss": 3.4375, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 9500 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7969, + "epoch": 0.6861682918020946, + "loss": 3.3594, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 9500 + }, + { + "epoch": 0.6879739978331527, + "grad_norm": 0.889171302318573, + "learning_rate": 3.815005338332205e-05, + "loss": 3.3849, + "step": 9525 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.7969, + "epoch": 0.6879739978331527, + "loss": 3.375, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 9525 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.7344, + "epoch": 0.6879739978331527, + "loss": 3.3906, + "loss_text": 0.8438, + "state_loss_0": 0.0, + "step": 9525 + }, + { + "epoch": 0.689779703864211, + "grad_norm": 0.8729430437088013, + "learning_rate": 3.808782367982185e-05, + "loss": 3.3694, + "step": 9550 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.7969, + "epoch": 0.689779703864211, + "loss": 3.4375, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 9550 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6562, + "epoch": 0.689779703864211, + "loss": 3.3281, + "loss_text": 0.7031, + "state_loss_0": 0.0, + "step": 9550 + }, + { + "epoch": 0.6915854098952691, + "grad_norm": 0.9460113048553467, + "learning_rate": 3.8025482066755935e-05, + "loss": 3.3739, + "step": 9575 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.7344, + "epoch": 0.6915854098952691, + "loss": 3.3281, + "loss_text": 0.4375, + "state_loss_0": 0.0, + "step": 9575 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0625, + "audio_loss_5": 4.0, + "audio_loss_6": 3.6875, + "epoch": 0.6915854098952691, + "loss": 3.4375, + "loss_text": 0.8438, + "state_loss_0": 0.0, + "step": 9575 + }, + { + "epoch": 0.6933911159263272, + "grad_norm": 0.8889758586883545, + "learning_rate": 3.796302907718643e-05, + "loss": 3.3743, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.1406, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.7656, + "eval_loss": 3.5469, + "eval_loss_AQACONVA": 3.5469, + "eval_loss_text_AQACONVA": 1.7266, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.7812, + "eval_loss": 3.5156, + "eval_loss_AQACONVA": 3.5156, + "eval_loss_text_AQACONVA": 1.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3281, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.75, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 2.7969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.2188, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_AQACONVA": 3.5938, + "eval_audio_loss_1_AQACONVA": 3.8281, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.875, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 1.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.75, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.6641, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.5, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9688, + "eval_audio_loss_6_RQACONVA": 3.7969, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.6719, + "eval_audio_loss_1_RQACONVA": 3.8594, + "eval_audio_loss_2_RQACONVA": 3.6406, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.125, + "eval_audio_loss_6_RQACONVA": 3.9375, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.8438, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 3.9844, + "eval_audio_loss_6_RQACONVA": 3.75, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.7656, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 1.8828, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.7656, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.4785, + "eval_loss_RQACONV": 0.4785, + "eval_loss_text_RQACONV": 0.957, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.6484, + "eval_loss_RQACONV": 0.6484, + "eval_loss_text_RQACONV": 1.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.8633, + "eval_loss_RQACONV": 0.8633, + "eval_loss_text_RQACONV": 1.7266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.4336, + "eval_loss_RQACONV": 0.4336, + "eval_loss_text_RQACONV": 0.8672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.4766, + "eval_loss_RQACONV": 0.4766, + "eval_loss_text_RQACONV": 0.9531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 1.3203, + "eval_loss_RQACONV": 1.3203, + "eval_loss_text_RQACONV": 2.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.0991, + "eval_loss_RQACONV": 0.0991, + "eval_loss_text_RQACONV": 0.1982, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1699, + "eval_loss_RQACONV": 0.1699, + "eval_loss_text_RQACONV": 0.3398, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.0481, + "eval_loss_RQACONV": 0.0481, + "eval_loss_text_RQACONV": 0.0962, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1016, + "eval_loss_RQACONV": 0.1016, + "eval_loss_text_RQACONV": 0.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.1089, + "eval_loss_RQACONV": 0.1089, + "eval_loss_text_RQACONV": 0.2178, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 9600 + }, + { + "epoch": 0.6933911159263272, + "eval_loss": 1.5716832876205444, + "eval_runtime": 28.2888, + "eval_samples_per_second": 189.086, + "eval_steps_per_second": 1.485, + "step": 9600 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9688, + "audio_loss_6": 3.7812, + "epoch": 0.6933911159263272, + "loss": 3.4375, + "loss_text": 1.2734, + "state_loss_0": 0.0, + "step": 9600 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 4.0, + "audio_loss_2": 3.7188, + "audio_loss_3": 4.7188, + "audio_loss_4": 4.1562, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.8906, + "epoch": 0.6933911159263272, + "loss": 3.625, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 9600 + }, + { + "epoch": 0.6951968219573853, + "grad_norm": 0.9036917686462402, + "learning_rate": 3.7900465245127755e-05, + "loss": 3.3684, + "step": 9625 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.25, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5781, + "epoch": 0.6951968219573853, + "loss": 3.2656, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 9625 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.7031, + "epoch": 0.6951968219573853, + "loss": 3.375, + "loss_text": 0.9375, + "state_loss_0": 0.0, + "step": 9625 + }, + { + "epoch": 0.6970025279884435, + "grad_norm": 1.0480875968933105, + "learning_rate": 3.7837791105542156e-05, + "loss": 3.3744, + "step": 9650 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6562, + "epoch": 0.6970025279884435, + "loss": 3.2656, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 9650 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.7812, + "epoch": 0.6970025279884435, + "loss": 3.4688, + "loss_text": 0.8906, + "state_loss_0": 0.0, + "step": 9650 + }, + { + "epoch": 0.6988082340195017, + "grad_norm": 0.7944151163101196, + "learning_rate": 3.7775007194335056e-05, + "loss": 3.3729, + "step": 9675 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.7031, + "epoch": 0.6988082340195017, + "loss": 3.3906, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 9675 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.625, + "epoch": 0.6988082340195017, + "loss": 3.2969, + "loss_text": 0.9844, + "state_loss_0": 0.0, + "step": 9675 + }, + { + "epoch": 0.7006139400505598, + "grad_norm": 0.8502286672592163, + "learning_rate": 3.7712114048350497e-05, + "loss": 3.3765, + "step": 9700 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5625, + "epoch": 0.7006139400505598, + "loss": 3.1875, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 9700 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.7812, + "epoch": 0.7006139400505598, + "loss": 3.4688, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 9700 + }, + { + "epoch": 0.7024196460816179, + "grad_norm": 0.8556007146835327, + "learning_rate": 3.764911220536656e-05, + "loss": 3.3752, + "step": 9725 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6094, + "epoch": 0.7024196460816179, + "loss": 3.2188, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 9725 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5312, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.7344, + "epoch": 0.7024196460816179, + "loss": 3.3906, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 9725 + }, + { + "epoch": 0.704225352112676, + "grad_norm": 0.885534405708313, + "learning_rate": 3.758600220409074e-05, + "loss": 3.3632, + "step": 9750 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.625, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5625, + "epoch": 0.704225352112676, + "loss": 3.3125, + "loss_text": 0.8047, + "state_loss_0": 0.0, + "step": 9750 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.875, + "audio_loss_6": 3.6875, + "epoch": 0.704225352112676, + "loss": 3.2812, + "loss_text": 0.2969, + "state_loss_0": 0.0, + "step": 9750 + }, + { + "epoch": 0.7060310581437342, + "grad_norm": 0.9673547744750977, + "learning_rate": 3.752278458415539e-05, + "loss": 3.3656, + "step": 9775 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6406, + "epoch": 0.7060310581437342, + "loss": 3.3594, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 9775 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.75, + "audio_loss_6": 3.6562, + "epoch": 0.7060310581437342, + "loss": 3.3125, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 9775 + }, + { + "epoch": 0.7078367641747924, + "grad_norm": 0.7976075410842896, + "learning_rate": 3.745945988611305e-05, + "loss": 3.3724, + "step": 9800 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6719, + "epoch": 0.7078367641747924, + "loss": 3.2969, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 9800 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.5938, + "epoch": 0.7078367641747924, + "loss": 3.3125, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 9800 + }, + { + "epoch": 0.7096424702058505, + "grad_norm": 0.8251013159751892, + "learning_rate": 3.739602865143185e-05, + "loss": 3.367, + "step": 9825 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6406, + "epoch": 0.7096424702058505, + "loss": 3.3281, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 9825 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.6875, + "epoch": 0.7096424702058505, + "loss": 3.4062, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 9825 + }, + { + "epoch": 0.7114481762369086, + "grad_norm": 0.8963384032249451, + "learning_rate": 3.7332491422490884e-05, + "loss": 3.366, + "step": 9850 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.5781, + "epoch": 0.7114481762369086, + "loss": 3.2656, + "loss_text": 1.1797, + "state_loss_0": 0.0, + "step": 9850 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7969, + "epoch": 0.7114481762369086, + "loss": 3.4219, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 9850 + }, + { + "epoch": 0.7132538822679668, + "grad_norm": 0.7813385128974915, + "learning_rate": 3.7268848742575566e-05, + "loss": 3.3636, + "step": 9875 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.7344, + "epoch": 0.7132538822679668, + "loss": 3.375, + "loss_text": 0.9062, + "state_loss_0": 0.0, + "step": 9875 + }, + { + "audio_loss_0": 3.5312, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.8125, + "epoch": 0.7132538822679668, + "loss": 3.4062, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 9875 + }, + { + "epoch": 0.7150595882990249, + "grad_norm": 0.7895112633705139, + "learning_rate": 3.720510115587298e-05, + "loss": 3.3662, + "step": 9900 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5625, + "epoch": 0.7150595882990249, + "loss": 3.25, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 9900 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.5781, + "epoch": 0.7150595882990249, + "loss": 3.1719, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 9900 + }, + { + "epoch": 0.7168652943300831, + "grad_norm": 1.220239281654358, + "learning_rate": 3.714124920746724e-05, + "loss": 3.361, + "step": 9925 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5312, + "epoch": 0.7168652943300831, + "loss": 3.1875, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 9925 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6719, + "epoch": 0.7168652943300831, + "loss": 3.3125, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 9925 + }, + { + "epoch": 0.7186710003611412, + "grad_norm": 0.7914051413536072, + "learning_rate": 3.7077293443334814e-05, + "loss": 3.3582, + "step": 9950 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.25, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6406, + "epoch": 0.7186710003611412, + "loss": 3.3125, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 9950 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5625, + "epoch": 0.7186710003611412, + "loss": 3.3125, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 9950 + }, + { + "epoch": 0.7204767063921993, + "grad_norm": 0.7928208708763123, + "learning_rate": 3.7013234410339886e-05, + "loss": 3.3549, + "step": 9975 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.625, + "epoch": 0.7204767063921993, + "loss": 3.3594, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 9975 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.6406, + "epoch": 0.7204767063921993, + "loss": 3.375, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 9975 + }, + { + "epoch": 0.7222824124232575, + "grad_norm": 0.7389172315597534, + "learning_rate": 3.694907265622962e-05, + "loss": 3.3536, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.3906, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.125, + "eval_audio_loss_6_AQACONVA": 3.8594, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.1562, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.5312, + "eval_loss_AQACONVA": 3.5312, + "eval_loss_text_AQACONVA": 1.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.5156, + "eval_loss_AQACONVA": 3.5156, + "eval_loss_text_AQACONVA": 1.4297, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.8438, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.75, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0938, + "eval_audio_loss_6_AQACONVA": 3.8281, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.2812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.8281, + "eval_loss": 3.7031, + "eval_loss_AQACONVA": 3.7031, + "eval_loss_text_AQACONVA": 1.9609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.9609, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.4844, + "eval_loss_RQACONVA": 3.4844, + "eval_loss_text_RQACONVA": 1.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.5, + "eval_audio_loss_2_RQACONVA": 3.2969, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.9531, + "eval_audio_loss_6_RQACONVA": 3.7656, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.6094, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.1875, + "eval_audio_loss_5_RQACONVA": 4.0312, + "eval_audio_loss_6_RQACONVA": 3.8594, + "eval_loss": 3.7656, + "eval_loss_RQACONVA": 3.7656, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.8125, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9531, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.6094, + "eval_loss_RQACONVA": 3.6094, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9219, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 1.8125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.7344, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.8008, + "eval_loss_RQACONV": 0.8008, + "eval_loss_text_RQACONV": 1.6016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.875, + "eval_loss_RQACONV": 0.875, + "eval_loss_text_RQACONV": 1.75, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.4492, + "eval_loss_RQACONV": 0.4492, + "eval_loss_text_RQACONV": 0.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.418, + "eval_loss_RQACONV": 0.418, + "eval_loss_text_RQACONV": 0.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.4805, + "eval_loss_RQACONV": 0.4805, + "eval_loss_text_RQACONV": 0.9609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.4609, + "eval_loss_RQACONV": 0.4609, + "eval_loss_text_RQACONV": 0.9219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 1.3203, + "eval_loss_RQACONV": 1.3203, + "eval_loss_text_RQACONV": 2.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1758, + "eval_loss_RQACONV": 0.1758, + "eval_loss_text_RQACONV": 0.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.0933, + "eval_loss_RQACONV": 0.0933, + "eval_loss_text_RQACONV": 0.1865, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1777, + "eval_loss_RQACONV": 0.1777, + "eval_loss_text_RQACONV": 0.3555, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.042, + "eval_loss_RQACONV": 0.042, + "eval_loss_text_RQACONV": 0.084, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1021, + "eval_loss_RQACONV": 0.1021, + "eval_loss_text_RQACONV": 0.2041, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.1118, + "eval_loss_RQACONV": 0.1118, + "eval_loss_text_RQACONV": 0.2236, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10000 + }, + { + "epoch": 0.7222824124232575, + "eval_loss": 1.5643140077590942, + "eval_runtime": 28.0031, + "eval_samples_per_second": 191.015, + "eval_steps_per_second": 1.5, + "step": 10000 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7969, + "epoch": 0.7222824124232575, + "loss": 3.3594, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 10000 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.875, + "audio_loss_6": 3.7344, + "epoch": 0.7222824124232575, + "loss": 3.4844, + "loss_text": 1.3906, + "state_loss_0": 0.0, + "step": 10000 + }, + { + "epoch": 0.7240881184543156, + "grad_norm": 0.9039463996887207, + "learning_rate": 3.688480872962954e-05, + "loss": 3.3654, + "step": 10025 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5938, + "epoch": 0.7240881184543156, + "loss": 3.2969, + "loss_text": 0.7539, + "state_loss_0": 0.0, + "step": 10025 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7812, + "epoch": 0.7240881184543156, + "loss": 3.3906, + "loss_text": 0.4531, + "state_loss_0": 0.0, + "step": 10025 + }, + { + "epoch": 0.7258938244853738, + "grad_norm": 0.874454915523529, + "learning_rate": 3.682044318003881e-05, + "loss": 3.3553, + "step": 10050 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.7812, + "epoch": 0.7258938244853738, + "loss": 3.375, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 10050 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6562, + "epoch": 0.7258938244853738, + "loss": 3.375, + "loss_text": 0.9648, + "state_loss_0": 0.0, + "step": 10050 + }, + { + "epoch": 0.7276995305164319, + "grad_norm": 0.9626765251159668, + "learning_rate": 3.675597655782554e-05, + "loss": 3.3556, + "step": 10075 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.25, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.625, + "epoch": 0.7276995305164319, + "loss": 3.25, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 10075 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.7188, + "epoch": 0.7276995305164319, + "loss": 3.3438, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 10075 + }, + { + "epoch": 0.7295052365474901, + "grad_norm": 1.0504759550094604, + "learning_rate": 3.669140941422205e-05, + "loss": 3.3461, + "step": 10100 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.75, + "epoch": 0.7295052365474901, + "loss": 3.4688, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 10100 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.625, + "epoch": 0.7295052365474901, + "loss": 3.2969, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 10100 + }, + { + "epoch": 0.7313109425785482, + "grad_norm": 0.9513405561447144, + "learning_rate": 3.662674230132023e-05, + "loss": 3.3494, + "step": 10125 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.5, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9219, + "audio_loss_6": 3.6875, + "epoch": 0.7313109425785482, + "loss": 3.4062, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 10125 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5469, + "epoch": 0.7313109425785482, + "loss": 3.1875, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 10125 + }, + { + "epoch": 0.7331166486096063, + "grad_norm": 1.1387850046157837, + "learning_rate": 3.6561975772066735e-05, + "loss": 3.3511, + "step": 10150 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.5469, + "epoch": 0.7331166486096063, + "loss": 3.1719, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 10150 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.875, + "audio_loss_6": 3.6719, + "epoch": 0.7331166486096063, + "loss": 3.3281, + "loss_text": 0.5859, + "state_loss_0": 0.0, + "step": 10150 + }, + { + "epoch": 0.7349223546406645, + "grad_norm": 0.8415699005126953, + "learning_rate": 3.649711038025831e-05, + "loss": 3.3473, + "step": 10175 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6562, + "epoch": 0.7349223546406645, + "loss": 3.2656, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 10175 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.625, + "epoch": 0.7349223546406645, + "loss": 3.4219, + "loss_text": 1.0859, + "state_loss_0": 0.0, + "step": 10175 + }, + { + "epoch": 0.7367280606717227, + "grad_norm": 1.0381951332092285, + "learning_rate": 3.643214668053705e-05, + "loss": 3.3489, + "step": 10200 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6406, + "epoch": 0.7367280606717227, + "loss": 3.3125, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 10200 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.6562, + "epoch": 0.7367280606717227, + "loss": 3.4062, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 10200 + }, + { + "epoch": 0.7385337667027808, + "grad_norm": 0.7791191935539246, + "learning_rate": 3.6367085228385634e-05, + "loss": 3.3486, + "step": 10225 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5469, + "epoch": 0.7385337667027808, + "loss": 3.2188, + "loss_text": 0.5, + "state_loss_0": 0.0, + "step": 10225 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.6406, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.125, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.7188, + "epoch": 0.7385337667027808, + "loss": 3.5312, + "loss_text": 0.9219, + "state_loss_0": 0.0, + "step": 10225 + }, + { + "epoch": 0.7403394727338389, + "grad_norm": 0.9563679695129395, + "learning_rate": 3.630192658012259e-05, + "loss": 3.3422, + "step": 10250 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7031, + "epoch": 0.7403394727338389, + "loss": 3.4062, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 10250 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6875, + "epoch": 0.7403394727338389, + "loss": 3.3438, + "loss_text": 0.9297, + "state_loss_0": 0.0, + "step": 10250 + }, + { + "epoch": 0.742145178764897, + "grad_norm": 0.9169381260871887, + "learning_rate": 3.6236671292897555e-05, + "loss": 3.3403, + "step": 10275 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5781, + "epoch": 0.742145178764897, + "loss": 3.2344, + "loss_text": 0.5, + "state_loss_0": 0.0, + "step": 10275 + }, + { + "audio_loss_0": 3.625, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.0625, + "audio_loss_5": 4.0312, + "audio_loss_6": 3.625, + "epoch": 0.742145178764897, + "loss": 3.4531, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 10275 + }, + { + "epoch": 0.7439508847959552, + "grad_norm": 0.9767013192176819, + "learning_rate": 3.6171319924686464e-05, + "loss": 3.3359, + "step": 10300 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5469, + "epoch": 0.7439508847959552, + "loss": 3.3594, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 10300 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.6406, + "epoch": 0.7439508847959552, + "loss": 3.3594, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 10300 + }, + { + "epoch": 0.7457565908270134, + "grad_norm": 0.8202961683273315, + "learning_rate": 3.610587303428683e-05, + "loss": 3.3442, + "step": 10325 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.6719, + "epoch": 0.7457565908270134, + "loss": 3.2969, + "loss_text": 0.4258, + "state_loss_0": 0.0, + "step": 10325 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5469, + "epoch": 0.7457565908270134, + "loss": 3.1562, + "loss_text": 0.4707, + "state_loss_0": 0.0, + "step": 10325 + }, + { + "epoch": 0.7475622968580715, + "grad_norm": 0.8343178033828735, + "learning_rate": 3.604033118131294e-05, + "loss": 3.3436, + "step": 10350 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.625, + "epoch": 0.7475622968580715, + "loss": 3.2656, + "loss_text": 0.418, + "state_loss_0": 0.0, + "step": 10350 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.6719, + "audio_loss_3": 4.6875, + "audio_loss_4": 4.2188, + "audio_loss_5": 3.9844, + "audio_loss_6": 3.9844, + "epoch": 0.7475622968580715, + "loss": 3.5781, + "loss_text": 0.6133, + "state_loss_0": 0.0, + "step": 10350 + }, + { + "epoch": 0.7493680028891296, + "grad_norm": 0.9145658612251282, + "learning_rate": 3.597469492619106e-05, + "loss": 3.3389, + "step": 10375 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5938, + "epoch": 0.7493680028891296, + "loss": 3.2812, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 10375 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.6875, + "epoch": 0.7493680028891296, + "loss": 3.3438, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 10375 + }, + { + "epoch": 0.7511737089201878, + "grad_norm": 0.8919581174850464, + "learning_rate": 3.590896483015468e-05, + "loss": 3.3332, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.7969, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.0469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.1406, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6875, + "eval_loss": 3.5312, + "eval_loss_AQACONVA": 3.5312, + "eval_loss_text_AQACONVA": 1.7266, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.4375, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.6875, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.4375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.5938, + "eval_loss_AQACONVA": 3.5938, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.25, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.7969, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0938, + "eval_audio_loss_6_AQACONVA": 3.8281, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_AQACONVA": 3.5781, + "eval_audio_loss_1_AQACONVA": 3.8438, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.8125, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 1.9688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.4844, + "eval_loss_RQACONVA": 3.4844, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 1.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.5, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.9219, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.625, + "eval_audio_loss_1_RQACONVA": 3.8906, + "eval_audio_loss_2_RQACONVA": 3.6562, + "eval_audio_loss_3_RQACONVA": 4.6562, + "eval_audio_loss_4_RQACONVA": 4.2188, + "eval_audio_loss_5_RQACONVA": 4.0938, + "eval_audio_loss_6_RQACONVA": 3.875, + "eval_loss": 3.8125, + "eval_loss_RQACONVA": 3.8125, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9531, + "eval_audio_loss_6_RQACONVA": 3.75, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9219, + "eval_audio_loss_6_RQACONVA": 3.6719, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 1.9844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9219, + "eval_audio_loss_6_RQACONVA": 3.6875, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 1.9453, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.6875, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.1406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6523, + "eval_loss_RQACONV": 0.6523, + "eval_loss_text_RQACONV": 1.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.8867, + "eval_loss_RQACONV": 0.8867, + "eval_loss_text_RQACONV": 1.7734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.457, + "eval_loss_RQACONV": 0.457, + "eval_loss_text_RQACONV": 0.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.4297, + "eval_loss_RQACONV": 0.4297, + "eval_loss_text_RQACONV": 0.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 1.3281, + "eval_loss_RQACONV": 1.3281, + "eval_loss_text_RQACONV": 2.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1357, + "eval_loss_RQACONV": 0.1357, + "eval_loss_text_RQACONV": 0.2715, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.167, + "eval_loss_RQACONV": 0.167, + "eval_loss_text_RQACONV": 0.334, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.0903, + "eval_loss_RQACONV": 0.0903, + "eval_loss_text_RQACONV": 0.1807, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1689, + "eval_loss_RQACONV": 0.1689, + "eval_loss_text_RQACONV": 0.3379, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.0364, + "eval_loss_RQACONV": 0.0364, + "eval_loss_text_RQACONV": 0.0728, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1064, + "eval_loss_RQACONV": 0.1064, + "eval_loss_text_RQACONV": 0.2129, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.1064, + "eval_loss_RQACONV": 0.1064, + "eval_loss_text_RQACONV": 0.2129, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 0.6211, + "eval_loss_RQACONV": 0.6211, + "eval_loss_text_RQACONV": 1.2422, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10400 + }, + { + "epoch": 0.7511737089201878, + "eval_loss": 1.5642621517181396, + "eval_runtime": 27.7046, + "eval_samples_per_second": 193.073, + "eval_steps_per_second": 1.516, + "step": 10400 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5469, + "epoch": 0.7511737089201878, + "loss": 3.2812, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 10400 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4531, + "epoch": 0.7511737089201878, + "loss": 3.2031, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 10400 + }, + { + "epoch": 0.752979414951246, + "grad_norm": 0.8412373065948486, + "learning_rate": 3.584314145523967e-05, + "loss": 3.3405, + "step": 10425 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.4844, + "epoch": 0.752979414951246, + "loss": 3.2344, + "loss_text": 0.7422, + "state_loss_0": 0.0, + "step": 10425 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.6875, + "epoch": 0.752979414951246, + "loss": 3.3125, + "loss_text": 0.8555, + "state_loss_0": 0.0, + "step": 10425 + }, + { + "epoch": 0.7547851209823041, + "grad_norm": 0.7730754613876343, + "learning_rate": 3.577722536427952e-05, + "loss": 3.3391, + "step": 10450 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6406, + "epoch": 0.7547851209823041, + "loss": 3.4219, + "loss_text": 1.3359, + "state_loss_0": 0.0, + "step": 10450 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6719, + "epoch": 0.7547851209823041, + "loss": 3.4062, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 10450 + }, + { + "epoch": 0.7565908270133622, + "grad_norm": 0.8989689946174622, + "learning_rate": 3.571121712090047e-05, + "loss": 3.3352, + "step": 10475 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6406, + "epoch": 0.7565908270133622, + "loss": 3.4219, + "loss_text": 1.2578, + "state_loss_0": 0.0, + "step": 10475 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5938, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.5938, + "epoch": 0.7565908270133622, + "loss": 3.4531, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 10475 + }, + { + "epoch": 0.7583965330444203, + "grad_norm": 0.8697414994239807, + "learning_rate": 3.564511728951677e-05, + "loss": 3.3367, + "step": 10500 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4219, + "epoch": 0.7583965330444203, + "loss": 3.2188, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 10500 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.625, + "epoch": 0.7583965330444203, + "loss": 3.2656, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 10500 + }, + { + "epoch": 0.7602022390754785, + "grad_norm": 0.8138471841812134, + "learning_rate": 3.557892643532576e-05, + "loss": 3.3307, + "step": 10525 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4531, + "epoch": 0.7602022390754785, + "loss": 3.25, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 10525 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.5312, + "epoch": 0.7602022390754785, + "loss": 3.2344, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 10525 + }, + { + "epoch": 0.7620079451065367, + "grad_norm": 1.0003538131713867, + "learning_rate": 3.551264512430312e-05, + "loss": 3.335, + "step": 10550 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.875, + "audio_loss_6": 3.6875, + "epoch": 0.7620079451065367, + "loss": 3.4062, + "loss_text": 0.9141, + "state_loss_0": 0.0, + "step": 10550 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.375, + "audio_loss_4": 4.125, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.6406, + "epoch": 0.7620079451065367, + "loss": 3.375, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 10550 + }, + { + "epoch": 0.7638136511375948, + "grad_norm": 0.9207282662391663, + "learning_rate": 3.5446273923197986e-05, + "loss": 3.3284, + "step": 10575 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5938, + "epoch": 0.7638136511375948, + "loss": 3.2969, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 10575 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5625, + "epoch": 0.7638136511375948, + "loss": 3.2812, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 10575 + }, + { + "epoch": 0.7656193571686529, + "grad_norm": 0.8645801544189453, + "learning_rate": 3.53798133995281e-05, + "loss": 3.335, + "step": 10600 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6094, + "epoch": 0.7656193571686529, + "loss": 3.3438, + "loss_text": 1.1484, + "state_loss_0": 0.0, + "step": 10600 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.875, + "audio_loss_6": 3.5625, + "epoch": 0.7656193571686529, + "loss": 3.2812, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 10600 + }, + { + "epoch": 0.767425063199711, + "grad_norm": 0.8362615704536438, + "learning_rate": 3.531326412157499e-05, + "loss": 3.3314, + "step": 10625 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5312, + "epoch": 0.767425063199711, + "loss": 3.3438, + "loss_text": 1.3672, + "state_loss_0": 0.0, + "step": 10625 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5938, + "epoch": 0.767425063199711, + "loss": 3.2344, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 10625 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.1545565128326416, + "learning_rate": 3.524662665837908e-05, + "loss": 3.3198, + "step": 10650 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5312, + "epoch": 0.7692307692307693, + "loss": 3.3281, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 10650 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4219, + "epoch": 0.7692307692307693, + "loss": 3.1875, + "loss_text": 0.3457, + "state_loss_0": 0.0, + "step": 10650 + }, + { + "epoch": 0.7710364752618274, + "grad_norm": 0.8324057459831238, + "learning_rate": 3.517990157973484e-05, + "loss": 3.3286, + "step": 10675 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7344, + "epoch": 0.7710364752618274, + "loss": 3.375, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 10675 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6875, + "epoch": 0.7710364752618274, + "loss": 3.4062, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 10675 + }, + { + "epoch": 0.7728421812928855, + "grad_norm": 0.7487447261810303, + "learning_rate": 3.511308945618592e-05, + "loss": 3.3348, + "step": 10700 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.6562, + "epoch": 0.7728421812928855, + "loss": 3.3594, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 10700 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.7188, + "epoch": 0.7728421812928855, + "loss": 3.4062, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 10700 + }, + { + "epoch": 0.7746478873239436, + "grad_norm": 0.8994874358177185, + "learning_rate": 3.504619085902024e-05, + "loss": 3.3293, + "step": 10725 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.625, + "epoch": 0.7746478873239436, + "loss": 3.4062, + "loss_text": 0.8867, + "state_loss_0": 0.0, + "step": 10725 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.7188, + "epoch": 0.7746478873239436, + "loss": 3.4062, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 10725 + }, + { + "epoch": 0.7764535933550019, + "grad_norm": 0.8162523508071899, + "learning_rate": 3.497920636026516e-05, + "loss": 3.3186, + "step": 10750 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.4844, + "epoch": 0.7764535933550019, + "loss": 3.125, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 10750 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.75, + "epoch": 0.7764535933550019, + "loss": 3.4688, + "loss_text": 1.0625, + "state_loss_0": 0.0, + "step": 10750 + }, + { + "epoch": 0.77825929938606, + "grad_norm": 0.8000311851501465, + "learning_rate": 3.4912136532682506e-05, + "loss": 3.3267, + "step": 10775 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.7344, + "epoch": 0.77825929938606, + "loss": 3.4375, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 10775 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6562, + "epoch": 0.77825929938606, + "loss": 3.4062, + "loss_text": 0.9805, + "state_loss_0": 0.0, + "step": 10775 + }, + { + "epoch": 0.7800650054171181, + "grad_norm": 0.8509671092033386, + "learning_rate": 3.484498194976377e-05, + "loss": 3.3396, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1875, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.7969, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.1406, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.5938, + "eval_loss_AQACONVA": 3.5938, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.7656, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.8594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.7969, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.7656, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.4844, + "eval_loss_RQACONVA": 3.4844, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 1.7109, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6719, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.1562, + "eval_audio_loss_5_RQACONVA": 4.0, + "eval_audio_loss_6_RQACONVA": 3.8125, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9531, + "eval_audio_loss_6_RQACONVA": 3.7344, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.7656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 1.8438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.6719, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.6523, + "eval_loss_RQACONV": 0.6523, + "eval_loss_text_RQACONV": 1.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.8984, + "eval_loss_RQACONV": 0.8984, + "eval_loss_text_RQACONV": 1.7969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4512, + "eval_loss_RQACONV": 0.4512, + "eval_loss_text_RQACONV": 0.9023, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4258, + "eval_loss_RQACONV": 0.4258, + "eval_loss_text_RQACONV": 0.8516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 1.3359, + "eval_loss_RQACONV": 1.3359, + "eval_loss_text_RQACONV": 2.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1729, + "eval_loss_RQACONV": 0.1729, + "eval_loss_text_RQACONV": 0.3457, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.0884, + "eval_loss_RQACONV": 0.0884, + "eval_loss_text_RQACONV": 0.1768, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.168, + "eval_loss_RQACONV": 0.168, + "eval_loss_text_RQACONV": 0.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.0371, + "eval_loss_RQACONV": 0.0371, + "eval_loss_text_RQACONV": 0.0742, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1553, + "eval_loss_RQACONV": 0.1553, + "eval_loss_text_RQACONV": 0.3105, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1084, + "eval_loss_RQACONV": 0.1084, + "eval_loss_text_RQACONV": 0.2168, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.1069, + "eval_loss_RQACONV": 0.1069, + "eval_loss_text_RQACONV": 0.2139, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 10800 + }, + { + "epoch": 0.7800650054171181, + "eval_loss": 1.5610883235931396, + "eval_runtime": 27.8814, + "eval_samples_per_second": 191.848, + "eval_steps_per_second": 1.506, + "step": 10800 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5, + "epoch": 0.7800650054171181, + "loss": 3.2812, + "loss_text": 0.7188, + "state_loss_0": 0.0, + "step": 10800 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5938, + "epoch": 0.7800650054171181, + "loss": 3.3125, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 10800 + }, + { + "epoch": 0.7818707114481762, + "grad_norm": 0.7850077748298645, + "learning_rate": 3.4777743185725135e-05, + "loss": 3.3256, + "step": 10825 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5, + "epoch": 0.7818707114481762, + "loss": 3.25, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 10825 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5469, + "epoch": 0.7818707114481762, + "loss": 3.2031, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 10825 + }, + { + "epoch": 0.7836764174792343, + "grad_norm": 0.8170210123062134, + "learning_rate": 3.471042081550259e-05, + "loss": 3.3262, + "step": 10850 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6719, + "epoch": 0.7836764174792343, + "loss": 3.3125, + "loss_text": 0.5625, + "state_loss_0": 0.0, + "step": 10850 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5938, + "epoch": 0.7836764174792343, + "loss": 3.2969, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 10850 + }, + { + "epoch": 0.7854821235102926, + "grad_norm": 0.8771389722824097, + "learning_rate": 3.4643015414747014e-05, + "loss": 3.3273, + "step": 10875 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.9219, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.1875, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.7031, + "epoch": 0.7854821235102926, + "loss": 3.6094, + "loss_text": 1.2656, + "state_loss_0": 0.0, + "step": 10875 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6094, + "epoch": 0.7854821235102926, + "loss": 3.2969, + "loss_text": 0.4531, + "state_loss_0": 0.0, + "step": 10875 + }, + { + "epoch": 0.7872878295413507, + "grad_norm": 0.8197487592697144, + "learning_rate": 3.457552755981926e-05, + "loss": 3.325, + "step": 10900 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4375, + "epoch": 0.7872878295413507, + "loss": 3.2812, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 10900 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5312, + "epoch": 0.7872878295413507, + "loss": 3.2812, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 10900 + }, + { + "epoch": 0.7890935355724088, + "grad_norm": 0.9648930430412292, + "learning_rate": 3.45079578277852e-05, + "loss": 3.327, + "step": 10925 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.6406, + "epoch": 0.7890935355724088, + "loss": 3.4219, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 10925 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.6562, + "epoch": 0.7890935355724088, + "loss": 3.5156, + "loss_text": 0.9102, + "state_loss_0": 0.0, + "step": 10925 + }, + { + "epoch": 0.7908992416034669, + "grad_norm": 0.9600905179977417, + "learning_rate": 3.444030679641083e-05, + "loss": 3.3213, + "step": 10950 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5156, + "epoch": 0.7908992416034669, + "loss": 3.2188, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 10950 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.375, + "epoch": 0.7908992416034669, + "loss": 3.1562, + "loss_text": 0.4961, + "state_loss_0": 0.0, + "step": 10950 + }, + { + "epoch": 0.7927049476345251, + "grad_norm": 0.8263586163520813, + "learning_rate": 3.4372575044157307e-05, + "loss": 3.3157, + "step": 10975 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2812, + "epoch": 0.7927049476345251, + "loss": 3.1094, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 10975 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5, + "epoch": 0.7927049476345251, + "loss": 3.2969, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 10975 + }, + { + "epoch": 0.7945106536655833, + "grad_norm": 0.907014012336731, + "learning_rate": 3.4304763150175976e-05, + "loss": 3.3242, + "step": 11000 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.5625, + "epoch": 0.7945106536655833, + "loss": 3.3438, + "loss_text": 0.9922, + "state_loss_0": 0.0, + "step": 11000 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.8281, + "epoch": 0.7945106536655833, + "loss": 3.5312, + "loss_text": 1.0703, + "state_loss_0": 0.0, + "step": 11000 + }, + { + "epoch": 0.7963163596966414, + "grad_norm": 0.784678041934967, + "learning_rate": 3.42368716943035e-05, + "loss": 3.324, + "step": 11025 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5625, + "epoch": 0.7963163596966414, + "loss": 3.3438, + "loss_text": 1.0391, + "state_loss_0": 0.0, + "step": 11025 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.625, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.4062, + "epoch": 0.7963163596966414, + "loss": 3.1562, + "loss_text": 0.9453, + "state_loss_0": 0.0, + "step": 11025 + }, + { + "epoch": 0.7981220657276995, + "grad_norm": 0.8105549812316895, + "learning_rate": 3.4168901257056787e-05, + "loss": 3.3166, + "step": 11050 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.625, + "epoch": 0.7981220657276995, + "loss": 3.4062, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 11050 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7188, + "epoch": 0.7981220657276995, + "loss": 3.3906, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 11050 + }, + { + "epoch": 0.7999277717587576, + "grad_norm": 0.7550969123840332, + "learning_rate": 3.410085241962813e-05, + "loss": 3.3185, + "step": 11075 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.25, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5938, + "epoch": 0.7999277717587576, + "loss": 3.3125, + "loss_text": 0.8555, + "state_loss_0": 0.0, + "step": 11075 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6094, + "epoch": 0.7999277717587576, + "loss": 3.2969, + "loss_text": 0.9727, + "state_loss_0": 0.0, + "step": 11075 + }, + { + "epoch": 0.8017334777898159, + "grad_norm": 0.8604311943054199, + "learning_rate": 3.4032725763880196e-05, + "loss": 3.3173, + "step": 11100 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5156, + "epoch": 0.8017334777898159, + "loss": 3.1875, + "loss_text": 0.5078, + "state_loss_0": 0.0, + "step": 11100 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6562, + "epoch": 0.8017334777898159, + "loss": 3.3281, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 11100 + }, + { + "epoch": 0.803539183820874, + "grad_norm": 0.8586398363113403, + "learning_rate": 3.3964521872341025e-05, + "loss": 3.3125, + "step": 11125 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.6562, + "epoch": 0.803539183820874, + "loss": 3.3281, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 11125 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.625, + "epoch": 0.803539183820874, + "loss": 3.4219, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 11125 + }, + { + "epoch": 0.8053448898519321, + "grad_norm": 0.9365116953849792, + "learning_rate": 3.38962413281991e-05, + "loss": 3.3138, + "step": 11150 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5469, + "epoch": 0.8053448898519321, + "loss": 3.375, + "loss_text": 1.2188, + "state_loss_0": 0.0, + "step": 11150 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.75, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5, + "audio_loss_4": 4.125, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.75, + "epoch": 0.8053448898519321, + "loss": 3.4375, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 11150 + }, + { + "epoch": 0.8071505958829902, + "grad_norm": 0.8373432159423828, + "learning_rate": 3.382788471529831e-05, + "loss": 3.3115, + "step": 11175 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.4688, + "epoch": 0.8071505958829902, + "loss": 3.2812, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 11175 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6719, + "epoch": 0.8071505958829902, + "loss": 3.4219, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 11175 + }, + { + "epoch": 0.8089563019140484, + "grad_norm": 0.9682813286781311, + "learning_rate": 3.3759452618133e-05, + "loss": 3.3055, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.7812, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.4297, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.5938, + "eval_loss_AQACONVA": 3.5938, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.7344, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.8125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0625, + "eval_audio_loss_6_AQACONVA": 3.75, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.7344, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.9609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9062, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 1.6406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9688, + "eval_audio_loss_6_RQACONVA": 3.7812, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9375, + "eval_audio_loss_6_RQACONVA": 3.7031, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.7344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 1.8906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 1.8906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.1719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.8398, + "eval_loss_RQACONV": 0.8398, + "eval_loss_text_RQACONV": 1.6797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.457, + "eval_loss_RQACONV": 0.457, + "eval_loss_text_RQACONV": 0.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.4258, + "eval_loss_RQACONV": 0.4258, + "eval_loss_text_RQACONV": 0.8516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.5234, + "eval_loss_RQACONV": 0.5234, + "eval_loss_text_RQACONV": 1.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 1.3281, + "eval_loss_RQACONV": 1.3281, + "eval_loss_text_RQACONV": 2.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1729, + "eval_loss_RQACONV": 0.1729, + "eval_loss_text_RQACONV": 0.3457, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.084, + "eval_loss_RQACONV": 0.084, + "eval_loss_text_RQACONV": 0.168, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1787, + "eval_loss_RQACONV": 0.1787, + "eval_loss_text_RQACONV": 0.3574, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.041, + "eval_loss_RQACONV": 0.041, + "eval_loss_text_RQACONV": 0.082, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.1016, + "eval_loss_RQACONV": 0.1016, + "eval_loss_text_RQACONV": 0.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.0962, + "eval_loss_RQACONV": 0.0962, + "eval_loss_text_RQACONV": 0.1924, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11200 + }, + { + "epoch": 0.8089563019140484, + "eval_loss": 1.5578036308288574, + "eval_runtime": 27.8056, + "eval_samples_per_second": 192.371, + "eval_steps_per_second": 1.51, + "step": 11200 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9531, + "audio_loss_6": 3.7344, + "epoch": 0.8089563019140484, + "loss": 3.4219, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 11200 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5625, + "epoch": 0.8089563019140484, + "loss": 3.2188, + "loss_text": 0.4648, + "state_loss_0": 0.0, + "step": 11200 + }, + { + "epoch": 0.8107620079451066, + "grad_norm": 0.7486904263496399, + "learning_rate": 3.369094562184296e-05, + "loss": 3.3034, + "step": 11225 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6406, + "epoch": 0.8107620079451066, + "loss": 3.4062, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 11225 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5625, + "epoch": 0.8107620079451066, + "loss": 3.3125, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 11225 + }, + { + "epoch": 0.8125677139761647, + "grad_norm": 0.7830896973609924, + "learning_rate": 3.362236431220839e-05, + "loss": 3.308, + "step": 11250 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6562, + "epoch": 0.8125677139761647, + "loss": 3.3906, + "loss_text": 1.0781, + "state_loss_0": 0.0, + "step": 11250 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4688, + "epoch": 0.8125677139761647, + "loss": 3.2344, + "loss_text": 0.9023, + "state_loss_0": 0.0, + "step": 11250 + }, + { + "epoch": 0.8143734200072228, + "grad_norm": 1.0723496675491333, + "learning_rate": 3.3553709275644946e-05, + "loss": 3.3058, + "step": 11275 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6406, + "epoch": 0.8143734200072228, + "loss": 3.3438, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 11275 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3906, + "epoch": 0.8143734200072228, + "loss": 3.1562, + "loss_text": 0.7227, + "state_loss_0": 0.0, + "step": 11275 + }, + { + "epoch": 0.816179126038281, + "grad_norm": 0.9139533638954163, + "learning_rate": 3.3484981099198695e-05, + "loss": 3.2983, + "step": 11300 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5781, + "epoch": 0.816179126038281, + "loss": 3.2969, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 11300 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.6562, + "epoch": 0.816179126038281, + "loss": 3.2969, + "loss_text": 0.8398, + "state_loss_0": 0.0, + "step": 11300 + }, + { + "epoch": 0.8179848320693391, + "grad_norm": 0.8863880038261414, + "learning_rate": 3.3416180370541074e-05, + "loss": 3.3081, + "step": 11325 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5938, + "epoch": 0.8179848320693391, + "loss": 3.2344, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 11325 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5469, + "epoch": 0.8179848320693391, + "loss": 3.3125, + "loss_text": 0.9023, + "state_loss_0": 0.0, + "step": 11325 + }, + { + "epoch": 0.8197905381003973, + "grad_norm": 0.9034544825553894, + "learning_rate": 3.3347307677963915e-05, + "loss": 3.3072, + "step": 11350 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5469, + "epoch": 0.8197905381003973, + "loss": 3.2656, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 11350 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6406, + "epoch": 0.8197905381003973, + "loss": 3.3125, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 11350 + }, + { + "epoch": 0.8215962441314554, + "grad_norm": 0.8095229864120483, + "learning_rate": 3.327836361037439e-05, + "loss": 3.3034, + "step": 11375 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.5, + "epoch": 0.8215962441314554, + "loss": 3.2656, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 11375 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4219, + "epoch": 0.8215962441314554, + "loss": 3.2812, + "loss_text": 0.9531, + "state_loss_0": 0.0, + "step": 11375 + }, + { + "epoch": 0.8234019501625135, + "grad_norm": 0.9564022421836853, + "learning_rate": 3.320934875728996e-05, + "loss": 3.3049, + "step": 11400 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5156, + "epoch": 0.8234019501625135, + "loss": 3.3438, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 11400 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.4844, + "epoch": 0.8234019501625135, + "loss": 3.2188, + "loss_text": 0.3613, + "state_loss_0": 0.0, + "step": 11400 + }, + { + "epoch": 0.8252076561935717, + "grad_norm": 0.822413444519043, + "learning_rate": 3.314026370883335e-05, + "loss": 3.301, + "step": 11425 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5781, + "epoch": 0.8252076561935717, + "loss": 3.3594, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 11425 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4688, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.5781, + "epoch": 0.8252076561935717, + "loss": 3.4062, + "loss_text": 0.7109, + "state_loss_0": 0.0, + "step": 11425 + }, + { + "epoch": 0.8270133622246298, + "grad_norm": 0.8696854710578918, + "learning_rate": 3.307110905572751e-05, + "loss": 3.3023, + "step": 11450 + }, + { + "audio_loss_0": 3.7656, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.375, + "audio_loss_3": 4.5938, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.625, + "epoch": 0.8270133622246298, + "loss": 3.4688, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 11450 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6719, + "epoch": 0.8270133622246298, + "loss": 3.375, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 11450 + }, + { + "epoch": 0.828819068255688, + "grad_norm": 0.8119823336601257, + "learning_rate": 3.3001885389290554e-05, + "loss": 3.306, + "step": 11475 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.4844, + "epoch": 0.828819068255688, + "loss": 3.2344, + "loss_text": 0.4512, + "state_loss_0": 0.0, + "step": 11475 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5781, + "epoch": 0.828819068255688, + "loss": 3.3281, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 11475 + }, + { + "epoch": 0.8306247742867461, + "grad_norm": 0.7153850793838501, + "learning_rate": 3.2932593301430706e-05, + "loss": 3.3046, + "step": 11500 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.5312, + "epoch": 0.8306247742867461, + "loss": 3.1875, + "loss_text": 0.8789, + "state_loss_0": 0.0, + "step": 11500 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4062, + "epoch": 0.8306247742867461, + "loss": 3.2188, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 11500 + }, + { + "epoch": 0.8324304803178043, + "grad_norm": 0.7442590594291687, + "learning_rate": 3.286323338464122e-05, + "loss": 3.3052, + "step": 11525 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3594, + "epoch": 0.8324304803178043, + "loss": 3.125, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 11525 + }, + { + "audio_loss_0": 3.4219, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.5781, + "epoch": 0.8324304803178043, + "loss": 3.375, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 11525 + }, + { + "epoch": 0.8342361863488624, + "grad_norm": 0.848821222782135, + "learning_rate": 3.279380623199537e-05, + "loss": 3.2995, + "step": 11550 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4062, + "epoch": 0.8342361863488624, + "loss": 3.2031, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 11550 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5938, + "epoch": 0.8342361863488624, + "loss": 3.375, + "loss_text": 1.3359, + "state_loss_0": 0.0, + "step": 11550 + }, + { + "epoch": 0.8360418923799205, + "grad_norm": 0.8765903115272522, + "learning_rate": 3.272431243714131e-05, + "loss": 3.2999, + "step": 11575 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4688, + "epoch": 0.8360418923799205, + "loss": 3.2656, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 11575 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.625, + "epoch": 0.8360418923799205, + "loss": 3.4219, + "loss_text": 1.3047, + "state_loss_0": 0.0, + "step": 11575 + }, + { + "epoch": 0.8378475984109787, + "grad_norm": 0.7993263602256775, + "learning_rate": 3.265475259429706e-05, + "loss": 3.3043, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.75, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.5938, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.4453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9844, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5625, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.7344, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.2812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9062, + "eval_audio_loss_5_RQACONVA": 3.6406, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.6719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.5938, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.9688, + "eval_audio_loss_6_RQACONVA": 3.7344, + "eval_loss": 3.7344, + "eval_loss_RQACONVA": 3.7344, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.9219, + "eval_audio_loss_6_RQACONVA": 3.6875, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 1.9609, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.6914, + "eval_loss_RQACONV": 0.6914, + "eval_loss_text_RQACONV": 1.3828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.7969, + "eval_loss_RQACONV": 0.7969, + "eval_loss_text_RQACONV": 1.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.7539, + "eval_loss_RQACONV": 0.7539, + "eval_loss_text_RQACONV": 1.5078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.8477, + "eval_loss_RQACONV": 0.8477, + "eval_loss_text_RQACONV": 1.6953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.4473, + "eval_loss_RQACONV": 0.4473, + "eval_loss_text_RQACONV": 0.8945, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.418, + "eval_loss_RQACONV": 0.418, + "eval_loss_text_RQACONV": 0.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.457, + "eval_loss_RQACONV": 0.457, + "eval_loss_text_RQACONV": 0.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 1.3281, + "eval_loss_RQACONV": 1.3281, + "eval_loss_text_RQACONV": 2.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.165, + "eval_loss_RQACONV": 0.165, + "eval_loss_text_RQACONV": 0.3301, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.083, + "eval_loss_RQACONV": 0.083, + "eval_loss_text_RQACONV": 0.166, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.167, + "eval_loss_RQACONV": 0.167, + "eval_loss_text_RQACONV": 0.334, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.0425, + "eval_loss_RQACONV": 0.0425, + "eval_loss_text_RQACONV": 0.085, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.1016, + "eval_loss_RQACONV": 0.1016, + "eval_loss_text_RQACONV": 0.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.1006, + "eval_loss_RQACONV": 0.1006, + "eval_loss_text_RQACONV": 0.2012, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 11600 + }, + { + "epoch": 0.8378475984109787, + "eval_loss": 1.5521892309188843, + "eval_runtime": 27.8961, + "eval_samples_per_second": 191.747, + "eval_steps_per_second": 1.506, + "step": 11600 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4844, + "epoch": 0.8378475984109787, + "loss": 3.1875, + "loss_text": 0.5156, + "state_loss_0": 0.0, + "step": 11600 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4531, + "epoch": 0.8378475984109787, + "loss": 3.25, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 11600 + }, + { + "epoch": 0.8396533044420368, + "grad_norm": 0.9053290486335754, + "learning_rate": 3.258512729824534e-05, + "loss": 3.2945, + "step": 11625 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.4688, + "epoch": 0.8396533044420368, + "loss": 3.2344, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 11625 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5312, + "epoch": 0.8396533044420368, + "loss": 3.2188, + "loss_text": 0.3184, + "state_loss_0": 0.0, + "step": 11625 + }, + { + "epoch": 0.841459010473095, + "grad_norm": 0.9611077308654785, + "learning_rate": 3.2515437144328585e-05, + "loss": 3.2925, + "step": 11650 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4375, + "epoch": 0.841459010473095, + "loss": 3.1875, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 11650 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5312, + "epoch": 0.841459010473095, + "loss": 3.25, + "loss_text": 0.8086, + "state_loss_0": 0.0, + "step": 11650 + }, + { + "epoch": 0.8432647165041531, + "grad_norm": 0.8398139476776123, + "learning_rate": 3.2445682728443806e-05, + "loss": 3.2978, + "step": 11675 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.125, + "audio_loss_5": 3.9375, + "audio_loss_6": 3.7031, + "epoch": 0.8432647165041531, + "loss": 3.4375, + "loss_text": 1.25, + "state_loss_0": 0.0, + "step": 11675 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.875, + "audio_loss_6": 3.5, + "epoch": 0.8432647165041531, + "loss": 3.3281, + "loss_text": 0.8633, + "state_loss_0": 0.0, + "step": 11675 + }, + { + "epoch": 0.8450704225352113, + "grad_norm": 0.8390135169029236, + "learning_rate": 3.237586464703747e-05, + "loss": 3.2964, + "step": 11700 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5938, + "epoch": 0.8450704225352113, + "loss": 3.2812, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 11700 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5625, + "epoch": 0.8450704225352113, + "loss": 3.3125, + "loss_text": 1.0078, + "state_loss_0": 0.0, + "step": 11700 + }, + { + "epoch": 0.8468761285662694, + "grad_norm": 0.7851021885871887, + "learning_rate": 3.230598349710045e-05, + "loss": 3.2866, + "step": 11725 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.6094, + "epoch": 0.8468761285662694, + "loss": 3.3906, + "loss_text": 1.1562, + "state_loss_0": 0.0, + "step": 11725 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.125, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5781, + "epoch": 0.8468761285662694, + "loss": 3.2969, + "loss_text": 0.8594, + "state_loss_0": 0.0, + "step": 11725 + }, + { + "epoch": 0.8486818345973276, + "grad_norm": 0.853674054145813, + "learning_rate": 3.223603987616288e-05, + "loss": 3.2967, + "step": 11750 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4844, + "epoch": 0.8486818345973276, + "loss": 3.25, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 11750 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5312, + "epoch": 0.8486818345973276, + "loss": 3.2812, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 11750 + }, + { + "epoch": 0.8504875406283857, + "grad_norm": 0.7495729923248291, + "learning_rate": 3.2166034382289066e-05, + "loss": 3.293, + "step": 11775 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.6562, + "epoch": 0.8504875406283857, + "loss": 3.375, + "loss_text": 0.8906, + "state_loss_0": 0.0, + "step": 11775 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5156, + "epoch": 0.8504875406283857, + "loss": 3.2812, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 11775 + }, + { + "epoch": 0.8522932466594438, + "grad_norm": 0.8424703478813171, + "learning_rate": 3.2095967614072394e-05, + "loss": 3.2952, + "step": 11800 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.8438, + "audio_loss_2": 3.625, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6875, + "epoch": 0.8522932466594438, + "loss": 3.4062, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 11800 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4531, + "epoch": 0.8522932466594438, + "loss": 3.1406, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 11800 + }, + { + "epoch": 0.854098952690502, + "grad_norm": 0.7170383930206299, + "learning_rate": 3.202584017063015e-05, + "loss": 3.2931, + "step": 11825 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4375, + "epoch": 0.854098952690502, + "loss": 3.25, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 11825 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.75, + "audio_loss_6": 3.4531, + "epoch": 0.854098952690502, + "loss": 3.2656, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 11825 + }, + { + "epoch": 0.8559046587215602, + "grad_norm": 0.8743582367897034, + "learning_rate": 3.195565265159846e-05, + "loss": 3.2877, + "step": 11850 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.375, + "epoch": 0.8559046587215602, + "loss": 3.1094, + "loss_text": 0.5, + "state_loss_0": 0.0, + "step": 11850 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6562, + "epoch": 0.8559046587215602, + "loss": 3.3594, + "loss_text": 0.6602, + "state_loss_0": 0.0, + "step": 11850 + }, + { + "epoch": 0.8577103647526183, + "grad_norm": 1.171193242073059, + "learning_rate": 3.18854056571271e-05, + "loss": 3.2929, + "step": 11875 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5, + "epoch": 0.8577103647526183, + "loss": 3.2812, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 11875 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.25, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.5, + "epoch": 0.8577103647526183, + "loss": 3.2188, + "loss_text": 0.582, + "state_loss_0": 0.0, + "step": 11875 + }, + { + "epoch": 0.8595160707836764, + "grad_norm": 0.8230201601982117, + "learning_rate": 3.1815099787874456e-05, + "loss": 3.2915, + "step": 11900 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.5312, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.75, + "audio_loss_6": 3.6406, + "epoch": 0.8595160707836764, + "loss": 3.3438, + "loss_text": 0.4141, + "state_loss_0": 0.0, + "step": 11900 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.125, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2969, + "epoch": 0.8595160707836764, + "loss": 3.0938, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 11900 + }, + { + "epoch": 0.8613217768147345, + "grad_norm": 1.1896560192108154, + "learning_rate": 3.174473564500228e-05, + "loss": 3.2916, + "step": 11925 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5312, + "epoch": 0.8613217768147345, + "loss": 3.2344, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 11925 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.5, + "epoch": 0.8613217768147345, + "loss": 3.2031, + "loss_text": 0.3164, + "state_loss_0": 0.0, + "step": 11925 + }, + { + "epoch": 0.8631274828457927, + "grad_norm": 0.7426737546920776, + "learning_rate": 3.167431383017063e-05, + "loss": 3.2868, + "step": 11950 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5938, + "epoch": 0.8631274828457927, + "loss": 3.4062, + "loss_text": 1.1094, + "state_loss_0": 0.0, + "step": 11950 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.5, + "epoch": 0.8631274828457927, + "loss": 3.2188, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 11950 + }, + { + "epoch": 0.8649331888768509, + "grad_norm": 0.7933710217475891, + "learning_rate": 3.1603834945532684e-05, + "loss": 3.2848, + "step": 11975 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4062, + "epoch": 0.8649331888768509, + "loss": 3.2656, + "loss_text": 0.9883, + "state_loss_0": 0.0, + "step": 11975 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5, + "epoch": 0.8649331888768509, + "loss": 3.2656, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 11975 + }, + { + "epoch": 0.866738894907909, + "grad_norm": 0.8430262804031372, + "learning_rate": 3.1533299593729624e-05, + "loss": 3.2907, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.7188, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.0781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.5781, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.5938, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.4453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9844, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.5469, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 2.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.7031, + "eval_loss_AQACONVA": 3.7031, + "eval_loss_text_AQACONVA": 2.8594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.0, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8906, + "eval_audio_loss_5_RQACONVA": 3.6406, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.6797, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9375, + "eval_audio_loss_6_RQACONVA": 3.7344, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 2.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 1.9766, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.7695, + "eval_loss_RQACONV": 0.7695, + "eval_loss_text_RQACONV": 1.5391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.9023, + "eval_loss_RQACONV": 0.9023, + "eval_loss_text_RQACONV": 1.8047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.4453, + "eval_loss_RQACONV": 0.4453, + "eval_loss_text_RQACONV": 0.8906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.4863, + "eval_loss_RQACONV": 0.4863, + "eval_loss_text_RQACONV": 0.9727, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.418, + "eval_loss_RQACONV": 0.418, + "eval_loss_text_RQACONV": 0.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.4805, + "eval_loss_RQACONV": 0.4805, + "eval_loss_text_RQACONV": 0.9609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.4551, + "eval_loss_RQACONV": 0.4551, + "eval_loss_text_RQACONV": 0.9102, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 1.3281, + "eval_loss_RQACONV": 1.3281, + "eval_loss_text_RQACONV": 2.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.167, + "eval_loss_RQACONV": 0.167, + "eval_loss_text_RQACONV": 0.334, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.0767, + "eval_loss_RQACONV": 0.0767, + "eval_loss_text_RQACONV": 0.1533, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1553, + "eval_loss_RQACONV": 0.1553, + "eval_loss_text_RQACONV": 0.3105, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1523, + "eval_loss_RQACONV": 0.1523, + "eval_loss_text_RQACONV": 0.3047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.0381, + "eval_loss_RQACONV": 0.0381, + "eval_loss_text_RQACONV": 0.0762, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1079, + "eval_loss_RQACONV": 0.1079, + "eval_loss_text_RQACONV": 0.2158, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12000 + }, + { + "epoch": 0.866738894907909, + "eval_loss": 1.5512888431549072, + "eval_runtime": 27.7071, + "eval_samples_per_second": 193.055, + "eval_steps_per_second": 1.516, + "step": 12000 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4375, + "epoch": 0.866738894907909, + "loss": 3.25, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 12000 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5156, + "epoch": 0.866738894907909, + "loss": 3.2969, + "loss_text": 0.7773, + "state_loss_0": 0.0, + "step": 12000 + }, + { + "epoch": 0.8685446009389671, + "grad_norm": 0.9360340237617493, + "learning_rate": 3.1462708377885455e-05, + "loss": 3.2898, + "step": 12025 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6562, + "epoch": 0.8685446009389671, + "loss": 3.3906, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 12025 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4219, + "epoch": 0.8685446009389671, + "loss": 3.1406, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 12025 + }, + { + "epoch": 0.8703503069700252, + "grad_norm": 0.8480640053749084, + "learning_rate": 3.1392061901601846e-05, + "loss": 3.2822, + "step": 12050 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.625, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.5781, + "epoch": 0.8703503069700252, + "loss": 3.375, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 12050 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5625, + "epoch": 0.8703503069700252, + "loss": 3.25, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 12050 + }, + { + "epoch": 0.8721560130010835, + "grad_norm": 0.8787268996238708, + "learning_rate": 3.1321360768952974e-05, + "loss": 3.2891, + "step": 12075 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.4844, + "epoch": 0.8721560130010835, + "loss": 3.2969, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 12075 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0938, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.4688, + "epoch": 0.8721560130010835, + "loss": 3.4062, + "loss_text": 1.2031, + "state_loss_0": 0.0, + "step": 12075 + }, + { + "epoch": 0.8739617190321416, + "grad_norm": 0.808261513710022, + "learning_rate": 3.12506055844804e-05, + "loss": 3.2878, + "step": 12100 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5156, + "epoch": 0.8739617190321416, + "loss": 3.2969, + "loss_text": 0.8203, + "state_loss_0": 0.0, + "step": 12100 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3125, + "epoch": 0.8739617190321416, + "loss": 3.1875, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 12100 + }, + { + "epoch": 0.8757674250631997, + "grad_norm": 0.8738330602645874, + "learning_rate": 3.117979695318781e-05, + "loss": 3.2745, + "step": 12125 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6094, + "epoch": 0.8757674250631997, + "loss": 3.3438, + "loss_text": 0.8086, + "state_loss_0": 0.0, + "step": 12125 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5469, + "epoch": 0.8757674250631997, + "loss": 3.3125, + "loss_text": 1.1641, + "state_loss_0": 0.0, + "step": 12125 + }, + { + "epoch": 0.8775731310942578, + "grad_norm": 0.7676719427108765, + "learning_rate": 3.110893548053596e-05, + "loss": 3.2852, + "step": 12150 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.5156, + "epoch": 0.8775731310942578, + "loss": 3.2656, + "loss_text": 0.5586, + "state_loss_0": 0.0, + "step": 12150 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5, + "audio_loss_2": 3.125, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4844, + "epoch": 0.8775731310942578, + "loss": 3.1875, + "loss_text": 0.4961, + "state_loss_0": 0.0, + "step": 12150 + }, + { + "epoch": 0.879378837125316, + "grad_norm": 0.8094620704650879, + "learning_rate": 3.103802177243738e-05, + "loss": 3.2836, + "step": 12175 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.75, + "audio_loss_6": 3.4219, + "epoch": 0.879378837125316, + "loss": 3.2656, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 12175 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4844, + "epoch": 0.879378837125316, + "loss": 3.2656, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 12175 + }, + { + "epoch": 0.8811845431563742, + "grad_norm": 0.8654184937477112, + "learning_rate": 3.096705643525127e-05, + "loss": 3.2777, + "step": 12200 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5156, + "epoch": 0.8811845431563742, + "loss": 3.25, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 12200 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.5312, + "epoch": 0.8811845431563742, + "loss": 3.2969, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 12200 + }, + { + "epoch": 0.8829902491874323, + "grad_norm": 0.7707675099372864, + "learning_rate": 3.089604007577829e-05, + "loss": 3.2755, + "step": 12225 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4375, + "epoch": 0.8829902491874323, + "loss": 3.2188, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 12225 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3594, + "epoch": 0.8829902491874323, + "loss": 3.1406, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 12225 + }, + { + "epoch": 0.8847959552184904, + "grad_norm": 1.0320194959640503, + "learning_rate": 3.0824973301255396e-05, + "loss": 3.2798, + "step": 12250 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5312, + "epoch": 0.8847959552184904, + "loss": 3.375, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 12250 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.125, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5312, + "epoch": 0.8847959552184904, + "loss": 3.2031, + "loss_text": 0.582, + "state_loss_0": 0.0, + "step": 12250 + }, + { + "epoch": 0.8866016612495485, + "grad_norm": 0.7944374084472656, + "learning_rate": 3.075385671935058e-05, + "loss": 3.2699, + "step": 12275 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.6562, + "epoch": 0.8866016612495485, + "loss": 3.4062, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 12275 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.6094, + "epoch": 0.8866016612495485, + "loss": 3.3281, + "loss_text": 0.5625, + "state_loss_0": 0.0, + "step": 12275 + }, + { + "epoch": 0.8884073672806068, + "grad_norm": 0.8683745265007019, + "learning_rate": 3.068269093815776e-05, + "loss": 3.2831, + "step": 12300 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4688, + "epoch": 0.8884073672806068, + "loss": 3.25, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 12300 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4688, + "epoch": 0.8884073672806068, + "loss": 3.25, + "loss_text": 0.8867, + "state_loss_0": 0.0, + "step": 12300 + }, + { + "epoch": 0.8902130733116649, + "grad_norm": 0.8754615187644958, + "learning_rate": 3.061147656619152e-05, + "loss": 3.2688, + "step": 12325 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.375, + "epoch": 0.8902130733116649, + "loss": 3.1719, + "loss_text": 0.2812, + "state_loss_0": 0.0, + "step": 12325 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5625, + "audio_loss_4": 4.1562, + "audio_loss_5": 3.9062, + "audio_loss_6": 3.7188, + "epoch": 0.8902130733116649, + "loss": 3.4688, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 12325 + }, + { + "epoch": 0.892018779342723, + "grad_norm": 1.0876353979110718, + "learning_rate": 3.054021421238193e-05, + "loss": 3.2809, + "step": 12350 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.4531, + "epoch": 0.892018779342723, + "loss": 3.2188, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 12350 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4219, + "epoch": 0.892018779342723, + "loss": 3.2188, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 12350 + }, + { + "epoch": 0.8938244853737811, + "grad_norm": 0.9894729256629944, + "learning_rate": 3.0468904486069362e-05, + "loss": 3.2789, + "step": 12375 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5312, + "epoch": 0.8938244853737811, + "loss": 3.2656, + "loss_text": 1.0156, + "state_loss_0": 0.0, + "step": 12375 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4375, + "epoch": 0.8938244853737811, + "loss": 3.2812, + "loss_text": 0.957, + "state_loss_0": 0.0, + "step": 12375 + }, + { + "epoch": 0.8956301914048392, + "grad_norm": 0.9832631945610046, + "learning_rate": 3.0397547996999202e-05, + "loss": 3.2692, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.5469, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.5781, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.4609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6875, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.5781, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.875, + "eval_audio_loss_5_RQACONVA": 3.6094, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.6484, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 2.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.5469, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.6875, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 1.8828, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.832, + "eval_loss_RQACONV": 0.832, + "eval_loss_text_RQACONV": 1.6641, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.8828, + "eval_loss_RQACONV": 0.8828, + "eval_loss_text_RQACONV": 1.7656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4414, + "eval_loss_RQACONV": 0.4414, + "eval_loss_text_RQACONV": 0.8828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4141, + "eval_loss_RQACONV": 0.4141, + "eval_loss_text_RQACONV": 0.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 1.3359, + "eval_loss_RQACONV": 1.3359, + "eval_loss_text_RQACONV": 2.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.0825, + "eval_loss_RQACONV": 0.0825, + "eval_loss_text_RQACONV": 0.165, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.0337, + "eval_loss_RQACONV": 0.0337, + "eval_loss_text_RQACONV": 0.0674, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1069, + "eval_loss_RQACONV": 0.1069, + "eval_loss_text_RQACONV": 0.2139, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12400 + }, + { + "epoch": 0.8956301914048392, + "eval_loss": 1.5471291542053223, + "eval_runtime": 27.7236, + "eval_samples_per_second": 192.941, + "eval_steps_per_second": 1.515, + "step": 12400 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4844, + "epoch": 0.8956301914048392, + "loss": 3.2344, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 12400 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3906, + "epoch": 0.8956301914048392, + "loss": 3.1562, + "loss_text": 0.8438, + "state_loss_0": 0.0, + "step": 12400 + }, + { + "epoch": 0.8974358974358975, + "grad_norm": 0.7645571231842041, + "learning_rate": 3.032614535531675e-05, + "loss": 3.2762, + "step": 12425 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5312, + "epoch": 0.8974358974358975, + "loss": 3.4062, + "loss_text": 1.5703, + "state_loss_0": 0.0, + "step": 12425 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.625, + "epoch": 0.8974358974358975, + "loss": 3.4062, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 12425 + }, + { + "epoch": 0.8992416034669556, + "grad_norm": 1.0044018030166626, + "learning_rate": 3.0254697171561884e-05, + "loss": 3.2698, + "step": 12450 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3906, + "epoch": 0.8992416034669556, + "loss": 3.2188, + "loss_text": 0.8633, + "state_loss_0": 0.0, + "step": 12450 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.2812, + "epoch": 0.8992416034669556, + "loss": 3.2656, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 12450 + }, + { + "epoch": 0.9010473094980137, + "grad_norm": 0.8783119916915894, + "learning_rate": 3.018320405666394e-05, + "loss": 3.2718, + "step": 12475 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5, + "epoch": 0.9010473094980137, + "loss": 3.2812, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 12475 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.75, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6562, + "epoch": 0.9010473094980137, + "loss": 3.4219, + "loss_text": 0.7852, + "state_loss_0": 0.0, + "step": 12475 + }, + { + "epoch": 0.9028530155290718, + "grad_norm": 0.718848705291748, + "learning_rate": 3.0111666621936417e-05, + "loss": 3.2868, + "step": 12500 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3438, + "epoch": 0.9028530155290718, + "loss": 3.2031, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 12500 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.6094, + "epoch": 0.9028530155290718, + "loss": 3.3438, + "loss_text": 0.7617, + "state_loss_0": 0.0, + "step": 12500 + }, + { + "epoch": 0.90465872156013, + "grad_norm": 0.907256543636322, + "learning_rate": 3.0040085479071806e-05, + "loss": 3.2761, + "step": 12525 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.5, + "epoch": 0.90465872156013, + "loss": 3.25, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 12525 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.6094, + "epoch": 0.90465872156013, + "loss": 3.375, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 12525 + }, + { + "epoch": 0.9064644275911882, + "grad_norm": 0.847044825553894, + "learning_rate": 2.9968461240136296e-05, + "loss": 3.274, + "step": 12550 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4844, + "epoch": 0.9064644275911882, + "loss": 3.25, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 12550 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5156, + "epoch": 0.9064644275911882, + "loss": 3.2344, + "loss_text": 0.416, + "state_loss_0": 0.0, + "step": 12550 + }, + { + "epoch": 0.9082701336222463, + "grad_norm": 0.8439131379127502, + "learning_rate": 2.9896794517564607e-05, + "loss": 3.2641, + "step": 12575 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4688, + "epoch": 0.9082701336222463, + "loss": 3.25, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 12575 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5, + "epoch": 0.9082701336222463, + "loss": 3.2656, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 12575 + }, + { + "epoch": 0.9100758396533044, + "grad_norm": 0.8504288196563721, + "learning_rate": 2.982508592415471e-05, + "loss": 3.2651, + "step": 12600 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.5781, + "epoch": 0.9100758396533044, + "loss": 3.3594, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 12600 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.4375, + "epoch": 0.9100758396533044, + "loss": 3.1875, + "loss_text": 0.4434, + "state_loss_0": 0.0, + "step": 12600 + }, + { + "epoch": 0.9118815456843626, + "grad_norm": 0.999519944190979, + "learning_rate": 2.975333607306259e-05, + "loss": 3.2714, + "step": 12625 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.4219, + "epoch": 0.9118815456843626, + "loss": 3.125, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 12625 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 4.0625, + "audio_loss_2": 3.875, + "audio_loss_3": 4.6562, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0625, + "audio_loss_6": 3.8281, + "epoch": 0.9118815456843626, + "loss": 3.6719, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 12625 + }, + { + "epoch": 0.9136872517154208, + "grad_norm": 0.9852985143661499, + "learning_rate": 2.9681545577797033e-05, + "loss": 3.2769, + "step": 12650 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4375, + "epoch": 0.9136872517154208, + "loss": 3.25, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 12650 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4688, + "epoch": 0.9136872517154208, + "loss": 3.2031, + "loss_text": 0.5898, + "state_loss_0": 0.0, + "step": 12650 + }, + { + "epoch": 0.9154929577464789, + "grad_norm": 0.7289363145828247, + "learning_rate": 2.960971505221435e-05, + "loss": 3.2644, + "step": 12675 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.9688, + "audio_loss_2": 3.6875, + "audio_loss_3": 4.7812, + "audio_loss_4": 4.25, + "audio_loss_5": 4.0938, + "audio_loss_6": 3.8594, + "epoch": 0.9154929577464789, + "loss": 3.6562, + "loss_text": 0.8906, + "state_loss_0": 0.0, + "step": 12675 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5625, + "epoch": 0.9154929577464789, + "loss": 3.3125, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 12675 + }, + { + "epoch": 0.917298663777537, + "grad_norm": 0.9100626111030579, + "learning_rate": 2.9537845110513124e-05, + "loss": 3.2629, + "step": 12700 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.4531, + "epoch": 0.917298663777537, + "loss": 3.2656, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 12700 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.4531, + "epoch": 0.917298663777537, + "loss": 3.1875, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 12700 + }, + { + "epoch": 0.9191043698085951, + "grad_norm": 0.7805112600326538, + "learning_rate": 2.946593636722899e-05, + "loss": 3.2632, + "step": 12725 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4531, + "epoch": 0.9191043698085951, + "loss": 3.1406, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 12725 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4062, + "epoch": 0.9191043698085951, + "loss": 3.25, + "loss_text": 0.9336, + "state_loss_0": 0.0, + "step": 12725 + }, + { + "epoch": 0.9209100758396533, + "grad_norm": 0.7736445665359497, + "learning_rate": 2.9393989437229364e-05, + "loss": 3.2709, + "step": 12750 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.4375, + "epoch": 0.9209100758396533, + "loss": 3.1875, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 12750 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4688, + "epoch": 0.9209100758396533, + "loss": 3.2656, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 12750 + }, + { + "epoch": 0.9227157818707115, + "grad_norm": 0.8486165404319763, + "learning_rate": 2.9322004935708168e-05, + "loss": 3.2769, + "step": 12775 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.4844, + "epoch": 0.9227157818707115, + "loss": 3.2344, + "loss_text": 0.707, + "state_loss_0": 0.0, + "step": 12775 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4844, + "epoch": 0.9227157818707115, + "loss": 3.1875, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 12775 + }, + { + "epoch": 0.9245214879017696, + "grad_norm": 0.7842081785202026, + "learning_rate": 2.92499834781806e-05, + "loss": 3.2586, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.0938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5469, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7656, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 2.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.8594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.6094, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.375, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.6719, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 2.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 2.7344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.8203, + "eval_loss_RQACONV": 0.8203, + "eval_loss_text_RQACONV": 1.6406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.75, + "eval_loss_RQACONV": 0.75, + "eval_loss_text_RQACONV": 1.5, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.9102, + "eval_loss_RQACONV": 0.9102, + "eval_loss_text_RQACONV": 1.8203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4434, + "eval_loss_RQACONV": 0.4434, + "eval_loss_text_RQACONV": 0.8867, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4121, + "eval_loss_RQACONV": 0.4121, + "eval_loss_text_RQACONV": 0.8242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.4609, + "eval_loss_RQACONV": 0.4609, + "eval_loss_text_RQACONV": 0.9219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 1.3359, + "eval_loss_RQACONV": 1.3359, + "eval_loss_text_RQACONV": 2.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.085, + "eval_loss_RQACONV": 0.085, + "eval_loss_text_RQACONV": 0.1699, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.0339, + "eval_loss_RQACONV": 0.0339, + "eval_loss_text_RQACONV": 0.0679, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.1172, + "eval_loss_RQACONV": 0.1172, + "eval_loss_text_RQACONV": 0.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 12800 + }, + { + "epoch": 0.9245214879017696, + "eval_loss": 1.5443482398986816, + "eval_runtime": 28.1702, + "eval_samples_per_second": 189.881, + "eval_steps_per_second": 1.491, + "step": 12800 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.3906, + "epoch": 0.9245214879017696, + "loss": 3.2031, + "loss_text": 0.3516, + "state_loss_0": 0.0, + "step": 12800 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4219, + "epoch": 0.9245214879017696, + "loss": 3.2188, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 12800 + }, + { + "epoch": 0.9263271939328277, + "grad_norm": 0.8354002237319946, + "learning_rate": 2.9177925680477842e-05, + "loss": 3.258, + "step": 12825 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6094, + "epoch": 0.9263271939328277, + "loss": 3.4219, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 12825 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4531, + "epoch": 0.9263271939328277, + "loss": 3.2969, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 12825 + }, + { + "epoch": 0.9281328999638859, + "grad_norm": 0.8756522536277771, + "learning_rate": 2.9105832158741824e-05, + "loss": 3.2689, + "step": 12850 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.5625, + "epoch": 0.9281328999638859, + "loss": 3.3125, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 12850 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.375, + "epoch": 0.9281328999638859, + "loss": 3.2656, + "loss_text": 0.9336, + "state_loss_0": 0.0, + "step": 12850 + }, + { + "epoch": 0.929938605994944, + "grad_norm": 0.8531259298324585, + "learning_rate": 2.903370352941993e-05, + "loss": 3.2585, + "step": 12875 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3594, + "epoch": 0.929938605994944, + "loss": 3.25, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 12875 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4688, + "epoch": 0.929938605994944, + "loss": 3.2031, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 12875 + }, + { + "epoch": 0.9317443120260022, + "grad_norm": 0.8014244437217712, + "learning_rate": 2.896154040925974e-05, + "loss": 3.2629, + "step": 12900 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.625, + "audio_loss_6": 3.375, + "epoch": 0.9317443120260022, + "loss": 3.2031, + "loss_text": 1.1016, + "state_loss_0": 0.0, + "step": 12900 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.625, + "epoch": 0.9317443120260022, + "loss": 3.3906, + "loss_text": 0.875, + "state_loss_0": 0.0, + "step": 12900 + }, + { + "epoch": 0.9335500180570603, + "grad_norm": 0.8492245674133301, + "learning_rate": 2.888934341530376e-05, + "loss": 3.2624, + "step": 12925 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5469, + "epoch": 0.9335500180570603, + "loss": 3.2969, + "loss_text": 0.7305, + "state_loss_0": 0.0, + "step": 12925 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.4844, + "epoch": 0.9335500180570603, + "loss": 3.3125, + "loss_text": 0.9023, + "state_loss_0": 0.0, + "step": 12925 + }, + { + "epoch": 0.9353557240881184, + "grad_norm": 0.8143634796142578, + "learning_rate": 2.8817113164884122e-05, + "loss": 3.2582, + "step": 12950 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4531, + "epoch": 0.9353557240881184, + "loss": 3.2031, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 12950 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4219, + "epoch": 0.9353557240881184, + "loss": 3.2188, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 12950 + }, + { + "epoch": 0.9371614301191766, + "grad_norm": 0.769343376159668, + "learning_rate": 2.874485027561734e-05, + "loss": 3.2623, + "step": 12975 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4219, + "epoch": 0.9371614301191766, + "loss": 3.2031, + "loss_text": 0.5586, + "state_loss_0": 0.0, + "step": 12975 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3594, + "epoch": 0.9371614301191766, + "loss": 3.1875, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 12975 + }, + { + "epoch": 0.9389671361502347, + "grad_norm": 0.7142559885978699, + "learning_rate": 2.8672555365399e-05, + "loss": 3.2611, + "step": 13000 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.25, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4375, + "epoch": 0.9389671361502347, + "loss": 3.3125, + "loss_text": 1.1953, + "state_loss_0": 0.0, + "step": 13000 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3438, + "epoch": 0.9389671361502347, + "loss": 3.1719, + "loss_text": 0.4043, + "state_loss_0": 0.0, + "step": 13000 + }, + { + "epoch": 0.9407728421812929, + "grad_norm": 0.6855766177177429, + "learning_rate": 2.860022905239851e-05, + "loss": 3.2613, + "step": 13025 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.5, + "epoch": 0.9407728421812929, + "loss": 3.3125, + "loss_text": 0.7891, + "state_loss_0": 0.0, + "step": 13025 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.125, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5469, + "epoch": 0.9407728421812929, + "loss": 3.25, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 13025 + }, + { + "epoch": 0.942578548212351, + "grad_norm": 0.8233514428138733, + "learning_rate": 2.8527871955053747e-05, + "loss": 3.2602, + "step": 13050 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.875, + "audio_loss_6": 3.5469, + "epoch": 0.942578548212351, + "loss": 3.3438, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 13050 + }, + { + "audio_loss_0": 3.5938, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.25, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0, + "audio_loss_5": 3.8594, + "audio_loss_6": 3.6562, + "epoch": 0.942578548212351, + "loss": 3.4062, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 13050 + }, + { + "epoch": 0.9443842542434092, + "grad_norm": 0.912275493144989, + "learning_rate": 2.8455484692065876e-05, + "loss": 3.2554, + "step": 13075 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.5156, + "epoch": 0.9443842542434092, + "loss": 3.2812, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 13075 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.5469, + "epoch": 0.9443842542434092, + "loss": 3.3594, + "loss_text": 0.9922, + "state_loss_0": 0.0, + "step": 13075 + }, + { + "epoch": 0.9461899602744673, + "grad_norm": 0.816412627696991, + "learning_rate": 2.838306788239395e-05, + "loss": 3.2667, + "step": 13100 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4844, + "epoch": 0.9461899602744673, + "loss": 3.3125, + "loss_text": 0.7305, + "state_loss_0": 0.0, + "step": 13100 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.5312, + "epoch": 0.9461899602744673, + "loss": 3.2812, + "loss_text": 0.5117, + "state_loss_0": 0.0, + "step": 13100 + }, + { + "epoch": 0.9479956663055255, + "grad_norm": 0.7976250052452087, + "learning_rate": 2.8310622145249684e-05, + "loss": 3.2539, + "step": 13125 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.5781, + "epoch": 0.9479956663055255, + "loss": 3.3594, + "loss_text": 0.6367, + "state_loss_0": 0.0, + "step": 13125 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.5781, + "epoch": 0.9479956663055255, + "loss": 3.3906, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 13125 + }, + { + "epoch": 0.9498013723365836, + "grad_norm": 0.8463626503944397, + "learning_rate": 2.8238148100092136e-05, + "loss": 3.2529, + "step": 13150 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4531, + "epoch": 0.9498013723365836, + "loss": 3.2656, + "loss_text": 0.5625, + "state_loss_0": 0.0, + "step": 13150 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5469, + "epoch": 0.9498013723365836, + "loss": 3.2812, + "loss_text": 0.8828, + "state_loss_0": 0.0, + "step": 13150 + }, + { + "epoch": 0.9516070783676418, + "grad_norm": 0.8386203050613403, + "learning_rate": 2.816564636662243e-05, + "loss": 3.2497, + "step": 13175 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.375, + "epoch": 0.9516070783676418, + "loss": 3.125, + "loss_text": 0.4902, + "state_loss_0": 0.0, + "step": 13175 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.75, + "audio_loss_6": 3.4375, + "epoch": 0.9516070783676418, + "loss": 3.25, + "loss_text": 0.8242, + "state_loss_0": 0.0, + "step": 13175 + }, + { + "epoch": 0.9534127843986999, + "grad_norm": 0.8651671409606934, + "learning_rate": 2.8093117564778425e-05, + "loss": 3.2511, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.5781, + "eval_loss_AQACONVA": 3.5781, + "eval_loss_text_AQACONVA": 2.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.2812, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.3594, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.4219, + "eval_loss_AQACONVA": 3.4219, + "eval_loss_text_AQACONVA": 1.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_AQACONVA": 3.5625, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.5312, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.4688, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.0, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5938, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.6875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.4844, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 2.75, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 1.9297, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.6289, + "eval_loss_RQACONV": 0.6289, + "eval_loss_text_RQACONV": 1.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.6367, + "eval_loss_RQACONV": 0.6367, + "eval_loss_text_RQACONV": 1.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.7695, + "eval_loss_RQACONV": 0.7695, + "eval_loss_text_RQACONV": 1.5391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.9219, + "eval_loss_RQACONV": 0.9219, + "eval_loss_text_RQACONV": 1.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.4453, + "eval_loss_RQACONV": 0.4453, + "eval_loss_text_RQACONV": 0.8906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.418, + "eval_loss_RQACONV": 0.418, + "eval_loss_text_RQACONV": 0.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.4844, + "eval_loss_RQACONV": 0.4844, + "eval_loss_text_RQACONV": 0.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.457, + "eval_loss_RQACONV": 0.457, + "eval_loss_text_RQACONV": 0.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 1.3359, + "eval_loss_RQACONV": 1.3359, + "eval_loss_text_RQACONV": 2.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.1338, + "eval_loss_RQACONV": 0.1338, + "eval_loss_text_RQACONV": 0.2676, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.0747, + "eval_loss_RQACONV": 0.0747, + "eval_loss_text_RQACONV": 0.1494, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.168, + "eval_loss_RQACONV": 0.168, + "eval_loss_text_RQACONV": 0.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.0396, + "eval_loss_RQACONV": 0.0396, + "eval_loss_text_RQACONV": 0.0791, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.1064, + "eval_loss_RQACONV": 0.1064, + "eval_loss_text_RQACONV": 0.2129, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13200 + }, + { + "epoch": 0.9534127843986999, + "eval_loss": 1.542074203491211, + "eval_runtime": 27.9418, + "eval_samples_per_second": 191.434, + "eval_steps_per_second": 1.503, + "step": 13200 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5625, + "epoch": 0.9534127843986999, + "loss": 3.375, + "loss_text": 0.7656, + "state_loss_0": 0.0, + "step": 13200 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.625, + "epoch": 0.9534127843986999, + "loss": 3.3125, + "loss_text": 0.3301, + "state_loss_0": 0.0, + "step": 13200 + }, + { + "epoch": 0.955218490429758, + "grad_norm": 0.9686676263809204, + "learning_rate": 2.8020562314729447e-05, + "loss": 3.2533, + "step": 13225 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.4219, + "epoch": 0.955218490429758, + "loss": 3.2344, + "loss_text": 0.9805, + "state_loss_0": 0.0, + "step": 13225 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.75, + "audio_loss_2": 3.5781, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5156, + "epoch": 0.955218490429758, + "loss": 3.375, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 13225 + }, + { + "epoch": 0.9570241964608162, + "grad_norm": 0.7662353515625, + "learning_rate": 2.7947981236870957e-05, + "loss": 3.2489, + "step": 13250 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4375, + "epoch": 0.9570241964608162, + "loss": 3.2188, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 13250 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4062, + "epoch": 0.9570241964608162, + "loss": 3.2656, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 13250 + }, + { + "epoch": 0.9588299024918743, + "grad_norm": 0.7703453302383423, + "learning_rate": 2.7875374951819284e-05, + "loss": 3.2515, + "step": 13275 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5938, + "epoch": 0.9588299024918743, + "loss": 3.4219, + "loss_text": 1.2422, + "state_loss_0": 0.0, + "step": 13275 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5781, + "epoch": 0.9588299024918743, + "loss": 3.3594, + "loss_text": 0.9609, + "state_loss_0": 0.0, + "step": 13275 + }, + { + "epoch": 0.9606356085229325, + "grad_norm": 0.8118528723716736, + "learning_rate": 2.7802744080406274e-05, + "loss": 3.2541, + "step": 13300 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.25, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3594, + "epoch": 0.9606356085229325, + "loss": 3.1719, + "loss_text": 0.4863, + "state_loss_0": 0.0, + "step": 13300 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4062, + "epoch": 0.9606356085229325, + "loss": 3.2656, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 13300 + }, + { + "epoch": 0.9624413145539906, + "grad_norm": 0.8029466867446899, + "learning_rate": 2.7730089243674005e-05, + "loss": 3.2536, + "step": 13325 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3125, + "epoch": 0.9624413145539906, + "loss": 3.2188, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 13325 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.5312, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.875, + "audio_loss_6": 3.5625, + "epoch": 0.9624413145539906, + "loss": 3.3438, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 13325 + }, + { + "epoch": 0.9642470205850487, + "grad_norm": 0.8151109218597412, + "learning_rate": 2.7657411062869486e-05, + "loss": 3.2513, + "step": 13350 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.5, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8438, + "audio_loss_6": 3.625, + "epoch": 0.9642470205850487, + "loss": 3.4375, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 13350 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3438, + "epoch": 0.9642470205850487, + "loss": 3.2188, + "loss_text": 0.5117, + "state_loss_0": 0.0, + "step": 13350 + }, + { + "epoch": 0.9660527266161069, + "grad_norm": 0.91599440574646, + "learning_rate": 2.758471015943933e-05, + "loss": 3.2582, + "step": 13375 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.25, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.4219, + "epoch": 0.9660527266161069, + "loss": 3.1406, + "loss_text": 0.7383, + "state_loss_0": 0.0, + "step": 13375 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4844, + "epoch": 0.9660527266161069, + "loss": 3.3125, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 13375 + }, + { + "epoch": 0.9678584326471651, + "grad_norm": 0.7344107627868652, + "learning_rate": 2.7511987155024425e-05, + "loss": 3.2539, + "step": 13400 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5469, + "epoch": 0.9678584326471651, + "loss": 3.2812, + "loss_text": 1.1641, + "state_loss_0": 0.0, + "step": 13400 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4219, + "epoch": 0.9678584326471651, + "loss": 3.25, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 13400 + }, + { + "epoch": 0.9696641386782232, + "grad_norm": 0.7218741178512573, + "learning_rate": 2.7439242671454667e-05, + "loss": 3.2534, + "step": 13425 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4062, + "epoch": 0.9696641386782232, + "loss": 3.1719, + "loss_text": 0.4453, + "state_loss_0": 0.0, + "step": 13425 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.4531, + "epoch": 0.9696641386782232, + "loss": 3.2031, + "loss_text": 0.3652, + "state_loss_0": 0.0, + "step": 13425 + }, + { + "epoch": 0.9714698447092813, + "grad_norm": 0.8031677603721619, + "learning_rate": 2.736647733074359e-05, + "loss": 3.2448, + "step": 13450 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.5156, + "audio_loss_3": 4.4062, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5, + "epoch": 0.9714698447092813, + "loss": 3.3594, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 13450 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4531, + "epoch": 0.9714698447092813, + "loss": 3.25, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 13450 + }, + { + "epoch": 0.9732755507403394, + "grad_norm": 0.7814299464225769, + "learning_rate": 2.7293691755083074e-05, + "loss": 3.2463, + "step": 13475 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3281, + "epoch": 0.9732755507403394, + "loss": 3.1719, + "loss_text": 0.7539, + "state_loss_0": 0.0, + "step": 13475 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5, + "epoch": 0.9732755507403394, + "loss": 3.25, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 13475 + }, + { + "epoch": 0.9750812567713976, + "grad_norm": 1.0927953720092773, + "learning_rate": 2.722088656683804e-05, + "loss": 3.2412, + "step": 13500 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.3594, + "epoch": 0.9750812567713976, + "loss": 3.2656, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 13500 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.4531, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5156, + "epoch": 0.9750812567713976, + "loss": 3.3906, + "loss_text": 1.0234, + "state_loss_0": 0.0, + "step": 13500 + }, + { + "epoch": 0.9768869628024558, + "grad_norm": 0.8132412433624268, + "learning_rate": 2.7148062388541085e-05, + "loss": 3.2452, + "step": 13525 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.5156, + "epoch": 0.9768869628024558, + "loss": 3.2812, + "loss_text": 0.9844, + "state_loss_0": 0.0, + "step": 13525 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.8281, + "audio_loss_6": 3.5156, + "epoch": 0.9768869628024558, + "loss": 3.3125, + "loss_text": 0.3809, + "state_loss_0": 0.0, + "step": 13525 + }, + { + "epoch": 0.9786926688335139, + "grad_norm": 0.7717610001564026, + "learning_rate": 2.7075219842887195e-05, + "loss": 3.2451, + "step": 13550 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4375, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.6094, + "epoch": 0.9786926688335139, + "loss": 3.3281, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 13550 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9688, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5938, + "epoch": 0.9786926688335139, + "loss": 3.3594, + "loss_text": 0.9023, + "state_loss_0": 0.0, + "step": 13550 + }, + { + "epoch": 0.980498374864572, + "grad_norm": 0.7793428301811218, + "learning_rate": 2.7002359552728408e-05, + "loss": 3.2379, + "step": 13575 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4062, + "epoch": 0.980498374864572, + "loss": 3.25, + "loss_text": 0.6406, + "state_loss_0": 0.0, + "step": 13575 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5469, + "epoch": 0.980498374864572, + "loss": 3.3125, + "loss_text": 0.4961, + "state_loss_0": 0.0, + "step": 13575 + }, + { + "epoch": 0.9823040808956301, + "grad_norm": 0.7056400179862976, + "learning_rate": 2.692948214106849e-05, + "loss": 3.245, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.3281, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.5781, + "eval_loss_AQACONVA": 3.5781, + "eval_loss_text_AQACONVA": 2.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.0938, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5625, + "eval_audio_loss_2_AQACONVA": 3.375, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.4062, + "eval_loss_AQACONVA": 3.4062, + "eval_loss_text_AQACONVA": 1.3906, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7656, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.5781, + "eval_loss_AQACONVA": 3.5781, + "eval_loss_text_AQACONVA": 2.7812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7656, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.5312, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.5625, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.4844, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 1.9922, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.9922, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.6641, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.4375, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.5469, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 2.7188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5156, + "eval_loss_RQACONVA": 3.5156, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.6406, + "eval_loss_RQACONV": 0.6406, + "eval_loss_text_RQACONV": 1.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.6172, + "eval_loss_RQACONV": 0.6172, + "eval_loss_text_RQACONV": 1.2344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.6445, + "eval_loss_RQACONV": 0.6445, + "eval_loss_text_RQACONV": 1.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.7305, + "eval_loss_RQACONV": 0.7305, + "eval_loss_text_RQACONV": 1.4609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.8594, + "eval_loss_RQACONV": 0.8594, + "eval_loss_text_RQACONV": 1.7188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.4492, + "eval_loss_RQACONV": 0.4492, + "eval_loss_text_RQACONV": 0.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.418, + "eval_loss_RQACONV": 0.418, + "eval_loss_text_RQACONV": 0.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.4883, + "eval_loss_RQACONV": 0.4883, + "eval_loss_text_RQACONV": 0.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.459, + "eval_loss_RQACONV": 0.459, + "eval_loss_text_RQACONV": 0.918, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 1.3359, + "eval_loss_RQACONV": 1.3359, + "eval_loss_text_RQACONV": 2.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1279, + "eval_loss_RQACONV": 0.1279, + "eval_loss_text_RQACONV": 0.2559, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.0757, + "eval_loss_RQACONV": 0.0757, + "eval_loss_text_RQACONV": 0.1514, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.0347, + "eval_loss_RQACONV": 0.0347, + "eval_loss_text_RQACONV": 0.0693, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.1118, + "eval_loss_RQACONV": 0.1118, + "eval_loss_text_RQACONV": 0.2236, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.0977, + "eval_loss_RQACONV": 0.0977, + "eval_loss_text_RQACONV": 0.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 0.5742, + "eval_loss_RQACONV": 0.5742, + "eval_loss_text_RQACONV": 1.1484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 13600 + }, + { + "epoch": 0.9823040808956301, + "eval_loss": 1.5430461168289185, + "eval_runtime": 27.4913, + "eval_samples_per_second": 194.57, + "eval_steps_per_second": 1.528, + "step": 13600 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.7969, + "audio_loss_6": 3.5469, + "epoch": 0.9823040808956301, + "loss": 3.375, + "loss_text": 0.918, + "state_loss_0": 0.0, + "step": 13600 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2656, + "epoch": 0.9823040808956301, + "loss": 2.9688, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 13600 + }, + { + "epoch": 0.9841097869266884, + "grad_norm": 0.9491550922393799, + "learning_rate": 2.6856588231057605e-05, + "loss": 3.2394, + "step": 13625 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4531, + "epoch": 0.9841097869266884, + "loss": 3.2656, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 13625 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3281, + "epoch": 0.9841097869266884, + "loss": 3.1719, + "loss_text": 1.0469, + "state_loss_0": 0.0, + "step": 13625 + }, + { + "epoch": 0.9859154929577465, + "grad_norm": 0.7830501198768616, + "learning_rate": 2.6783678445986993e-05, + "loss": 3.2381, + "step": 13650 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4062, + "epoch": 0.9859154929577465, + "loss": 3.2969, + "loss_text": 1.1172, + "state_loss_0": 0.0, + "step": 13650 + }, + { + "audio_loss_0": 3.3906, + "audio_loss_1": 3.7812, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5312, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.8906, + "audio_loss_6": 3.6094, + "epoch": 0.9859154929577465, + "loss": 3.4062, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 13650 + }, + { + "epoch": 0.9877211989888046, + "grad_norm": 0.7291406989097595, + "learning_rate": 2.671075340928363e-05, + "loss": 3.2411, + "step": 13675 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.3906, + "epoch": 0.9877211989888046, + "loss": 3.2344, + "loss_text": 0.7578, + "state_loss_0": 0.0, + "step": 13675 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4375, + "epoch": 0.9877211989888046, + "loss": 3.25, + "loss_text": 0.5586, + "state_loss_0": 0.0, + "step": 13675 + }, + { + "epoch": 0.9895269050198627, + "grad_norm": 0.8766806125640869, + "learning_rate": 2.6637813744504908e-05, + "loss": 3.2373, + "step": 13700 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0312, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.375, + "epoch": 0.9895269050198627, + "loss": 3.2656, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 13700 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3906, + "epoch": 0.9895269050198627, + "loss": 3.2344, + "loss_text": 0.7695, + "state_loss_0": 0.0, + "step": 13700 + }, + { + "epoch": 0.991332611050921, + "grad_norm": 0.8677784204483032, + "learning_rate": 2.65648600753333e-05, + "loss": 3.2412, + "step": 13725 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.75, + "audio_loss_6": 3.4375, + "epoch": 0.991332611050921, + "loss": 3.2812, + "loss_text": 0.457, + "state_loss_0": 0.0, + "step": 13725 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3906, + "epoch": 0.991332611050921, + "loss": 3.2031, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 13725 + }, + { + "epoch": 0.9931383170819791, + "grad_norm": 0.8278300166130066, + "learning_rate": 2.6491893025571017e-05, + "loss": 3.2424, + "step": 13750 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.4219, + "epoch": 0.9931383170819791, + "loss": 3.2656, + "loss_text": 0.7344, + "state_loss_0": 0.0, + "step": 13750 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4688, + "epoch": 0.9931383170819791, + "loss": 3.2656, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 13750 + }, + { + "epoch": 0.9949440231130372, + "grad_norm": 0.91463303565979, + "learning_rate": 2.64189132191347e-05, + "loss": 3.2373, + "step": 13775 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.625, + "audio_loss_6": 3.5, + "epoch": 0.9949440231130372, + "loss": 3.2031, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 13775 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3438, + "epoch": 0.9949440231130372, + "loss": 3.2031, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 13775 + }, + { + "epoch": 0.9967497291440953, + "grad_norm": 0.7658233642578125, + "learning_rate": 2.634592128005005e-05, + "loss": 3.2332, + "step": 13800 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4219, + "epoch": 0.9967497291440953, + "loss": 3.2656, + "loss_text": 0.75, + "state_loss_0": 0.0, + "step": 13800 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4688, + "epoch": 0.9967497291440953, + "loss": 3.2656, + "loss_text": 0.9336, + "state_loss_0": 0.0, + "step": 13800 + }, + { + "epoch": 0.9985554351751534, + "grad_norm": 0.7888367176055908, + "learning_rate": 2.627291783244652e-05, + "loss": 3.2372, + "step": 13825 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5, + "epoch": 0.9985554351751534, + "loss": 3.3438, + "loss_text": 1.0312, + "state_loss_0": 0.0, + "step": 13825 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5625, + "epoch": 0.9985554351751534, + "loss": 3.25, + "loss_text": 0.6719, + "state_loss_0": 0.0, + "step": 13825 + }, + { + "epoch": 1.0003611412062117, + "grad_norm": 0.7769561409950256, + "learning_rate": 2.6199903500551963e-05, + "loss": 3.2173, + "step": 13850 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1406, + "epoch": 1.0003611412062117, + "loss": 3.0, + "loss_text": 0.2988, + "state_loss_0": 0.0, + "step": 13850 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.75, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.75, + "audio_loss_6": 3.5938, + "epoch": 1.0003611412062117, + "loss": 3.3125, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 13850 + }, + { + "epoch": 1.0021668472372698, + "grad_norm": 0.7762817144393921, + "learning_rate": 2.6126878908687302e-05, + "loss": 3.1093, + "step": 13875 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.3594, + "epoch": 1.0021668472372698, + "loss": 3.125, + "loss_text": 0.418, + "state_loss_0": 0.0, + "step": 13875 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2344, + "epoch": 1.0021668472372698, + "loss": 3.0312, + "loss_text": 0.5625, + "state_loss_0": 0.0, + "step": 13875 + }, + { + "epoch": 1.003972553268328, + "grad_norm": 0.7212494611740112, + "learning_rate": 2.6053844681261198e-05, + "loss": 3.1084, + "step": 13900 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2656, + "epoch": 1.003972553268328, + "loss": 3.0156, + "loss_text": 0.3418, + "state_loss_0": 0.0, + "step": 13900 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.625, + "audio_loss_6": 3.375, + "epoch": 1.003972553268328, + "loss": 3.1562, + "loss_text": 0.2793, + "state_loss_0": 0.0, + "step": 13900 + }, + { + "epoch": 1.005778259299386, + "grad_norm": 0.8538562059402466, + "learning_rate": 2.5980801442764684e-05, + "loss": 3.1086, + "step": 13925 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4688, + "epoch": 1.005778259299386, + "loss": 3.2031, + "loss_text": 0.3789, + "state_loss_0": 0.0, + "step": 13925 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3594, + "epoch": 1.005778259299386, + "loss": 3.1719, + "loss_text": 0.3281, + "state_loss_0": 0.0, + "step": 13925 + }, + { + "epoch": 1.0075839653304441, + "grad_norm": 0.8126384019851685, + "learning_rate": 2.590774981776587e-05, + "loss": 3.125, + "step": 13950 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2344, + "epoch": 1.0075839653304441, + "loss": 3.0469, + "loss_text": 0.4219, + "state_loss_0": 0.0, + "step": 13950 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.7031, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4531, + "epoch": 1.0075839653304441, + "loss": 3.2969, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 13950 + }, + { + "epoch": 1.0093896713615023, + "grad_norm": 0.7983788847923279, + "learning_rate": 2.583469043090455e-05, + "loss": 3.1114, + "step": 13975 + }, + { + "audio_loss_0": 3.5625, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4688, + "epoch": 1.0093896713615023, + "loss": 3.2812, + "loss_text": 0.249, + "state_loss_0": 0.0, + "step": 13975 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3906, + "epoch": 1.0093896713615023, + "loss": 3.1719, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 13975 + }, + { + "epoch": 1.0111953773925606, + "grad_norm": 0.879227340221405, + "learning_rate": 2.57616239068869e-05, + "loss": 3.1152, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.7031, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.8359, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.4531, + "eval_loss_AQACONVA": 3.4531, + "eval_loss_text_AQACONVA": 1.5312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9844, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.5938, + "eval_loss_AQACONVA": 3.5938, + "eval_loss_text_AQACONVA": 2.9688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 3.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0312, + "eval_audio_loss_6_AQACONVA": 3.6875, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.5625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.1562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.1406, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.75, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.3594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.1719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.0, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.6133, + "eval_loss_RQACONV": 0.6133, + "eval_loss_text_RQACONV": 1.2266, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.7656, + "eval_loss_RQACONV": 0.7656, + "eval_loss_text_RQACONV": 1.5312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.9648, + "eval_loss_RQACONV": 0.9648, + "eval_loss_text_RQACONV": 1.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.5781, + "eval_loss_RQACONV": 0.5781, + "eval_loss_text_RQACONV": 1.1562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 1.4062, + "eval_loss_RQACONV": 1.4062, + "eval_loss_text_RQACONV": 2.8125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.0762, + "eval_loss_RQACONV": 0.0762, + "eval_loss_text_RQACONV": 0.1523, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.0322, + "eval_loss_RQACONV": 0.0322, + "eval_loss_text_RQACONV": 0.0645, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.104, + "eval_loss_RQACONV": 0.104, + "eval_loss_text_RQACONV": 0.208, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14000 + }, + { + "epoch": 1.0111953773925606, + "eval_loss": 1.5813148021697998, + "eval_runtime": 27.7855, + "eval_samples_per_second": 192.51, + "eval_steps_per_second": 1.512, + "step": 14000 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5156, + "epoch": 1.0111953773925606, + "loss": 3.1875, + "loss_text": 0.332, + "state_loss_0": 0.0, + "step": 14000 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3125, + "epoch": 1.0111953773925606, + "loss": 3.1562, + "loss_text": 0.2402, + "state_loss_0": 0.0, + "step": 14000 + }, + { + "epoch": 1.0130010834236187, + "grad_norm": 0.8214014172554016, + "learning_rate": 2.568855087048012e-05, + "loss": 3.1062, + "step": 14025 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.3281, + "epoch": 1.0130010834236187, + "loss": 3.1094, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 14025 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.375, + "epoch": 1.0130010834236187, + "loss": 3.2031, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 14025 + }, + { + "epoch": 1.0148067894546768, + "grad_norm": 0.8458222150802612, + "learning_rate": 2.5615471946507097e-05, + "loss": 3.1101, + "step": 14050 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2812, + "epoch": 1.0148067894546768, + "loss": 3.125, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 14050 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.375, + "epoch": 1.0148067894546768, + "loss": 3.0781, + "loss_text": 0.2832, + "state_loss_0": 0.0, + "step": 14050 + }, + { + "epoch": 1.016612495485735, + "grad_norm": 0.7832428216934204, + "learning_rate": 2.5542387759841062e-05, + "loss": 3.1094, + "step": 14075 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.3125, + "epoch": 1.016612495485735, + "loss": 3.0938, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 14075 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5, + "audio_loss_6": 3.3438, + "epoch": 1.016612495485735, + "loss": 3.0781, + "loss_text": 0.4141, + "state_loss_0": 0.0, + "step": 14075 + }, + { + "epoch": 1.018418201516793, + "grad_norm": 0.8357475996017456, + "learning_rate": 2.5469298935400237e-05, + "loss": 3.1048, + "step": 14100 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4844, + "epoch": 1.018418201516793, + "loss": 3.25, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 14100 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.5, + "audio_loss_6": 3.3125, + "epoch": 1.018418201516793, + "loss": 3.0625, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 14100 + }, + { + "epoch": 1.0202239075478512, + "grad_norm": 0.6850032806396484, + "learning_rate": 2.5396206098142515e-05, + "loss": 3.1135, + "step": 14125 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4062, + "epoch": 1.0202239075478512, + "loss": 3.2031, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 14125 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.4531, + "epoch": 1.0202239075478512, + "loss": 3.2812, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 14125 + }, + { + "epoch": 1.0220296135789093, + "grad_norm": 0.7799955010414124, + "learning_rate": 2.532310987306009e-05, + "loss": 3.1068, + "step": 14150 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.2969, + "epoch": 1.0220296135789093, + "loss": 3.1406, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 14150 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.7344, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.4375, + "epoch": 1.0220296135789093, + "loss": 3.2812, + "loss_text": 0.832, + "state_loss_0": 0.0, + "step": 14150 + }, + { + "epoch": 1.0238353196099674, + "grad_norm": 0.774755597114563, + "learning_rate": 2.525001088517413e-05, + "loss": 3.1065, + "step": 14175 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2344, + "epoch": 1.0238353196099674, + "loss": 3.0, + "loss_text": 0.2637, + "state_loss_0": 0.0, + "step": 14175 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.375, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3281, + "epoch": 1.0238353196099674, + "loss": 3.2031, + "loss_text": 0.5469, + "state_loss_0": 0.0, + "step": 14175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.7990733981132507, + "learning_rate": 2.5176909759529417e-05, + "loss": 3.1023, + "step": 14200 + }, + { + "audio_loss_0": 3.5156, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4375, + "epoch": 1.0256410256410255, + "loss": 3.25, + "loss_text": 0.2891, + "state_loss_0": 0.0, + "step": 14200 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.8906, + "audio_loss_3": 3.9219, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3594, + "audio_loss_6": 2.875, + "epoch": 1.0256410256410255, + "loss": 2.8906, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 14200 + }, + { + "epoch": 1.027446731672084, + "grad_norm": 0.7825935482978821, + "learning_rate": 2.5103807121189026e-05, + "loss": 3.1017, + "step": 14225 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.027446731672084, + "loss": 3.0312, + "loss_text": 0.2051, + "state_loss_0": 0.0, + "step": 14225 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2969, + "epoch": 1.027446731672084, + "loss": 3.125, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 14225 + }, + { + "epoch": 1.029252437703142, + "grad_norm": 0.8570442795753479, + "learning_rate": 2.5030703595228953e-05, + "loss": 3.1091, + "step": 14250 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0938, + "epoch": 1.029252437703142, + "loss": 2.9688, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 14250 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2812, + "epoch": 1.029252437703142, + "loss": 3.0781, + "loss_text": 0.3984, + "state_loss_0": 0.0, + "step": 14250 + }, + { + "epoch": 1.0310581437342001, + "grad_norm": 0.7040374279022217, + "learning_rate": 2.495759980673279e-05, + "loss": 3.1069, + "step": 14275 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2812, + "epoch": 1.0310581437342001, + "loss": 3.0938, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 14275 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3906, + "epoch": 1.0310581437342001, + "loss": 3.0781, + "loss_text": 0.2119, + "state_loss_0": 0.0, + "step": 14275 + }, + { + "epoch": 1.0328638497652582, + "grad_norm": 0.6803432703018188, + "learning_rate": 2.4884496380786383e-05, + "loss": 3.1018, + "step": 14300 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1875, + "epoch": 1.0328638497652582, + "loss": 3.0312, + "loss_text": 0.3789, + "state_loss_0": 0.0, + "step": 14300 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2969, + "epoch": 1.0328638497652582, + "loss": 3.1094, + "loss_text": 0.2988, + "state_loss_0": 0.0, + "step": 14300 + }, + { + "epoch": 1.0346695557963164, + "grad_norm": 0.7093298435211182, + "learning_rate": 2.4811393942472464e-05, + "loss": 3.1027, + "step": 14325 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3281, + "epoch": 1.0346695557963164, + "loss": 3.1562, + "loss_text": 0.4375, + "state_loss_0": 0.0, + "step": 14325 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3906, + "epoch": 1.0346695557963164, + "loss": 3.2031, + "loss_text": 0.4102, + "state_loss_0": 0.0, + "step": 14325 + }, + { + "epoch": 1.0364752618273745, + "grad_norm": 0.7349820137023926, + "learning_rate": 2.4738293116865323e-05, + "loss": 3.1017, + "step": 14350 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.3438, + "epoch": 1.0364752618273745, + "loss": 3.1406, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 14350 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.0364752618273745, + "loss": 3.0312, + "loss_text": 0.4512, + "state_loss_0": 0.0, + "step": 14350 + }, + { + "epoch": 1.0382809678584326, + "grad_norm": 0.8741734623908997, + "learning_rate": 2.4665194529025465e-05, + "loss": 3.1025, + "step": 14375 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.375, + "epoch": 1.0382809678584326, + "loss": 3.1406, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 14375 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1719, + "epoch": 1.0382809678584326, + "loss": 2.9219, + "loss_text": 0.2139, + "state_loss_0": 0.0, + "step": 14375 + }, + { + "epoch": 1.0400866738894907, + "grad_norm": 0.8751253485679626, + "learning_rate": 2.459209880399426e-05, + "loss": 3.1058, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.3438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.8828, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.4531, + "eval_loss_AQACONVA": 3.4531, + "eval_loss_text_AQACONVA": 1.5469, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7656, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.9844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.0938, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.875, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.7969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.3906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6719, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.7695, + "eval_loss_RQACONV": 0.7695, + "eval_loss_text_RQACONV": 1.5391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.8672, + "eval_loss_RQACONV": 0.8672, + "eval_loss_text_RQACONV": 1.7344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.8672, + "eval_loss_RQACONV": 0.8672, + "eval_loss_text_RQACONV": 1.7344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.9375, + "eval_loss_RQACONV": 0.9375, + "eval_loss_text_RQACONV": 1.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.4941, + "eval_loss_RQACONV": 0.4941, + "eval_loss_text_RQACONV": 0.9883, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.457, + "eval_loss_RQACONV": 0.457, + "eval_loss_text_RQACONV": 0.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 1.4062, + "eval_loss_RQACONV": 1.4062, + "eval_loss_text_RQACONV": 2.8125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.0771, + "eval_loss_RQACONV": 0.0771, + "eval_loss_text_RQACONV": 0.1543, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1572, + "eval_loss_RQACONV": 0.1572, + "eval_loss_text_RQACONV": 0.3145, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.0286, + "eval_loss_RQACONV": 0.0286, + "eval_loss_text_RQACONV": 0.0571, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.1035, + "eval_loss_RQACONV": 0.1035, + "eval_loss_text_RQACONV": 0.207, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.0967, + "eval_loss_RQACONV": 0.0967, + "eval_loss_text_RQACONV": 0.1934, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14400 + }, + { + "epoch": 1.0400866738894907, + "eval_loss": 1.5818662643432617, + "eval_runtime": 27.575, + "eval_samples_per_second": 193.98, + "eval_steps_per_second": 1.523, + "step": 14400 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.3438, + "epoch": 1.0400866738894907, + "loss": 3.0781, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 14400 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.4844, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5, + "audio_loss_6": 3.4375, + "epoch": 1.0400866738894907, + "loss": 3.2812, + "loss_text": 0.8125, + "state_loss_0": 0.0, + "step": 14400 + }, + { + "epoch": 1.0418923799205488, + "grad_norm": 0.7927209734916687, + "learning_rate": 2.4519006566788593e-05, + "loss": 3.108, + "step": 14425 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3594, + "epoch": 1.0418923799205488, + "loss": 3.1406, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 14425 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2344, + "epoch": 1.0418923799205488, + "loss": 3.0625, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 14425 + }, + { + "epoch": 1.0436980859516072, + "grad_norm": 0.7212491035461426, + "learning_rate": 2.4445918442395535e-05, + "loss": 3.1099, + "step": 14450 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.3594, + "epoch": 1.0436980859516072, + "loss": 3.125, + "loss_text": 0.4199, + "state_loss_0": 0.0, + "step": 14450 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.25, + "epoch": 1.0436980859516072, + "loss": 3.1094, + "loss_text": 0.4707, + "state_loss_0": 0.0, + "step": 14450 + }, + { + "epoch": 1.0455037919826653, + "grad_norm": 0.8275578022003174, + "learning_rate": 2.4372835055766983e-05, + "loss": 3.095, + "step": 14475 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.3125, + "epoch": 1.0455037919826653, + "loss": 3.1562, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 14475 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2031, + "epoch": 1.0455037919826653, + "loss": 3.0312, + "loss_text": 0.4414, + "state_loss_0": 0.0, + "step": 14475 + }, + { + "epoch": 1.0473094980137234, + "grad_norm": 0.8033379912376404, + "learning_rate": 2.4299757031814323e-05, + "loss": 3.102, + "step": 14500 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.0473094980137234, + "loss": 2.9844, + "loss_text": 0.1758, + "state_loss_0": 0.0, + "step": 14500 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.75, + "audio_loss_6": 3.4062, + "epoch": 1.0473094980137234, + "loss": 3.1719, + "loss_text": 0.3652, + "state_loss_0": 0.0, + "step": 14500 + }, + { + "epoch": 1.0491152040447815, + "grad_norm": 0.7116935849189758, + "learning_rate": 2.422668499540309e-05, + "loss": 3.0987, + "step": 14525 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.25, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4844, + "epoch": 1.0491152040447815, + "loss": 3.2656, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 14525 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2812, + "epoch": 1.0491152040447815, + "loss": 3.1094, + "loss_text": 0.3418, + "state_loss_0": 0.0, + "step": 14525 + }, + { + "epoch": 1.0509209100758397, + "grad_norm": 0.7498179078102112, + "learning_rate": 2.4153619571347617e-05, + "loss": 3.1043, + "step": 14550 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3281, + "epoch": 1.0509209100758397, + "loss": 3.1094, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 14550 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4062, + "epoch": 1.0509209100758397, + "loss": 3.2188, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 14550 + }, + { + "epoch": 1.0527266161068978, + "grad_norm": 0.6912081837654114, + "learning_rate": 2.4080561384405697e-05, + "loss": 3.0992, + "step": 14575 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1719, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.0527266161068978, + "loss": 2.9375, + "loss_text": 0.2021, + "state_loss_0": 0.0, + "step": 14575 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.375, + "epoch": 1.0527266161068978, + "loss": 3.1719, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 14575 + }, + { + "epoch": 1.054532322137956, + "grad_norm": 0.7175329327583313, + "learning_rate": 2.4007511059273255e-05, + "loss": 3.11, + "step": 14600 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.625, + "audio_loss_6": 3.2812, + "epoch": 1.054532322137956, + "loss": 3.0469, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 14600 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.3906, + "epoch": 1.054532322137956, + "loss": 3.125, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 14600 + }, + { + "epoch": 1.056338028169014, + "grad_norm": 0.7912352681159973, + "learning_rate": 2.393446922057897e-05, + "loss": 3.102, + "step": 14625 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.3125, + "epoch": 1.056338028169014, + "loss": 3.125, + "loss_text": 0.2656, + "state_loss_0": 0.0, + "step": 14625 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3438, + "epoch": 1.056338028169014, + "loss": 3.1406, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 14625 + }, + { + "epoch": 1.0581437342000721, + "grad_norm": 0.7097601890563965, + "learning_rate": 2.386143649287898e-05, + "loss": 3.0984, + "step": 14650 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.375, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3906, + "epoch": 1.0581437342000721, + "loss": 3.1562, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 14650 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4062, + "epoch": 1.0581437342000721, + "loss": 3.1719, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 14650 + }, + { + "epoch": 1.0599494402311305, + "grad_norm": 0.7987929582595825, + "learning_rate": 2.3788413500651495e-05, + "loss": 3.0974, + "step": 14675 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.75, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.375, + "epoch": 1.0599494402311305, + "loss": 3.1562, + "loss_text": 0.457, + "state_loss_0": 0.0, + "step": 14675 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3906, + "epoch": 1.0599494402311305, + "loss": 3.1562, + "loss_text": 0.4414, + "state_loss_0": 0.0, + "step": 14675 + }, + { + "epoch": 1.0617551462621886, + "grad_norm": 1.011500597000122, + "learning_rate": 2.371540086829149e-05, + "loss": 3.1001, + "step": 14700 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.125, + "audio_loss_4": 3.625, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2344, + "epoch": 1.0617551462621886, + "loss": 3.0, + "loss_text": 0.3242, + "state_loss_0": 0.0, + "step": 14700 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2812, + "epoch": 1.0617551462621886, + "loss": 3.0938, + "loss_text": 0.5859, + "state_loss_0": 0.0, + "step": 14700 + }, + { + "epoch": 1.0635608522932467, + "grad_norm": 0.8004282116889954, + "learning_rate": 2.364239922010536e-05, + "loss": 3.0908, + "step": 14725 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.0781, + "epoch": 1.0635608522932467, + "loss": 3.0156, + "loss_text": 0.1826, + "state_loss_0": 0.0, + "step": 14725 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2188, + "epoch": 1.0635608522932467, + "loss": 3.1094, + "loss_text": 0.127, + "state_loss_0": 0.0, + "step": 14725 + }, + { + "epoch": 1.0653665583243048, + "grad_norm": 0.7359916567802429, + "learning_rate": 2.3569409180305575e-05, + "loss": 3.0893, + "step": 14750 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.4844, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0938, + "epoch": 1.0653665583243048, + "loss": 2.9375, + "loss_text": 0.4609, + "state_loss_0": 0.0, + "step": 14750 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.375, + "epoch": 1.0653665583243048, + "loss": 3.0312, + "loss_text": 0.2441, + "state_loss_0": 0.0, + "step": 14750 + }, + { + "epoch": 1.067172264355363, + "grad_norm": 0.820371687412262, + "learning_rate": 2.3496431373005347e-05, + "loss": 3.0935, + "step": 14775 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2812, + "epoch": 1.067172264355363, + "loss": 3.0938, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 14775 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2344, + "epoch": 1.067172264355363, + "loss": 3.0625, + "loss_text": 0.3184, + "state_loss_0": 0.0, + "step": 14775 + }, + { + "epoch": 1.068977970386421, + "grad_norm": 0.8345602750778198, + "learning_rate": 2.342346642221328e-05, + "loss": 3.09, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.375, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.3594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.8906, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5312, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.5859, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7656, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.0, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.2656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.875, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.8047, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.625, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.9062, + "eval_audio_loss_6_RQACONVA": 3.6562, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.2812, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.0625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5, + "eval_loss_RQACONVA": 3.5, + "eval_loss_text_RQACONVA": 2.0781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5586, + "eval_loss_RQACONV": 0.5586, + "eval_loss_text_RQACONV": 1.1172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.7969, + "eval_loss_RQACONV": 0.7969, + "eval_loss_text_RQACONV": 1.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.9141, + "eval_loss_RQACONV": 0.9141, + "eval_loss_text_RQACONV": 1.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.707, + "eval_loss_RQACONV": 0.707, + "eval_loss_text_RQACONV": 1.4141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.9453, + "eval_loss_RQACONV": 0.9453, + "eval_loss_text_RQACONV": 1.8906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.0762, + "eval_loss_RQACONV": 0.0762, + "eval_loss_text_RQACONV": 0.1523, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.1592, + "eval_loss_RQACONV": 0.1592, + "eval_loss_text_RQACONV": 0.3184, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.0339, + "eval_loss_RQACONV": 0.0339, + "eval_loss_text_RQACONV": 0.0679, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.0981, + "eval_loss_RQACONV": 0.0981, + "eval_loss_text_RQACONV": 0.1963, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.0947, + "eval_loss_RQACONV": 0.0947, + "eval_loss_text_RQACONV": 0.1895, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 14800 + }, + { + "epoch": 1.068977970386421, + "eval_loss": 1.5855716466903687, + "eval_runtime": 28.5077, + "eval_samples_per_second": 187.634, + "eval_steps_per_second": 1.473, + "step": 14800 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2031, + "epoch": 1.068977970386421, + "loss": 3.125, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 14800 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.2812, + "epoch": 1.068977970386421, + "loss": 3.1094, + "loss_text": 0.2676, + "state_loss_0": 0.0, + "step": 14800 + }, + { + "epoch": 1.0707836764174792, + "grad_norm": 0.7974959015846252, + "learning_rate": 2.3350514951828075e-05, + "loss": 3.0929, + "step": 14825 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1406, + "epoch": 1.0707836764174792, + "loss": 2.9844, + "loss_text": 0.1982, + "state_loss_0": 0.0, + "step": 14825 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4531, + "epoch": 1.0707836764174792, + "loss": 3.2344, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 14825 + }, + { + "epoch": 1.0725893824485373, + "grad_norm": 0.7585076093673706, + "learning_rate": 2.3277577585633134e-05, + "loss": 3.0955, + "step": 14850 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2812, + "epoch": 1.0725893824485373, + "loss": 3.0781, + "loss_text": 0.3809, + "state_loss_0": 0.0, + "step": 14850 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2969, + "epoch": 1.0725893824485373, + "loss": 3.0312, + "loss_text": 0.4043, + "state_loss_0": 0.0, + "step": 14850 + }, + { + "epoch": 1.0743950884795956, + "grad_norm": 0.7560344338417053, + "learning_rate": 2.320465494729128e-05, + "loss": 3.092, + "step": 14875 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4062, + "epoch": 1.0743950884795956, + "loss": 3.125, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 14875 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.0743950884795956, + "loss": 3.0156, + "loss_text": 0.3105, + "state_loss_0": 0.0, + "step": 14875 + }, + { + "epoch": 1.0762007945106538, + "grad_norm": 0.7716652750968933, + "learning_rate": 2.3131747660339394e-05, + "loss": 3.0968, + "step": 14900 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1406, + "epoch": 1.0762007945106538, + "loss": 2.9375, + "loss_text": 0.2793, + "state_loss_0": 0.0, + "step": 14900 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.375, + "epoch": 1.0762007945106538, + "loss": 3.1875, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 14900 + }, + { + "epoch": 1.0780065005417119, + "grad_norm": 0.8226339817047119, + "learning_rate": 2.3058856348183098e-05, + "loss": 3.0916, + "step": 14925 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2812, + "epoch": 1.0780065005417119, + "loss": 3.0625, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 14925 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7188, + "audio_loss_2": 3.375, + "audio_loss_3": 4.375, + "audio_loss_4": 4.0625, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5156, + "epoch": 1.0780065005417119, + "loss": 3.2812, + "loss_text": 0.3926, + "state_loss_0": 0.0, + "step": 14925 + }, + { + "epoch": 1.07981220657277, + "grad_norm": 0.7639069557189941, + "learning_rate": 2.298598163409141e-05, + "loss": 3.0831, + "step": 14950 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3125, + "epoch": 1.07981220657277, + "loss": 3.1562, + "loss_text": 0.334, + "state_loss_0": 0.0, + "step": 14950 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4219, + "epoch": 1.07981220657277, + "loss": 3.1562, + "loss_text": 0.3281, + "state_loss_0": 0.0, + "step": 14950 + }, + { + "epoch": 1.0816179126038281, + "grad_norm": 0.6817963719367981, + "learning_rate": 2.2913124141191432e-05, + "loss": 3.0988, + "step": 14975 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.625, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2969, + "epoch": 1.0816179126038281, + "loss": 3.1406, + "loss_text": 0.3203, + "state_loss_0": 0.0, + "step": 14975 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.6719, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3594, + "epoch": 1.0816179126038281, + "loss": 3.2188, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 14975 + }, + { + "epoch": 1.0834236186348862, + "grad_norm": 0.687295138835907, + "learning_rate": 2.2840284492463006e-05, + "loss": 3.088, + "step": 15000 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.9375, + "audio_loss_3": 3.8594, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.1406, + "epoch": 1.0834236186348862, + "loss": 2.9219, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 15000 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4688, + "epoch": 1.0834236186348862, + "loss": 3.25, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 15000 + }, + { + "epoch": 1.0852293246659444, + "grad_norm": 0.712755560874939, + "learning_rate": 2.276746331073341e-05, + "loss": 3.0973, + "step": 15025 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2969, + "epoch": 1.0852293246659444, + "loss": 3.1094, + "loss_text": 0.625, + "state_loss_0": 0.0, + "step": 15025 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.0852293246659444, + "loss": 3.0156, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 15025 + }, + { + "epoch": 1.0870350306970025, + "grad_norm": 0.7528886198997498, + "learning_rate": 2.2694661218672002e-05, + "loss": 3.0933, + "step": 15050 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2812, + "epoch": 1.0870350306970025, + "loss": 3.0625, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 15050 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.25, + "epoch": 1.0870350306970025, + "loss": 3.0469, + "loss_text": 0.3301, + "state_loss_0": 0.0, + "step": 15050 + }, + { + "epoch": 1.0888407367280606, + "grad_norm": 0.6697736382484436, + "learning_rate": 2.2621878838784914e-05, + "loss": 3.0964, + "step": 15075 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.4531, + "epoch": 1.0888407367280606, + "loss": 3.2656, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 15075 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.625, + "audio_loss_6": 3.2969, + "epoch": 1.0888407367280606, + "loss": 3.1562, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 15075 + }, + { + "epoch": 1.0906464427591187, + "grad_norm": 0.6843962073326111, + "learning_rate": 2.2549116793409722e-05, + "loss": 3.0871, + "step": 15100 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2031, + "epoch": 1.0906464427591187, + "loss": 3.0, + "loss_text": 0.2148, + "state_loss_0": 0.0, + "step": 15100 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5, + "audio_loss_6": 3.5, + "epoch": 1.0906464427591187, + "loss": 3.1562, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 15100 + }, + { + "epoch": 1.092452148790177, + "grad_norm": 0.8142296075820923, + "learning_rate": 2.2476375704710137e-05, + "loss": 3.0853, + "step": 15125 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.4062, + "epoch": 1.092452148790177, + "loss": 3.1406, + "loss_text": 0.2383, + "state_loss_0": 0.0, + "step": 15125 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.3125, + "epoch": 1.092452148790177, + "loss": 3.0625, + "loss_text": 0.2656, + "state_loss_0": 0.0, + "step": 15125 + }, + { + "epoch": 1.0942578548212352, + "grad_norm": 0.8052785396575928, + "learning_rate": 2.2403656194670675e-05, + "loss": 3.0865, + "step": 15150 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3438, + "epoch": 1.0942578548212352, + "loss": 3.1719, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 15150 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.0942578548212352, + "loss": 2.9844, + "loss_text": 0.4551, + "state_loss_0": 0.0, + "step": 15150 + }, + { + "epoch": 1.0960635608522933, + "grad_norm": 0.6941478848457336, + "learning_rate": 2.2330958885091332e-05, + "loss": 3.0888, + "step": 15175 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2656, + "epoch": 1.0960635608522933, + "loss": 3.125, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 15175 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.0960635608522933, + "loss": 3.0781, + "loss_text": 0.3984, + "state_loss_0": 0.0, + "step": 15175 + }, + { + "epoch": 1.0978692668833514, + "grad_norm": 0.8305199146270752, + "learning_rate": 2.2258284397582286e-05, + "loss": 3.0909, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.3438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.8984, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5156, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.6094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.0625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.75, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.625, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.8438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.0156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.3281, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.6328, + "eval_loss_RQACONV": 0.6328, + "eval_loss_text_RQACONV": 1.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5195, + "eval_loss_RQACONV": 0.5195, + "eval_loss_text_RQACONV": 1.0391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.8867, + "eval_loss_RQACONV": 0.8867, + "eval_loss_text_RQACONV": 1.7734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.9453, + "eval_loss_RQACONV": 0.9453, + "eval_loss_text_RQACONV": 1.8906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 1.4141, + "eval_loss_RQACONV": 1.4141, + "eval_loss_text_RQACONV": 2.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.0776, + "eval_loss_RQACONV": 0.0776, + "eval_loss_text_RQACONV": 0.1553, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.0352, + "eval_loss_RQACONV": 0.0352, + "eval_loss_text_RQACONV": 0.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.1035, + "eval_loss_RQACONV": 0.1035, + "eval_loss_text_RQACONV": 0.207, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.0938, + "eval_loss_RQACONV": 0.0938, + "eval_loss_text_RQACONV": 0.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15200 + }, + { + "epoch": 1.0978692668833514, + "eval_loss": 1.58280611038208, + "eval_runtime": 28.0534, + "eval_samples_per_second": 190.672, + "eval_steps_per_second": 1.497, + "step": 15200 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3594, + "epoch": 1.0978692668833514, + "loss": 3.1406, + "loss_text": 0.3555, + "state_loss_0": 0.0, + "step": 15200 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.0781, + "audio_loss_2": 2.8125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1562, + "epoch": 1.0978692668833514, + "loss": 2.9844, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 15200 + }, + { + "epoch": 1.0996749729144095, + "grad_norm": 0.713141143321991, + "learning_rate": 2.218563335355856e-05, + "loss": 3.0921, + "step": 15225 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2031, + "epoch": 1.0996749729144095, + "loss": 3.0625, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 15225 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.2812, + "epoch": 1.0996749729144095, + "loss": 3.0312, + "loss_text": 0.1514, + "state_loss_0": 0.0, + "step": 15225 + }, + { + "epoch": 1.1014806789454676, + "grad_norm": 0.7654790878295898, + "learning_rate": 2.211300637423473e-05, + "loss": 3.0842, + "step": 15250 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.7656, + "audio_loss_2": 3.4219, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.375, + "epoch": 1.1014806789454676, + "loss": 3.2969, + "loss_text": 0.4219, + "state_loss_0": 0.0, + "step": 15250 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1406, + "epoch": 1.1014806789454676, + "loss": 3.0469, + "loss_text": 0.6836, + "state_loss_0": 0.0, + "step": 15250 + }, + { + "epoch": 1.1032863849765258, + "grad_norm": 0.7570105791091919, + "learning_rate": 2.2040404080619604e-05, + "loss": 3.0938, + "step": 15275 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2344, + "epoch": 1.1032863849765258, + "loss": 3.125, + "loss_text": 0.332, + "state_loss_0": 0.0, + "step": 15275 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2969, + "epoch": 1.1032863849765258, + "loss": 3.0781, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 15275 + }, + { + "epoch": 1.1050920910075839, + "grad_norm": 0.7896836996078491, + "learning_rate": 2.19678270935109e-05, + "loss": 3.0888, + "step": 15300 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.875, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4062, + "epoch": 1.1050920910075839, + "loss": 3.1875, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 15300 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3906, + "epoch": 1.1050920910075839, + "loss": 3.2344, + "loss_text": 0.7812, + "state_loss_0": 0.0, + "step": 15300 + }, + { + "epoch": 1.1068977970386422, + "grad_norm": 0.8328891396522522, + "learning_rate": 2.1895276033489953e-05, + "loss": 3.0938, + "step": 15325 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2031, + "epoch": 1.1068977970386422, + "loss": 3.0, + "loss_text": 0.1855, + "state_loss_0": 0.0, + "step": 15325 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2031, + "epoch": 1.1068977970386422, + "loss": 2.9844, + "loss_text": 0.2314, + "state_loss_0": 0.0, + "step": 15325 + }, + { + "epoch": 1.1087035030697003, + "grad_norm": 0.7370375990867615, + "learning_rate": 2.1822751520916413e-05, + "loss": 3.0871, + "step": 15350 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4844, + "epoch": 1.1087035030697003, + "loss": 3.25, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 15350 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.1087035030697003, + "loss": 2.9531, + "loss_text": 0.2129, + "state_loss_0": 0.0, + "step": 15350 + }, + { + "epoch": 1.1105092091007585, + "grad_norm": 0.7380526661872864, + "learning_rate": 2.1750254175922914e-05, + "loss": 3.089, + "step": 15375 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.8906, + "audio_loss_2": 3.4375, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.875, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4375, + "epoch": 1.1105092091007585, + "loss": 3.2812, + "loss_text": 0.3262, + "state_loss_0": 0.0, + "step": 15375 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.25, + "epoch": 1.1105092091007585, + "loss": 3.125, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 15375 + }, + { + "epoch": 1.1123149151318166, + "grad_norm": 0.9343123435974121, + "learning_rate": 2.1677784618409804e-05, + "loss": 3.091, + "step": 15400 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2031, + "epoch": 1.1123149151318166, + "loss": 3.0312, + "loss_text": 0.2246, + "state_loss_0": 0.0, + "step": 15400 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0938, + "epoch": 1.1123149151318166, + "loss": 3.0156, + "loss_text": 0.2432, + "state_loss_0": 0.0, + "step": 15400 + }, + { + "epoch": 1.1141206211628747, + "grad_norm": 0.6871147751808167, + "learning_rate": 2.160534346803982e-05, + "loss": 3.0873, + "step": 15425 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1875, + "epoch": 1.1141206211628747, + "loss": 2.9688, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 15425 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2812, + "epoch": 1.1141206211628747, + "loss": 3.0469, + "loss_text": 0.4785, + "state_loss_0": 0.0, + "step": 15425 + }, + { + "epoch": 1.1159263271939328, + "grad_norm": 0.6757208108901978, + "learning_rate": 2.1532931344232817e-05, + "loss": 3.0894, + "step": 15450 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1562, + "epoch": 1.1159263271939328, + "loss": 3.0469, + "loss_text": 0.418, + "state_loss_0": 0.0, + "step": 15450 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3125, + "epoch": 1.1159263271939328, + "loss": 3.1562, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 15450 + }, + { + "epoch": 1.117732033224991, + "grad_norm": 0.7229690551757812, + "learning_rate": 2.1460548866160432e-05, + "loss": 3.0815, + "step": 15475 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2188, + "epoch": 1.117732033224991, + "loss": 2.9844, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 15475 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2031, + "epoch": 1.117732033224991, + "loss": 3.0312, + "loss_text": 0.4141, + "state_loss_0": 0.0, + "step": 15475 + }, + { + "epoch": 1.119537739256049, + "grad_norm": 0.7445337176322937, + "learning_rate": 2.1388196652740823e-05, + "loss": 3.0823, + "step": 15500 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.4062, + "epoch": 1.119537739256049, + "loss": 3.1875, + "loss_text": 0.4727, + "state_loss_0": 0.0, + "step": 15500 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2188, + "epoch": 1.119537739256049, + "loss": 3.0, + "loss_text": 0.1758, + "state_loss_0": 0.0, + "step": 15500 + }, + { + "epoch": 1.1213434452871072, + "grad_norm": 0.7346187233924866, + "learning_rate": 2.1315875322633357e-05, + "loss": 3.0706, + "step": 15525 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.3438, + "epoch": 1.1213434452871072, + "loss": 3.1094, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 15525 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.0, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1406, + "epoch": 1.1213434452871072, + "loss": 2.9219, + "loss_text": 0.2314, + "state_loss_0": 0.0, + "step": 15525 + }, + { + "epoch": 1.1231491513181655, + "grad_norm": 0.6594767570495605, + "learning_rate": 2.1243585494233337e-05, + "loss": 3.0775, + "step": 15550 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.4062, + "epoch": 1.1231491513181655, + "loss": 3.2031, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 15550 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3281, + "epoch": 1.1231491513181655, + "loss": 3.0938, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 15550 + }, + { + "epoch": 1.1249548573492236, + "grad_norm": 0.6566407084465027, + "learning_rate": 2.1171327785666707e-05, + "loss": 3.0787, + "step": 15575 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3438, + "epoch": 1.1249548573492236, + "loss": 3.1562, + "loss_text": 0.582, + "state_loss_0": 0.0, + "step": 15575 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5156, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.1875, + "epoch": 1.1249548573492236, + "loss": 2.9062, + "loss_text": 0.2871, + "state_loss_0": 0.0, + "step": 15575 + }, + { + "epoch": 1.1267605633802817, + "grad_norm": 0.7590958476066589, + "learning_rate": 2.1099102814784748e-05, + "loss": 3.088, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.4531, + "eval_loss_AQACONVA": 3.4531, + "eval_loss_text_AQACONVA": 1.6016, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.0781, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5781, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2969, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 2.3594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.5625, + "eval_loss_RQACONV": 0.5625, + "eval_loss_text_RQACONV": 1.125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.8125, + "eval_loss_RQACONV": 0.8125, + "eval_loss_text_RQACONV": 1.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 1.0078, + "eval_loss_RQACONV": 1.0078, + "eval_loss_text_RQACONV": 2.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1357, + "eval_loss_RQACONV": 0.1357, + "eval_loss_text_RQACONV": 0.2715, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1514, + "eval_loss_RQACONV": 0.1514, + "eval_loss_text_RQACONV": 0.3027, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.0771, + "eval_loss_RQACONV": 0.0771, + "eval_loss_text_RQACONV": 0.1543, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1562, + "eval_loss_RQACONV": 0.1562, + "eval_loss_text_RQACONV": 0.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.033, + "eval_loss_RQACONV": 0.033, + "eval_loss_text_RQACONV": 0.0659, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.1025, + "eval_loss_RQACONV": 0.1025, + "eval_loss_text_RQACONV": 0.2051, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.0967, + "eval_loss_RQACONV": 0.0967, + "eval_loss_text_RQACONV": 0.1934, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 0.6094, + "eval_loss_RQACONV": 0.6094, + "eval_loss_text_RQACONV": 1.2188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 15600 + }, + { + "epoch": 1.1267605633802817, + "eval_loss": 1.5852196216583252, + "eval_runtime": 27.9989, + "eval_samples_per_second": 191.043, + "eval_steps_per_second": 1.5, + "step": 15600 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.375, + "epoch": 1.1267605633802817, + "loss": 3.1875, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 15600 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3281, + "epoch": 1.1267605633802817, + "loss": 3.1562, + "loss_text": 0.3965, + "state_loss_0": 0.0, + "step": 15600 + }, + { + "epoch": 1.1285662694113399, + "grad_norm": 0.8040897250175476, + "learning_rate": 2.1026911199158835e-05, + "loss": 3.081, + "step": 15625 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2969, + "epoch": 1.1285662694113399, + "loss": 3.0312, + "loss_text": 0.2715, + "state_loss_0": 0.0, + "step": 15625 + }, + { + "audio_loss_0": 3.4688, + "audio_loss_1": 3.6406, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3125, + "epoch": 1.1285662694113399, + "loss": 3.2188, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 15625 + }, + { + "epoch": 1.130371975442398, + "grad_norm": 0.7710487246513367, + "learning_rate": 2.0954753556075113e-05, + "loss": 3.0849, + "step": 15650 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2812, + "epoch": 1.130371975442398, + "loss": 3.0938, + "loss_text": 0.2275, + "state_loss_0": 0.0, + "step": 15650 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.4219, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.2344, + "epoch": 1.130371975442398, + "loss": 3.0469, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 15650 + }, + { + "epoch": 1.132177681473456, + "grad_norm": 0.7805773019790649, + "learning_rate": 2.088263050252926e-05, + "loss": 3.0814, + "step": 15675 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.1875, + "epoch": 1.132177681473456, + "loss": 3.0938, + "loss_text": 0.8281, + "state_loss_0": 0.0, + "step": 15675 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2188, + "epoch": 1.132177681473456, + "loss": 3.0469, + "loss_text": 0.3477, + "state_loss_0": 0.0, + "step": 15675 + }, + { + "epoch": 1.1339833875045142, + "grad_norm": 0.7545686364173889, + "learning_rate": 2.0810542655221172e-05, + "loss": 3.0863, + "step": 15700 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.1339833875045142, + "loss": 2.9844, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 15700 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.7344, + "audio_loss_3": 3.9531, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3125, + "audio_loss_6": 2.9375, + "epoch": 1.1339833875045142, + "loss": 2.8438, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 15700 + }, + { + "epoch": 1.1357890935355723, + "grad_norm": 0.7275266647338867, + "learning_rate": 2.0738490630549724e-05, + "loss": 3.0914, + "step": 15725 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1406, + "epoch": 1.1357890935355723, + "loss": 2.9531, + "loss_text": 0.2637, + "state_loss_0": 0.0, + "step": 15725 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3281, + "epoch": 1.1357890935355723, + "loss": 3.0781, + "loss_text": 0.4941, + "state_loss_0": 0.0, + "step": 15725 + }, + { + "epoch": 1.1375947995666307, + "grad_norm": 0.6978769302368164, + "learning_rate": 2.0666475044607463e-05, + "loss": 3.0792, + "step": 15750 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.4531, + "epoch": 1.1375947995666307, + "loss": 3.25, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 15750 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0938, + "epoch": 1.1375947995666307, + "loss": 3.0, + "loss_text": 0.3516, + "state_loss_0": 0.0, + "step": 15750 + }, + { + "epoch": 1.1394005055976888, + "grad_norm": 0.8125790357589722, + "learning_rate": 2.0594496513175385e-05, + "loss": 3.0747, + "step": 15775 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.2812, + "epoch": 1.1394005055976888, + "loss": 3.2031, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 15775 + }, + { + "audio_loss_0": 2.6719, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2031, + "epoch": 1.1394005055976888, + "loss": 3.0625, + "loss_text": 0.7461, + "state_loss_0": 0.0, + "step": 15775 + }, + { + "epoch": 1.141206211628747, + "grad_norm": 0.7378820776939392, + "learning_rate": 2.0522555651717625e-05, + "loss": 3.0758, + "step": 15800 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3125, + "epoch": 1.141206211628747, + "loss": 3.125, + "loss_text": 0.3145, + "state_loss_0": 0.0, + "step": 15800 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2969, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2031, + "epoch": 1.141206211628747, + "loss": 3.0312, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 15800 + }, + { + "epoch": 1.143011917659805, + "grad_norm": 0.7417800426483154, + "learning_rate": 2.045065307537623e-05, + "loss": 3.078, + "step": 15825 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3438, + "audio_loss_6": 2.9688, + "epoch": 1.143011917659805, + "loss": 2.9375, + "loss_text": 0.1689, + "state_loss_0": 0.0, + "step": 15825 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0625, + "epoch": 1.143011917659805, + "loss": 2.9062, + "loss_text": 0.3477, + "state_loss_0": 0.0, + "step": 15825 + }, + { + "epoch": 1.1448176236908632, + "grad_norm": 0.7309858798980713, + "learning_rate": 2.0378789398965873e-05, + "loss": 3.0777, + "step": 15850 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3281, + "epoch": 1.1448176236908632, + "loss": 3.0938, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 15850 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2656, + "epoch": 1.1448176236908632, + "loss": 3.0938, + "loss_text": 0.416, + "state_loss_0": 0.0, + "step": 15850 + }, + { + "epoch": 1.1466233297219213, + "grad_norm": 0.7042123079299927, + "learning_rate": 2.0306965236968605e-05, + "loss": 3.0715, + "step": 15875 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1875, + "epoch": 1.1466233297219213, + "loss": 3.0625, + "loss_text": 0.7188, + "state_loss_0": 0.0, + "step": 15875 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.1875, + "epoch": 1.1466233297219213, + "loss": 3.0312, + "loss_text": 0.1953, + "state_loss_0": 0.0, + "step": 15875 + }, + { + "epoch": 1.1484290357529794, + "grad_norm": 0.7317967414855957, + "learning_rate": 2.0235181203528624e-05, + "loss": 3.0706, + "step": 15900 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0938, + "epoch": 1.1484290357529794, + "loss": 2.9219, + "loss_text": 0.2139, + "state_loss_0": 0.0, + "step": 15900 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2812, + "epoch": 1.1484290357529794, + "loss": 3.125, + "loss_text": 0.7109, + "state_loss_0": 0.0, + "step": 15900 + }, + { + "epoch": 1.1502347417840375, + "grad_norm": 0.7246168851852417, + "learning_rate": 2.0163437912446976e-05, + "loss": 3.0829, + "step": 15925 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.25, + "epoch": 1.1502347417840375, + "loss": 3.0781, + "loss_text": 0.1348, + "state_loss_0": 0.0, + "step": 15925 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.3281, + "epoch": 1.1502347417840375, + "loss": 3.0469, + "loss_text": 0.3359, + "state_loss_0": 0.0, + "step": 15925 + }, + { + "epoch": 1.1520404478150956, + "grad_norm": 0.6937533617019653, + "learning_rate": 2.009173597717635e-05, + "loss": 3.0731, + "step": 15950 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.8125, + "audio_loss_6": 3.4688, + "epoch": 1.1520404478150956, + "loss": 3.3125, + "loss_text": 0.418, + "state_loss_0": 0.0, + "step": 15950 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2188, + "epoch": 1.1520404478150956, + "loss": 3.0156, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 15950 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 0.9202188849449158, + "learning_rate": 2.0020076010815804e-05, + "loss": 3.0745, + "step": 15975 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.4375, + "epoch": 1.1538461538461537, + "loss": 3.1562, + "loss_text": 0.7734, + "state_loss_0": 0.0, + "step": 15975 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.4062, + "epoch": 1.1538461538461537, + "loss": 3.1406, + "loss_text": 0.3848, + "state_loss_0": 0.0, + "step": 15975 + }, + { + "epoch": 1.155651859877212, + "grad_norm": 0.7236653566360474, + "learning_rate": 1.9948458626105537e-05, + "loss": 3.0722, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 2.3594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.5, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.6172, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.5938, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5625, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.8516, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6406, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.0469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5156, + "eval_loss_RQACONV": 0.5156, + "eval_loss_text_RQACONV": 1.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.7695, + "eval_loss_RQACONV": 0.7695, + "eval_loss_text_RQACONV": 1.5391, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.9258, + "eval_loss_RQACONV": 0.9258, + "eval_loss_text_RQACONV": 1.8516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.6953, + "eval_loss_RQACONV": 0.6953, + "eval_loss_text_RQACONV": 1.3906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.9922, + "eval_loss_RQACONV": 0.9922, + "eval_loss_text_RQACONV": 1.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.4688, + "eval_loss_RQACONV": 0.4688, + "eval_loss_text_RQACONV": 0.9375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 1.4141, + "eval_loss_RQACONV": 1.4141, + "eval_loss_text_RQACONV": 2.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.0747, + "eval_loss_RQACONV": 0.0747, + "eval_loss_text_RQACONV": 0.1494, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1631, + "eval_loss_RQACONV": 0.1631, + "eval_loss_text_RQACONV": 0.3262, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1582, + "eval_loss_RQACONV": 0.1582, + "eval_loss_text_RQACONV": 0.3164, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.0352, + "eval_loss_RQACONV": 0.0352, + "eval_loss_text_RQACONV": 0.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.1006, + "eval_loss_RQACONV": 0.1006, + "eval_loss_text_RQACONV": 0.2012, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.0962, + "eval_loss_RQACONV": 0.0962, + "eval_loss_text_RQACONV": 0.1924, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16000 + }, + { + "epoch": 1.155651859877212, + "eval_loss": 1.582816243171692, + "eval_runtime": 28.5265, + "eval_samples_per_second": 187.51, + "eval_steps_per_second": 1.472, + "step": 16000 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2812, + "epoch": 1.155651859877212, + "loss": 3.1094, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 16000 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5, + "audio_loss_6": 3.3594, + "epoch": 1.155651859877212, + "loss": 3.0781, + "loss_text": 0.4199, + "state_loss_0": 0.0, + "step": 16000 + }, + { + "epoch": 1.1574575659082702, + "grad_norm": 0.6383086442947388, + "learning_rate": 1.987688443542166e-05, + "loss": 3.0799, + "step": 16025 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.3281, + "epoch": 1.1574575659082702, + "loss": 3.125, + "loss_text": 0.4785, + "state_loss_0": 0.0, + "step": 16025 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.125, + "epoch": 1.1574575659082702, + "loss": 2.9219, + "loss_text": 0.2637, + "state_loss_0": 0.0, + "step": 16025 + }, + { + "epoch": 1.1592632719393283, + "grad_norm": 0.831063449382782, + "learning_rate": 1.980535405077092e-05, + "loss": 3.0719, + "step": 16050 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.4844, + "audio_loss_5": 3.2031, + "audio_loss_6": 3.2188, + "epoch": 1.1592632719393283, + "loss": 2.9375, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 16050 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.4062, + "epoch": 1.1592632719393283, + "loss": 3.0938, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 16050 + }, + { + "epoch": 1.1610689779703864, + "grad_norm": 0.7414517998695374, + "learning_rate": 1.9733868083785518e-05, + "loss": 3.0875, + "step": 16075 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.2969, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2656, + "epoch": 1.1610689779703864, + "loss": 3.0781, + "loss_text": 0.3926, + "state_loss_0": 0.0, + "step": 16075 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.1875, + "epoch": 1.1610689779703864, + "loss": 3.0938, + "loss_text": 0.793, + "state_loss_0": 0.0, + "step": 16075 + }, + { + "epoch": 1.1628746840014446, + "grad_norm": 0.7556141018867493, + "learning_rate": 1.9662427145717847e-05, + "loss": 3.0761, + "step": 16100 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.1628746840014446, + "loss": 3.0312, + "loss_text": 0.4766, + "state_loss_0": 0.0, + "step": 16100 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3281, + "epoch": 1.1628746840014446, + "loss": 3.2031, + "loss_text": 0.8008, + "state_loss_0": 0.0, + "step": 16100 + }, + { + "epoch": 1.1646803900325027, + "grad_norm": 0.9407621026039124, + "learning_rate": 1.9591031847435265e-05, + "loss": 3.0696, + "step": 16125 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.1646803900325027, + "loss": 3.0469, + "loss_text": 0.4355, + "state_loss_0": 0.0, + "step": 16125 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.3125, + "epoch": 1.1646803900325027, + "loss": 3.125, + "loss_text": 0.5078, + "state_loss_0": 0.0, + "step": 16125 + }, + { + "epoch": 1.1664860960635608, + "grad_norm": 0.792091965675354, + "learning_rate": 1.951968279941489e-05, + "loss": 3.0788, + "step": 16150 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.1664860960635608, + "loss": 3.0469, + "loss_text": 0.4922, + "state_loss_0": 0.0, + "step": 16150 + }, + { + "audio_loss_0": 2.6719, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.375, + "audio_loss_6": 3.2344, + "epoch": 1.1664860960635608, + "loss": 3.0312, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 16150 + }, + { + "epoch": 1.168291802094619, + "grad_norm": 0.7654048800468445, + "learning_rate": 1.9448380611738365e-05, + "loss": 3.0725, + "step": 16175 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5, + "audio_loss_6": 3.1875, + "epoch": 1.168291802094619, + "loss": 3.0312, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 16175 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2812, + "epoch": 1.168291802094619, + "loss": 3.1094, + "loss_text": 0.4043, + "state_loss_0": 0.0, + "step": 16175 + }, + { + "epoch": 1.1700975081256773, + "grad_norm": 0.729767382144928, + "learning_rate": 1.9377125894086643e-05, + "loss": 3.0669, + "step": 16200 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.5312, + "epoch": 1.1700975081256773, + "loss": 3.2188, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 16200 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.25, + "epoch": 1.1700975081256773, + "loss": 3.0469, + "loss_text": 0.373, + "state_loss_0": 0.0, + "step": 16200 + }, + { + "epoch": 1.1719032141567354, + "grad_norm": 0.6464732885360718, + "learning_rate": 1.9305919255734788e-05, + "loss": 3.0673, + "step": 16225 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.2031, + "epoch": 1.1719032141567354, + "loss": 2.9688, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 16225 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1875, + "epoch": 1.1719032141567354, + "loss": 3.0, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 16225 + }, + { + "epoch": 1.1737089201877935, + "grad_norm": 0.7647759914398193, + "learning_rate": 1.9234761305546733e-05, + "loss": 3.0745, + "step": 16250 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2656, + "epoch": 1.1737089201877935, + "loss": 3.0781, + "loss_text": 0.377, + "state_loss_0": 0.0, + "step": 16250 + }, + { + "audio_loss_0": 3.6562, + "audio_loss_1": 3.625, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.7031, + "audio_loss_6": 3.5469, + "epoch": 1.1737089201877935, + "loss": 3.3281, + "loss_text": 0.3926, + "state_loss_0": 0.0, + "step": 16250 + }, + { + "epoch": 1.1755146262188516, + "grad_norm": 0.7439690232276917, + "learning_rate": 1.9163652651970122e-05, + "loss": 3.0714, + "step": 16275 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.2969, + "epoch": 1.1755146262188516, + "loss": 3.0938, + "loss_text": 0.2275, + "state_loss_0": 0.0, + "step": 16275 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.625, + "audio_loss_6": 3.2969, + "epoch": 1.1755146262188516, + "loss": 3.125, + "loss_text": 0.4746, + "state_loss_0": 0.0, + "step": 16275 + }, + { + "epoch": 1.1773203322499097, + "grad_norm": 0.6373063921928406, + "learning_rate": 1.909259390303105e-05, + "loss": 3.0774, + "step": 16300 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1562, + "epoch": 1.1773203322499097, + "loss": 3.0781, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 16300 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2188, + "epoch": 1.1773203322499097, + "loss": 3.0625, + "loss_text": 0.4141, + "state_loss_0": 0.0, + "step": 16300 + }, + { + "epoch": 1.1791260382809678, + "grad_norm": 0.7170313000679016, + "learning_rate": 1.9021585666328927e-05, + "loss": 3.0695, + "step": 16325 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.25, + "epoch": 1.1791260382809678, + "loss": 3.1562, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 16325 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1562, + "epoch": 1.1791260382809678, + "loss": 2.9844, + "loss_text": 0.3125, + "state_loss_0": 0.0, + "step": 16325 + }, + { + "epoch": 1.180931744312026, + "grad_norm": 0.7159942984580994, + "learning_rate": 1.895062854903123e-05, + "loss": 3.0693, + "step": 16350 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1562, + "epoch": 1.180931744312026, + "loss": 2.9219, + "loss_text": 0.2139, + "state_loss_0": 0.0, + "step": 16350 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2969, + "epoch": 1.180931744312026, + "loss": 3.1406, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 16350 + }, + { + "epoch": 1.182737450343084, + "grad_norm": 0.6740960478782654, + "learning_rate": 1.887972315786833e-05, + "loss": 3.0784, + "step": 16375 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2188, + "epoch": 1.182737450343084, + "loss": 3.0625, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 16375 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.25, + "epoch": 1.182737450343084, + "loss": 3.0469, + "loss_text": 0.3398, + "state_loss_0": 0.0, + "step": 16375 + }, + { + "epoch": 1.1845431563741422, + "grad_norm": 0.8438175320625305, + "learning_rate": 1.8808870099128317e-05, + "loss": 3.06, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6875, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9375, + "eval_audio_loss_6_AQACONVA": 3.6719, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.8281, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7344, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.6562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.2812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5625, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.4062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.8203, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4844, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 3.1094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.707, + "eval_loss_RQACONV": 0.707, + "eval_loss_text_RQACONV": 1.4141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.7891, + "eval_loss_RQACONV": 0.7891, + "eval_loss_text_RQACONV": 1.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.7266, + "eval_loss_RQACONV": 0.7266, + "eval_loss_text_RQACONV": 1.4531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.9883, + "eval_loss_RQACONV": 0.9883, + "eval_loss_text_RQACONV": 1.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1299, + "eval_loss_RQACONV": 0.1299, + "eval_loss_text_RQACONV": 0.2598, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1465, + "eval_loss_RQACONV": 0.1465, + "eval_loss_text_RQACONV": 0.293, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.0723, + "eval_loss_RQACONV": 0.0723, + "eval_loss_text_RQACONV": 0.1445, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1553, + "eval_loss_RQACONV": 0.1553, + "eval_loss_text_RQACONV": 0.3105, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.0349, + "eval_loss_RQACONV": 0.0349, + "eval_loss_text_RQACONV": 0.0698, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1348, + "eval_loss_RQACONV": 0.1348, + "eval_loss_text_RQACONV": 0.2695, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.0952, + "eval_loss_RQACONV": 0.0952, + "eval_loss_text_RQACONV": 0.1904, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16400 + }, + { + "epoch": 1.1845431563741422, + "eval_loss": 1.5834896564483643, + "eval_runtime": 27.7122, + "eval_samples_per_second": 193.02, + "eval_steps_per_second": 1.516, + "step": 16400 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.1845431563741422, + "loss": 3.0938, + "loss_text": 0.4434, + "state_loss_0": 0.0, + "step": 16400 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.1845431563741422, + "loss": 3.0469, + "loss_text": 0.3008, + "state_loss_0": 0.0, + "step": 16400 + }, + { + "epoch": 1.1863488624052003, + "grad_norm": 0.7175026535987854, + "learning_rate": 1.873806997865179e-05, + "loss": 3.0688, + "step": 16425 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2969, + "epoch": 1.1863488624052003, + "loss": 3.1094, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 16425 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.1863488624052003, + "loss": 3.0625, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 16425 + }, + { + "epoch": 1.1881545684362587, + "grad_norm": 0.696071445941925, + "learning_rate": 1.86673234018267e-05, + "loss": 3.0649, + "step": 16450 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2656, + "epoch": 1.1881545684362587, + "loss": 3.0625, + "loss_text": 0.2344, + "state_loss_0": 0.0, + "step": 16450 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.1881545684362587, + "loss": 3.1094, + "loss_text": 0.6328, + "state_loss_0": 0.0, + "step": 16450 + }, + { + "epoch": 1.1899602744673168, + "grad_norm": 0.7572211027145386, + "learning_rate": 1.8596630973583166e-05, + "loss": 3.0692, + "step": 16475 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1875, + "epoch": 1.1899602744673168, + "loss": 3.0, + "loss_text": 0.3789, + "state_loss_0": 0.0, + "step": 16475 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1562, + "epoch": 1.1899602744673168, + "loss": 2.9531, + "loss_text": 0.3809, + "state_loss_0": 0.0, + "step": 16475 + }, + { + "epoch": 1.191765980498375, + "grad_norm": 1.2347127199172974, + "learning_rate": 1.8525993298388287e-05, + "loss": 3.0654, + "step": 16500 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.191765980498375, + "loss": 3.0625, + "loss_text": 0.5117, + "state_loss_0": 0.0, + "step": 16500 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.1875, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2344, + "epoch": 1.191765980498375, + "loss": 3.0625, + "loss_text": 0.124, + "state_loss_0": 0.0, + "step": 16500 + }, + { + "epoch": 1.193571686529433, + "grad_norm": 0.6958622336387634, + "learning_rate": 1.8455410980241008e-05, + "loss": 3.0624, + "step": 16525 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2812, + "epoch": 1.193571686529433, + "loss": 3.0312, + "loss_text": 0.416, + "state_loss_0": 0.0, + "step": 16525 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3438, + "epoch": 1.193571686529433, + "loss": 3.1094, + "loss_text": 0.3145, + "state_loss_0": 0.0, + "step": 16525 + }, + { + "epoch": 1.1953773925604911, + "grad_norm": 0.6756531000137329, + "learning_rate": 1.8384884622666927e-05, + "loss": 3.0743, + "step": 16550 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2188, + "epoch": 1.1953773925604911, + "loss": 3.1094, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 16550 + }, + { + "audio_loss_0": 3.4375, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.4062, + "epoch": 1.1953773925604911, + "loss": 3.2656, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 16550 + }, + { + "epoch": 1.1971830985915493, + "grad_norm": 0.7867107391357422, + "learning_rate": 1.8314414828713126e-05, + "loss": 3.0635, + "step": 16575 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.1971830985915493, + "loss": 2.9844, + "loss_text": 0.3672, + "state_loss_0": 0.0, + "step": 16575 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.3281, + "epoch": 1.1971830985915493, + "loss": 3.1406, + "loss_text": 0.6758, + "state_loss_0": 0.0, + "step": 16575 + }, + { + "epoch": 1.1989888046226074, + "grad_norm": 0.7439194321632385, + "learning_rate": 1.824400220094305e-05, + "loss": 3.0688, + "step": 16600 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2031, + "epoch": 1.1989888046226074, + "loss": 3.0625, + "loss_text": 0.2178, + "state_loss_0": 0.0, + "step": 16600 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1562, + "epoch": 1.1989888046226074, + "loss": 3.0469, + "loss_text": 0.4805, + "state_loss_0": 0.0, + "step": 16600 + }, + { + "epoch": 1.2007945106536655, + "grad_norm": 0.8084712028503418, + "learning_rate": 1.817364734143133e-05, + "loss": 3.0677, + "step": 16625 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2812, + "epoch": 1.2007945106536655, + "loss": 3.0938, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 16625 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2969, + "epoch": 1.2007945106536655, + "loss": 3.0625, + "loss_text": 0.3086, + "state_loss_0": 0.0, + "step": 16625 + }, + { + "epoch": 1.2026002166847238, + "grad_norm": 0.7253180742263794, + "learning_rate": 1.8103350851758636e-05, + "loss": 3.0684, + "step": 16650 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2188, + "epoch": 1.2026002166847238, + "loss": 3.0625, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 16650 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0938, + "epoch": 1.2026002166847238, + "loss": 3.0625, + "loss_text": 0.8711, + "state_loss_0": 0.0, + "step": 16650 + }, + { + "epoch": 1.204405922715782, + "grad_norm": 0.715740442276001, + "learning_rate": 1.8033113333006542e-05, + "loss": 3.0743, + "step": 16675 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2031, + "epoch": 1.204405922715782, + "loss": 3.125, + "loss_text": 0.7539, + "state_loss_0": 0.0, + "step": 16675 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1562, + "epoch": 1.204405922715782, + "loss": 2.9688, + "loss_text": 0.3477, + "state_loss_0": 0.0, + "step": 16675 + }, + { + "epoch": 1.20621162874684, + "grad_norm": 0.7083131074905396, + "learning_rate": 1.7962935385752373e-05, + "loss": 3.0609, + "step": 16700 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.8594, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.4844, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.0, + "epoch": 1.20621162874684, + "loss": 2.8594, + "loss_text": 0.2295, + "state_loss_0": 0.0, + "step": 16700 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1719, + "epoch": 1.20621162874684, + "loss": 2.9688, + "loss_text": 0.3496, + "state_loss_0": 0.0, + "step": 16700 + }, + { + "epoch": 1.2080173347778982, + "grad_norm": 0.7313473224639893, + "learning_rate": 1.789281761006409e-05, + "loss": 3.0707, + "step": 16725 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3438, + "epoch": 1.2080173347778982, + "loss": 3.0938, + "loss_text": 0.3066, + "state_loss_0": 0.0, + "step": 16725 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.2080173347778982, + "loss": 3.0469, + "loss_text": 0.3613, + "state_loss_0": 0.0, + "step": 16725 + }, + { + "epoch": 1.2098230408089563, + "grad_norm": 0.6631115674972534, + "learning_rate": 1.7822760605495145e-05, + "loss": 3.0705, + "step": 16750 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2969, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2656, + "epoch": 1.2098230408089563, + "loss": 3.0312, + "loss_text": 0.3555, + "state_loss_0": 0.0, + "step": 16750 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3125, + "epoch": 1.2098230408089563, + "loss": 3.1562, + "loss_text": 0.2324, + "state_loss_0": 0.0, + "step": 16750 + }, + { + "epoch": 1.2116287468400144, + "grad_norm": 0.7117000818252563, + "learning_rate": 1.775276497107935e-05, + "loss": 3.0631, + "step": 16775 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2656, + "epoch": 1.2116287468400144, + "loss": 3.0781, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 16775 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.375, + "epoch": 1.2116287468400144, + "loss": 3.25, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 16775 + }, + { + "epoch": 1.2134344528710725, + "grad_norm": 0.6863119006156921, + "learning_rate": 1.7682831305325766e-05, + "loss": 3.0621, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.3906, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.1094, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7188, + "eval_loss_AQACONVA": 3.7188, + "eval_loss_text_AQACONVA": 2.6562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.3125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5469, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.2344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.8281, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.125, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.625, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.0312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.7734, + "eval_loss_RQACONV": 0.7734, + "eval_loss_text_RQACONV": 1.5469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.7383, + "eval_loss_RQACONV": 0.7383, + "eval_loss_text_RQACONV": 1.4766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.9023, + "eval_loss_RQACONV": 0.9023, + "eval_loss_text_RQACONV": 1.8047, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.7969, + "eval_loss_RQACONV": 0.7969, + "eval_loss_text_RQACONV": 1.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.9492, + "eval_loss_RQACONV": 0.9492, + "eval_loss_text_RQACONV": 1.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.6602, + "eval_loss_RQACONV": 0.6602, + "eval_loss_text_RQACONV": 1.3203, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 1.4141, + "eval_loss_RQACONV": 1.4141, + "eval_loss_text_RQACONV": 2.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1299, + "eval_loss_RQACONV": 0.1299, + "eval_loss_text_RQACONV": 0.2598, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.0762, + "eval_loss_RQACONV": 0.0762, + "eval_loss_text_RQACONV": 0.1523, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1621, + "eval_loss_RQACONV": 0.1621, + "eval_loss_text_RQACONV": 0.3242, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.0337, + "eval_loss_RQACONV": 0.0337, + "eval_loss_text_RQACONV": 0.0674, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.1025, + "eval_loss_RQACONV": 0.1025, + "eval_loss_text_RQACONV": 0.2051, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.0967, + "eval_loss_RQACONV": 0.0967, + "eval_loss_text_RQACONV": 0.1934, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 16800 + }, + { + "epoch": 1.2134344528710725, + "eval_loss": 1.5816025733947754, + "eval_runtime": 27.4624, + "eval_samples_per_second": 194.775, + "eval_steps_per_second": 1.529, + "step": 16800 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.1719, + "epoch": 1.2134344528710725, + "loss": 2.9531, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 16800 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.625, + "audio_loss_6": 3.1562, + "epoch": 1.2134344528710725, + "loss": 3.0625, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 16800 + }, + { + "epoch": 1.2152401589021307, + "grad_norm": 0.7430329322814941, + "learning_rate": 1.761296020621358e-05, + "loss": 3.0684, + "step": 16825 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3906, + "epoch": 1.2152401589021307, + "loss": 3.125, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 16825 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5, + "audio_loss_6": 3.375, + "epoch": 1.2152401589021307, + "loss": 3.0938, + "loss_text": 0.1943, + "state_loss_0": 0.0, + "step": 16825 + }, + { + "epoch": 1.2170458649331888, + "grad_norm": 0.7932845950126648, + "learning_rate": 1.754315227118699e-05, + "loss": 3.0577, + "step": 16850 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1719, + "epoch": 1.2170458649331888, + "loss": 2.9688, + "loss_text": 0.3008, + "state_loss_0": 0.0, + "step": 16850 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.3438, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1406, + "epoch": 1.2170458649331888, + "loss": 2.9844, + "loss_text": 0.2969, + "state_loss_0": 0.0, + "step": 16850 + }, + { + "epoch": 1.218851570964247, + "grad_norm": 0.7270514965057373, + "learning_rate": 1.7473408097150108e-05, + "loss": 3.06, + "step": 16875 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3438, + "epoch": 1.218851570964247, + "loss": 3.0625, + "loss_text": 0.1064, + "state_loss_0": 0.0, + "step": 16875 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1406, + "epoch": 1.218851570964247, + "loss": 3.0469, + "loss_text": 0.4863, + "state_loss_0": 0.0, + "step": 16875 + }, + { + "epoch": 1.2206572769953052, + "grad_norm": 0.7562538385391235, + "learning_rate": 1.7403728280461835e-05, + "loss": 3.0655, + "step": 16900 + }, + { + "audio_loss_0": 2.6094, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.0625, + "epoch": 1.2206572769953052, + "loss": 2.9688, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 16900 + }, + { + "audio_loss_0": 2.6875, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.125, + "epoch": 1.2206572769953052, + "loss": 2.9531, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 16900 + }, + { + "epoch": 1.2224629830263634, + "grad_norm": 0.6541838645935059, + "learning_rate": 1.7334113416930775e-05, + "loss": 3.0487, + "step": 16925 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2656, + "epoch": 1.2224629830263634, + "loss": 3.1094, + "loss_text": 0.3125, + "state_loss_0": 0.0, + "step": 16925 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.2224629830263634, + "loss": 3.0469, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 16925 + }, + { + "epoch": 1.2242686890574215, + "grad_norm": 0.7422059178352356, + "learning_rate": 1.7264564101810154e-05, + "loss": 3.0618, + "step": 16950 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.2242686890574215, + "loss": 2.9688, + "loss_text": 0.3789, + "state_loss_0": 0.0, + "step": 16950 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.2242686890574215, + "loss": 3.0312, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 16950 + }, + { + "epoch": 1.2260743950884796, + "grad_norm": 0.8118678331375122, + "learning_rate": 1.719508092979271e-05, + "loss": 3.0581, + "step": 16975 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1406, + "epoch": 1.2260743950884796, + "loss": 3.0156, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 16975 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.2260743950884796, + "loss": 3.0781, + "loss_text": 0.498, + "state_loss_0": 0.0, + "step": 16975 + }, + { + "epoch": 1.2278801011195377, + "grad_norm": 0.6683110594749451, + "learning_rate": 1.7125664495005606e-05, + "loss": 3.0592, + "step": 17000 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.3125, + "epoch": 1.2278801011195377, + "loss": 3.1562, + "loss_text": 0.416, + "state_loss_0": 0.0, + "step": 17000 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2969, + "epoch": 1.2278801011195377, + "loss": 3.1094, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 17000 + }, + { + "epoch": 1.2296858071505958, + "grad_norm": 0.6193154454231262, + "learning_rate": 1.7056315391005356e-05, + "loss": 3.0546, + "step": 17025 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5, + "audio_loss_5": 3.2344, + "audio_loss_6": 2.9688, + "epoch": 1.2296858071505958, + "loss": 2.875, + "loss_text": 0.4551, + "state_loss_0": 0.0, + "step": 17025 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.5, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.1719, + "epoch": 1.2296858071505958, + "loss": 3.0156, + "loss_text": 0.4023, + "state_loss_0": 0.0, + "step": 17025 + }, + { + "epoch": 1.231491513181654, + "grad_norm": 0.8179510831832886, + "learning_rate": 1.698703421077276e-05, + "loss": 3.0569, + "step": 17050 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3438, + "epoch": 1.231491513181654, + "loss": 3.1562, + "loss_text": 0.2734, + "state_loss_0": 0.0, + "step": 17050 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0312, + "epoch": 1.231491513181654, + "loss": 3.0156, + "loss_text": 0.6797, + "state_loss_0": 0.0, + "step": 17050 + }, + { + "epoch": 1.2332972192127123, + "grad_norm": 0.7208225727081299, + "learning_rate": 1.691782154670782e-05, + "loss": 3.0555, + "step": 17075 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.3281, + "epoch": 1.2332972192127123, + "loss": 3.0938, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 17075 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.375, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3438, + "epoch": 1.2332972192127123, + "loss": 3.1875, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 17075 + }, + { + "epoch": 1.2351029252437704, + "grad_norm": 0.6426492929458618, + "learning_rate": 1.6848677990624687e-05, + "loss": 3.0588, + "step": 17100 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.3125, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1094, + "epoch": 1.2351029252437704, + "loss": 2.9688, + "loss_text": 0.373, + "state_loss_0": 0.0, + "step": 17100 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.0781, + "epoch": 1.2351029252437704, + "loss": 2.9375, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 17100 + }, + { + "epoch": 1.2369086312748285, + "grad_norm": 0.7697896361351013, + "learning_rate": 1.677960413374658e-05, + "loss": 3.0708, + "step": 17125 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.0625, + "epoch": 1.2369086312748285, + "loss": 2.9062, + "loss_text": 0.5977, + "state_loss_0": 0.0, + "step": 17125 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2656, + "epoch": 1.2369086312748285, + "loss": 3.0938, + "loss_text": 0.6484, + "state_loss_0": 0.0, + "step": 17125 + }, + { + "epoch": 1.2387143373058866, + "grad_norm": 0.7562309503555298, + "learning_rate": 1.6710600566700745e-05, + "loss": 3.0467, + "step": 17150 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.2387143373058866, + "loss": 2.9844, + "loss_text": 0.2656, + "state_loss_0": 0.0, + "step": 17150 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2344, + "epoch": 1.2387143373058866, + "loss": 3.0781, + "loss_text": 0.2969, + "state_loss_0": 0.0, + "step": 17150 + }, + { + "epoch": 1.2405200433369448, + "grad_norm": 0.6694947481155396, + "learning_rate": 1.6641667879513422e-05, + "loss": 3.0559, + "step": 17175 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3125, + "epoch": 1.2405200433369448, + "loss": 3.1875, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 17175 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2812, + "epoch": 1.2405200433369448, + "loss": 3.125, + "loss_text": 0.3379, + "state_loss_0": 0.0, + "step": 17175 + }, + { + "epoch": 1.2423257493680029, + "grad_norm": 0.6921038627624512, + "learning_rate": 1.657280666160476e-05, + "loss": 3.0603, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.4219, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9609, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.6406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.75, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 3.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.3438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5625, + "eval_audio_loss_6_RQACONVA": 3.3906, + "eval_loss": 3.4844, + "eval_loss_RQACONVA": 3.4844, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.125, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 3.125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.8398, + "eval_loss_RQACONV": 0.8398, + "eval_loss_text_RQACONV": 1.6797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.7305, + "eval_loss_RQACONV": 0.7305, + "eval_loss_text_RQACONV": 1.4609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.918, + "eval_loss_RQACONV": 0.918, + "eval_loss_text_RQACONV": 1.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.8672, + "eval_loss_RQACONV": 0.8672, + "eval_loss_text_RQACONV": 1.7344, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 1.0234, + "eval_loss_RQACONV": 1.0234, + "eval_loss_text_RQACONV": 2.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.4688, + "eval_loss_RQACONV": 0.4688, + "eval_loss_text_RQACONV": 0.9375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5586, + "eval_loss_RQACONV": 0.5586, + "eval_loss_text_RQACONV": 1.1172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.1299, + "eval_loss_RQACONV": 0.1299, + "eval_loss_text_RQACONV": 0.2598, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.0723, + "eval_loss_RQACONV": 0.0723, + "eval_loss_text_RQACONV": 0.1445, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.1611, + "eval_loss_RQACONV": 0.1611, + "eval_loss_text_RQACONV": 0.3223, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.1494, + "eval_loss_RQACONV": 0.1494, + "eval_loss_text_RQACONV": 0.2988, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.0364, + "eval_loss_RQACONV": 0.0364, + "eval_loss_text_RQACONV": 0.0728, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.0957, + "eval_loss_RQACONV": 0.0957, + "eval_loss_text_RQACONV": 0.1914, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.0967, + "eval_loss_RQACONV": 0.0967, + "eval_loss_text_RQACONV": 0.1934, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17200 + }, + { + "epoch": 1.2423257493680029, + "eval_loss": 1.590382695198059, + "eval_runtime": 28.1866, + "eval_samples_per_second": 189.771, + "eval_steps_per_second": 1.49, + "step": 17200 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.375, + "epoch": 1.2423257493680029, + "loss": 3.1562, + "loss_text": 0.2324, + "state_loss_0": 0.0, + "step": 17200 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0781, + "epoch": 1.2423257493680029, + "loss": 2.9531, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 17200 + }, + { + "epoch": 1.244131455399061, + "grad_norm": 0.7726364731788635, + "learning_rate": 1.6504017501783815e-05, + "loss": 3.055, + "step": 17225 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.4375, + "epoch": 1.244131455399061, + "loss": 3.1875, + "loss_text": 0.3164, + "state_loss_0": 0.0, + "step": 17225 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.3125, + "epoch": 1.244131455399061, + "loss": 3.1562, + "loss_text": 0.373, + "state_loss_0": 0.0, + "step": 17225 + }, + { + "epoch": 1.2459371614301191, + "grad_norm": 0.6612791419029236, + "learning_rate": 1.6435300988243475e-05, + "loss": 3.0589, + "step": 17250 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.1875, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2969, + "epoch": 1.2459371614301191, + "loss": 3.0469, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 17250 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.2459371614301191, + "loss": 3.0781, + "loss_text": 0.3535, + "state_loss_0": 0.0, + "step": 17250 + }, + { + "epoch": 1.2477428674611772, + "grad_norm": 0.772638201713562, + "learning_rate": 1.6366657708555484e-05, + "loss": 3.0542, + "step": 17275 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0781, + "epoch": 1.2477428674611772, + "loss": 3.0156, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 17275 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2656, + "epoch": 1.2477428674611772, + "loss": 3.0625, + "loss_text": 0.5078, + "state_loss_0": 0.0, + "step": 17275 + }, + { + "epoch": 1.2495485734922354, + "grad_norm": 0.739829957485199, + "learning_rate": 1.629808824966536e-05, + "loss": 3.0503, + "step": 17300 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3594, + "epoch": 1.2495485734922354, + "loss": 3.25, + "loss_text": 0.6914, + "state_loss_0": 0.0, + "step": 17300 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2031, + "epoch": 1.2495485734922354, + "loss": 3.0312, + "loss_text": 0.3066, + "state_loss_0": 0.0, + "step": 17300 + }, + { + "epoch": 1.2513542795232935, + "grad_norm": 0.8378864526748657, + "learning_rate": 1.622959319788742e-05, + "loss": 3.0486, + "step": 17325 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.5, + "audio_loss_2": 3.3594, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4375, + "epoch": 1.2513542795232935, + "loss": 3.1875, + "loss_text": 0.6523, + "state_loss_0": 0.0, + "step": 17325 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.375, + "epoch": 1.2513542795232935, + "loss": 3.1719, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 17325 + }, + { + "epoch": 1.2531599855543518, + "grad_norm": 0.7523669600486755, + "learning_rate": 1.616117313889975e-05, + "loss": 3.048, + "step": 17350 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2344, + "epoch": 1.2531599855543518, + "loss": 2.9844, + "loss_text": 0.2119, + "state_loss_0": 0.0, + "step": 17350 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3594, + "epoch": 1.2531599855543518, + "loss": 3.1406, + "loss_text": 0.2891, + "state_loss_0": 0.0, + "step": 17350 + }, + { + "epoch": 1.25496569158541, + "grad_norm": 0.8651571869850159, + "learning_rate": 1.60928286577392e-05, + "loss": 3.0534, + "step": 17375 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.3438, + "epoch": 1.25496569158541, + "loss": 3.1719, + "loss_text": 0.4355, + "state_loss_0": 0.0, + "step": 17375 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3906, + "epoch": 1.25496569158541, + "loss": 3.1562, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 17375 + }, + { + "epoch": 1.256771397616468, + "grad_norm": 0.7395412921905518, + "learning_rate": 1.6024560338796377e-05, + "loss": 3.0532, + "step": 17400 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.875, + "audio_loss_5": 3.75, + "audio_loss_6": 3.3594, + "epoch": 1.256771397616468, + "loss": 3.2031, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 17400 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2188, + "epoch": 1.256771397616468, + "loss": 3.0, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 17400 + }, + { + "epoch": 1.2585771036475262, + "grad_norm": 0.744809627532959, + "learning_rate": 1.595636876581064e-05, + "loss": 3.0598, + "step": 17425 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.8594, + "audio_loss_2": 3.5469, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.5781, + "epoch": 1.2585771036475262, + "loss": 3.375, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 17425 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1562, + "epoch": 1.2585771036475262, + "loss": 2.9844, + "loss_text": 0.1758, + "state_loss_0": 0.0, + "step": 17425 + }, + { + "epoch": 1.2603828096785843, + "grad_norm": 0.7585452198982239, + "learning_rate": 1.5888254521865133e-05, + "loss": 3.0534, + "step": 17450 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.0781, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0938, + "epoch": 1.2603828096785843, + "loss": 2.9062, + "loss_text": 0.2031, + "state_loss_0": 0.0, + "step": 17450 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2656, + "epoch": 1.2603828096785843, + "loss": 3.0312, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 17450 + }, + { + "epoch": 1.2621885157096424, + "grad_norm": 0.7531628012657166, + "learning_rate": 1.5820218189381782e-05, + "loss": 3.0436, + "step": 17475 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0469, + "epoch": 1.2621885157096424, + "loss": 2.875, + "loss_text": 0.2637, + "state_loss_0": 0.0, + "step": 17475 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1875, + "epoch": 1.2621885157096424, + "loss": 3.0625, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 17475 + }, + { + "epoch": 1.2639942217407008, + "grad_norm": 0.6914001107215881, + "learning_rate": 1.5752260350116317e-05, + "loss": 3.0573, + "step": 17500 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2656, + "epoch": 1.2639942217407008, + "loss": 3.0938, + "loss_text": 0.5117, + "state_loss_0": 0.0, + "step": 17500 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.875, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1562, + "epoch": 1.2639942217407008, + "loss": 3.0938, + "loss_text": 0.3477, + "state_loss_0": 0.0, + "step": 17500 + }, + { + "epoch": 1.2657999277717589, + "grad_norm": 0.731653094291687, + "learning_rate": 1.5684381585153297e-05, + "loss": 3.0437, + "step": 17525 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.2657999277717589, + "loss": 2.9688, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 17525 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0781, + "epoch": 1.2657999277717589, + "loss": 2.9688, + "loss_text": 0.2617, + "state_loss_0": 0.0, + "step": 17525 + }, + { + "epoch": 1.267605633802817, + "grad_norm": 0.6788240075111389, + "learning_rate": 1.561658247490114e-05, + "loss": 3.0606, + "step": 17550 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1562, + "epoch": 1.267605633802817, + "loss": 3.0156, + "loss_text": 0.4023, + "state_loss_0": 0.0, + "step": 17550 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.7969, + "audio_loss_2": 3.3906, + "audio_loss_3": 4.5, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.6875, + "audio_loss_6": 3.5, + "epoch": 1.267605633802817, + "loss": 3.375, + "loss_text": 1.0, + "state_loss_0": 0.0, + "step": 17550 + }, + { + "epoch": 1.269411339833875, + "grad_norm": 0.7620913982391357, + "learning_rate": 1.5548863599087178e-05, + "loss": 3.0513, + "step": 17575 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.125, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.125, + "epoch": 1.269411339833875, + "loss": 2.9219, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 17575 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.25, + "audio_loss_6": 3.125, + "epoch": 1.269411339833875, + "loss": 2.9219, + "loss_text": 0.4121, + "state_loss_0": 0.0, + "step": 17575 + }, + { + "epoch": 1.2712170458649332, + "grad_norm": 0.6688727736473083, + "learning_rate": 1.5481225536752675e-05, + "loss": 3.0468, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6562, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.9219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4844, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6484, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9688, + "eval_audio_loss_5_AQACONVA": 3.7344, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.2031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.3281, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5469, + "eval_audio_loss_6_RQACONVA": 3.3906, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.8359, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2812, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.7773, + "eval_loss_RQACONV": 0.7773, + "eval_loss_text_RQACONV": 1.5547, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.8984, + "eval_loss_RQACONV": 0.8984, + "eval_loss_text_RQACONV": 1.7969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.8359, + "eval_loss_RQACONV": 0.8359, + "eval_loss_text_RQACONV": 1.6719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.9922, + "eval_loss_RQACONV": 0.9922, + "eval_loss_text_RQACONV": 1.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.1289, + "eval_loss_RQACONV": 0.1289, + "eval_loss_text_RQACONV": 0.2578, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.1475, + "eval_loss_RQACONV": 0.1475, + "eval_loss_text_RQACONV": 0.2949, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.0776, + "eval_loss_RQACONV": 0.0776, + "eval_loss_text_RQACONV": 0.1553, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.1553, + "eval_loss_RQACONV": 0.1553, + "eval_loss_text_RQACONV": 0.3105, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.0298, + "eval_loss_RQACONV": 0.0298, + "eval_loss_text_RQACONV": 0.0596, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.0952, + "eval_loss_RQACONV": 0.0952, + "eval_loss_text_RQACONV": 0.1904, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 17600 + }, + { + "epoch": 1.2712170458649332, + "eval_loss": 1.5848537683486938, + "eval_runtime": 27.5805, + "eval_samples_per_second": 193.941, + "eval_steps_per_second": 1.523, + "step": 17600 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2188, + "epoch": 1.2712170458649332, + "loss": 3.0938, + "loss_text": 0.4258, + "state_loss_0": 0.0, + "step": 17600 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2344, + "epoch": 1.2712170458649332, + "loss": 3.0625, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 17600 + }, + { + "epoch": 1.2730227518959913, + "grad_norm": 0.7891496419906616, + "learning_rate": 1.5413668866247887e-05, + "loss": 3.0549, + "step": 17625 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1875, + "epoch": 1.2730227518959913, + "loss": 3.0312, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 17625 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0938, + "epoch": 1.2730227518959913, + "loss": 2.9219, + "loss_text": 0.2334, + "state_loss_0": 0.0, + "step": 17625 + }, + { + "epoch": 1.2748284579270495, + "grad_norm": 0.7245773077011108, + "learning_rate": 1.5346194165227115e-05, + "loss": 3.0458, + "step": 17650 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2656, + "epoch": 1.2748284579270495, + "loss": 3.0938, + "loss_text": 0.4648, + "state_loss_0": 0.0, + "step": 17650 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.2748284579270495, + "loss": 3.0938, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 17650 + }, + { + "epoch": 1.2766341639581076, + "grad_norm": 0.6719074845314026, + "learning_rate": 1.5278802010643768e-05, + "loss": 3.0439, + "step": 17675 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.625, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3906, + "epoch": 1.2766341639581076, + "loss": 3.1094, + "loss_text": 0.168, + "state_loss_0": 0.0, + "step": 17675 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.8594, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1875, + "epoch": 1.2766341639581076, + "loss": 2.9375, + "loss_text": 0.1641, + "state_loss_0": 0.0, + "step": 17675 + }, + { + "epoch": 1.2784398699891657, + "grad_norm": 0.665128767490387, + "learning_rate": 1.5211492978745428e-05, + "loss": 3.0487, + "step": 17700 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.125, + "epoch": 1.2784398699891657, + "loss": 2.9531, + "loss_text": 0.3262, + "state_loss_0": 0.0, + "step": 17700 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.25, + "epoch": 1.2784398699891657, + "loss": 3.0938, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 17700 + }, + { + "epoch": 1.2802455760202238, + "grad_norm": 0.6513350009918213, + "learning_rate": 1.5144267645068922e-05, + "loss": 3.0555, + "step": 17725 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4375, + "epoch": 1.2802455760202238, + "loss": 3.2188, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 17725 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2969, + "epoch": 1.2802455760202238, + "loss": 3.0312, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 17725 + }, + { + "epoch": 1.282051282051282, + "grad_norm": 0.6886066198348999, + "learning_rate": 1.5077126584435406e-05, + "loss": 3.0557, + "step": 17750 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2656, + "epoch": 1.282051282051282, + "loss": 3.1094, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 17750 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3594, + "epoch": 1.282051282051282, + "loss": 3.125, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 17750 + }, + { + "epoch": 1.2838569880823403, + "grad_norm": 0.6562083959579468, + "learning_rate": 1.5010070370945433e-05, + "loss": 3.0489, + "step": 17775 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2656, + "epoch": 1.2838569880823403, + "loss": 3.0938, + "loss_text": 0.5469, + "state_loss_0": 0.0, + "step": 17775 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0625, + "epoch": 1.2838569880823403, + "loss": 2.9688, + "loss_text": 0.2393, + "state_loss_0": 0.0, + "step": 17775 + }, + { + "epoch": 1.2856626941133984, + "grad_norm": 0.7384065985679626, + "learning_rate": 1.4943099577974082e-05, + "loss": 3.0482, + "step": 17800 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.1875, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1562, + "epoch": 1.2856626941133984, + "loss": 3.0156, + "loss_text": 0.6016, + "state_loss_0": 0.0, + "step": 17800 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2344, + "epoch": 1.2856626941133984, + "loss": 3.0625, + "loss_text": 0.1162, + "state_loss_0": 0.0, + "step": 17800 + }, + { + "epoch": 1.2874684001444565, + "grad_norm": 0.6809717416763306, + "learning_rate": 1.4876214778166004e-05, + "loss": 3.0447, + "step": 17825 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3281, + "epoch": 1.2874684001444565, + "loss": 3.0938, + "loss_text": 0.3125, + "state_loss_0": 0.0, + "step": 17825 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.2874684001444565, + "loss": 2.9688, + "loss_text": 0.2217, + "state_loss_0": 0.0, + "step": 17825 + }, + { + "epoch": 1.2892741061755146, + "grad_norm": 1.0657081604003906, + "learning_rate": 1.4809416543430565e-05, + "loss": 3.0465, + "step": 17850 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.375, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.9531, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3594, + "epoch": 1.2892741061755146, + "loss": 3.25, + "loss_text": 0.4648, + "state_loss_0": 0.0, + "step": 17850 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.1875, + "epoch": 1.2892741061755146, + "loss": 3.125, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 17850 + }, + { + "epoch": 1.2910798122065728, + "grad_norm": 0.7266601324081421, + "learning_rate": 1.4742705444936938e-05, + "loss": 3.0448, + "step": 17875 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2969, + "epoch": 1.2910798122065728, + "loss": 3.0938, + "loss_text": 0.1216, + "state_loss_0": 0.0, + "step": 17875 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2031, + "epoch": 1.2910798122065728, + "loss": 3.0625, + "loss_text": 0.3789, + "state_loss_0": 0.0, + "step": 17875 + }, + { + "epoch": 1.2928855182376309, + "grad_norm": 0.7996474504470825, + "learning_rate": 1.4676082053109224e-05, + "loss": 3.0357, + "step": 17900 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.75, + "audio_loss_5": 3.625, + "audio_loss_6": 3.25, + "epoch": 1.2928855182376309, + "loss": 3.1094, + "loss_text": 0.291, + "state_loss_0": 0.0, + "step": 17900 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.2928855182376309, + "loss": 3.0938, + "loss_text": 0.3105, + "state_loss_0": 0.0, + "step": 17900 + }, + { + "epoch": 1.294691224268689, + "grad_norm": 0.6678273677825928, + "learning_rate": 1.4609546937621571e-05, + "loss": 3.0527, + "step": 17925 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.25, + "epoch": 1.294691224268689, + "loss": 3.1406, + "loss_text": 0.2451, + "state_loss_0": 0.0, + "step": 17925 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.4062, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4219, + "epoch": 1.294691224268689, + "loss": 3.1719, + "loss_text": 0.2031, + "state_loss_0": 0.0, + "step": 17925 + }, + { + "epoch": 1.2964969302997473, + "grad_norm": 0.7938993573188782, + "learning_rate": 1.4543100667393312e-05, + "loss": 3.0489, + "step": 17950 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.125, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2188, + "epoch": 1.2964969302997473, + "loss": 3.0312, + "loss_text": 0.3867, + "state_loss_0": 0.0, + "step": 17950 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5156, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.1875, + "epoch": 1.2964969302997473, + "loss": 2.9688, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 17950 + }, + { + "epoch": 1.2983026363308054, + "grad_norm": 0.7633681297302246, + "learning_rate": 1.4476743810584099e-05, + "loss": 3.0412, + "step": 17975 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0312, + "epoch": 1.2983026363308054, + "loss": 2.9219, + "loss_text": 0.2471, + "state_loss_0": 0.0, + "step": 17975 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2031, + "epoch": 1.2983026363308054, + "loss": 3.0938, + "loss_text": 0.3672, + "state_loss_0": 0.0, + "step": 17975 + }, + { + "epoch": 1.3001083423618636, + "grad_norm": 0.6111509799957275, + "learning_rate": 1.4410476934589013e-05, + "loss": 3.0442, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.3906, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9766, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9531, + "eval_audio_loss_5_AQACONVA": 3.7344, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.2344, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5625, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 3.2656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 4.0, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.3594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5625, + "eval_audio_loss_6_RQACONVA": 3.3906, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.8672, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.6406, + "eval_loss_RQACONVA": 3.6406, + "eval_loss_text_RQACONVA": 2.4688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 3.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2344, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.6562, + "eval_loss_RQACONV": 0.6562, + "eval_loss_text_RQACONV": 1.3125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.7891, + "eval_loss_RQACONV": 0.7891, + "eval_loss_text_RQACONV": 1.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.8984, + "eval_loss_RQACONV": 0.8984, + "eval_loss_text_RQACONV": 1.7969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.7344, + "eval_loss_RQACONV": 0.7344, + "eval_loss_text_RQACONV": 1.4688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.9922, + "eval_loss_RQACONV": 0.9922, + "eval_loss_text_RQACONV": 1.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5586, + "eval_loss_RQACONV": 0.5586, + "eval_loss_text_RQACONV": 1.1172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.123, + "eval_loss_RQACONV": 0.123, + "eval_loss_text_RQACONV": 0.2461, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.0742, + "eval_loss_RQACONV": 0.0742, + "eval_loss_text_RQACONV": 0.1484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.1543, + "eval_loss_RQACONV": 0.1543, + "eval_loss_text_RQACONV": 0.3086, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.0322, + "eval_loss_RQACONV": 0.0322, + "eval_loss_text_RQACONV": 0.0645, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.1011, + "eval_loss_RQACONV": 0.1011, + "eval_loss_text_RQACONV": 0.2021, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.0942, + "eval_loss_RQACONV": 0.0942, + "eval_loss_text_RQACONV": 0.1885, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 0.6055, + "eval_loss_RQACONV": 0.6055, + "eval_loss_text_RQACONV": 1.2109, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18000 + }, + { + "epoch": 1.3001083423618636, + "eval_loss": 1.5862237215042114, + "eval_runtime": 27.3776, + "eval_samples_per_second": 195.379, + "eval_steps_per_second": 1.534, + "step": 18000 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2969, + "epoch": 1.3001083423618636, + "loss": 3.0781, + "loss_text": 0.2812, + "state_loss_0": 0.0, + "step": 18000 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.25, + "epoch": 1.3001083423618636, + "loss": 3.0156, + "loss_text": 0.3613, + "state_loss_0": 0.0, + "step": 18000 + }, + { + "epoch": 1.3019140483929217, + "grad_norm": 0.7780880928039551, + "learning_rate": 1.4344300606033784e-05, + "loss": 3.0484, + "step": 18025 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.1719, + "epoch": 1.3019140483929217, + "loss": 3.0469, + "loss_text": 0.5508, + "state_loss_0": 0.0, + "step": 18025 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.4219, + "epoch": 1.3019140483929217, + "loss": 3.1875, + "loss_text": 0.4434, + "state_loss_0": 0.0, + "step": 18025 + }, + { + "epoch": 1.3037197544239798, + "grad_norm": 0.6836293935775757, + "learning_rate": 1.4278215390769847e-05, + "loss": 3.0427, + "step": 18050 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.2969, + "epoch": 1.3037197544239798, + "loss": 3.1562, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 18050 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1875, + "epoch": 1.3037197544239798, + "loss": 2.9375, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 18050 + }, + { + "epoch": 1.305525460455038, + "grad_norm": 0.6984586715698242, + "learning_rate": 1.4212221853869614e-05, + "loss": 3.0431, + "step": 18075 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1875, + "epoch": 1.305525460455038, + "loss": 3.0, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 18075 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.375, + "audio_loss_6": 3.125, + "epoch": 1.305525460455038, + "loss": 3.0781, + "loss_text": 0.4707, + "state_loss_0": 0.0, + "step": 18075 + }, + { + "epoch": 1.307331166486096, + "grad_norm": 0.672048807144165, + "learning_rate": 1.4146320559621538e-05, + "loss": 3.0434, + "step": 18100 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.0469, + "audio_loss_2": 2.7344, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3906, + "audio_loss_6": 2.9688, + "epoch": 1.307331166486096, + "loss": 2.8906, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 18100 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3594, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.307331166486096, + "loss": 3.0625, + "loss_text": 0.5742, + "state_loss_0": 0.0, + "step": 18100 + }, + { + "epoch": 1.3091368725171542, + "grad_norm": 0.7038462162017822, + "learning_rate": 1.4080512071525364e-05, + "loss": 3.0391, + "step": 18125 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.25, + "audio_loss_3": 4.375, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7812, + "audio_loss_6": 3.4375, + "epoch": 1.3091368725171542, + "loss": 3.25, + "loss_text": 0.4336, + "state_loss_0": 0.0, + "step": 18125 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.9219, + "audio_loss_5": 3.625, + "audio_loss_6": 3.3281, + "epoch": 1.3091368725171542, + "loss": 3.2031, + "loss_text": 0.2168, + "state_loss_0": 0.0, + "step": 18125 + }, + { + "epoch": 1.3109425785482123, + "grad_norm": 0.6545125246047974, + "learning_rate": 1.401479695228729e-05, + "loss": 3.0413, + "step": 18150 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.5, + "audio_loss_6": 3.25, + "epoch": 1.3109425785482123, + "loss": 3.0312, + "loss_text": 0.3105, + "state_loss_0": 0.0, + "step": 18150 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0156, + "epoch": 1.3109425785482123, + "loss": 2.9844, + "loss_text": 0.4551, + "state_loss_0": 0.0, + "step": 18150 + }, + { + "epoch": 1.3127482845792704, + "grad_norm": 0.8710009455680847, + "learning_rate": 1.3949175763815115e-05, + "loss": 3.0407, + "step": 18175 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.25, + "epoch": 1.3127482845792704, + "loss": 3.0625, + "loss_text": 0.3516, + "state_loss_0": 0.0, + "step": 18175 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.3125, + "epoch": 1.3127482845792704, + "loss": 3.0312, + "loss_text": 0.2256, + "state_loss_0": 0.0, + "step": 18175 + }, + { + "epoch": 1.3145539906103285, + "grad_norm": 0.7770014405250549, + "learning_rate": 1.38836490672135e-05, + "loss": 3.0416, + "step": 18200 + }, + { + "audio_loss_0": 3.6875, + "audio_loss_1": 3.8125, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.875, + "audio_loss_5": 3.7188, + "audio_loss_6": 3.3906, + "epoch": 1.3145539906103285, + "loss": 3.3438, + "loss_text": 0.4121, + "state_loss_0": 0.0, + "step": 18200 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2812, + "epoch": 1.3145539906103285, + "loss": 3.0938, + "loss_text": 0.3496, + "state_loss_0": 0.0, + "step": 18200 + }, + { + "epoch": 1.3163596966413869, + "grad_norm": 0.9247949719429016, + "learning_rate": 1.3818217422779115e-05, + "loss": 3.0425, + "step": 18225 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3438, + "epoch": 1.3163596966413869, + "loss": 3.25, + "loss_text": 0.5703, + "state_loss_0": 0.0, + "step": 18225 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.25, + "epoch": 1.3163596966413869, + "loss": 3.125, + "loss_text": 0.334, + "state_loss_0": 0.0, + "step": 18225 + }, + { + "epoch": 1.318165402672445, + "grad_norm": 0.6530346870422363, + "learning_rate": 1.3752881389995897e-05, + "loss": 3.0401, + "step": 18250 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1719, + "epoch": 1.318165402672445, + "loss": 3.0781, + "loss_text": 0.4219, + "state_loss_0": 0.0, + "step": 18250 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2969, + "epoch": 1.318165402672445, + "loss": 3.1094, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 18250 + }, + { + "epoch": 1.319971108703503, + "grad_norm": 0.6913058161735535, + "learning_rate": 1.3687641527530202e-05, + "loss": 3.0406, + "step": 18275 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.0938, + "epoch": 1.319971108703503, + "loss": 2.9531, + "loss_text": 0.377, + "state_loss_0": 0.0, + "step": 18275 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1406, + "epoch": 1.319971108703503, + "loss": 3.0312, + "loss_text": 0.1221, + "state_loss_0": 0.0, + "step": 18275 + }, + { + "epoch": 1.3217768147345612, + "grad_norm": 0.7104440331459045, + "learning_rate": 1.362249839322611e-05, + "loss": 3.0404, + "step": 18300 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.1719, + "epoch": 1.3217768147345612, + "loss": 3.0156, + "loss_text": 0.1826, + "state_loss_0": 0.0, + "step": 18300 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.3217768147345612, + "loss": 2.9531, + "loss_text": 0.1118, + "state_loss_0": 0.0, + "step": 18300 + }, + { + "epoch": 1.3235825207656193, + "grad_norm": 0.6724638342857361, + "learning_rate": 1.355745254410057e-05, + "loss": 3.0402, + "step": 18325 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.125, + "epoch": 1.3235825207656193, + "loss": 2.9531, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 18325 + }, + { + "audio_loss_0": 3.3438, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.25, + "epoch": 1.3235825207656193, + "loss": 3.1406, + "loss_text": 0.3262, + "state_loss_0": 0.0, + "step": 18325 + }, + { + "epoch": 1.3253882267966774, + "grad_norm": 0.6807225942611694, + "learning_rate": 1.349250453633871e-05, + "loss": 3.0444, + "step": 18350 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.125, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.25, + "epoch": 1.3253882267966774, + "loss": 3.0625, + "loss_text": 0.3184, + "state_loss_0": 0.0, + "step": 18350 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.375, + "epoch": 1.3253882267966774, + "loss": 3.1094, + "loss_text": 0.4199, + "state_loss_0": 0.0, + "step": 18350 + }, + { + "epoch": 1.3271939328277356, + "grad_norm": 0.6872591972351074, + "learning_rate": 1.3427654925289043e-05, + "loss": 3.0395, + "step": 18375 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.375, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.625, + "audio_loss_6": 3.4375, + "epoch": 1.3271939328277356, + "loss": 3.2188, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 18375 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.8281, + "audio_loss_3": 3.9531, + "audio_loss_4": 3.4375, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.0312, + "epoch": 1.3271939328277356, + "loss": 2.8438, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 18375 + }, + { + "epoch": 1.328999638858794, + "grad_norm": 0.9695538282394409, + "learning_rate": 1.33629042654587e-05, + "loss": 3.0438, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.9453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6484, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.2969, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9531, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.1719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8594, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.8906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.0938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7656, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.8125, + "eval_loss_RQACONV": 0.8125, + "eval_loss_text_RQACONV": 1.625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.6953, + "eval_loss_RQACONV": 0.6953, + "eval_loss_text_RQACONV": 1.3906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.7969, + "eval_loss_RQACONV": 0.7969, + "eval_loss_text_RQACONV": 1.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 1.0078, + "eval_loss_RQACONV": 1.0078, + "eval_loss_text_RQACONV": 2.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.4629, + "eval_loss_RQACONV": 0.4629, + "eval_loss_text_RQACONV": 0.9258, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.125, + "eval_loss_RQACONV": 0.125, + "eval_loss_text_RQACONV": 0.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.0713, + "eval_loss_RQACONV": 0.0713, + "eval_loss_text_RQACONV": 0.1426, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.1533, + "eval_loss_RQACONV": 0.1533, + "eval_loss_text_RQACONV": 0.3066, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.0347, + "eval_loss_RQACONV": 0.0347, + "eval_loss_text_RQACONV": 0.0693, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.0986, + "eval_loss_RQACONV": 0.0986, + "eval_loss_text_RQACONV": 0.1973, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18400 + }, + { + "epoch": 1.328999638858794, + "eval_loss": 1.5841739177703857, + "eval_runtime": 28.4853, + "eval_samples_per_second": 187.781, + "eval_steps_per_second": 1.474, + "step": 18400 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.25, + "epoch": 1.328999638858794, + "loss": 3.0625, + "loss_text": 0.1533, + "state_loss_0": 0.0, + "step": 18400 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.25, + "epoch": 1.328999638858794, + "loss": 3.0625, + "loss_text": 0.3848, + "state_loss_0": 0.0, + "step": 18400 + }, + { + "epoch": 1.330805344889852, + "grad_norm": 0.6805699467658997, + "learning_rate": 1.3298253110508754e-05, + "loss": 3.0369, + "step": 18425 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1562, + "epoch": 1.330805344889852, + "loss": 3.0312, + "loss_text": 0.1562, + "state_loss_0": 0.0, + "step": 18425 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4062, + "epoch": 1.330805344889852, + "loss": 3.2188, + "loss_text": 0.668, + "state_loss_0": 0.0, + "step": 18425 + }, + { + "epoch": 1.3326110509209101, + "grad_norm": 0.7521597146987915, + "learning_rate": 1.3233702013249402e-05, + "loss": 3.0303, + "step": 18450 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2344, + "epoch": 1.3326110509209101, + "loss": 3.1094, + "loss_text": 0.4199, + "state_loss_0": 0.0, + "step": 18450 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1875, + "epoch": 1.3326110509209101, + "loss": 3.0312, + "loss_text": 0.2891, + "state_loss_0": 0.0, + "step": 18450 + }, + { + "epoch": 1.3344167569519683, + "grad_norm": 0.8090732097625732, + "learning_rate": 1.3169251525635327e-05, + "loss": 3.0349, + "step": 18475 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.25, + "audio_loss_6": 2.9688, + "epoch": 1.3344167569519683, + "loss": 2.875, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 18475 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1406, + "epoch": 1.3344167569519683, + "loss": 2.9531, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 18475 + }, + { + "epoch": 1.3362224629830264, + "grad_norm": 0.7226414084434509, + "learning_rate": 1.3104902198760881e-05, + "loss": 3.0392, + "step": 18500 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.0312, + "audio_loss_2": 2.875, + "audio_loss_3": 3.9688, + "audio_loss_4": 3.5, + "audio_loss_5": 3.25, + "audio_loss_6": 2.875, + "epoch": 1.3362224629830264, + "loss": 2.8125, + "loss_text": 0.291, + "state_loss_0": 0.0, + "step": 18500 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5, + "audio_loss_6": 3.3281, + "epoch": 1.3362224629830264, + "loss": 3.0938, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 18500 + }, + { + "epoch": 1.3380281690140845, + "grad_norm": 0.6338071227073669, + "learning_rate": 1.3040654582855475e-05, + "loss": 3.0268, + "step": 18525 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1719, + "epoch": 1.3380281690140845, + "loss": 2.9844, + "loss_text": 0.1943, + "state_loss_0": 0.0, + "step": 18525 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2031, + "epoch": 1.3380281690140845, + "loss": 3.0625, + "loss_text": 0.1553, + "state_loss_0": 0.0, + "step": 18525 + }, + { + "epoch": 1.3398338750451426, + "grad_norm": 0.6913328170776367, + "learning_rate": 1.2976509227278799e-05, + "loss": 3.0389, + "step": 18550 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2344, + "epoch": 1.3398338750451426, + "loss": 3.0156, + "loss_text": 0.1982, + "state_loss_0": 0.0, + "step": 18550 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0938, + "epoch": 1.3398338750451426, + "loss": 2.9375, + "loss_text": 0.2734, + "state_loss_0": 0.0, + "step": 18550 + }, + { + "epoch": 1.3416395810762007, + "grad_norm": 0.6980537176132202, + "learning_rate": 1.2912466680516144e-05, + "loss": 3.0398, + "step": 18575 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.1875, + "epoch": 1.3416395810762007, + "loss": 3.0312, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 18575 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2656, + "epoch": 1.3416395810762007, + "loss": 3.0625, + "loss_text": 0.3066, + "state_loss_0": 0.0, + "step": 18575 + }, + { + "epoch": 1.3434452871072589, + "grad_norm": 0.7022261619567871, + "learning_rate": 1.2848527490173734e-05, + "loss": 3.0315, + "step": 18600 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.3434452871072589, + "loss": 3.0312, + "loss_text": 0.334, + "state_loss_0": 0.0, + "step": 18600 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2969, + "epoch": 1.3434452871072589, + "loss": 3.0781, + "loss_text": 0.4434, + "state_loss_0": 0.0, + "step": 18600 + }, + { + "epoch": 1.345250993138317, + "grad_norm": 0.8272330164909363, + "learning_rate": 1.2784692202974008e-05, + "loss": 3.0392, + "step": 18625 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1406, + "epoch": 1.345250993138317, + "loss": 3.0156, + "loss_text": 0.4258, + "state_loss_0": 0.0, + "step": 18625 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2188, + "epoch": 1.345250993138317, + "loss": 3.0781, + "loss_text": 0.3984, + "state_loss_0": 0.0, + "step": 18625 + }, + { + "epoch": 1.347056699169375, + "grad_norm": 0.714826226234436, + "learning_rate": 1.2720961364750989e-05, + "loss": 3.0254, + "step": 18650 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1875, + "epoch": 1.347056699169375, + "loss": 3.0, + "loss_text": 0.3398, + "state_loss_0": 0.0, + "step": 18650 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2969, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.0469, + "epoch": 1.347056699169375, + "loss": 2.9375, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 18650 + }, + { + "epoch": 1.3488624052004334, + "grad_norm": 0.8333682417869568, + "learning_rate": 1.265733552044556e-05, + "loss": 3.0306, + "step": 18675 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.1719, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.2188, + "audio_loss_6": 3.1094, + "epoch": 1.3488624052004334, + "loss": 2.9688, + "loss_text": 0.25, + "state_loss_0": 0.0, + "step": 18675 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.3488624052004334, + "loss": 3.0156, + "loss_text": 0.1396, + "state_loss_0": 0.0, + "step": 18675 + }, + { + "epoch": 1.3506681112314916, + "grad_norm": 0.7275634407997131, + "learning_rate": 1.2593815214100873e-05, + "loss": 3.0275, + "step": 18700 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3125, + "epoch": 1.3506681112314916, + "loss": 3.125, + "loss_text": 0.5391, + "state_loss_0": 0.0, + "step": 18700 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.4844, + "audio_loss_5": 3.2188, + "audio_loss_6": 3.1094, + "epoch": 1.3506681112314916, + "loss": 2.8906, + "loss_text": 0.1885, + "state_loss_0": 0.0, + "step": 18700 + }, + { + "epoch": 1.3524738172625497, + "grad_norm": 0.7358893156051636, + "learning_rate": 1.2530400988857626e-05, + "loss": 3.0346, + "step": 18725 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.4375, + "epoch": 1.3524738172625497, + "loss": 3.2188, + "loss_text": 0.6289, + "state_loss_0": 0.0, + "step": 18725 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.25, + "epoch": 1.3524738172625497, + "loss": 3.0469, + "loss_text": 0.2041, + "state_loss_0": 0.0, + "step": 18725 + }, + { + "epoch": 1.3542795232936078, + "grad_norm": 0.684475302696228, + "learning_rate": 1.2467093386949477e-05, + "loss": 3.0413, + "step": 18750 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.3542795232936078, + "loss": 2.9844, + "loss_text": 0.1982, + "state_loss_0": 0.0, + "step": 18750 + }, + { + "audio_loss_0": 2.4219, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.8125, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.4375, + "audio_loss_5": 3.1094, + "audio_loss_6": 2.9531, + "epoch": 1.3542795232936078, + "loss": 2.7969, + "loss_text": 0.3574, + "state_loss_0": 0.0, + "step": 18750 + }, + { + "epoch": 1.356085229324666, + "grad_norm": 0.7515304088592529, + "learning_rate": 1.2403892949698387e-05, + "loss": 3.0378, + "step": 18775 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.125, + "audio_loss_4": 3.4375, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0625, + "epoch": 1.356085229324666, + "loss": 2.9375, + "loss_text": 0.2266, + "state_loss_0": 0.0, + "step": 18775 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2344, + "epoch": 1.356085229324666, + "loss": 3.125, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 18775 + }, + { + "epoch": 1.357890935355724, + "grad_norm": 0.6739569902420044, + "learning_rate": 1.2340800217509952e-05, + "loss": 3.0308, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.9453, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.8125, + "eval_audio_loss_6_AQACONVA": 3.4688, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9531, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4219, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.1406, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 3.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.3281, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5469, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.8594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.4688, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.5, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.2656, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.7031, + "eval_loss_RQACONVA": 3.7031, + "eval_loss_text_RQACONVA": 3.1562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.582, + "eval_loss_RQACONV": 0.582, + "eval_loss_text_RQACONV": 1.1641, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.7305, + "eval_loss_RQACONV": 0.7305, + "eval_loss_text_RQACONV": 1.4609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.918, + "eval_loss_RQACONV": 0.918, + "eval_loss_text_RQACONV": 1.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.957, + "eval_loss_RQACONV": 0.957, + "eval_loss_text_RQACONV": 1.9141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.4766, + "eval_loss_RQACONV": 0.4766, + "eval_loss_text_RQACONV": 0.9531, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 1.4375, + "eval_loss_RQACONV": 1.4375, + "eval_loss_text_RQACONV": 2.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.1318, + "eval_loss_RQACONV": 0.1318, + "eval_loss_text_RQACONV": 0.2637, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.1465, + "eval_loss_RQACONV": 0.1465, + "eval_loss_text_RQACONV": 0.293, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.0718, + "eval_loss_RQACONV": 0.0718, + "eval_loss_text_RQACONV": 0.1436, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.0325, + "eval_loss_RQACONV": 0.0325, + "eval_loss_text_RQACONV": 0.0649, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.1357, + "eval_loss_RQACONV": 0.1357, + "eval_loss_text_RQACONV": 0.2715, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.0918, + "eval_loss_RQACONV": 0.0918, + "eval_loss_text_RQACONV": 0.1836, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.0918, + "eval_loss_RQACONV": 0.0918, + "eval_loss_text_RQACONV": 0.1836, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 18800 + }, + { + "epoch": 1.357890935355724, + "eval_loss": 1.5862106084823608, + "eval_runtime": 27.5962, + "eval_samples_per_second": 193.831, + "eval_steps_per_second": 1.522, + "step": 18800 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2031, + "epoch": 1.357890935355724, + "loss": 3.0938, + "loss_text": 0.6211, + "state_loss_0": 0.0, + "step": 18800 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1562, + "epoch": 1.357890935355724, + "loss": 3.0312, + "loss_text": 0.3867, + "state_loss_0": 0.0, + "step": 18800 + }, + { + "epoch": 1.3596966413867824, + "grad_norm": 0.7255478501319885, + "learning_rate": 1.2277815729868869e-05, + "loss": 3.0284, + "step": 18825 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2812, + "epoch": 1.3596966413867824, + "loss": 3.0, + "loss_text": 0.208, + "state_loss_0": 0.0, + "step": 18825 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0625, + "epoch": 1.3596966413867824, + "loss": 2.9062, + "loss_text": 0.2227, + "state_loss_0": 0.0, + "step": 18825 + }, + { + "epoch": 1.3615023474178405, + "grad_norm": 0.7205430865287781, + "learning_rate": 1.2214940025334227e-05, + "loss": 3.0339, + "step": 18850 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.3125, + "epoch": 1.3615023474178405, + "loss": 3.0625, + "loss_text": 0.3496, + "state_loss_0": 0.0, + "step": 18850 + }, + { + "audio_loss_0": 3.4531, + "audio_loss_1": 3.375, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.4844, + "epoch": 1.3615023474178405, + "loss": 3.2188, + "loss_text": 0.4102, + "state_loss_0": 0.0, + "step": 18850 + }, + { + "epoch": 1.3633080534488986, + "grad_norm": 0.7553040981292725, + "learning_rate": 1.215217364153498e-05, + "loss": 3.0343, + "step": 18875 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2969, + "epoch": 1.3633080534488986, + "loss": 3.1094, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 18875 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.3125, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2656, + "epoch": 1.3633080534488986, + "loss": 3.0938, + "loss_text": 0.3203, + "state_loss_0": 0.0, + "step": 18875 + }, + { + "epoch": 1.3651137594799567, + "grad_norm": 0.6768721342086792, + "learning_rate": 1.2089517115165295e-05, + "loss": 3.0343, + "step": 18900 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2031, + "epoch": 1.3651137594799567, + "loss": 3.0938, + "loss_text": 0.4258, + "state_loss_0": 0.0, + "step": 18900 + }, + { + "audio_loss_0": 2.6719, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0938, + "epoch": 1.3651137594799567, + "loss": 2.9531, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 18900 + }, + { + "epoch": 1.3669194655110148, + "grad_norm": 0.6530569791793823, + "learning_rate": 1.2026970981980007e-05, + "loss": 3.0302, + "step": 18925 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.3669194655110148, + "loss": 3.0781, + "loss_text": 0.4297, + "state_loss_0": 0.0, + "step": 18925 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.25, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2969, + "epoch": 1.3669194655110148, + "loss": 3.0938, + "loss_text": 0.5938, + "state_loss_0": 0.0, + "step": 18925 + }, + { + "epoch": 1.368725171542073, + "grad_norm": 0.7265365719795227, + "learning_rate": 1.1964535776789987e-05, + "loss": 3.0347, + "step": 18950 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.25, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.25, + "epoch": 1.368725171542073, + "loss": 3.0938, + "loss_text": 0.4609, + "state_loss_0": 0.0, + "step": 18950 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2031, + "epoch": 1.368725171542073, + "loss": 3.0625, + "loss_text": 0.3809, + "state_loss_0": 0.0, + "step": 18950 + }, + { + "epoch": 1.370530877573131, + "grad_norm": 0.6858474612236023, + "learning_rate": 1.1902212033457624e-05, + "loss": 3.0291, + "step": 18975 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.370530877573131, + "loss": 2.9375, + "loss_text": 0.4668, + "state_loss_0": 0.0, + "step": 18975 + }, + { + "audio_loss_0": 2.625, + "audio_loss_1": 3.0781, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0, + "audio_loss_4": 3.4375, + "audio_loss_5": 3.2812, + "audio_loss_6": 2.9844, + "epoch": 1.370530877573131, + "loss": 2.8438, + "loss_text": 0.4453, + "state_loss_0": 0.0, + "step": 18975 + }, + { + "epoch": 1.3723365836041892, + "grad_norm": 0.7194188237190247, + "learning_rate": 1.1840000284892239e-05, + "loss": 3.0252, + "step": 19000 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.25, + "epoch": 1.3723365836041892, + "loss": 3.1094, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 19000 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.0156, + "epoch": 1.3723365836041892, + "loss": 2.9688, + "loss_text": 0.2656, + "state_loss_0": 0.0, + "step": 19000 + }, + { + "epoch": 1.3741422896352473, + "grad_norm": 0.659325897693634, + "learning_rate": 1.1777901063045483e-05, + "loss": 3.0319, + "step": 19025 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1875, + "epoch": 1.3741422896352473, + "loss": 3.0312, + "loss_text": 0.416, + "state_loss_0": 0.0, + "step": 19025 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2344, + "epoch": 1.3741422896352473, + "loss": 3.0156, + "loss_text": 0.2334, + "state_loss_0": 0.0, + "step": 19025 + }, + { + "epoch": 1.3759479956663054, + "grad_norm": 0.6732504367828369, + "learning_rate": 1.1715914898906877e-05, + "loss": 3.0302, + "step": 19050 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.3759479956663054, + "loss": 3.0625, + "loss_text": 0.2041, + "state_loss_0": 0.0, + "step": 19050 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4688, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3438, + "epoch": 1.3759479956663054, + "loss": 3.2188, + "loss_text": 0.6953, + "state_loss_0": 0.0, + "step": 19050 + }, + { + "epoch": 1.3777537016973636, + "grad_norm": 0.8259382247924805, + "learning_rate": 1.1654042322499188e-05, + "loss": 3.0371, + "step": 19075 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1094, + "epoch": 1.3777537016973636, + "loss": 3.0469, + "loss_text": 0.3613, + "state_loss_0": 0.0, + "step": 19075 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1406, + "epoch": 1.3777537016973636, + "loss": 2.9844, + "loss_text": 0.4121, + "state_loss_0": 0.0, + "step": 19075 + }, + { + "epoch": 1.379559407728422, + "grad_norm": 0.851152241230011, + "learning_rate": 1.1592283862873962e-05, + "loss": 3.0401, + "step": 19100 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.25, + "audio_loss_2": 2.875, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5, + "audio_loss_5": 3.2188, + "audio_loss_6": 2.9844, + "epoch": 1.379559407728422, + "loss": 2.8906, + "loss_text": 0.2754, + "state_loss_0": 0.0, + "step": 19100 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1406, + "epoch": 1.379559407728422, + "loss": 2.9844, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 19100 + }, + { + "epoch": 1.38136511375948, + "grad_norm": 0.6754907369613647, + "learning_rate": 1.1530640048106935e-05, + "loss": 3.0273, + "step": 19125 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9375, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3125, + "epoch": 1.38136511375948, + "loss": 3.2188, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 19125 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1875, + "epoch": 1.38136511375948, + "loss": 2.9688, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 19125 + }, + { + "epoch": 1.3831708197905381, + "grad_norm": 0.6409271955490112, + "learning_rate": 1.14691114052936e-05, + "loss": 3.0295, + "step": 19150 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1719, + "epoch": 1.3831708197905381, + "loss": 2.9531, + "loss_text": 0.3379, + "state_loss_0": 0.0, + "step": 19150 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.125, + "epoch": 1.3831708197905381, + "loss": 2.9062, + "loss_text": 0.1924, + "state_loss_0": 0.0, + "step": 19150 + }, + { + "epoch": 1.3849765258215962, + "grad_norm": 0.6751378774642944, + "learning_rate": 1.1407698460544602e-05, + "loss": 3.0227, + "step": 19175 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.25, + "epoch": 1.3849765258215962, + "loss": 3.0312, + "loss_text": 0.4082, + "state_loss_0": 0.0, + "step": 19175 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.0938, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.125, + "epoch": 1.3849765258215962, + "loss": 2.9531, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 19175 + }, + { + "epoch": 1.3867822318526544, + "grad_norm": 0.718890368938446, + "learning_rate": 1.134640173898133e-05, + "loss": 3.0239, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6797, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4219, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.1875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.3281, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6562, + "eval_loss_AQACONVA": 3.6562, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5469, + "eval_audio_loss_6_RQACONVA": 3.375, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.3906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8906, + "eval_audio_loss_6_RQACONVA": 3.6094, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.5938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.3125, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.707, + "eval_loss_RQACONV": 0.707, + "eval_loss_text_RQACONV": 1.4141, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.793, + "eval_loss_RQACONV": 0.793, + "eval_loss_text_RQACONV": 1.5859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.7148, + "eval_loss_RQACONV": 0.7148, + "eval_loss_text_RQACONV": 1.4297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.9141, + "eval_loss_RQACONV": 0.9141, + "eval_loss_text_RQACONV": 1.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.7305, + "eval_loss_RQACONV": 0.7305, + "eval_loss_text_RQACONV": 1.4609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.8242, + "eval_loss_RQACONV": 0.8242, + "eval_loss_text_RQACONV": 1.6484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.9844, + "eval_loss_RQACONV": 0.9844, + "eval_loss_text_RQACONV": 1.9688, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.4824, + "eval_loss_RQACONV": 0.4824, + "eval_loss_text_RQACONV": 0.9648, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.6836, + "eval_loss_RQACONV": 0.6836, + "eval_loss_text_RQACONV": 1.3672, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.6016, + "eval_loss_RQACONV": 0.6016, + "eval_loss_text_RQACONV": 1.2031, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5117, + "eval_loss_RQACONV": 0.5117, + "eval_loss_text_RQACONV": 1.0234, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 1.4453, + "eval_loss_RQACONV": 1.4453, + "eval_loss_text_RQACONV": 2.8906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.126, + "eval_loss_RQACONV": 0.126, + "eval_loss_text_RQACONV": 0.252, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.0723, + "eval_loss_RQACONV": 0.0723, + "eval_loss_text_RQACONV": 0.1445, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.1514, + "eval_loss_RQACONV": 0.1514, + "eval_loss_text_RQACONV": 0.3027, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.0298, + "eval_loss_RQACONV": 0.0298, + "eval_loss_text_RQACONV": 0.0596, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.1357, + "eval_loss_RQACONV": 0.1357, + "eval_loss_text_RQACONV": 0.2715, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.0977, + "eval_loss_RQACONV": 0.0977, + "eval_loss_text_RQACONV": 0.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.0913, + "eval_loss_RQACONV": 0.0913, + "eval_loss_text_RQACONV": 0.1826, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19200 + }, + { + "epoch": 1.3867822318526544, + "eval_loss": 1.5875295400619507, + "eval_runtime": 27.9737, + "eval_samples_per_second": 191.215, + "eval_steps_per_second": 1.501, + "step": 19200 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2656, + "epoch": 1.3867822318526544, + "loss": 3.0156, + "loss_text": 0.3926, + "state_loss_0": 0.0, + "step": 19200 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1406, + "epoch": 1.3867822318526544, + "loss": 3.0, + "loss_text": 0.334, + "state_loss_0": 0.0, + "step": 19200 + }, + { + "epoch": 1.3885879378837125, + "grad_norm": 0.7641562819480896, + "learning_rate": 1.1285221764731383e-05, + "loss": 3.0229, + "step": 19225 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.375, + "epoch": 1.3885879378837125, + "loss": 3.1875, + "loss_text": 0.3848, + "state_loss_0": 0.0, + "step": 19225 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3125, + "epoch": 1.3885879378837125, + "loss": 3.2188, + "loss_text": 0.6641, + "state_loss_0": 0.0, + "step": 19225 + }, + { + "epoch": 1.3903936439147706, + "grad_norm": 0.6727105975151062, + "learning_rate": 1.1224159060924067e-05, + "loss": 3.0261, + "step": 19250 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1562, + "epoch": 1.3903936439147706, + "loss": 3.0156, + "loss_text": 0.2734, + "state_loss_0": 0.0, + "step": 19250 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0625, + "epoch": 1.3903936439147706, + "loss": 3.0, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 19250 + }, + { + "epoch": 1.392199349945829, + "grad_norm": 0.6369966268539429, + "learning_rate": 1.116321414968598e-05, + "loss": 3.025, + "step": 19275 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2969, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.392199349945829, + "loss": 3.0625, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 19275 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1719, + "epoch": 1.392199349945829, + "loss": 2.9688, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 19275 + }, + { + "epoch": 1.394005055976887, + "grad_norm": 0.6614473462104797, + "learning_rate": 1.1102387552136487e-05, + "loss": 3.0246, + "step": 19300 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.3438, + "epoch": 1.394005055976887, + "loss": 3.0781, + "loss_text": 0.2168, + "state_loss_0": 0.0, + "step": 19300 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2656, + "epoch": 1.394005055976887, + "loss": 3.125, + "loss_text": 0.332, + "state_loss_0": 0.0, + "step": 19300 + }, + { + "epoch": 1.3958107620079452, + "grad_norm": 0.8347592949867249, + "learning_rate": 1.1041679788383324e-05, + "loss": 3.024, + "step": 19325 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1406, + "epoch": 1.3958107620079452, + "loss": 2.9844, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 19325 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.0781, + "epoch": 1.3958107620079452, + "loss": 3.0156, + "loss_text": 0.2695, + "state_loss_0": 0.0, + "step": 19325 + }, + { + "epoch": 1.3976164680390033, + "grad_norm": 0.6893362402915955, + "learning_rate": 1.0981091377518083e-05, + "loss": 3.0227, + "step": 19350 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2188, + "epoch": 1.3976164680390033, + "loss": 3.0156, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 19350 + }, + { + "audio_loss_0": 2.7188, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.2344, + "epoch": 1.3976164680390033, + "loss": 3.0, + "loss_text": 0.3086, + "state_loss_0": 0.0, + "step": 19350 + }, + { + "epoch": 1.3994221740700614, + "grad_norm": 0.8349085450172424, + "learning_rate": 1.0920622837611835e-05, + "loss": 3.0175, + "step": 19375 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.3994221740700614, + "loss": 3.0781, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 19375 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2344, + "epoch": 1.3994221740700614, + "loss": 3.0781, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 19375 + }, + { + "epoch": 1.4012278801011195, + "grad_norm": 0.7408316135406494, + "learning_rate": 1.0860274685710683e-05, + "loss": 3.0193, + "step": 19400 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0781, + "epoch": 1.4012278801011195, + "loss": 2.9531, + "loss_text": 0.291, + "state_loss_0": 0.0, + "step": 19400 + }, + { + "audio_loss_0": 2.6875, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3438, + "epoch": 1.4012278801011195, + "loss": 3.0469, + "loss_text": 0.3594, + "state_loss_0": 0.0, + "step": 19400 + }, + { + "epoch": 1.4030335861321777, + "grad_norm": 0.6263195872306824, + "learning_rate": 1.0800047437831295e-05, + "loss": 3.0206, + "step": 19425 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2031, + "epoch": 1.4030335861321777, + "loss": 3.0469, + "loss_text": 0.2832, + "state_loss_0": 0.0, + "step": 19425 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3125, + "audio_loss_6": 2.9844, + "epoch": 1.4030335861321777, + "loss": 2.9688, + "loss_text": 0.457, + "state_loss_0": 0.0, + "step": 19425 + }, + { + "epoch": 1.4048392921632358, + "grad_norm": 0.7096551656723022, + "learning_rate": 1.0739941608956578e-05, + "loss": 3.0292, + "step": 19450 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5, + "audio_loss_6": 3.25, + "epoch": 1.4048392921632358, + "loss": 3.0312, + "loss_text": 0.2988, + "state_loss_0": 0.0, + "step": 19450 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2344, + "epoch": 1.4048392921632358, + "loss": 3.0625, + "loss_text": 0.3223, + "state_loss_0": 0.0, + "step": 19450 + }, + { + "epoch": 1.406644998194294, + "grad_norm": 0.713141918182373, + "learning_rate": 1.0679957713031171e-05, + "loss": 3.022, + "step": 19475 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2344, + "epoch": 1.406644998194294, + "loss": 3.0469, + "loss_text": 0.2373, + "state_loss_0": 0.0, + "step": 19475 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.25, + "epoch": 1.406644998194294, + "loss": 3.0625, + "loss_text": 0.2539, + "state_loss_0": 0.0, + "step": 19475 + }, + { + "epoch": 1.408450704225352, + "grad_norm": 0.6830669641494751, + "learning_rate": 1.0620096262957166e-05, + "loss": 3.0211, + "step": 19500 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0625, + "epoch": 1.408450704225352, + "loss": 2.9062, + "loss_text": 0.4121, + "state_loss_0": 0.0, + "step": 19500 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.875, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0938, + "epoch": 1.408450704225352, + "loss": 2.9375, + "loss_text": 0.2061, + "state_loss_0": 0.0, + "step": 19500 + }, + { + "epoch": 1.4102564102564101, + "grad_norm": 0.634128987789154, + "learning_rate": 1.0560357770589605e-05, + "loss": 3.025, + "step": 19525 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.2812, + "audio_loss_6": 3.125, + "epoch": 1.4102564102564101, + "loss": 2.8906, + "loss_text": 0.2227, + "state_loss_0": 0.0, + "step": 19525 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2969, + "epoch": 1.4102564102564101, + "loss": 3.0781, + "loss_text": 0.2422, + "state_loss_0": 0.0, + "step": 19525 + }, + { + "epoch": 1.4120621162874685, + "grad_norm": 0.6565881967544556, + "learning_rate": 1.0500742746732206e-05, + "loss": 3.0324, + "step": 19550 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1094, + "epoch": 1.4120621162874685, + "loss": 3.0312, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 19550 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1875, + "epoch": 1.4120621162874685, + "loss": 3.0312, + "loss_text": 0.1914, + "state_loss_0": 0.0, + "step": 19550 + }, + { + "epoch": 1.4138678223185266, + "grad_norm": 0.7219380140304565, + "learning_rate": 1.0441251701132912e-05, + "loss": 3.0177, + "step": 19575 + }, + { + "audio_loss_0": 3.375, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.25, + "audio_loss_3": 4.4375, + "audio_loss_4": 3.9844, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4375, + "epoch": 1.4138678223185266, + "loss": 3.3125, + "loss_text": 0.7031, + "state_loss_0": 0.0, + "step": 19575 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5156, + "audio_loss_5": 3.1719, + "audio_loss_6": 3.0, + "epoch": 1.4138678223185266, + "loss": 2.8125, + "loss_text": 0.2158, + "state_loss_0": 0.0, + "step": 19575 + }, + { + "epoch": 1.4156735283495847, + "grad_norm": 0.6710458993911743, + "learning_rate": 1.0381885142479597e-05, + "loss": 3.0187, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.125, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6641, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9531, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4219, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 3.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.625, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9844, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.7969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5469, + "eval_audio_loss_6_RQACONVA": 3.375, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.1094, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3281, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4375, + "eval_loss_RQACONVA": 3.4375, + "eval_loss_text_RQACONVA": 1.9297, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.4375, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8438, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 2.3906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 3.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.3438, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.7891, + "eval_loss_RQACONV": 0.7891, + "eval_loss_text_RQACONV": 1.5781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.7656, + "eval_loss_RQACONV": 0.7656, + "eval_loss_text_RQACONV": 1.5312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.9414, + "eval_loss_RQACONV": 0.9414, + "eval_loss_text_RQACONV": 1.8828, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.793, + "eval_loss_RQACONV": 0.793, + "eval_loss_text_RQACONV": 1.5859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.9883, + "eval_loss_RQACONV": 0.9883, + "eval_loss_text_RQACONV": 1.9766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.127, + "eval_loss_RQACONV": 0.127, + "eval_loss_text_RQACONV": 0.2539, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.0718, + "eval_loss_RQACONV": 0.0718, + "eval_loss_text_RQACONV": 0.1436, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.1504, + "eval_loss_RQACONV": 0.1504, + "eval_loss_text_RQACONV": 0.3008, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.028, + "eval_loss_RQACONV": 0.028, + "eval_loss_text_RQACONV": 0.0559, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.1367, + "eval_loss_RQACONV": 0.1367, + "eval_loss_text_RQACONV": 0.2734, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.0903, + "eval_loss_RQACONV": 0.0903, + "eval_loss_text_RQACONV": 0.1807, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 19600 + }, + { + "epoch": 1.4156735283495847, + "eval_loss": 1.5871191024780273, + "eval_runtime": 28.179, + "eval_samples_per_second": 189.822, + "eval_steps_per_second": 1.49, + "step": 19600 + }, + { + "audio_loss_0": 3.3594, + "audio_loss_1": 3.6875, + "audio_loss_2": 3.375, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.9062, + "audio_loss_5": 3.7656, + "audio_loss_6": 3.4531, + "epoch": 1.4156735283495847, + "loss": 3.2969, + "loss_text": 0.4551, + "state_loss_0": 0.0, + "step": 19600 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1719, + "epoch": 1.4156735283495847, + "loss": 3.0156, + "loss_text": 0.3086, + "state_loss_0": 0.0, + "step": 19600 + }, + { + "epoch": 1.4174792343806428, + "grad_norm": 0.6490464806556702, + "learning_rate": 1.0322643578395685e-05, + "loss": 3.0251, + "step": 19625 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0156, + "epoch": 1.4174792343806428, + "loss": 2.9062, + "loss_text": 0.1719, + "state_loss_0": 0.0, + "step": 19625 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.625, + "audio_loss_5": 3.375, + "audio_loss_6": 3.125, + "epoch": 1.4174792343806428, + "loss": 2.9688, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 19625 + }, + { + "epoch": 1.419284940411701, + "grad_norm": 0.7464128136634827, + "learning_rate": 1.0263527515435798e-05, + "loss": 3.0285, + "step": 19650 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1562, + "epoch": 1.419284940411701, + "loss": 2.9844, + "loss_text": 0.4512, + "state_loss_0": 0.0, + "step": 19650 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.419284940411701, + "loss": 2.9844, + "loss_text": 0.4043, + "state_loss_0": 0.0, + "step": 19650 + }, + { + "epoch": 1.421090646442759, + "grad_norm": 0.7721043229103088, + "learning_rate": 1.0204537459081459e-05, + "loss": 3.0216, + "step": 19675 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1406, + "epoch": 1.421090646442759, + "loss": 3.0, + "loss_text": 0.2354, + "state_loss_0": 0.0, + "step": 19675 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.3438, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0781, + "epoch": 1.421090646442759, + "loss": 3.0156, + "loss_text": 0.3516, + "state_loss_0": 0.0, + "step": 19675 + }, + { + "epoch": 1.4228963524738172, + "grad_norm": 0.6692510843276978, + "learning_rate": 1.014567391373673e-05, + "loss": 3.0219, + "step": 19700 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0938, + "epoch": 1.4228963524738172, + "loss": 3.0, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 19700 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1719, + "epoch": 1.4228963524738172, + "loss": 2.9844, + "loss_text": 0.3535, + "state_loss_0": 0.0, + "step": 19700 + }, + { + "epoch": 1.4247020585048755, + "grad_norm": 0.8199650645256042, + "learning_rate": 1.0086937382723949e-05, + "loss": 3.0187, + "step": 19725 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.25, + "epoch": 1.4247020585048755, + "loss": 3.0156, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 19725 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3438, + "epoch": 1.4247020585048755, + "loss": 3.1094, + "loss_text": 0.3965, + "state_loss_0": 0.0, + "step": 19725 + }, + { + "epoch": 1.4265077645359336, + "grad_norm": 0.7435491681098938, + "learning_rate": 1.002832836827936e-05, + "loss": 3.021, + "step": 19750 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2344, + "epoch": 1.4265077645359336, + "loss": 3.0469, + "loss_text": 0.3691, + "state_loss_0": 0.0, + "step": 19750 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2969, + "epoch": 1.4265077645359336, + "loss": 3.0625, + "loss_text": 0.2969, + "state_loss_0": 0.0, + "step": 19750 + }, + { + "epoch": 1.4283134705669918, + "grad_norm": 0.7874850034713745, + "learning_rate": 9.9698473715489e-06, + "loss": 3.026, + "step": 19775 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1562, + "epoch": 1.4283134705669918, + "loss": 3.0156, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 19775 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1094, + "epoch": 1.4283134705669918, + "loss": 3.0, + "loss_text": 0.3262, + "state_loss_0": 0.0, + "step": 19775 + }, + { + "epoch": 1.4301191765980499, + "grad_norm": 0.6537978053092957, + "learning_rate": 9.911494892583823e-06, + "loss": 3.0225, + "step": 19800 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2656, + "epoch": 1.4301191765980499, + "loss": 3.0469, + "loss_text": 0.2617, + "state_loss_0": 0.0, + "step": 19800 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2812, + "epoch": 1.4301191765980499, + "loss": 3.125, + "loss_text": 0.3418, + "state_loss_0": 0.0, + "step": 19800 + }, + { + "epoch": 1.431924882629108, + "grad_norm": 0.6661292314529419, + "learning_rate": 9.853271430336505e-06, + "loss": 3.022, + "step": 19825 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1406, + "epoch": 1.431924882629108, + "loss": 3.0469, + "loss_text": 0.5273, + "state_loss_0": 0.0, + "step": 19825 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.6562, + "audio_loss_2": 3.3438, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2656, + "epoch": 1.431924882629108, + "loss": 3.1562, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 19825 + }, + { + "epoch": 1.4337305886601661, + "grad_norm": 0.6571654081344604, + "learning_rate": 9.795177482656142e-06, + "loss": 3.0194, + "step": 19850 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.375, + "epoch": 1.4337305886601661, + "loss": 3.1875, + "loss_text": 0.3652, + "state_loss_0": 0.0, + "step": 19850 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.2188, + "epoch": 1.4337305886601661, + "loss": 3.0312, + "loss_text": 0.4473, + "state_loss_0": 0.0, + "step": 19850 + }, + { + "epoch": 1.4355362946912242, + "grad_norm": 0.6501863598823547, + "learning_rate": 9.737213546284468e-06, + "loss": 3.0161, + "step": 19875 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3281, + "epoch": 1.4355362946912242, + "loss": 3.1406, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 19875 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1719, + "epoch": 1.4355362946912242, + "loss": 3.0156, + "loss_text": 0.4551, + "state_loss_0": 0.0, + "step": 19875 + }, + { + "epoch": 1.4373420007222824, + "grad_norm": 0.621985673904419, + "learning_rate": 9.679380116851569e-06, + "loss": 3.0193, + "step": 19900 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.25, + "epoch": 1.4373420007222824, + "loss": 3.125, + "loss_text": 0.3691, + "state_loss_0": 0.0, + "step": 19900 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.2656, + "epoch": 1.4373420007222824, + "loss": 3.1562, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 19900 + }, + { + "epoch": 1.4391477067533405, + "grad_norm": 0.6688472628593445, + "learning_rate": 9.621677688871574e-06, + "loss": 3.0222, + "step": 19925 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.6094, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.4062, + "epoch": 1.4391477067533405, + "loss": 3.2344, + "loss_text": 0.5312, + "state_loss_0": 0.0, + "step": 19925 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1875, + "epoch": 1.4391477067533405, + "loss": 3.0312, + "loss_text": 0.6562, + "state_loss_0": 0.0, + "step": 19925 + }, + { + "epoch": 1.4409534127843986, + "grad_norm": 0.7305036187171936, + "learning_rate": 9.564106755738498e-06, + "loss": 3.0132, + "step": 19950 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3438, + "epoch": 1.4409534127843986, + "loss": 3.1406, + "loss_text": 0.6875, + "state_loss_0": 0.0, + "step": 19950 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.25, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.25, + "epoch": 1.4409534127843986, + "loss": 3.1094, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 19950 + }, + { + "epoch": 1.4427591188154567, + "grad_norm": 0.7242705821990967, + "learning_rate": 9.506667809721955e-06, + "loss": 3.0235, + "step": 19975 + }, + { + "audio_loss_0": 2.6406, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9375, + "audio_loss_3": 3.9844, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.125, + "epoch": 1.4427591188154567, + "loss": 2.875, + "loss_text": 0.2158, + "state_loss_0": 0.0, + "step": 19975 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1719, + "epoch": 1.4427591188154567, + "loss": 3.0, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 19975 + }, + { + "epoch": 1.444564824846515, + "grad_norm": 0.6063821315765381, + "learning_rate": 9.449361341963018e-06, + "loss": 3.0224, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9219, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4844, + "eval_loss_AQACONVA": 3.4844, + "eval_loss_text_AQACONVA": 1.9688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.6562, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9531, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4219, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2344, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.1562, + "eval_audio_loss_5_AQACONVA": 3.8594, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7344, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.7656, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.375, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.375, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.3906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.5781, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5625, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.2812, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.1875, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4062, + "eval_audio_loss_4_RQACONVA": 3.9219, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.5312, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.7031, + "eval_loss_RQACONV": 0.7031, + "eval_loss_text_RQACONV": 1.4062, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5703, + "eval_loss_RQACONV": 0.5703, + "eval_loss_text_RQACONV": 1.1406, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.8008, + "eval_loss_RQACONV": 0.8008, + "eval_loss_text_RQACONV": 1.6016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.9219, + "eval_loss_RQACONV": 0.9219, + "eval_loss_text_RQACONV": 1.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 1.0234, + "eval_loss_RQACONV": 1.0234, + "eval_loss_text_RQACONV": 2.0469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.498, + "eval_loss_RQACONV": 0.498, + "eval_loss_text_RQACONV": 0.9961, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.4746, + "eval_loss_RQACONV": 0.4746, + "eval_loss_text_RQACONV": 0.9492, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.127, + "eval_loss_RQACONV": 0.127, + "eval_loss_text_RQACONV": 0.2539, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.0713, + "eval_loss_RQACONV": 0.0713, + "eval_loss_text_RQACONV": 0.1426, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.1465, + "eval_loss_RQACONV": 0.1465, + "eval_loss_text_RQACONV": 0.293, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.0266, + "eval_loss_RQACONV": 0.0266, + "eval_loss_text_RQACONV": 0.0532, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.1318, + "eval_loss_RQACONV": 0.1318, + "eval_loss_text_RQACONV": 0.2637, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.0981, + "eval_loss_RQACONV": 0.0981, + "eval_loss_text_RQACONV": 0.1963, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.0903, + "eval_loss_RQACONV": 0.0903, + "eval_loss_text_RQACONV": 0.1807, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20000 + }, + { + "epoch": 1.444564824846515, + "eval_loss": 1.586337685585022, + "eval_runtime": 28.3884, + "eval_samples_per_second": 188.422, + "eval_steps_per_second": 1.479, + "step": 20000 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.125, + "audio_loss_2": 2.8594, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.0625, + "epoch": 1.444564824846515, + "loss": 2.875, + "loss_text": 0.2139, + "state_loss_0": 0.0, + "step": 20000 + }, + { + "audio_loss_0": 2.5781, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1094, + "epoch": 1.444564824846515, + "loss": 2.8594, + "loss_text": 0.1992, + "state_loss_0": 0.0, + "step": 20000 + }, + { + "epoch": 1.4463705308775732, + "grad_norm": 0.6519666314125061, + "learning_rate": 9.392187842469941e-06, + "loss": 3.0207, + "step": 20025 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1562, + "epoch": 1.4463705308775732, + "loss": 3.0469, + "loss_text": 0.1738, + "state_loss_0": 0.0, + "step": 20025 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0625, + "epoch": 1.4463705308775732, + "loss": 2.9375, + "loss_text": 0.3203, + "state_loss_0": 0.0, + "step": 20025 + }, + { + "epoch": 1.4481762369086313, + "grad_norm": 1.0148183107376099, + "learning_rate": 9.335147800114044e-06, + "loss": 3.0155, + "step": 20050 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0781, + "epoch": 1.4481762369086313, + "loss": 2.9844, + "loss_text": 0.3008, + "state_loss_0": 0.0, + "step": 20050 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.875, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.4481762369086313, + "loss": 2.9531, + "loss_text": 0.5078, + "state_loss_0": 0.0, + "step": 20050 + }, + { + "epoch": 1.4499819429396894, + "grad_norm": 0.6529830694198608, + "learning_rate": 9.278241702625498e-06, + "loss": 3.0168, + "step": 20075 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2188, + "epoch": 1.4499819429396894, + "loss": 3.0781, + "loss_text": 0.2812, + "state_loss_0": 0.0, + "step": 20075 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.1875, + "audio_loss_6": 3.0312, + "epoch": 1.4499819429396894, + "loss": 2.9062, + "loss_text": 0.1338, + "state_loss_0": 0.0, + "step": 20075 + }, + { + "epoch": 1.4517876489707475, + "grad_norm": 0.7028025984764099, + "learning_rate": 9.221470036589132e-06, + "loss": 3.014, + "step": 20100 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.125, + "epoch": 1.4517876489707475, + "loss": 2.9219, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 20100 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0, + "epoch": 1.4517876489707475, + "loss": 2.8906, + "loss_text": 0.1875, + "state_loss_0": 0.0, + "step": 20100 + }, + { + "epoch": 1.4535933550018056, + "grad_norm": 0.5879977345466614, + "learning_rate": 9.16483328744033e-06, + "loss": 3.0238, + "step": 20125 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2188, + "epoch": 1.4535933550018056, + "loss": 3.0781, + "loss_text": 0.459, + "state_loss_0": 0.0, + "step": 20125 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1719, + "epoch": 1.4535933550018056, + "loss": 3.0469, + "loss_text": 0.2988, + "state_loss_0": 0.0, + "step": 20125 + }, + { + "epoch": 1.455399061032864, + "grad_norm": 0.7832474708557129, + "learning_rate": 9.108331939460812e-06, + "loss": 3.0121, + "step": 20150 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1875, + "epoch": 1.455399061032864, + "loss": 3.0312, + "loss_text": 0.167, + "state_loss_0": 0.0, + "step": 20150 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.2812, + "audio_loss_6": 3.125, + "epoch": 1.455399061032864, + "loss": 2.9219, + "loss_text": 0.208, + "state_loss_0": 0.0, + "step": 20150 + }, + { + "epoch": 1.457204767063922, + "grad_norm": 0.7484167218208313, + "learning_rate": 9.051966475774562e-06, + "loss": 3.0142, + "step": 20175 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1562, + "epoch": 1.457204767063922, + "loss": 3.0469, + "loss_text": 0.377, + "state_loss_0": 0.0, + "step": 20175 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0781, + "epoch": 1.457204767063922, + "loss": 2.9531, + "loss_text": 0.1826, + "state_loss_0": 0.0, + "step": 20175 + }, + { + "epoch": 1.4590104730949802, + "grad_norm": 0.7750857472419739, + "learning_rate": 8.995737378343633e-06, + "loss": 3.0164, + "step": 20200 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.1719, + "epoch": 1.4590104730949802, + "loss": 2.9844, + "loss_text": 0.2412, + "state_loss_0": 0.0, + "step": 20200 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2031, + "epoch": 1.4590104730949802, + "loss": 3.0312, + "loss_text": 0.4648, + "state_loss_0": 0.0, + "step": 20200 + }, + { + "epoch": 1.4608161791260383, + "grad_norm": 0.7010588645935059, + "learning_rate": 8.939645127964077e-06, + "loss": 3.0143, + "step": 20225 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.125, + "epoch": 1.4608161791260383, + "loss": 3.0156, + "loss_text": 0.3359, + "state_loss_0": 0.0, + "step": 20225 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.125, + "audio_loss_2": 2.875, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.0781, + "epoch": 1.4608161791260383, + "loss": 2.9062, + "loss_text": 0.4219, + "state_loss_0": 0.0, + "step": 20225 + }, + { + "epoch": 1.4626218851570965, + "grad_norm": 0.6188965439796448, + "learning_rate": 8.88369020426182e-06, + "loss": 3.0094, + "step": 20250 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3281, + "epoch": 1.4626218851570965, + "loss": 3.1406, + "loss_text": 0.2871, + "state_loss_0": 0.0, + "step": 20250 + }, + { + "audio_loss_0": 2.625, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.1562, + "audio_loss_6": 3.0469, + "epoch": 1.4626218851570965, + "loss": 2.875, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 20250 + }, + { + "epoch": 1.4644275911881546, + "grad_norm": 0.6738746762275696, + "learning_rate": 8.827873085688521e-06, + "loss": 3.0114, + "step": 20275 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1406, + "epoch": 1.4644275911881546, + "loss": 2.9531, + "loss_text": 0.3281, + "state_loss_0": 0.0, + "step": 20275 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2656, + "epoch": 1.4644275911881546, + "loss": 3.0625, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 20275 + }, + { + "epoch": 1.4662332972192127, + "grad_norm": 0.8085736632347107, + "learning_rate": 8.772194249517545e-06, + "loss": 3.0099, + "step": 20300 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.3125, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.0781, + "epoch": 1.4662332972192127, + "loss": 2.9531, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 20300 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.4662332972192127, + "loss": 3.0312, + "loss_text": 0.3125, + "state_loss_0": 0.0, + "step": 20300 + }, + { + "epoch": 1.4680390032502708, + "grad_norm": 0.7361783981323242, + "learning_rate": 8.716654171839834e-06, + "loss": 3.0128, + "step": 20325 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.3594, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1719, + "epoch": 1.4680390032502708, + "loss": 3.0938, + "loss_text": 1.2344, + "state_loss_0": 0.0, + "step": 20325 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1719, + "epoch": 1.4680390032502708, + "loss": 2.9688, + "loss_text": 0.4785, + "state_loss_0": 0.0, + "step": 20325 + }, + { + "epoch": 1.469844709281329, + "grad_norm": 0.8110805749893188, + "learning_rate": 8.661253327559864e-06, + "loss": 3.0111, + "step": 20350 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2656, + "epoch": 1.469844709281329, + "loss": 3.0938, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 20350 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2344, + "epoch": 1.469844709281329, + "loss": 3.125, + "loss_text": 0.4668, + "state_loss_0": 0.0, + "step": 20350 + }, + { + "epoch": 1.471650415312387, + "grad_norm": 0.67500901222229, + "learning_rate": 8.60599219039156e-06, + "loss": 3.0081, + "step": 20375 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.625, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1719, + "epoch": 1.471650415312387, + "loss": 2.9375, + "loss_text": 0.4336, + "state_loss_0": 0.0, + "step": 20375 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2656, + "epoch": 1.471650415312387, + "loss": 3.0781, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 20375 + }, + { + "epoch": 1.4734561213434452, + "grad_norm": 0.6516327261924744, + "learning_rate": 8.550871232854277e-06, + "loss": 3.0248, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9766, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6719, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7031, + "eval_audio_loss_6_AQACONVA": 3.4062, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.3438, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.7344, + "eval_loss_AQACONVA": 3.7344, + "eval_loss_text_AQACONVA": 2.7812, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6719, + "eval_loss_AQACONVA": 3.6719, + "eval_loss_text_AQACONVA": 2.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.4531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9297, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.5312, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5156, + "eval_audio_loss_3_RQACONVA": 4.5312, + "eval_audio_loss_4_RQACONVA": 4.0312, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.625, + "eval_loss_RQACONVA": 3.625, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.2344, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.6719, + "eval_loss_RQACONVA": 3.6719, + "eval_loss_text_RQACONVA": 3.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.3906, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9219, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4688, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.6953, + "eval_loss_RQACONV": 0.6953, + "eval_loss_text_RQACONV": 1.3906, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.832, + "eval_loss_RQACONV": 0.832, + "eval_loss_text_RQACONV": 1.6641, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.7383, + "eval_loss_RQACONV": 0.7383, + "eval_loss_text_RQACONV": 1.4766, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.9688, + "eval_loss_RQACONV": 0.9688, + "eval_loss_text_RQACONV": 1.9375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.4668, + "eval_loss_RQACONV": 0.4668, + "eval_loss_text_RQACONV": 0.9336, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5312, + "eval_loss_RQACONV": 0.5312, + "eval_loss_text_RQACONV": 1.0625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 1.4219, + "eval_loss_RQACONV": 1.4219, + "eval_loss_text_RQACONV": 2.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.1235, + "eval_loss_RQACONV": 0.1235, + "eval_loss_text_RQACONV": 0.2471, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.0684, + "eval_loss_RQACONV": 0.0684, + "eval_loss_text_RQACONV": 0.1367, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.1484, + "eval_loss_RQACONV": 0.1484, + "eval_loss_text_RQACONV": 0.2969, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.1455, + "eval_loss_RQACONV": 0.1455, + "eval_loss_text_RQACONV": 0.291, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.0297, + "eval_loss_RQACONV": 0.0297, + "eval_loss_text_RQACONV": 0.0593, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.1328, + "eval_loss_RQACONV": 0.1328, + "eval_loss_text_RQACONV": 0.2656, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.0986, + "eval_loss_RQACONV": 0.0986, + "eval_loss_text_RQACONV": 0.1973, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.0898, + "eval_loss_RQACONV": 0.0898, + "eval_loss_text_RQACONV": 0.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20400 + }, + { + "epoch": 1.4734561213434452, + "eval_loss": 1.5809015035629272, + "eval_runtime": 27.5109, + "eval_samples_per_second": 194.432, + "eval_steps_per_second": 1.527, + "step": 20400 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2812, + "epoch": 1.4734561213434452, + "loss": 3.0781, + "loss_text": 0.6094, + "state_loss_0": 0.0, + "step": 20400 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.4688, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.0469, + "epoch": 1.4734561213434452, + "loss": 2.9219, + "loss_text": 0.5664, + "state_loss_0": 0.0, + "step": 20400 + }, + { + "epoch": 1.4752618273745035, + "grad_norm": 0.6022971868515015, + "learning_rate": 8.495890926268712e-06, + "loss": 3.0165, + "step": 20425 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1719, + "epoch": 1.4752618273745035, + "loss": 3.0, + "loss_text": 0.2109, + "state_loss_0": 0.0, + "step": 20425 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.125, + "epoch": 1.4752618273745035, + "loss": 3.0156, + "loss_text": 0.4238, + "state_loss_0": 0.0, + "step": 20425 + }, + { + "epoch": 1.4770675334055616, + "grad_norm": 0.6978294849395752, + "learning_rate": 8.441051740752937e-06, + "loss": 3.0167, + "step": 20450 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2031, + "epoch": 1.4770675334055616, + "loss": 3.1094, + "loss_text": 0.3867, + "state_loss_0": 0.0, + "step": 20450 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.125, + "epoch": 1.4770675334055616, + "loss": 2.9531, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 20450 + }, + { + "epoch": 1.4788732394366197, + "grad_norm": 0.6669027209281921, + "learning_rate": 8.386354145218334e-06, + "loss": 3.0186, + "step": 20475 + }, + { + "audio_loss_0": 2.6094, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9062, + "audio_loss_3": 3.9531, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.1719, + "epoch": 1.4788732394366197, + "loss": 2.8906, + "loss_text": 0.3105, + "state_loss_0": 0.0, + "step": 20475 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2656, + "epoch": 1.4788732394366197, + "loss": 3.1562, + "loss_text": 0.7969, + "state_loss_0": 0.0, + "step": 20475 + }, + { + "epoch": 1.4806789454676779, + "grad_norm": 0.7299829125404358, + "learning_rate": 8.331798607365573e-06, + "loss": 3.0059, + "step": 20500 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1562, + "epoch": 1.4806789454676779, + "loss": 3.0, + "loss_text": 0.4844, + "state_loss_0": 0.0, + "step": 20500 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1875, + "epoch": 1.4806789454676779, + "loss": 3.0938, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 20500 + }, + { + "epoch": 1.482484651498736, + "grad_norm": 0.6940511465072632, + "learning_rate": 8.277385593680679e-06, + "loss": 3.0157, + "step": 20525 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2188, + "epoch": 1.482484651498736, + "loss": 3.0, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 20525 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2188, + "epoch": 1.482484651498736, + "loss": 3.0312, + "loss_text": 0.1279, + "state_loss_0": 0.0, + "step": 20525 + }, + { + "epoch": 1.484290357529794, + "grad_norm": 0.6060506701469421, + "learning_rate": 8.223115569430959e-06, + "loss": 3.0092, + "step": 20550 + }, + { + "audio_loss_0": 2.5938, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1719, + "epoch": 1.484290357529794, + "loss": 2.9844, + "loss_text": 0.2324, + "state_loss_0": 0.0, + "step": 20550 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.3594, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0781, + "epoch": 1.484290357529794, + "loss": 2.9844, + "loss_text": 0.4629, + "state_loss_0": 0.0, + "step": 20550 + }, + { + "epoch": 1.4860960635608522, + "grad_norm": 0.7730219960212708, + "learning_rate": 8.168988998661101e-06, + "loss": 3.0079, + "step": 20575 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.375, + "epoch": 1.4860960635608522, + "loss": 3.1562, + "loss_text": 0.3633, + "state_loss_0": 0.0, + "step": 20575 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.2812, + "audio_loss_6": 3.125, + "epoch": 1.4860960635608522, + "loss": 2.9375, + "loss_text": 0.3242, + "state_loss_0": 0.0, + "step": 20575 + }, + { + "epoch": 1.4879017695919106, + "grad_norm": 0.6449101567268372, + "learning_rate": 8.115006344189139e-06, + "loss": 3.0067, + "step": 20600 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1562, + "epoch": 1.4879017695919106, + "loss": 3.0312, + "loss_text": 0.7148, + "state_loss_0": 0.0, + "step": 20600 + }, + { + "audio_loss_0": 3.1562, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0156, + "epoch": 1.4879017695919106, + "loss": 2.9688, + "loss_text": 0.2695, + "state_loss_0": 0.0, + "step": 20600 + }, + { + "epoch": 1.4897074756229687, + "grad_norm": 0.7509922385215759, + "learning_rate": 8.061168067602556e-06, + "loss": 3.0108, + "step": 20625 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1094, + "epoch": 1.4897074756229687, + "loss": 2.9531, + "loss_text": 0.4609, + "state_loss_0": 0.0, + "step": 20625 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1875, + "epoch": 1.4897074756229687, + "loss": 3.0938, + "loss_text": 0.2676, + "state_loss_0": 0.0, + "step": 20625 + }, + { + "epoch": 1.4915131816540268, + "grad_norm": 0.6580446362495422, + "learning_rate": 8.007474629254286e-06, + "loss": 3.0041, + "step": 20650 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1562, + "epoch": 1.4915131816540268, + "loss": 3.0, + "loss_text": 0.127, + "state_loss_0": 0.0, + "step": 20650 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.5625, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2812, + "epoch": 1.4915131816540268, + "loss": 3.0938, + "loss_text": 0.3301, + "state_loss_0": 0.0, + "step": 20650 + }, + { + "epoch": 1.493318887685085, + "grad_norm": 0.6453419923782349, + "learning_rate": 7.953926488258808e-06, + "loss": 3.0162, + "step": 20675 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1406, + "epoch": 1.493318887685085, + "loss": 2.9688, + "loss_text": 0.2832, + "state_loss_0": 0.0, + "step": 20675 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0156, + "epoch": 1.493318887685085, + "loss": 2.9375, + "loss_text": 0.2539, + "state_loss_0": 0.0, + "step": 20675 + }, + { + "epoch": 1.495124593716143, + "grad_norm": 0.6932626962661743, + "learning_rate": 7.900524102488221e-06, + "loss": 3.0161, + "step": 20700 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.495124593716143, + "loss": 3.0469, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 20700 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.25, + "epoch": 1.495124593716143, + "loss": 3.0625, + "loss_text": 0.2832, + "state_loss_0": 0.0, + "step": 20700 + }, + { + "epoch": 1.4969302997472012, + "grad_norm": 0.6678116321563721, + "learning_rate": 7.84726792856832e-06, + "loss": 3.0147, + "step": 20725 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0625, + "epoch": 1.4969302997472012, + "loss": 2.9375, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 20725 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2656, + "epoch": 1.4969302997472012, + "loss": 3.0781, + "loss_text": 0.4395, + "state_loss_0": 0.0, + "step": 20725 + }, + { + "epoch": 1.4987360057782593, + "grad_norm": 0.7817378640174866, + "learning_rate": 7.794158421874667e-06, + "loss": 3.0048, + "step": 20750 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1875, + "epoch": 1.4987360057782593, + "loss": 2.9688, + "loss_text": 0.3203, + "state_loss_0": 0.0, + "step": 20750 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2188, + "epoch": 1.4987360057782593, + "loss": 3.0469, + "loss_text": 0.2441, + "state_loss_0": 0.0, + "step": 20750 + }, + { + "epoch": 1.5005417118093174, + "grad_norm": 0.6076989769935608, + "learning_rate": 7.741196036528747e-06, + "loss": 3.01, + "step": 20775 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0938, + "epoch": 1.5005417118093174, + "loss": 2.9531, + "loss_text": 0.1338, + "state_loss_0": 0.0, + "step": 20775 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.0938, + "audio_loss_2": 2.8438, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5156, + "audio_loss_5": 3.2031, + "audio_loss_6": 2.9219, + "epoch": 1.5005417118093174, + "loss": 2.8125, + "loss_text": 0.1084, + "state_loss_0": 0.0, + "step": 20775 + }, + { + "epoch": 1.5023474178403755, + "grad_norm": 0.6007022857666016, + "learning_rate": 7.688381225394062e-06, + "loss": 3.0117, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.3594, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.5, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.6875, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.625, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4062, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9531, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9141, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.5625, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.5469, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.6562, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6719, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5859, + "eval_loss_RQACONV": 0.5859, + "eval_loss_text_RQACONV": 1.1719, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.9219, + "eval_loss_RQACONV": 0.9219, + "eval_loss_text_RQACONV": 1.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.7422, + "eval_loss_RQACONV": 0.7422, + "eval_loss_text_RQACONV": 1.4844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 1.0156, + "eval_loss_RQACONV": 1.0156, + "eval_loss_text_RQACONV": 2.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.126, + "eval_loss_RQACONV": 0.126, + "eval_loss_text_RQACONV": 0.252, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.1377, + "eval_loss_RQACONV": 0.1377, + "eval_loss_text_RQACONV": 0.2754, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.0688, + "eval_loss_RQACONV": 0.0688, + "eval_loss_text_RQACONV": 0.1377, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.1465, + "eval_loss_RQACONV": 0.1465, + "eval_loss_text_RQACONV": 0.293, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.0315, + "eval_loss_RQACONV": 0.0315, + "eval_loss_text_RQACONV": 0.063, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.127, + "eval_loss_RQACONV": 0.127, + "eval_loss_text_RQACONV": 0.2539, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 20800 + }, + { + "epoch": 1.5023474178403755, + "eval_loss": 1.587096095085144, + "eval_runtime": 28.2384, + "eval_samples_per_second": 189.423, + "eval_steps_per_second": 1.487, + "step": 20800 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0312, + "epoch": 1.5023474178403755, + "loss": 2.9844, + "loss_text": 0.5039, + "state_loss_0": 0.0, + "step": 20800 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2969, + "epoch": 1.5023474178403755, + "loss": 3.0, + "loss_text": 0.2871, + "state_loss_0": 0.0, + "step": 20800 + }, + { + "epoch": 1.5041531238714336, + "grad_norm": 0.6676250100135803, + "learning_rate": 7.635714440072231e-06, + "loss": 2.9984, + "step": 20825 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.5156, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2969, + "epoch": 1.5041531238714336, + "loss": 3.2188, + "loss_text": 0.8164, + "state_loss_0": 0.0, + "step": 20825 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.0156, + "audio_loss_2": 2.8438, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0, + "epoch": 1.5041531238714336, + "loss": 2.875, + "loss_text": 0.2002, + "state_loss_0": 0.0, + "step": 20825 + }, + { + "epoch": 1.5059588299024917, + "grad_norm": 0.6153017282485962, + "learning_rate": 7.583196130899184e-06, + "loss": 3.0132, + "step": 20850 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.375, + "audio_loss_6": 3.2031, + "epoch": 1.5059588299024917, + "loss": 3.0156, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 20850 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.5, + "audio_loss_2": 3.2344, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2188, + "epoch": 1.5059588299024917, + "loss": 3.0938, + "loss_text": 0.2129, + "state_loss_0": 0.0, + "step": 20850 + }, + { + "epoch": 1.5077645359335499, + "grad_norm": 0.6552121043205261, + "learning_rate": 7.530826746941258e-06, + "loss": 3.008, + "step": 20875 + }, + { + "audio_loss_0": 2.5938, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.2812, + "audio_loss_6": 3.0312, + "epoch": 1.5077645359335499, + "loss": 2.8594, + "loss_text": 0.2695, + "state_loss_0": 0.0, + "step": 20875 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0781, + "epoch": 1.5077645359335499, + "loss": 2.9531, + "loss_text": 0.3906, + "state_loss_0": 0.0, + "step": 20875 + }, + { + "epoch": 1.5095702419646082, + "grad_norm": 0.6586565971374512, + "learning_rate": 7.478606735991401e-06, + "loss": 3.0062, + "step": 20900 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1406, + "epoch": 1.5095702419646082, + "loss": 2.9688, + "loss_text": 0.2812, + "state_loss_0": 0.0, + "step": 20900 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.3281, + "audio_loss_6": 2.9688, + "epoch": 1.5095702419646082, + "loss": 2.9062, + "loss_text": 0.3398, + "state_loss_0": 0.0, + "step": 20900 + }, + { + "epoch": 1.5113759479956663, + "grad_norm": 0.6133041977882385, + "learning_rate": 7.426536544565299e-06, + "loss": 3.0055, + "step": 20925 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.125, + "audio_loss_2": 2.8906, + "audio_loss_3": 3.9688, + "audio_loss_4": 3.5156, + "audio_loss_5": 3.2812, + "audio_loss_6": 3.0625, + "epoch": 1.5113759479956663, + "loss": 2.8438, + "loss_text": 0.3066, + "state_loss_0": 0.0, + "step": 20925 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2969, + "epoch": 1.5113759479956663, + "loss": 3.125, + "loss_text": 0.4688, + "state_loss_0": 0.0, + "step": 20925 + }, + { + "epoch": 1.5131816540267244, + "grad_norm": 0.668273389339447, + "learning_rate": 7.3746166178976145e-06, + "loss": 3.0038, + "step": 20950 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.0, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2031, + "epoch": 1.5131816540267244, + "loss": 3.0469, + "loss_text": 0.2715, + "state_loss_0": 0.0, + "step": 20950 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.125, + "epoch": 1.5131816540267244, + "loss": 3.0156, + "loss_text": 0.2178, + "state_loss_0": 0.0, + "step": 20950 + }, + { + "epoch": 1.5149873600577826, + "grad_norm": 0.6775100231170654, + "learning_rate": 7.322847399938104e-06, + "loss": 3.0092, + "step": 20975 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5625, + "audio_loss_6": 3.2812, + "epoch": 1.5149873600577826, + "loss": 3.125, + "loss_text": 0.2129, + "state_loss_0": 0.0, + "step": 20975 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1562, + "epoch": 1.5149873600577826, + "loss": 3.0, + "loss_text": 0.3047, + "state_loss_0": 0.0, + "step": 20975 + }, + { + "epoch": 1.516793066088841, + "grad_norm": 0.6876776218414307, + "learning_rate": 7.271229333347904e-06, + "loss": 3.0052, + "step": 21000 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2812, + "epoch": 1.516793066088841, + "loss": 3.0781, + "loss_text": 0.4199, + "state_loss_0": 0.0, + "step": 21000 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1875, + "epoch": 1.516793066088841, + "loss": 3.0312, + "loss_text": 0.6055, + "state_loss_0": 0.0, + "step": 21000 + }, + { + "epoch": 1.518598772119899, + "grad_norm": 0.7809168100357056, + "learning_rate": 7.219762859495696e-06, + "loss": 3.003, + "step": 21025 + }, + { + "audio_loss_0": 3.4062, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.4062, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.3438, + "epoch": 1.518598772119899, + "loss": 3.2031, + "loss_text": 0.3145, + "state_loss_0": 0.0, + "step": 21025 + }, + { + "audio_loss_0": 3.25, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.518598772119899, + "loss": 3.0781, + "loss_text": 0.2832, + "state_loss_0": 0.0, + "step": 21025 + }, + { + "epoch": 1.5204044781509571, + "grad_norm": 0.5823636651039124, + "learning_rate": 7.1684484184539215e-06, + "loss": 2.9962, + "step": 21050 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0625, + "epoch": 1.5204044781509571, + "loss": 2.9062, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 21050 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2656, + "epoch": 1.5204044781509571, + "loss": 3.0625, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 21050 + }, + { + "epoch": 1.5222101841820153, + "grad_norm": 0.6301290392875671, + "learning_rate": 7.117286448995073e-06, + "loss": 3.0081, + "step": 21075 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1406, + "epoch": 1.5222101841820153, + "loss": 2.9844, + "loss_text": 0.2295, + "state_loss_0": 0.0, + "step": 21075 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 2.7656, + "audio_loss_2": 2.7031, + "audio_loss_3": 3.8438, + "audio_loss_4": 3.1875, + "audio_loss_5": 3.0469, + "audio_loss_6": 2.6562, + "epoch": 1.5222101841820153, + "loss": 2.6406, + "loss_text": 0.1738, + "state_loss_0": 0.0, + "step": 21075 + }, + { + "epoch": 1.5240158902130734, + "grad_norm": 0.6477930545806885, + "learning_rate": 7.0662773885878774e-06, + "loss": 3.0011, + "step": 21100 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.25, + "audio_loss_6": 3.0938, + "epoch": 1.5240158902130734, + "loss": 2.9219, + "loss_text": 0.3359, + "state_loss_0": 0.0, + "step": 21100 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1719, + "epoch": 1.5240158902130734, + "loss": 3.0469, + "loss_text": 0.5781, + "state_loss_0": 0.0, + "step": 21100 + }, + { + "epoch": 1.5258215962441315, + "grad_norm": 0.6396780014038086, + "learning_rate": 7.015421673393624e-06, + "loss": 3.0023, + "step": 21125 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.5938, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.2812, + "epoch": 1.5258215962441315, + "loss": 3.2031, + "loss_text": 0.3984, + "state_loss_0": 0.0, + "step": 21125 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2344, + "epoch": 1.5258215962441315, + "loss": 3.0625, + "loss_text": 0.4238, + "state_loss_0": 0.0, + "step": 21125 + }, + { + "epoch": 1.5276273022751896, + "grad_norm": 0.5841837525367737, + "learning_rate": 6.96471973826236e-06, + "loss": 3.0168, + "step": 21150 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.2344, + "epoch": 1.5276273022751896, + "loss": 3.0, + "loss_text": 0.2363, + "state_loss_0": 0.0, + "step": 21150 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.0, + "epoch": 1.5276273022751896, + "loss": 2.8906, + "loss_text": 0.1514, + "state_loss_0": 0.0, + "step": 21150 + }, + { + "epoch": 1.5294330083062477, + "grad_norm": 0.6426196098327637, + "learning_rate": 6.914172016729242e-06, + "loss": 3.0056, + "step": 21175 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.2344, + "epoch": 1.5294330083062477, + "loss": 3.0, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 21175 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2344, + "epoch": 1.5294330083062477, + "loss": 3.125, + "loss_text": 0.8672, + "state_loss_0": 0.0, + "step": 21175 + }, + { + "epoch": 1.5312387143373059, + "grad_norm": 0.6183873414993286, + "learning_rate": 6.863778941010793e-06, + "loss": 3.0118, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6562, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.5, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 1.9844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.3125, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.4375, + "eval_loss_AQACONVA": 3.4375, + "eval_loss_text_AQACONVA": 1.6953, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7188, + "eval_audio_loss_6_AQACONVA": 3.4062, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.3281, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.4062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.0781, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.8984, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.5, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.5938, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8281, + "eval_audio_loss_6_RQACONVA": 3.5469, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.1562, + "eval_audio_loss_1_RQACONVA": 3.7031, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.1875, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6875, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.6797, + "eval_loss_RQACONV": 0.6797, + "eval_loss_text_RQACONV": 1.3594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5625, + "eval_loss_RQACONV": 0.5625, + "eval_loss_text_RQACONV": 1.125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.6992, + "eval_loss_RQACONV": 0.6992, + "eval_loss_text_RQACONV": 1.3984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.918, + "eval_loss_RQACONV": 0.918, + "eval_loss_text_RQACONV": 1.8359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.7852, + "eval_loss_RQACONV": 0.7852, + "eval_loss_text_RQACONV": 1.5703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.9961, + "eval_loss_RQACONV": 0.9961, + "eval_loss_text_RQACONV": 1.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.4727, + "eval_loss_RQACONV": 0.4727, + "eval_loss_text_RQACONV": 0.9453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5078, + "eval_loss_RQACONV": 0.5078, + "eval_loss_text_RQACONV": 1.0156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 1.4375, + "eval_loss_RQACONV": 1.4375, + "eval_loss_text_RQACONV": 2.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.125, + "eval_loss_RQACONV": 0.125, + "eval_loss_text_RQACONV": 0.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.0679, + "eval_loss_RQACONV": 0.0679, + "eval_loss_text_RQACONV": 0.1357, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.0297, + "eval_loss_RQACONV": 0.0297, + "eval_loss_text_RQACONV": 0.0593, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.1279, + "eval_loss_RQACONV": 0.1279, + "eval_loss_text_RQACONV": 0.2559, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.0977, + "eval_loss_RQACONV": 0.0977, + "eval_loss_text_RQACONV": 0.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.0889, + "eval_loss_RQACONV": 0.0889, + "eval_loss_text_RQACONV": 0.1777, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21200 + }, + { + "epoch": 1.5312387143373059, + "eval_loss": 1.5831489562988281, + "eval_runtime": 28.7828, + "eval_samples_per_second": 185.84, + "eval_steps_per_second": 1.459, + "step": 21200 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.0469, + "epoch": 1.5312387143373059, + "loss": 2.9531, + "loss_text": 0.1338, + "state_loss_0": 0.0, + "step": 21200 + }, + { + "audio_loss_0": 3.0938, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.2969, + "epoch": 1.5312387143373059, + "loss": 3.0781, + "loss_text": 0.25, + "state_loss_0": 0.0, + "step": 21200 + }, + { + "epoch": 1.533044420368364, + "grad_norm": 0.6848045587539673, + "learning_rate": 6.81354094200119e-06, + "loss": 2.9985, + "step": 21225 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.0781, + "epoch": 1.533044420368364, + "loss": 2.9844, + "loss_text": 0.5234, + "state_loss_0": 0.0, + "step": 21225 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.3125, + "audio_loss_6": 2.9688, + "epoch": 1.533044420368364, + "loss": 2.9375, + "loss_text": 0.375, + "state_loss_0": 0.0, + "step": 21225 + }, + { + "epoch": 1.534850126399422, + "grad_norm": 0.6291896104812622, + "learning_rate": 6.763458449268631e-06, + "loss": 3.0012, + "step": 21250 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.125, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.3125, + "epoch": 1.534850126399422, + "loss": 3.1562, + "loss_text": 0.2891, + "state_loss_0": 0.0, + "step": 21250 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.0, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.1094, + "epoch": 1.534850126399422, + "loss": 3.0, + "loss_text": 0.4883, + "state_loss_0": 0.0, + "step": 21250 + }, + { + "epoch": 1.5366558324304802, + "grad_norm": 0.730059802532196, + "learning_rate": 6.713531891051608e-06, + "loss": 2.9984, + "step": 21275 + }, + { + "audio_loss_0": 3.2812, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.375, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.2812, + "epoch": 1.5366558324304802, + "loss": 3.125, + "loss_text": 0.25, + "state_loss_0": 0.0, + "step": 21275 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.8906, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1094, + "epoch": 1.5366558324304802, + "loss": 2.9219, + "loss_text": 0.2305, + "state_loss_0": 0.0, + "step": 21275 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.6617820262908936, + "learning_rate": 6.663761694255288e-06, + "loss": 2.9979, + "step": 21300 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1719, + "epoch": 1.5384615384615383, + "loss": 3.0312, + "loss_text": 0.2852, + "state_loss_0": 0.0, + "step": 21300 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.125, + "audio_loss_2": 2.875, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.0938, + "epoch": 1.5384615384615383, + "loss": 2.9219, + "loss_text": 0.0776, + "state_loss_0": 0.0, + "step": 21300 + }, + { + "epoch": 1.5402672444925964, + "grad_norm": 0.6749392747879028, + "learning_rate": 6.614148284447827e-06, + "loss": 3.0025, + "step": 21325 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.25, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3125, + "epoch": 1.5402672444925964, + "loss": 3.0938, + "loss_text": 0.5117, + "state_loss_0": 0.0, + "step": 21325 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.2031, + "epoch": 1.5402672444925964, + "loss": 3.0625, + "loss_text": 0.1885, + "state_loss_0": 0.0, + "step": 21325 + }, + { + "epoch": 1.5420729505236548, + "grad_norm": 0.643330991268158, + "learning_rate": 6.564692085856772e-06, + "loss": 3.0136, + "step": 21350 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.1875, + "epoch": 1.5420729505236548, + "loss": 3.0312, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 21350 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.2344, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1406, + "epoch": 1.5420729505236548, + "loss": 3.0, + "loss_text": 0.4707, + "state_loss_0": 0.0, + "step": 21350 + }, + { + "epoch": 1.543878656554713, + "grad_norm": 0.6808464527130127, + "learning_rate": 6.515393521365376e-06, + "loss": 3.0039, + "step": 21375 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.0781, + "epoch": 1.543878656554713, + "loss": 2.9688, + "loss_text": 0.3613, + "state_loss_0": 0.0, + "step": 21375 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.125, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1875, + "epoch": 1.543878656554713, + "loss": 3.0, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 21375 + }, + { + "epoch": 1.545684362585771, + "grad_norm": 0.6220929026603699, + "learning_rate": 6.466253012509046e-06, + "loss": 3.018, + "step": 21400 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.3438, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.1406, + "epoch": 1.545684362585771, + "loss": 2.9688, + "loss_text": 0.2197, + "state_loss_0": 0.0, + "step": 21400 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.125, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2344, + "epoch": 1.545684362585771, + "loss": 3.0938, + "loss_text": 0.5469, + "state_loss_0": 0.0, + "step": 21400 + }, + { + "epoch": 1.5474900686168291, + "grad_norm": 0.6823073625564575, + "learning_rate": 6.4172709794717036e-06, + "loss": 3.0049, + "step": 21425 + }, + { + "audio_loss_0": 3.5469, + "audio_loss_1": 3.4688, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 4.0, + "audio_loss_5": 3.7344, + "audio_loss_6": 3.5312, + "epoch": 1.5474900686168291, + "loss": 3.2656, + "loss_text": 0.3125, + "state_loss_0": 0.0, + "step": 21425 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5312, + "audio_loss_5": 3.2656, + "audio_loss_6": 3.0469, + "epoch": 1.5474900686168291, + "loss": 2.875, + "loss_text": 0.0942, + "state_loss_0": 0.0, + "step": 21425 + }, + { + "epoch": 1.5492957746478875, + "grad_norm": 0.6786304116249084, + "learning_rate": 6.368447841082181e-06, + "loss": 3.0055, + "step": 21450 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.3438, + "epoch": 1.5492957746478875, + "loss": 3.0625, + "loss_text": 0.2217, + "state_loss_0": 0.0, + "step": 21450 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2031, + "epoch": 1.5492957746478875, + "loss": 2.9688, + "loss_text": 0.1377, + "state_loss_0": 0.0, + "step": 21450 + }, + { + "epoch": 1.5511014806789456, + "grad_norm": 0.6875502467155457, + "learning_rate": 6.319784014810676e-06, + "loss": 3.0033, + "step": 21475 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2188, + "epoch": 1.5511014806789456, + "loss": 3.0781, + "loss_text": 0.2363, + "state_loss_0": 0.0, + "step": 21475 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0938, + "epoch": 1.5511014806789456, + "loss": 2.9688, + "loss_text": 0.1758, + "state_loss_0": 0.0, + "step": 21475 + }, + { + "epoch": 1.5529071867100037, + "grad_norm": 0.7159177660942078, + "learning_rate": 6.271279916765141e-06, + "loss": 3.0011, + "step": 21500 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1562, + "epoch": 1.5529071867100037, + "loss": 3.0312, + "loss_text": 0.2617, + "state_loss_0": 0.0, + "step": 21500 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2969, + "epoch": 1.5529071867100037, + "loss": 3.125, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 21500 + }, + { + "epoch": 1.5547128927410618, + "grad_norm": 0.6003041863441467, + "learning_rate": 6.22293596168777e-06, + "loss": 3.005, + "step": 21525 + }, + { + "audio_loss_0": 2.5156, + "audio_loss_1": 3.125, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.0156, + "epoch": 1.5547128927410618, + "loss": 2.8594, + "loss_text": 0.3887, + "state_loss_0": 0.0, + "step": 21525 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0156, + "epoch": 1.5547128927410618, + "loss": 3.0, + "loss_text": 0.2441, + "state_loss_0": 0.0, + "step": 21525 + }, + { + "epoch": 1.55651859877212, + "grad_norm": 0.648446798324585, + "learning_rate": 6.1747525629514054e-06, + "loss": 3.0023, + "step": 21550 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2188, + "epoch": 1.55651859877212, + "loss": 2.9688, + "loss_text": 0.2158, + "state_loss_0": 0.0, + "step": 21550 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.8281, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0312, + "epoch": 1.55651859877212, + "loss": 2.875, + "loss_text": 0.2402, + "state_loss_0": 0.0, + "step": 21550 + }, + { + "epoch": 1.558324304803178, + "grad_norm": 0.6985082030296326, + "learning_rate": 6.126730132556046e-06, + "loss": 3.0057, + "step": 21575 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0469, + "epoch": 1.558324304803178, + "loss": 2.9688, + "loss_text": 0.3223, + "state_loss_0": 0.0, + "step": 21575 + }, + { + "audio_loss_0": 2.6875, + "audio_loss_1": 3.0625, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.4844, + "audio_loss_5": 2.9219, + "audio_loss_6": 3.3906, + "epoch": 1.558324304803178, + "loss": 2.8594, + "loss_text": 0.1934, + "state_loss_0": 0.0, + "step": 21575 + }, + { + "epoch": 1.5601300108342362, + "grad_norm": 0.6626188158988953, + "learning_rate": 6.078869081125288e-06, + "loss": 3.0025, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.625, + "eval_loss_AQACONVA": 3.625, + "eval_loss_text_AQACONVA": 2.5, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 2.0312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7969, + "eval_audio_loss_6_AQACONVA": 3.4531, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7031, + "eval_audio_loss_6_AQACONVA": 3.4062, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.25, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8125, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2344, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5156, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.5156, + "eval_audio_loss_1_RQACONVA": 3.7969, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.875, + "eval_audio_loss_6_RQACONVA": 3.5938, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.6094, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 3.2031, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.3438, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.75, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5781, + "eval_loss_RQACONVA": 3.5781, + "eval_loss_text_RQACONVA": 2.4219, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.3125, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9219, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5742, + "eval_loss_RQACONV": 0.5742, + "eval_loss_text_RQACONV": 1.1484, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.8086, + "eval_loss_RQACONV": 0.8086, + "eval_loss_text_RQACONV": 1.6172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.7227, + "eval_loss_RQACONV": 0.7227, + "eval_loss_text_RQACONV": 1.4453, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.9219, + "eval_loss_RQACONV": 0.9219, + "eval_loss_text_RQACONV": 1.8438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.7305, + "eval_loss_RQACONV": 0.7305, + "eval_loss_text_RQACONV": 1.4609, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.7734, + "eval_loss_RQACONV": 0.7734, + "eval_loss_text_RQACONV": 1.5469, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.9492, + "eval_loss_RQACONV": 0.9492, + "eval_loss_text_RQACONV": 1.8984, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.4922, + "eval_loss_RQACONV": 0.4922, + "eval_loss_text_RQACONV": 0.9844, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.4688, + "eval_loss_RQACONV": 0.4688, + "eval_loss_text_RQACONV": 0.9375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.1226, + "eval_loss_RQACONV": 0.1226, + "eval_loss_text_RQACONV": 0.2451, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.1396, + "eval_loss_RQACONV": 0.1396, + "eval_loss_text_RQACONV": 0.2793, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.0693, + "eval_loss_RQACONV": 0.0693, + "eval_loss_text_RQACONV": 0.1387, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.1465, + "eval_loss_RQACONV": 0.1465, + "eval_loss_text_RQACONV": 0.293, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.033, + "eval_loss_RQACONV": 0.033, + "eval_loss_text_RQACONV": 0.0659, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.1309, + "eval_loss_RQACONV": 0.1309, + "eval_loss_text_RQACONV": 0.2617, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.0996, + "eval_loss_RQACONV": 0.0996, + "eval_loss_text_RQACONV": 0.1992, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.0903, + "eval_loss_RQACONV": 0.0903, + "eval_loss_text_RQACONV": 0.1807, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 21600 + }, + { + "epoch": 1.5601300108342362, + "eval_loss": 1.5827144384384155, + "eval_runtime": 28.1805, + "eval_samples_per_second": 189.812, + "eval_steps_per_second": 1.49, + "step": 21600 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.2812, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1406, + "epoch": 1.5601300108342362, + "loss": 3.0469, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 21600 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0469, + "epoch": 1.5601300108342362, + "loss": 2.9375, + "loss_text": 0.2695, + "state_loss_0": 0.0, + "step": 21600 + }, + { + "epoch": 1.5619357168652943, + "grad_norm": 0.6427469253540039, + "learning_rate": 6.031169817902841e-06, + "loss": 2.9953, + "step": 21625 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2031, + "epoch": 1.5619357168652943, + "loss": 3.0312, + "loss_text": 0.3164, + "state_loss_0": 0.0, + "step": 21625 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.125, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1094, + "epoch": 1.5619357168652943, + "loss": 2.9844, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 21625 + }, + { + "epoch": 1.5637414228963524, + "grad_norm": 0.6740164756774902, + "learning_rate": 5.983632750749024e-06, + "loss": 2.9973, + "step": 21650 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2344, + "epoch": 1.5637414228963524, + "loss": 3.0156, + "loss_text": 0.207, + "state_loss_0": 0.0, + "step": 21650 + }, + { + "audio_loss_0": 2.75, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.125, + "audio_loss_4": 3.625, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.125, + "epoch": 1.5637414228963524, + "loss": 2.9688, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 21650 + }, + { + "epoch": 1.5655471289274105, + "grad_norm": 0.7165602445602417, + "learning_rate": 5.93625828613725e-06, + "loss": 2.9933, + "step": 21675 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.8125, + "audio_loss_5": 3.5156, + "audio_loss_6": 3.1094, + "epoch": 1.5655471289274105, + "loss": 3.0312, + "loss_text": 0.2305, + "state_loss_0": 0.0, + "step": 21675 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0156, + "epoch": 1.5655471289274105, + "loss": 2.9375, + "loss_text": 0.1895, + "state_loss_0": 0.0, + "step": 21675 + }, + { + "epoch": 1.5673528349584687, + "grad_norm": 0.5839782953262329, + "learning_rate": 5.889046829150599e-06, + "loss": 3.0032, + "step": 21700 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1875, + "epoch": 1.5673528349584687, + "loss": 3.0, + "loss_text": 0.3027, + "state_loss_0": 0.0, + "step": 21700 + }, + { + "audio_loss_0": 3.2344, + "audio_loss_1": 3.1562, + "audio_loss_2": 3.0, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2031, + "epoch": 1.5673528349584687, + "loss": 3.0781, + "loss_text": 0.3945, + "state_loss_0": 0.0, + "step": 21700 + }, + { + "epoch": 1.5691585409895268, + "grad_norm": 0.6500712633132935, + "learning_rate": 5.841998783478306e-06, + "loss": 3.0083, + "step": 21725 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.1562, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1719, + "epoch": 1.5691585409895268, + "loss": 2.9844, + "loss_text": 0.1797, + "state_loss_0": 0.0, + "step": 21725 + }, + { + "audio_loss_0": 2.6875, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.0938, + "epoch": 1.5691585409895268, + "loss": 2.9688, + "loss_text": 0.1143, + "state_loss_0": 0.0, + "step": 21725 + }, + { + "epoch": 1.570964247020585, + "grad_norm": 0.6839341521263123, + "learning_rate": 5.795114551412348e-06, + "loss": 3.0038, + "step": 21750 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.125, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0469, + "epoch": 1.570964247020585, + "loss": 2.9531, + "loss_text": 0.2393, + "state_loss_0": 0.0, + "step": 21750 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.875, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.2969, + "audio_loss_6": 2.9219, + "epoch": 1.570964247020585, + "loss": 2.8906, + "loss_text": 0.2412, + "state_loss_0": 0.0, + "step": 21750 + }, + { + "epoch": 1.5727699530516432, + "grad_norm": 0.7360494136810303, + "learning_rate": 5.748394533843968e-06, + "loss": 2.9978, + "step": 21775 + }, + { + "audio_loss_0": 3.1875, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1406, + "epoch": 1.5727699530516432, + "loss": 3.0, + "loss_text": 0.3672, + "state_loss_0": 0.0, + "step": 21775 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.875, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3906, + "audio_loss_6": 2.9375, + "epoch": 1.5727699530516432, + "loss": 2.875, + "loss_text": 0.2715, + "state_loss_0": 0.0, + "step": 21775 + }, + { + "epoch": 1.5745756590827014, + "grad_norm": 0.667145848274231, + "learning_rate": 5.7018391302602895e-06, + "loss": 2.9955, + "step": 21800 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.5, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8438, + "audio_loss_5": 3.6094, + "audio_loss_6": 3.3594, + "epoch": 1.5745756590827014, + "loss": 3.1562, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 21800 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2188, + "epoch": 1.5745756590827014, + "loss": 3.0312, + "loss_text": 0.168, + "state_loss_0": 0.0, + "step": 21800 + }, + { + "epoch": 1.5763813651137595, + "grad_norm": 0.6918382048606873, + "learning_rate": 5.655448738740854e-06, + "loss": 3.0001, + "step": 21825 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.5763813651137595, + "loss": 3.125, + "loss_text": 0.7305, + "state_loss_0": 0.0, + "step": 21825 + }, + { + "audio_loss_0": 2.7031, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1094, + "epoch": 1.5763813651137595, + "loss": 2.9844, + "loss_text": 0.3828, + "state_loss_0": 0.0, + "step": 21825 + }, + { + "epoch": 1.5781870711448176, + "grad_norm": 0.6573619246482849, + "learning_rate": 5.6092237559542575e-06, + "loss": 2.9904, + "step": 21850 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.4688, + "audio_loss_5": 3.375, + "audio_loss_6": 2.9844, + "epoch": 1.5781870711448176, + "loss": 2.8438, + "loss_text": 0.2236, + "state_loss_0": 0.0, + "step": 21850 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1562, + "epoch": 1.5781870711448176, + "loss": 2.9375, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 21850 + }, + { + "epoch": 1.5799927771758757, + "grad_norm": 0.6750558614730835, + "learning_rate": 5.563164577154742e-06, + "loss": 2.9946, + "step": 21875 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2812, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.0625, + "epoch": 1.5799927771758757, + "loss": 2.9688, + "loss_text": 0.1787, + "state_loss_0": 0.0, + "step": 21875 + }, + { + "audio_loss_0": 2.6562, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0, + "audio_loss_4": 3.4688, + "audio_loss_5": 3.1562, + "audio_loss_6": 3.0312, + "epoch": 1.5799927771758757, + "loss": 2.8594, + "loss_text": 0.4023, + "state_loss_0": 0.0, + "step": 21875 + }, + { + "epoch": 1.581798483206934, + "grad_norm": 0.6186392307281494, + "learning_rate": 5.517271596178797e-06, + "loss": 2.9911, + "step": 21900 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.25, + "audio_loss_4": 3.8281, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.581798483206934, + "loss": 3.1094, + "loss_text": 0.3848, + "state_loss_0": 0.0, + "step": 21900 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.3906, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.2344, + "epoch": 1.581798483206934, + "loss": 3.0469, + "loss_text": 0.4492, + "state_loss_0": 0.0, + "step": 21900 + }, + { + "epoch": 1.5836041892379922, + "grad_norm": 0.6534333229064941, + "learning_rate": 5.471545205441836e-06, + "loss": 3.0037, + "step": 21925 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.125, + "epoch": 1.5836041892379922, + "loss": 2.9688, + "loss_text": 0.1582, + "state_loss_0": 0.0, + "step": 21925 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1406, + "epoch": 1.5836041892379922, + "loss": 3.0156, + "loss_text": 0.3965, + "state_loss_0": 0.0, + "step": 21925 + }, + { + "epoch": 1.5854098952690503, + "grad_norm": 0.637391984462738, + "learning_rate": 5.425985795934788e-06, + "loss": 3.0021, + "step": 21950 + }, + { + "audio_loss_0": 2.9844, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9844, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.0781, + "epoch": 1.5854098952690503, + "loss": 2.9688, + "loss_text": 0.2812, + "state_loss_0": 0.0, + "step": 21950 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.125, + "epoch": 1.5854098952690503, + "loss": 2.9844, + "loss_text": 0.1836, + "state_loss_0": 0.0, + "step": 21950 + }, + { + "epoch": 1.5872156013001084, + "grad_norm": 0.678966224193573, + "learning_rate": 5.380593757220811e-06, + "loss": 2.9915, + "step": 21975 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9688, + "audio_loss_3": 3.9844, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0938, + "epoch": 1.5872156013001084, + "loss": 2.9531, + "loss_text": 0.332, + "state_loss_0": 0.0, + "step": 21975 + }, + { + "audio_loss_0": 3.125, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.1875, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5, + "audio_loss_6": 3.1562, + "epoch": 1.5872156013001084, + "loss": 3.0938, + "loss_text": 0.2305, + "state_loss_0": 0.0, + "step": 21975 + }, + { + "epoch": 1.5890213073311665, + "grad_norm": 0.6121324300765991, + "learning_rate": 5.335369477431904e-06, + "loss": 3.0058, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.6406, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.5156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 2.0156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5938, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7031, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7031, + "eval_audio_loss_6_AQACONVA": 3.4062, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7812, + "eval_audio_loss_2_AQACONVA": 3.5156, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 3.375, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8281, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4531, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5156, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.4062, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.0781, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6875, + "eval_audio_loss_6_RQACONVA": 3.3906, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.8906, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.3906, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.2344, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4844, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.5312, + "eval_audio_loss_3_RQACONVA": 4.5625, + "eval_audio_loss_4_RQACONVA": 4.0625, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5938, + "eval_loss_RQACONVA": 3.5938, + "eval_loss_text_RQACONVA": 2.3594, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 4.0, + "eval_audio_loss_5_RQACONVA": 3.8125, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 3.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2656, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.1406, + "eval_audio_loss_1_RQACONVA": 3.7344, + "eval_audio_loss_2_RQACONVA": 3.4375, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7656, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9219, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.5625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.6875, + "eval_loss_RQACONV": 0.6875, + "eval_loss_text_RQACONV": 1.375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5625, + "eval_loss_RQACONV": 0.5625, + "eval_loss_text_RQACONV": 1.125, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.8047, + "eval_loss_RQACONV": 0.8047, + "eval_loss_text_RQACONV": 1.6094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.7188, + "eval_loss_RQACONV": 0.7188, + "eval_loss_text_RQACONV": 1.4375, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.9258, + "eval_loss_RQACONV": 0.9258, + "eval_loss_text_RQACONV": 1.8516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.7461, + "eval_loss_RQACONV": 0.7461, + "eval_loss_text_RQACONV": 1.4922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.8398, + "eval_loss_RQACONV": 0.8398, + "eval_loss_text_RQACONV": 1.6797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 1.0312, + "eval_loss_RQACONV": 1.0312, + "eval_loss_text_RQACONV": 2.0625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5547, + "eval_loss_RQACONV": 0.5547, + "eval_loss_text_RQACONV": 1.1094, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5039, + "eval_loss_RQACONV": 0.5039, + "eval_loss_text_RQACONV": 1.0078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 1.4375, + "eval_loss_RQACONV": 1.4375, + "eval_loss_text_RQACONV": 2.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.125, + "eval_loss_RQACONV": 0.125, + "eval_loss_text_RQACONV": 0.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.1406, + "eval_loss_RQACONV": 0.1406, + "eval_loss_text_RQACONV": 0.2812, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.0674, + "eval_loss_RQACONV": 0.0674, + "eval_loss_text_RQACONV": 0.1348, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.1445, + "eval_loss_RQACONV": 0.1445, + "eval_loss_text_RQACONV": 0.2891, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.0291, + "eval_loss_RQACONV": 0.0291, + "eval_loss_text_RQACONV": 0.0581, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.127, + "eval_loss_RQACONV": 0.127, + "eval_loss_text_RQACONV": 0.2539, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.0947, + "eval_loss_RQACONV": 0.0947, + "eval_loss_text_RQACONV": 0.1895, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.0923, + "eval_loss_RQACONV": 0.0923, + "eval_loss_text_RQACONV": 0.1846, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22000 + }, + { + "epoch": 1.5890213073311665, + "eval_loss": 1.5858433246612549, + "eval_runtime": 28.2018, + "eval_samples_per_second": 189.669, + "eval_steps_per_second": 1.489, + "step": 22000 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.4844, + "audio_loss_5": 3.2188, + "audio_loss_6": 2.9375, + "epoch": 1.5890213073311665, + "loss": 2.875, + "loss_text": 0.293, + "state_loss_0": 0.0, + "step": 22000 + }, + { + "audio_loss_0": 2.5781, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.8125, + "audio_loss_3": 3.9531, + "audio_loss_4": 3.4531, + "audio_loss_5": 3.2031, + "audio_loss_6": 2.9062, + "epoch": 1.5890213073311665, + "loss": 2.7812, + "loss_text": 0.2246, + "state_loss_0": 0.0, + "step": 22000 + }, + { + "epoch": 1.5908270133622247, + "grad_norm": 0.6231251955032349, + "learning_rate": 5.290313343265635e-06, + "loss": 2.9924, + "step": 22025 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.125, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0469, + "epoch": 1.5908270133622247, + "loss": 2.9219, + "loss_text": 0.3164, + "state_loss_0": 0.0, + "step": 22025 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 2.9844, + "audio_loss_2": 2.75, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.2344, + "audio_loss_6": 3.0625, + "epoch": 1.5908270133622247, + "loss": 2.9062, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 22025 + }, + { + "epoch": 1.5926327193932828, + "grad_norm": 0.6193037033081055, + "learning_rate": 5.245425739981819e-06, + "loss": 2.9979, + "step": 22050 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.0312, + "epoch": 1.5926327193932828, + "loss": 2.9219, + "loss_text": 0.3262, + "state_loss_0": 0.0, + "step": 22050 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1719, + "epoch": 1.5926327193932828, + "loss": 3.0156, + "loss_text": 0.5625, + "state_loss_0": 0.0, + "step": 22050 + }, + { + "epoch": 1.5944384254243409, + "grad_norm": 0.5837889909744263, + "learning_rate": 5.20070705139919e-06, + "loss": 3.0064, + "step": 22075 + }, + { + "audio_loss_0": 3.4844, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.2031, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8906, + "audio_loss_5": 3.6719, + "audio_loss_6": 3.2812, + "epoch": 1.5944384254243409, + "loss": 3.1875, + "loss_text": 0.2285, + "state_loss_0": 0.0, + "step": 22075 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.2656, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.0625, + "epoch": 1.5944384254243409, + "loss": 2.9688, + "loss_text": 0.1787, + "state_loss_0": 0.0, + "step": 22075 + }, + { + "epoch": 1.596244131455399, + "grad_norm": 0.6529595851898193, + "learning_rate": 5.156157659892191e-06, + "loss": 3.0008, + "step": 22100 + }, + { + "audio_loss_0": 3.2656, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1094, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6562, + "audio_loss_6": 3.375, + "epoch": 1.596244131455399, + "loss": 3.1875, + "loss_text": 0.6445, + "state_loss_0": 0.0, + "step": 22100 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7969, + "audio_loss_5": 3.5469, + "audio_loss_6": 3.2031, + "epoch": 1.596244131455399, + "loss": 3.0625, + "loss_text": 0.4004, + "state_loss_0": 0.0, + "step": 22100 + }, + { + "epoch": 1.5980498374864571, + "grad_norm": 0.6961444616317749, + "learning_rate": 5.111777946387628e-06, + "loss": 2.9975, + "step": 22125 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.2656, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3281, + "audio_loss_6": 2.9844, + "epoch": 1.5980498374864571, + "loss": 2.9219, + "loss_text": 0.2051, + "state_loss_0": 0.0, + "step": 22125 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1406, + "epoch": 1.5980498374864571, + "loss": 2.9375, + "loss_text": 0.25, + "state_loss_0": 0.0, + "step": 22125 + }, + { + "epoch": 1.5998555435175152, + "grad_norm": 0.5938643217086792, + "learning_rate": 5.067568290361474e-06, + "loss": 2.9968, + "step": 22150 + }, + { + "audio_loss_0": 2.9531, + "audio_loss_1": 3.3438, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.2031, + "epoch": 1.5998555435175152, + "loss": 3.0469, + "loss_text": 0.4688, + "state_loss_0": 0.0, + "step": 22150 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.125, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1094, + "epoch": 1.5998555435175152, + "loss": 2.9219, + "loss_text": 0.2422, + "state_loss_0": 0.0, + "step": 22150 + }, + { + "epoch": 1.6016612495485734, + "grad_norm": 0.7044111490249634, + "learning_rate": 5.02352906983557e-06, + "loss": 3.0088, + "step": 22175 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1094, + "epoch": 1.6016612495485734, + "loss": 2.9219, + "loss_text": 0.1338, + "state_loss_0": 0.0, + "step": 22175 + }, + { + "audio_loss_0": 3.1406, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1719, + "epoch": 1.6016612495485734, + "loss": 3.0469, + "loss_text": 0.3594, + "state_loss_0": 0.0, + "step": 22175 + }, + { + "epoch": 1.6034669555796315, + "grad_norm": 0.5954722762107849, + "learning_rate": 4.979660661374452e-06, + "loss": 2.9991, + "step": 22200 + }, + { + "audio_loss_0": 3.0156, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.0938, + "epoch": 1.6034669555796315, + "loss": 3.0469, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 22200 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.5, + "audio_loss_6": 3.1875, + "epoch": 1.6034669555796315, + "loss": 3.0625, + "loss_text": 0.4512, + "state_loss_0": 0.0, + "step": 22200 + }, + { + "epoch": 1.6052726616106898, + "grad_norm": 0.6581910848617554, + "learning_rate": 4.93596344008207e-06, + "loss": 3.005, + "step": 22225 + }, + { + "audio_loss_0": 3.0781, + "audio_loss_1": 3.5, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.8594, + "audio_loss_5": 3.6406, + "audio_loss_6": 3.3125, + "epoch": 1.6052726616106898, + "loss": 3.1562, + "loss_text": 0.4277, + "state_loss_0": 0.0, + "step": 22225 + }, + { + "audio_loss_0": 2.9062, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3125, + "audio_loss_6": 3.0781, + "epoch": 1.6052726616106898, + "loss": 2.9531, + "loss_text": 0.3281, + "state_loss_0": 0.0, + "step": 22225 + }, + { + "epoch": 1.607078367641748, + "grad_norm": 0.6613566875457764, + "learning_rate": 4.892437779598629e-06, + "loss": 2.9889, + "step": 22250 + }, + { + "audio_loss_0": 2.8125, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.9531, + "audio_loss_3": 3.9688, + "audio_loss_4": 3.5, + "audio_loss_5": 3.3281, + "audio_loss_6": 3.0469, + "epoch": 1.607078367641748, + "loss": 2.9062, + "loss_text": 0.6172, + "state_loss_0": 0.0, + "step": 22250 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.125, + "audio_loss_2": 2.8594, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.2812, + "audio_loss_6": 2.9844, + "epoch": 1.607078367641748, + "loss": 2.8906, + "loss_text": 0.3926, + "state_loss_0": 0.0, + "step": 22250 + }, + { + "epoch": 1.608884073672806, + "grad_norm": 0.634445309638977, + "learning_rate": 4.84908405209738e-06, + "loss": 2.995, + "step": 22275 + }, + { + "audio_loss_0": 3.0312, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.5, + "audio_loss_6": 3.1562, + "epoch": 1.608884073672806, + "loss": 3.0156, + "loss_text": 0.3438, + "state_loss_0": 0.0, + "step": 22275 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3125, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.75, + "audio_loss_5": 3.5, + "audio_loss_6": 3.2969, + "epoch": 1.608884073672806, + "loss": 3.0469, + "loss_text": 0.3398, + "state_loss_0": 0.0, + "step": 22275 + }, + { + "epoch": 1.6106897797038642, + "grad_norm": 0.67802894115448, + "learning_rate": 4.805902628281405e-06, + "loss": 3.0078, + "step": 22300 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.0781, + "epoch": 1.6106897797038642, + "loss": 2.9219, + "loss_text": 0.1494, + "state_loss_0": 0.0, + "step": 22300 + }, + { + "audio_loss_0": 3.2031, + "audio_loss_1": 3.25, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.125, + "epoch": 1.6106897797038642, + "loss": 2.9844, + "loss_text": 0.2539, + "state_loss_0": 0.0, + "step": 22300 + }, + { + "epoch": 1.6124954857349225, + "grad_norm": 0.6139829158782959, + "learning_rate": 4.762893877380515e-06, + "loss": 2.9912, + "step": 22325 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1875, + "epoch": 1.6124954857349225, + "loss": 3.0312, + "loss_text": 0.3711, + "state_loss_0": 0.0, + "step": 22325 + }, + { + "audio_loss_0": 2.9219, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1562, + "epoch": 1.6124954857349225, + "loss": 3.0, + "loss_text": 0.4062, + "state_loss_0": 0.0, + "step": 22325 + }, + { + "epoch": 1.6143011917659806, + "grad_norm": 0.6254326701164246, + "learning_rate": 4.720058167148014e-06, + "loss": 2.9992, + "step": 22350 + }, + { + "audio_loss_0": 2.6094, + "audio_loss_1": 3.1875, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5, + "audio_loss_5": 3.3594, + "audio_loss_6": 2.9844, + "epoch": 1.6143011917659806, + "loss": 2.8906, + "loss_text": 0.3672, + "state_loss_0": 0.0, + "step": 22350 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.5781, + "audio_loss_2": 3.3281, + "audio_loss_3": 4.375, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2188, + "epoch": 1.6143011917659806, + "loss": 3.1094, + "loss_text": 0.2559, + "state_loss_0": 0.0, + "step": 22350 + }, + { + "epoch": 1.6161068977970388, + "grad_norm": 0.7013593912124634, + "learning_rate": 4.677395863857623e-06, + "loss": 2.9892, + "step": 22375 + }, + { + "audio_loss_0": 2.7969, + "audio_loss_1": 3.2188, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1562, + "epoch": 1.6161068977970388, + "loss": 2.9531, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 22375 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.5312, + "audio_loss_2": 3.2188, + "audio_loss_3": 4.3438, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.2344, + "epoch": 1.6161068977970388, + "loss": 3.1719, + "loss_text": 0.8867, + "state_loss_0": 0.0, + "step": 22375 + }, + { + "epoch": 1.6179126038280969, + "grad_norm": 0.651037871837616, + "learning_rate": 4.634907332300298e-06, + "loss": 2.9893, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.5156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 4.0, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 2.0312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4219, + "eval_loss": 3.4531, + "eval_loss_AQACONVA": 3.4531, + "eval_loss_text_AQACONVA": 1.7109, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7031, + "eval_audio_loss_6_AQACONVA": 3.3906, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 3.4062, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.8125, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.8906, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4844, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6094, + "eval_audio_loss_2_RQACONVA": 3.3594, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5312, + "eval_audio_loss_6_RQACONVA": 3.3594, + "eval_loss": 3.4688, + "eval_loss_RQACONVA": 3.4688, + "eval_loss_text_RQACONVA": 2.5625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9297, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.4219, + "eval_audio_loss_1_RQACONVA": 3.4688, + "eval_audio_loss_2_RQACONVA": 3.2656, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5312, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5781, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5625, + "eval_loss": 3.6875, + "eval_loss_RQACONVA": 3.6875, + "eval_loss_text_RQACONVA": 2.5625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.7188, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 3.2188, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.2969, + "eval_audio_loss_1_RQACONVA": 3.625, + "eval_audio_loss_2_RQACONVA": 3.4844, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7344, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.3281, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.1719, + "eval_audio_loss_1_RQACONVA": 3.7812, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.4688, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.25, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9219, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.5, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.6641, + "eval_loss_RQACONV": 0.6641, + "eval_loss_text_RQACONV": 1.3281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.7969, + "eval_loss_RQACONV": 0.7969, + "eval_loss_text_RQACONV": 1.5938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.7578, + "eval_loss_RQACONV": 0.7578, + "eval_loss_text_RQACONV": 1.5156, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.8281, + "eval_loss_RQACONV": 0.8281, + "eval_loss_text_RQACONV": 1.6562, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.7539, + "eval_loss_RQACONV": 0.7539, + "eval_loss_text_RQACONV": 1.5078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.8164, + "eval_loss_RQACONV": 0.8164, + "eval_loss_text_RQACONV": 1.6328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.9609, + "eval_loss_RQACONV": 0.9609, + "eval_loss_text_RQACONV": 1.9219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.4961, + "eval_loss_RQACONV": 0.4961, + "eval_loss_text_RQACONV": 0.9922, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.543, + "eval_loss_RQACONV": 0.543, + "eval_loss_text_RQACONV": 1.0859, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.4707, + "eval_loss_RQACONV": 0.4707, + "eval_loss_text_RQACONV": 0.9414, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.6719, + "eval_loss_RQACONV": 0.6719, + "eval_loss_text_RQACONV": 1.3438, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5508, + "eval_loss_RQACONV": 0.5508, + "eval_loss_text_RQACONV": 1.1016, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5977, + "eval_loss_RQACONV": 0.5977, + "eval_loss_text_RQACONV": 1.1953, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5352, + "eval_loss_RQACONV": 0.5352, + "eval_loss_text_RQACONV": 1.0703, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 1.4297, + "eval_loss_RQACONV": 1.4297, + "eval_loss_text_RQACONV": 2.8594, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.1226, + "eval_loss_RQACONV": 0.1226, + "eval_loss_text_RQACONV": 0.2451, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.0693, + "eval_loss_RQACONV": 0.0693, + "eval_loss_text_RQACONV": 0.1387, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.1426, + "eval_loss_RQACONV": 0.1426, + "eval_loss_text_RQACONV": 0.2852, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.0271, + "eval_loss_RQACONV": 0.0271, + "eval_loss_text_RQACONV": 0.0542, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.127, + "eval_loss_RQACONV": 0.127, + "eval_loss_text_RQACONV": 0.2539, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.0972, + "eval_loss_RQACONV": 0.0972, + "eval_loss_text_RQACONV": 0.1943, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.0864, + "eval_loss_RQACONV": 0.0864, + "eval_loss_text_RQACONV": 0.1729, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 0.5938, + "eval_loss_RQACONV": 0.5938, + "eval_loss_text_RQACONV": 1.1875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22400 + }, + { + "epoch": 1.6179126038280969, + "eval_loss": 1.5848073959350586, + "eval_runtime": 27.7486, + "eval_samples_per_second": 192.767, + "eval_steps_per_second": 1.514, + "step": 22400 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1094, + "epoch": 1.6179126038280969, + "loss": 2.9688, + "loss_text": 0.1318, + "state_loss_0": 0.0, + "step": 22400 + }, + { + "audio_loss_0": 2.5625, + "audio_loss_1": 3.1562, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.6094, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1094, + "epoch": 1.6179126038280969, + "loss": 2.8906, + "loss_text": 0.2734, + "state_loss_0": 0.0, + "step": 22400 + }, + { + "epoch": 1.619718309859155, + "grad_norm": 0.6246947050094604, + "learning_rate": 4.592592935781151e-06, + "loss": 2.9921, + "step": 22425 + }, + { + "audio_loss_0": 2.8281, + "audio_loss_1": 3.3281, + "audio_loss_2": 3.1562, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.625, + "audio_loss_5": 3.3906, + "audio_loss_6": 3.1094, + "epoch": 1.619718309859155, + "loss": 3.0, + "loss_text": 0.4785, + "state_loss_0": 0.0, + "step": 22425 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2812, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.25, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.3438, + "audio_loss_6": 3.1406, + "epoch": 1.619718309859155, + "loss": 3.0, + "loss_text": 0.3164, + "state_loss_0": 0.0, + "step": 22425 + }, + { + "epoch": 1.621524015890213, + "grad_norm": 0.6403222680091858, + "learning_rate": 4.5504530361163e-06, + "loss": 2.9954, + "step": 22450 + }, + { + "audio_loss_0": 2.8594, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.2969, + "audio_loss_6": 3.125, + "epoch": 1.621524015890213, + "loss": 2.9375, + "loss_text": 0.1855, + "state_loss_0": 0.0, + "step": 22450 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.4531, + "audio_loss_2": 3.1719, + "audio_loss_3": 4.2812, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.2656, + "epoch": 1.621524015890213, + "loss": 3.0781, + "loss_text": 0.2773, + "state_loss_0": 0.0, + "step": 22450 + }, + { + "epoch": 1.6233297219212712, + "grad_norm": 0.6074870824813843, + "learning_rate": 4.508487993629823e-06, + "loss": 3.0008, + "step": 22475 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.3281, + "audio_loss_2": 2.9219, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5469, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1875, + "epoch": 1.6233297219212712, + "loss": 2.9688, + "loss_text": 0.2314, + "state_loss_0": 0.0, + "step": 22475 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.4375, + "audio_loss_2": 3.2656, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1875, + "epoch": 1.6233297219212712, + "loss": 3.0625, + "loss_text": 0.1582, + "state_loss_0": 0.0, + "step": 22475 + }, + { + "epoch": 1.6251354279523293, + "grad_norm": 0.6309900283813477, + "learning_rate": 4.466698167150649e-06, + "loss": 2.9998, + "step": 22500 + }, + { + "audio_loss_0": 2.875, + "audio_loss_1": 3.1719, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.1719, + "audio_loss_6": 3.0469, + "epoch": 1.6251354279523293, + "loss": 2.8906, + "loss_text": 0.2949, + "state_loss_0": 0.0, + "step": 22500 + }, + { + "audio_loss_0": 3.2188, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.75, + "audio_loss_5": 3.4844, + "audio_loss_6": 3.1875, + "epoch": 1.6251354279523293, + "loss": 3.0625, + "loss_text": 0.252, + "state_loss_0": 0.0, + "step": 22500 + }, + { + "epoch": 1.6269411339833875, + "grad_norm": 0.6440161466598511, + "learning_rate": 4.425083914009476e-06, + "loss": 2.9984, + "step": 22525 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.2969, + "audio_loss_2": 3.125, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.5938, + "audio_loss_6": 3.3438, + "epoch": 1.6269411339833875, + "loss": 3.0938, + "loss_text": 0.3398, + "state_loss_0": 0.0, + "step": 22525 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.4062, + "audio_loss_6": 3.1562, + "epoch": 1.6269411339833875, + "loss": 3.0469, + "loss_text": 0.3379, + "state_loss_0": 0.0, + "step": 22525 + }, + { + "epoch": 1.6287468400144456, + "grad_norm": 0.666053295135498, + "learning_rate": 4.3836455900357535e-06, + "loss": 2.99, + "step": 22550 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.4844, + "audio_loss_2": 3.25, + "audio_loss_3": 4.3125, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.25, + "epoch": 1.6287468400144456, + "loss": 3.0938, + "loss_text": 0.1748, + "state_loss_0": 0.0, + "step": 22550 + }, + { + "audio_loss_0": 2.7344, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.1094, + "epoch": 1.6287468400144456, + "loss": 2.9844, + "loss_text": 0.3652, + "state_loss_0": 0.0, + "step": 22550 + }, + { + "epoch": 1.6305525460455037, + "grad_norm": 0.6750335693359375, + "learning_rate": 4.342383549554607e-06, + "loss": 2.9966, + "step": 22575 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.375, + "audio_loss_2": 3.1406, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4531, + "audio_loss_6": 3.2188, + "epoch": 1.6305525460455037, + "loss": 3.0469, + "loss_text": 0.3711, + "state_loss_0": 0.0, + "step": 22575 + }, + { + "audio_loss_0": 2.8906, + "audio_loss_1": 3.1406, + "audio_loss_2": 2.9375, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6406, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.125, + "epoch": 1.6305525460455037, + "loss": 2.9375, + "loss_text": 0.2598, + "state_loss_0": 0.0, + "step": 22575 + }, + { + "epoch": 1.6323582520765618, + "grad_norm": 0.5683905482292175, + "learning_rate": 4.3012981453838365e-06, + "loss": 2.9935, + "step": 22600 + }, + { + "audio_loss_0": 2.9375, + "audio_loss_1": 3.2031, + "audio_loss_2": 3.0156, + "audio_loss_3": 4.0938, + "audio_loss_4": 3.5781, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0781, + "epoch": 1.6323582520765618, + "loss": 2.9531, + "loss_text": 0.2158, + "state_loss_0": 0.0, + "step": 22600 + }, + { + "audio_loss_0": 3.3125, + "audio_loss_1": 3.5469, + "audio_loss_2": 3.3125, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7344, + "audio_loss_5": 3.625, + "audio_loss_6": 3.2031, + "epoch": 1.6323582520765618, + "loss": 3.2031, + "loss_text": 0.5547, + "state_loss_0": 0.0, + "step": 22600 + }, + { + "epoch": 1.63416395810762, + "grad_norm": 0.6073514223098755, + "learning_rate": 4.2603897288308605e-06, + "loss": 2.9812, + "step": 22625 + }, + { + "audio_loss_0": 2.4688, + "audio_loss_1": 3.25, + "audio_loss_2": 2.8125, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.4688, + "audio_loss_5": 3.0156, + "audio_loss_6": 2.9375, + "epoch": 1.63416395810762, + "loss": 2.7812, + "loss_text": 0.2695, + "state_loss_0": 0.0, + "step": 22625 + }, + { + "audio_loss_0": 3.2969, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0781, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.375, + "audio_loss_6": 3.1562, + "epoch": 1.63416395810762, + "loss": 3.125, + "loss_text": 0.8359, + "state_loss_0": 0.0, + "step": 22625 + }, + { + "epoch": 1.635969664138678, + "grad_norm": 0.6286430358886719, + "learning_rate": 4.219658649689762e-06, + "loss": 2.9867, + "step": 22650 + }, + { + "audio_loss_0": 2.7656, + "audio_loss_1": 3.2188, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.0312, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.3594, + "audio_loss_6": 3.0469, + "epoch": 1.635969664138678, + "loss": 2.9062, + "loss_text": 0.2715, + "state_loss_0": 0.0, + "step": 22650 + }, + { + "audio_loss_0": 2.7812, + "audio_loss_1": 3.0625, + "audio_loss_2": 2.9062, + "audio_loss_3": 4.0625, + "audio_loss_4": 3.5625, + "audio_loss_5": 3.3438, + "audio_loss_6": 2.9688, + "epoch": 1.635969664138678, + "loss": 2.875, + "loss_text": 0.2676, + "state_loss_0": 0.0, + "step": 22650 + }, + { + "epoch": 1.6377753701697364, + "grad_norm": 0.7367401719093323, + "learning_rate": 4.1791052562382525e-06, + "loss": 2.9937, + "step": 22675 + }, + { + "audio_loss_0": 3.1094, + "audio_loss_1": 3.1094, + "audio_loss_2": 2.8438, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7031, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.125, + "epoch": 1.6377753701697364, + "loss": 2.9844, + "loss_text": 0.3223, + "state_loss_0": 0.0, + "step": 22675 + }, + { + "audio_loss_0": 3.0625, + "audio_loss_1": 3.2031, + "audio_loss_2": 2.875, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.6719, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.6377753701697364, + "loss": 3.0, + "loss_text": 0.3105, + "state_loss_0": 0.0, + "step": 22675 + }, + { + "epoch": 1.6395810762007945, + "grad_norm": 0.6730944514274597, + "learning_rate": 4.138729895234725e-06, + "loss": 2.9848, + "step": 22700 + }, + { + "audio_loss_0": 2.9688, + "audio_loss_1": 3.3594, + "audio_loss_2": 3.0469, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.1875, + "epoch": 1.6395810762007945, + "loss": 3.0625, + "loss_text": 0.5352, + "state_loss_0": 0.0, + "step": 22700 + }, + { + "audio_loss_0": 3.1719, + "audio_loss_1": 3.4219, + "audio_loss_2": 3.0938, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.7812, + "audio_loss_5": 3.5781, + "audio_loss_6": 3.2969, + "epoch": 1.6395810762007945, + "loss": 3.1094, + "loss_text": 0.2578, + "state_loss_0": 0.0, + "step": 22700 + }, + { + "epoch": 1.6413867822318526, + "grad_norm": 0.6652769446372986, + "learning_rate": 4.098532911915276e-06, + "loss": 2.9916, + "step": 22725 + }, + { + "audio_loss_0": 2.8438, + "audio_loss_1": 3.2344, + "audio_loss_2": 2.9688, + "audio_loss_3": 4.2188, + "audio_loss_4": 3.5938, + "audio_loss_5": 3.4219, + "audio_loss_6": 3.1562, + "epoch": 1.6413867822318526, + "loss": 2.9688, + "loss_text": 0.3516, + "state_loss_0": 0.0, + "step": 22725 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.375, + "audio_loss_2": 3.0625, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7188, + "audio_loss_5": 3.4688, + "audio_loss_6": 3.125, + "epoch": 1.6413867822318526, + "loss": 3.0469, + "loss_text": 0.4434, + "state_loss_0": 0.0, + "step": 22725 + }, + { + "epoch": 1.6431924882629108, + "grad_norm": 0.7023254036903381, + "learning_rate": 4.058514649990741e-06, + "loss": 2.9905, + "step": 22750 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.1562, + "audio_loss_2": 2.9531, + "audio_loss_3": 4.1562, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5312, + "audio_loss_6": 3.2344, + "epoch": 1.6431924882629108, + "loss": 3.0156, + "loss_text": 0.2891, + "state_loss_0": 0.0, + "step": 22750 + }, + { + "audio_loss_0": 3.3281, + "audio_loss_1": 3.4062, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.25, + "audio_loss_4": 3.7656, + "audio_loss_5": 3.5, + "audio_loss_6": 3.25, + "epoch": 1.6431924882629108, + "loss": 3.0938, + "loss_text": 0.2441, + "state_loss_0": 0.0, + "step": 22750 + }, + { + "epoch": 1.644998194293969, + "grad_norm": 0.7063782811164856, + "learning_rate": 4.018675451643791e-06, + "loss": 2.9987, + "step": 22775 + }, + { + "audio_loss_0": 3.0, + "audio_loss_1": 3.3125, + "audio_loss_2": 3.0312, + "audio_loss_3": 4.125, + "audio_loss_4": 3.6875, + "audio_loss_5": 3.4375, + "audio_loss_6": 3.1406, + "epoch": 1.644998194293969, + "loss": 2.9844, + "loss_text": 0.2061, + "state_loss_0": 0.0, + "step": 22775 + }, + { + "audio_loss_0": 3.0469, + "audio_loss_1": 3.25, + "audio_loss_2": 3.0, + "audio_loss_3": 4.1875, + "audio_loss_4": 3.6562, + "audio_loss_5": 3.375, + "audio_loss_6": 3.0781, + "epoch": 1.644998194293969, + "loss": 3.0156, + "loss_text": 0.5195, + "state_loss_0": 0.0, + "step": 22775 + }, + { + "epoch": 1.6468039003250272, + "grad_norm": 0.6276195049285889, + "learning_rate": 3.979015657525962e-06, + "loss": 2.9926, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.3438, + "eval_audio_loss_1_AQACONVA": 3.6719, + "eval_audio_loss_2_AQACONVA": 3.3906, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.0938, + "eval_audio_loss_5_AQACONVA": 3.9062, + "eval_audio_loss_6_AQACONVA": 3.625, + "eval_loss": 3.6406, + "eval_loss_AQACONVA": 3.6406, + "eval_loss_text_AQACONVA": 2.5156, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.1094, + "eval_audio_loss_1_AQACONVA": 3.7031, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4688, + "eval_audio_loss_4_AQACONVA": 3.9844, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.5, + "eval_loss_AQACONVA": 3.5, + "eval_loss_text_AQACONVA": 2.0312, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.2969, + "eval_audio_loss_1_AQACONVA": 3.5781, + "eval_audio_loss_2_AQACONVA": 3.4062, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 4.0312, + "eval_audio_loss_5_AQACONVA": 3.7812, + "eval_audio_loss_6_AQACONVA": 3.4375, + "eval_loss": 3.4688, + "eval_loss_AQACONVA": 3.4688, + "eval_loss_text_AQACONVA": 1.7188, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.2031, + "eval_audio_loss_1_AQACONVA": 3.6094, + "eval_audio_loss_2_AQACONVA": 3.3125, + "eval_audio_loss_3_AQACONVA": 4.4375, + "eval_audio_loss_4_AQACONVA": 3.9375, + "eval_audio_loss_5_AQACONVA": 3.7031, + "eval_audio_loss_6_AQACONVA": 3.3906, + "eval_loss": 3.6094, + "eval_loss_AQACONVA": 3.6094, + "eval_loss_text_AQACONVA": 3.2969, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.2188, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5312, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.8438, + "eval_audio_loss_6_AQACONVA": 3.5938, + "eval_loss": 3.7656, + "eval_loss_AQACONVA": 3.7656, + "eval_loss_text_AQACONVA": 3.4219, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.5469, + "eval_audio_loss_1_AQACONVA": 3.7188, + "eval_audio_loss_2_AQACONVA": 3.6094, + "eval_audio_loss_3_AQACONVA": 4.5, + "eval_audio_loss_4_AQACONVA": 4.125, + "eval_audio_loss_5_AQACONVA": 3.9688, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.75, + "eval_loss_AQACONVA": 3.75, + "eval_loss_text_AQACONVA": 2.8594, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_AQACONVA": 3.5625, + "eval_audio_loss_1_AQACONVA": 3.7969, + "eval_audio_loss_2_AQACONVA": 3.5, + "eval_audio_loss_3_AQACONVA": 4.5312, + "eval_audio_loss_4_AQACONVA": 4.0625, + "eval_audio_loss_5_AQACONVA": 3.875, + "eval_audio_loss_6_AQACONVA": 3.6094, + "eval_loss": 3.6875, + "eval_loss_AQACONVA": 3.6875, + "eval_loss_text_AQACONVA": 2.4688, + "eval_state_loss_0_AQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.5938, + "eval_audio_loss_2_RQACONVA": 3.3438, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.8438, + "eval_audio_loss_5_RQACONVA": 3.5156, + "eval_audio_loss_6_RQACONVA": 3.3438, + "eval_loss": 3.4531, + "eval_loss_RQACONVA": 3.4531, + "eval_loss_text_RQACONVA": 2.4844, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.0938, + "eval_audio_loss_1_RQACONVA": 3.6406, + "eval_audio_loss_2_RQACONVA": 3.3125, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.7031, + "eval_audio_loss_6_RQACONVA": 3.4062, + "eval_loss": 3.4219, + "eval_loss_RQACONVA": 3.4219, + "eval_loss_text_RQACONVA": 1.9531, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.4062, + "eval_audio_loss_1_RQACONVA": 3.4531, + "eval_audio_loss_2_RQACONVA": 3.25, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9531, + "eval_audio_loss_5_RQACONVA": 3.7969, + "eval_audio_loss_6_RQACONVA": 3.5156, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.5469, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.4531, + "eval_audio_loss_1_RQACONVA": 3.8125, + "eval_audio_loss_2_RQACONVA": 3.5938, + "eval_audio_loss_3_RQACONVA": 4.5938, + "eval_audio_loss_4_RQACONVA": 4.0938, + "eval_audio_loss_5_RQACONVA": 3.8594, + "eval_audio_loss_6_RQACONVA": 3.5781, + "eval_loss": 3.7188, + "eval_loss_RQACONVA": 3.7188, + "eval_loss_text_RQACONVA": 2.625, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.25, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4219, + "eval_audio_loss_3_RQACONVA": 4.4375, + "eval_audio_loss_4_RQACONVA": 3.9688, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.5, + "eval_loss": 3.6562, + "eval_loss_RQACONVA": 3.6562, + "eval_loss_text_RQACONVA": 3.1719, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.3594, + "eval_audio_loss_1_RQACONVA": 3.6562, + "eval_audio_loss_2_RQACONVA": 3.4688, + "eval_audio_loss_3_RQACONVA": 4.4688, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7188, + "eval_audio_loss_6_RQACONVA": 3.4219, + "eval_loss": 3.5625, + "eval_loss_RQACONVA": 3.5625, + "eval_loss_text_RQACONVA": 2.375, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.2031, + "eval_audio_loss_1_RQACONVA": 3.75, + "eval_audio_loss_2_RQACONVA": 3.4062, + "eval_audio_loss_3_RQACONVA": 4.5, + "eval_audio_loss_4_RQACONVA": 3.9844, + "eval_audio_loss_5_RQACONVA": 3.7812, + "eval_audio_loss_6_RQACONVA": 3.4531, + "eval_loss": 3.5469, + "eval_loss_RQACONVA": 3.5469, + "eval_loss_text_RQACONVA": 2.2969, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_audio_loss_0_RQACONVA": 3.2188, + "eval_audio_loss_1_RQACONVA": 3.6719, + "eval_audio_loss_2_RQACONVA": 3.4531, + "eval_audio_loss_3_RQACONVA": 4.375, + "eval_audio_loss_4_RQACONVA": 3.9375, + "eval_audio_loss_5_RQACONVA": 3.6562, + "eval_audio_loss_6_RQACONVA": 3.4375, + "eval_loss": 3.5312, + "eval_loss_RQACONVA": 3.5312, + "eval_loss_text_RQACONVA": 2.5781, + "eval_state_loss_0_RQACONVA": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.6758, + "eval_loss_RQACONV": 0.6758, + "eval_loss_text_RQACONV": 1.3516, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5664, + "eval_loss_RQACONV": 0.5664, + "eval_loss_text_RQACONV": 1.1328, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.8086, + "eval_loss_RQACONV": 0.8086, + "eval_loss_text_RQACONV": 1.6172, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.7109, + "eval_loss_RQACONV": 0.7109, + "eval_loss_text_RQACONV": 1.4219, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.9141, + "eval_loss_RQACONV": 0.9141, + "eval_loss_text_RQACONV": 1.8281, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.7539, + "eval_loss_RQACONV": 0.7539, + "eval_loss_text_RQACONV": 1.5078, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.8594, + "eval_loss_RQACONV": 0.8594, + "eval_loss_text_RQACONV": 1.7188, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 1.0156, + "eval_loss_RQACONV": 1.0156, + "eval_loss_text_RQACONV": 2.0312, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.4902, + "eval_loss_RQACONV": 0.4902, + "eval_loss_text_RQACONV": 0.9805, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5391, + "eval_loss_RQACONV": 0.5391, + "eval_loss_text_RQACONV": 1.0781, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.4648, + "eval_loss_RQACONV": 0.4648, + "eval_loss_text_RQACONV": 0.9297, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.668, + "eval_loss_RQACONV": 0.668, + "eval_loss_text_RQACONV": 1.3359, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5469, + "eval_loss_RQACONV": 0.5469, + "eval_loss_text_RQACONV": 1.0938, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5312, + "eval_loss_RQACONV": 0.5312, + "eval_loss_text_RQACONV": 1.0625, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5, + "eval_loss_RQACONV": 0.5, + "eval_loss_text_RQACONV": 1.0, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 1.4375, + "eval_loss_RQACONV": 1.4375, + "eval_loss_text_RQACONV": 2.875, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.1206, + "eval_loss_RQACONV": 0.1206, + "eval_loss_text_RQACONV": 0.2412, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.1387, + "eval_loss_RQACONV": 0.1387, + "eval_loss_text_RQACONV": 0.2773, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.0688, + "eval_loss_RQACONV": 0.0688, + "eval_loss_text_RQACONV": 0.1377, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.1436, + "eval_loss_RQACONV": 0.1436, + "eval_loss_text_RQACONV": 0.2871, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.1416, + "eval_loss_RQACONV": 0.1416, + "eval_loss_text_RQACONV": 0.2832, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.0271, + "eval_loss_RQACONV": 0.0271, + "eval_loss_text_RQACONV": 0.0542, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.125, + "eval_loss_RQACONV": 0.125, + "eval_loss_text_RQACONV": 0.25, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.0957, + "eval_loss_RQACONV": 0.0957, + "eval_loss_text_RQACONV": 0.1914, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.084, + "eval_loss_RQACONV": 0.084, + "eval_loss_text_RQACONV": 0.168, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 0.5898, + "eval_loss_RQACONV": 0.5898, + "eval_loss_text_RQACONV": 1.1797, + "eval_state_loss_0_RQACONV": 0.0, + "step": 22800 + }, + { + "epoch": 1.6468039003250272, + "eval_loss": 1.5838485956192017, + "eval_runtime": 28.626, + "eval_samples_per_second": 186.858, + "eval_steps_per_second": 1.467, + "step": 22800 + } + ], + "logging_steps": 25, + "max_steps": 27690, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.334964282160654e+21, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}