PLUME-Qwen2-VL-2B / trainer_state.json
CUDAOUTOFMEMORY's picture
Upload folder using huggingface_hub
aaa3161 verified
raw
history blame contribute delete
885 Bytes
{
"log_history": [
{
"avg_latent_steps": 6.0,
"avg_suffix_tokens": 7.0,
"ce_loss": 0.0023984569124877453,
"contrastive_pairs_global_disc": 64.0,
"contrastive_pairs_global_gen": 64.0,
"contrastive_pairs_local_disc": 8.0,
"contrastive_pairs_local_gen": 8.0,
"curriculum_latent_tokens": 6.0,
"curriculum_ratio": 1.0,
"curriculum_stage": 0.0,
"curriculum_total_stages": 1.0,
"debug_disc_oracle_pos_from_qry": 0.0,
"disc_contrastive_loss": 1.3125,
"epoch": 1.474169741697417,
"gen_contrastive_loss": 1.1875,
"latent_moe_balance_loss": 0.010107994079589844,
"latent_moe_enable": 1.0,
"latent_moe_router_entropy": 1.2998046875,
"loss": 2.5023984909057617,
"pos_ce_loss": 0.001024903729557991,
"qry_ce_loss": 0.0013735531829297543,
"step": 799
}
]
}