Ctrl+K
- 1.52 kB initial commit
- 597 Bytes upload checkpoint step=99200 best_val_loss=3.678879
- 1.73 kB upload checkpoint step=99200 best_val_loss=3.678879
- 685 Bytes upload checkpoint step=99200 best_val_loss=3.678879
- 199 Bytes upload checkpoint step=99200 best_val_loss=3.678879
- 8.23 kB upload checkpoint step=-1 best_val_loss=nan
- 761 MB xetupload checkpoint step=99200 best_val_loss=3.678879
- model_state.pt761 MB
Detected Pickle imports (442)
- "torch.ByteStorage",
- "torch._utils._rebuild_tensor_v2",
- "torch.FloatStorage",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict",
- "torch._utils._rebuild_tensor_v2",
- "torch.storage._load_from_bytes",
- "transformer_engine.common.recipe._FormatHelper",
- "transformer_engine.common.recipe.Format",
- "transformer_engine.common.recipe.DelayedScaling",
- "collections.OrderedDict"
xetupload checkpoint step=99200 best_val_loss=3.678879 - 131 Bytes upload checkpoint step=99200 best_val_loss=3.678879
- 225 Bytes upload checkpoint step=99200 best_val_loss=3.678879
- 193 Bytes upload checkpoint step=99200 best_val_loss=3.678879