Training in progress, epoch 1
Browse files- 6a3d37d565774c52a8a95e75642966bf/config.json +48 -0
- 6a3d37d565774c52a8a95e75642966bf/model.safetensors +3 -0
- cc81c2c05e6d49e7b2919c447ff5f6fe/config.json +48 -0
- cc81c2c05e6d49e7b2919c447ff5f6fe/model.safetensors +3 -0
- gpu_insight/internal/gpu_data_202405301944 +8 -0
- gpu_insight/user/gpu_data_202405301944 +9 -0
- hsperfdata_root/2725970 +0 -0
- model.safetensors +1 -1
- runs/May30_19-32-13_one/events.out.tfevents.1717065649.one.682896.29 +3 -0
- runs/May30_19-40-50_one/events.out.tfevents.1717065651.one.682896.30 +3 -0
- runs/May30_19-44-08_one/events.out.tfevents.1717065850.one.2160426.0 +3 -0
- tmplk3ewan2/__pycache__/_remote_module_non_scriptable.cpython-311.pyc +0 -0
- tmplk3ewan2/_remote_module_non_scriptable.py +81 -0
- training_args.bin +1 -1
6a3d37d565774c52a8a95e75642966bf/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "ibm/TTM",
|
| 3 |
+
"adaptive_patching_levels": 3,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"TinyTimeMixerForPrediction"
|
| 6 |
+
],
|
| 7 |
+
"context_length": 1024,
|
| 8 |
+
"d_model": 192,
|
| 9 |
+
"d_model_scale": 3,
|
| 10 |
+
"decoder_adaptive_patching_levels": 0,
|
| 11 |
+
"decoder_d_model": 128,
|
| 12 |
+
"decoder_d_model_scale": 2,
|
| 13 |
+
"decoder_mode": "common_channel",
|
| 14 |
+
"decoder_num_layers": 2,
|
| 15 |
+
"decoder_raw_residual": false,
|
| 16 |
+
"dropout": 0.2,
|
| 17 |
+
"expansion_factor": 2,
|
| 18 |
+
"frequency_token_vocab_size": 5,
|
| 19 |
+
"gated_attn": true,
|
| 20 |
+
"head_dropout": 0.2,
|
| 21 |
+
"init_processing": true,
|
| 22 |
+
"init_std": 0.02,
|
| 23 |
+
"loss": "mse",
|
| 24 |
+
"mode": "common_channel",
|
| 25 |
+
"model_type": "tinytimemixer",
|
| 26 |
+
"norm_eps": 1e-05,
|
| 27 |
+
"norm_mlp": "LayerNorm",
|
| 28 |
+
"num_input_channels": 1,
|
| 29 |
+
"num_layers": 2,
|
| 30 |
+
"num_patches": 16,
|
| 31 |
+
"patch_last": true,
|
| 32 |
+
"patch_length": 64,
|
| 33 |
+
"patch_stride": 64,
|
| 34 |
+
"positional_encoding_type": "sincos",
|
| 35 |
+
"post_init": false,
|
| 36 |
+
"prediction_channel_indices": null,
|
| 37 |
+
"prediction_filter_length": null,
|
| 38 |
+
"prediction_length": 96,
|
| 39 |
+
"resolution_prefix_tuning": false,
|
| 40 |
+
"scaling": "std",
|
| 41 |
+
"self_attn": false,
|
| 42 |
+
"self_attn_heads": 1,
|
| 43 |
+
"stride_ratio": 1,
|
| 44 |
+
"torch_dtype": "float32",
|
| 45 |
+
"transformers_version": "4.41.1",
|
| 46 |
+
"use_decoder": true,
|
| 47 |
+
"use_positional_encoding": false
|
| 48 |
+
}
|
6a3d37d565774c52a8a95e75642966bf/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a58455dd4af9762c0bec4676cf933d3af9101055dfbbe3cd3f00075b16796db
|
| 3 |
+
size 3804872
|
cc81c2c05e6d49e7b2919c447ff5f6fe/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "ibm/TTM",
|
| 3 |
+
"adaptive_patching_levels": 3,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"TinyTimeMixerForPrediction"
|
| 6 |
+
],
|
| 7 |
+
"context_length": 1024,
|
| 8 |
+
"d_model": 192,
|
| 9 |
+
"d_model_scale": 3,
|
| 10 |
+
"decoder_adaptive_patching_levels": 0,
|
| 11 |
+
"decoder_d_model": 128,
|
| 12 |
+
"decoder_d_model_scale": 2,
|
| 13 |
+
"decoder_mode": "common_channel",
|
| 14 |
+
"decoder_num_layers": 2,
|
| 15 |
+
"decoder_raw_residual": false,
|
| 16 |
+
"dropout": 0.2,
|
| 17 |
+
"expansion_factor": 2,
|
| 18 |
+
"frequency_token_vocab_size": 5,
|
| 19 |
+
"gated_attn": true,
|
| 20 |
+
"head_dropout": 0.2,
|
| 21 |
+
"init_processing": true,
|
| 22 |
+
"init_std": 0.02,
|
| 23 |
+
"loss": "mse",
|
| 24 |
+
"mode": "common_channel",
|
| 25 |
+
"model_type": "tinytimemixer",
|
| 26 |
+
"norm_eps": 1e-05,
|
| 27 |
+
"norm_mlp": "LayerNorm",
|
| 28 |
+
"num_input_channels": 1,
|
| 29 |
+
"num_layers": 2,
|
| 30 |
+
"num_patches": 16,
|
| 31 |
+
"patch_last": true,
|
| 32 |
+
"patch_length": 64,
|
| 33 |
+
"patch_stride": 64,
|
| 34 |
+
"positional_encoding_type": "sincos",
|
| 35 |
+
"post_init": false,
|
| 36 |
+
"prediction_channel_indices": null,
|
| 37 |
+
"prediction_filter_length": null,
|
| 38 |
+
"prediction_length": 96,
|
| 39 |
+
"resolution_prefix_tuning": false,
|
| 40 |
+
"scaling": "std",
|
| 41 |
+
"self_attn": false,
|
| 42 |
+
"self_attn_heads": 1,
|
| 43 |
+
"stride_ratio": 1,
|
| 44 |
+
"torch_dtype": "float32",
|
| 45 |
+
"transformers_version": "4.41.1",
|
| 46 |
+
"use_decoder": true,
|
| 47 |
+
"use_positional_encoding": false
|
| 48 |
+
}
|
cc81c2c05e6d49e7b2919c447ff5f6fe/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d146ccc6e7cd4c5b12bedc6b059435d021526a7846972227070336b83d825160
|
| 3 |
+
size 3804872
|
gpu_insight/internal/gpu_data_202405301944
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"type":"gpu","timestamp":1717065841821,"core_clk":"1410","gpu_idx":0,"power_draw":61,"temp":"32","vmem_clk":"1593"}
|
| 2 |
+
{"power_draw":61,"temp":"33","vmem_clk":"1593","type":"gpu","timestamp":1717065841821,"core_clk":"1410","gpu_idx":1}
|
| 3 |
+
{"vmem_clk":"1593","power_draw":58,"temp":"30","core_clk":"1410","gpu_idx":2,"type":"gpu","timestamp":1717065841821}
|
| 4 |
+
{"type":"gpu","timestamp":1717065841821,"core_clk":"1410","gpu_idx":3,"power_draw":62,"temp":"33","vmem_clk":"1593"}
|
| 5 |
+
{"timestamp":1717065841821,"type":"gpu","gpu_idx":4,"core_clk":"1410","temp":"30","power_draw":64,"vmem_clk":"1593"}
|
| 6 |
+
{"type":"gpu","timestamp":1717065841821,"core_clk":"1410","gpu_idx":5,"power_draw":61,"temp":"32","vmem_clk":"1593"}
|
| 7 |
+
{"power_draw":61,"temp":"31","vmem_clk":"1593","type":"gpu","timestamp":1717065841821,"core_clk":"1410","gpu_idx":6}
|
| 8 |
+
{"timestamp":1717065841821,"type":"gpu","gpu_idx":7,"core_clk":"1410","temp":"32","power_draw":62,"vmem_clk":"1593"}
|
gpu_insight/user/gpu_data_202405301944
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"gpu_idx":0,"timestamp":1717065841821,"vmem_usage_mib":"0","type":"gpu","vmem_usage":"0","usage":"0"}
|
| 2 |
+
{"usage":"0","vmem_usage":"0","type":"gpu","vmem_usage_mib":"0","timestamp":1717065841821,"gpu_idx":1}
|
| 3 |
+
{"usage":"0","vmem_usage":"0","vmem_usage_mib":"0","timestamp":1717065841821,"type":"gpu","gpu_idx":2}
|
| 4 |
+
{"vmem_usage":"0","usage":"0","gpu_idx":3,"vmem_usage_mib":"0","timestamp":1717065841821,"type":"gpu"}
|
| 5 |
+
{"vmem_usage_mib":"0","timestamp":1717065841821,"type":"gpu","gpu_idx":4,"usage":"0","vmem_usage":"0"}
|
| 6 |
+
{"vmem_usage":"0","usage":"0","gpu_idx":5,"vmem_usage_mib":"0","timestamp":1717065841821,"type":"gpu"}
|
| 7 |
+
{"timestamp":1717065841821,"vmem_usage_mib":"0","type":"gpu","gpu_idx":6,"usage":"0","vmem_usage":"0"}
|
| 8 |
+
{"gpu_idx":7,"type":"gpu","vmem_usage_mib":"0","timestamp":1717065841821,"vmem_usage":"0","usage":"0"}
|
| 9 |
+
{"gpu_count":"8","type":"svr","timestamp":1717065841821}
|
hsperfdata_root/2725970
CHANGED
|
Binary files a/hsperfdata_root/2725970 and b/hsperfdata_root/2725970 differ
|
|
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3804872
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a10d15dc96c794e0fb0efe39c58b9f7118ce3eb873a6c29a432b2cab271cef0f
|
| 3 |
size 3804872
|
runs/May30_19-32-13_one/events.out.tfevents.1717065649.one.682896.29
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37c12d2db9c7a29881dbdaae16df8e20b911b58bea51e1b87c7a560191f0c160
|
| 3 |
+
size 359
|
runs/May30_19-40-50_one/events.out.tfevents.1717065651.one.682896.30
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7cfb16e73a6dd2a106edaa2512e7f94efe535f4b51232f6e38e375e07dc3420
|
| 3 |
+
size 8056
|
runs/May30_19-44-08_one/events.out.tfevents.1717065850.one.2160426.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3e27ef1ef5b614269e1d0228ed3b0a7737f9428dc09306257334a807f19de9d
|
| 3 |
+
size 5816
|
tmplk3ewan2/__pycache__/_remote_module_non_scriptable.cpython-311.pyc
ADDED
|
Binary file (2.77 kB). View file
|
|
|
tmplk3ewan2/_remote_module_non_scriptable.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import *
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
import torch.distributed.rpc as rpc
|
| 5 |
+
from torch import Tensor
|
| 6 |
+
from torch._jit_internal import Future
|
| 7 |
+
from torch.distributed.rpc import RRef
|
| 8 |
+
from typing import Tuple # pyre-ignore: unused import
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
module_interface_cls = None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def forward_async(self, *args, **kwargs):
|
| 15 |
+
args = (self.module_rref, self.device, self.is_device_map_set, *args)
|
| 16 |
+
kwargs = {**kwargs}
|
| 17 |
+
return rpc.rpc_async(
|
| 18 |
+
self.module_rref.owner(),
|
| 19 |
+
_remote_forward,
|
| 20 |
+
args,
|
| 21 |
+
kwargs,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def forward(self, *args, **kwargs):
|
| 26 |
+
args = (self.module_rref, self.device, self.is_device_map_set, *args)
|
| 27 |
+
kwargs = {**kwargs}
|
| 28 |
+
ret_fut = rpc.rpc_async(
|
| 29 |
+
self.module_rref.owner(),
|
| 30 |
+
_remote_forward,
|
| 31 |
+
args,
|
| 32 |
+
kwargs,
|
| 33 |
+
)
|
| 34 |
+
return ret_fut.wait()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
_generated_methods = [
|
| 38 |
+
forward_async,
|
| 39 |
+
forward,
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _remote_forward(
|
| 46 |
+
module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
|
| 47 |
+
module = module_rref.local_value()
|
| 48 |
+
device = torch.device(device)
|
| 49 |
+
|
| 50 |
+
if device.type != "cuda":
|
| 51 |
+
return module.forward(*args, **kwargs)
|
| 52 |
+
|
| 53 |
+
# If the module is on a cuda device,
|
| 54 |
+
# move any CPU tensor in args or kwargs to the same cuda device.
|
| 55 |
+
# Since torch script does not support generator expression,
|
| 56 |
+
# have to use concatenation instead of
|
| 57 |
+
# ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
|
| 58 |
+
args = (*args,)
|
| 59 |
+
out_args: Tuple[()] = ()
|
| 60 |
+
for arg in args:
|
| 61 |
+
arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
|
| 62 |
+
out_args = out_args + arg
|
| 63 |
+
|
| 64 |
+
kwargs = {**kwargs}
|
| 65 |
+
for k, v in kwargs.items():
|
| 66 |
+
if isinstance(v, Tensor):
|
| 67 |
+
kwargs[k] = kwargs[k].to(device)
|
| 68 |
+
|
| 69 |
+
if is_device_map_set:
|
| 70 |
+
return module.forward(*out_args, **kwargs)
|
| 71 |
+
|
| 72 |
+
# If the device map is empty, then only CPU tensors are allowed to send over wire,
|
| 73 |
+
# so have to move any GPU tensor to CPU in the output.
|
| 74 |
+
# Since torch script does not support generator expression,
|
| 75 |
+
# have to use concatenation instead of
|
| 76 |
+
# ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
|
| 77 |
+
ret: Tuple[()] = ()
|
| 78 |
+
for i in module.forward(*out_args, **kwargs):
|
| 79 |
+
i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
|
| 80 |
+
ret = ret + i
|
| 81 |
+
return ret
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5048
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b27962ac8e469ac1c66fbf58f433c3ca5739ac324e0db3e16cafd87de64f050
|
| 3 |
size 5048
|