thanks to mingyuan-zhang ❤
Browse files- data/database/generate_kit.py +63 -0
- data/database/generate_t2m.py +63 -0
- data/database/kit_text_train.npz +3 -0
- data/database/t2m_text_train.npz +3 -0
- data/datasets/human_ml3d.zip +3 -0
- data/datasets/kit_ml.zip +3 -0
- data/evaluators/human_ml3d/finest.tar +3 -0
- data/evaluators/kit_ml/finest.tar +3 -0
- data/glove/our_vab_data.npy +3 -0
- data/glove/our_vab_idx.pkl +3 -0
- data/glove/our_vab_words.pkl +3 -0
- logs/finemogen/finemogen_kit/latest.pth +3 -0
- logs/finemogen/finemogen_t2m/latest.pth +3 -0
- logs/mdm/mdm_t2m/latest.pth +3 -0
- logs/motiondiffuse/motiondiffuse_kit/latest.pth +3 -0
- logs/motiondiffuse/motiondiffuse_t2m/latest.pth +3 -0
- logs/remodiffuse/remodiffuse_kit/latest.pth +3 -0
- logs/remodiffuse/remodiffuse_t2m/latest.pth +3 -0
data/database/generate_kit.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import clip
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
device = 'cpu'
|
| 8 |
+
clip_model, _ = clip.load('ViT-B/32', device)
|
| 9 |
+
|
| 10 |
+
data_root_dir = "../datasets/kit_ml"
|
| 11 |
+
data_clip_dir = os.path.join(data_root_dir, "clip_feats")
|
| 12 |
+
data_caption_dir = os.path.join(data_root_dir, "texts")
|
| 13 |
+
data_motion_dir = os.path.join(data_root_dir, "motions")
|
| 14 |
+
train_split = os.path.join(data_root_dir, "train.txt")
|
| 15 |
+
|
| 16 |
+
all_text_features = []
|
| 17 |
+
all_captions = []
|
| 18 |
+
all_motions = []
|
| 19 |
+
all_m_lengths = []
|
| 20 |
+
all_clip_seq_features = []
|
| 21 |
+
|
| 22 |
+
std = np.load(os.path.join(data_root_dir, "std.npy"))
|
| 23 |
+
mean = np.load(os.path.join(data_root_dir, "mean.npy"))
|
| 24 |
+
|
| 25 |
+
for filename in tqdm(open(train_split)):
|
| 26 |
+
filename = filename.strip()
|
| 27 |
+
caption_file = os.path.join(data_caption_dir, filename + ".txt")
|
| 28 |
+
caption = open(caption_file).readlines()[0].strip()
|
| 29 |
+
text = clip.tokenize([caption], truncate=True).to(device)
|
| 30 |
+
with torch.no_grad():
|
| 31 |
+
text_feature = clip_model.encode_text(text)[0].numpy()
|
| 32 |
+
all_text_features.append(text_feature)
|
| 33 |
+
all_captions.append(caption)
|
| 34 |
+
motion_file = os.path.join(data_motion_dir, filename + ".npy")
|
| 35 |
+
motion_data = np.load(motion_file)
|
| 36 |
+
# import pdb; pdb.set_trace()
|
| 37 |
+
motion_data = (motion_data - mean) / (std + 1e-9)
|
| 38 |
+
motion_data = motion_data[:196]
|
| 39 |
+
motion = np.zeros((196, 251))
|
| 40 |
+
motion[:motion_data.shape[0], :] = motion_data
|
| 41 |
+
all_motions.append(motion)
|
| 42 |
+
m_length = motion_data.shape[0]
|
| 43 |
+
all_m_lengths.append(m_length)
|
| 44 |
+
clip_feat_file = os.path.join(data_clip_dir, filename + ".npy")
|
| 45 |
+
clip_feat = np.load(clip_feat_file)[0]
|
| 46 |
+
all_clip_seq_features.append(clip_feat)
|
| 47 |
+
|
| 48 |
+
all_text_features = np.array(all_text_features)
|
| 49 |
+
all_captions = np.array(all_captions)
|
| 50 |
+
all_motions = np.array(all_motions)
|
| 51 |
+
all_m_lengths = np.array(all_m_lengths)
|
| 52 |
+
all_clip_seq_features = np.array(all_clip_seq_features)
|
| 53 |
+
|
| 54 |
+
output = {
|
| 55 |
+
'text_features': all_text_features,
|
| 56 |
+
'captions': all_captions,
|
| 57 |
+
'motions': all_motions,
|
| 58 |
+
'm_lengths': all_m_lengths,
|
| 59 |
+
'clip_seq_features': all_clip_seq_features
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
npz_path = "kit_text_train.npz"
|
| 63 |
+
np.savez_compressed(npz_path, **output)
|
data/database/generate_t2m.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import clip
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
device = 'cpu'
|
| 8 |
+
clip_model, _ = clip.load('ViT-B/32', device)
|
| 9 |
+
|
| 10 |
+
data_root_dir = "../datasets/human_ml3d"
|
| 11 |
+
data_clip_dir = os.path.join(data_root_dir, "clip_feats")
|
| 12 |
+
data_caption_dir = os.path.join(data_root_dir, "texts")
|
| 13 |
+
data_motion_dir = os.path.join(data_root_dir, "motions")
|
| 14 |
+
train_split = os.path.join(data_root_dir, "train.txt")
|
| 15 |
+
|
| 16 |
+
all_text_features = []
|
| 17 |
+
all_captions = []
|
| 18 |
+
all_motions = []
|
| 19 |
+
all_m_lengths = []
|
| 20 |
+
all_clip_seq_features = []
|
| 21 |
+
|
| 22 |
+
std = np.load(os.path.join(data_root_dir, "std.npy"))
|
| 23 |
+
mean = np.load(os.path.join(data_root_dir, "mean.npy"))
|
| 24 |
+
|
| 25 |
+
for filename in tqdm(open(train_split)):
|
| 26 |
+
filename = filename.strip()
|
| 27 |
+
caption_file = os.path.join(data_caption_dir, filename + ".txt")
|
| 28 |
+
caption = open(caption_file).readlines()[0].strip()
|
| 29 |
+
text = clip.tokenize([caption], truncate=True).to(device)
|
| 30 |
+
with torch.no_grad():
|
| 31 |
+
text_feature = clip_model.encode_text(text)[0].numpy()
|
| 32 |
+
all_text_features.append(text_feature)
|
| 33 |
+
all_captions.append(caption)
|
| 34 |
+
motion_file = os.path.join(data_motion_dir, filename + ".npy")
|
| 35 |
+
motion_data = np.load(motion_file)
|
| 36 |
+
# import pdb; pdb.set_trace()
|
| 37 |
+
motion_data = (motion_data - mean) / (std + 1e-9)
|
| 38 |
+
motion_data = motion_data[:196]
|
| 39 |
+
motion = np.zeros((196, 263))
|
| 40 |
+
motion[:motion_data.shape[0], :] = motion_data
|
| 41 |
+
all_motions.append(motion)
|
| 42 |
+
m_length = motion_data.shape[0]
|
| 43 |
+
all_m_lengths.append(m_length)
|
| 44 |
+
clip_feat_file = os.path.join(data_clip_dir, filename + ".npy")
|
| 45 |
+
clip_feat = np.load(clip_feat_file)[0]
|
| 46 |
+
all_clip_seq_features.append(clip_feat)
|
| 47 |
+
|
| 48 |
+
all_text_features = np.array(all_text_features)
|
| 49 |
+
all_captions = np.array(all_captions)
|
| 50 |
+
all_motions = np.array(all_motions)
|
| 51 |
+
all_m_lengths = np.array(all_m_lengths)
|
| 52 |
+
all_clip_seq_features = np.array(all_clip_seq_features)
|
| 53 |
+
|
| 54 |
+
output = {
|
| 55 |
+
'text_features': all_text_features,
|
| 56 |
+
'captions': all_captions,
|
| 57 |
+
'motions': all_motions,
|
| 58 |
+
'm_lengths': all_m_lengths,
|
| 59 |
+
'clip_seq_features': all_clip_seq_features
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
npz_path = "t2m_text_train.npz"
|
| 63 |
+
np.savez_compressed(npz_path, **output)
|
data/database/kit_text_train.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cb18cdf63734d9e34c2abf402269d7a4aaea02c727c894cb7e0161de10fe053
|
| 3 |
+
size 962043519
|
data/database/t2m_text_train.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e597ca6ab524a73f20074d8e7f0fa013d99872e5b590e778aecaefcee1172b77
|
| 3 |
+
size 5852293680
|
data/datasets/human_ml3d.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77b13f18bbd01f6d052b70b5fa27a11fc474aa58107dd5f97f687bd991a71017
|
| 3 |
+
size 10397657625
|
data/datasets/kit_ml.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1475d2c8b54ea110c7b731ec63f06bcc0dac0751eda5a551b9bc3c54d6b7666d
|
| 3 |
+
size 1326887901
|
data/evaluators/human_ml3d/finest.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:881e45bea6fb84b15eca6e7cdd75d602b58a72f2038c223ee96ab75d4f684ada
|
| 3 |
+
size 245580211
|
data/evaluators/kit_ml/finest.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f786d733ca1bfa5dc1474554f167e1f608ec629868061e51ccfdb812ea61e6a
|
| 3 |
+
size 245481907
|
data/glove/our_vab_data.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73ed9b73ee5bc5ab683661c132b9f98fe305672dffdf70ceeac1bc447543d668
|
| 3 |
+
size 10077728
|
data/glove/our_vab_idx.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d85dce83d2c27a92bb94bb51dc1f55a04fcaa328c359d23eccf648e296c16493
|
| 3 |
+
size 79811
|
data/glove/our_vab_words.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fac88da1be6f00d36b72b88c9745c8ac35a94a7bd6ccf1fa3bab380faf8c2e0d
|
| 3 |
+
size 67470
|
logs/finemogen/finemogen_kit/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66b7ea74dca87a1940ad29205b66032edeba0fcf62ef4a007ed3764f415b4c1d
|
| 3 |
+
size 261983599
|
logs/finemogen/finemogen_t2m/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fd39808c1f0a9f57374e76ec99388359bd639a85273c5d8bd6f93f2097e0749
|
| 3 |
+
size 261996143
|
logs/mdm/mdm_t2m/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8810255fb8df9eed6211537de9826f07ff73862f367cbf91532d84fd4c9a497e
|
| 3 |
+
size 81791550
|
logs/motiondiffuse/motiondiffuse_kit/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4056909b968615df88a8a81d1566dcbe2de499ac816832181ceeac2a453aedf3
|
| 3 |
+
size 953909508
|
logs/motiondiffuse/motiondiffuse_t2m/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc5cc2269fb42237910e75b2d04357ccc25d8ad59ae5d27d8a1134e8f68e3860
|
| 3 |
+
size 953958724
|
logs/remodiffuse/remodiffuse_kit/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7da46a5ac84be587c715325ac33f2c837060ee6b7d212876406678d7f1c3c64b
|
| 3 |
+
size 1168873949
|
logs/remodiffuse/remodiffuse_t2m/latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d36ca2f5372a0aa8a4be054e61af9a36529254ad94944e440e8a8e7dfe5e8327
|
| 3 |
+
size 1169095901
|