File size: 4,043 Bytes
e744d68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | import torch
import torchvision.transforms as transforms
import os
import logging
import pickle
def read_pkl_data(pkl_path, img_path):
logging.info('reading pickle file: '+ pkl_path)
with open(pkl_path, "rb") as fp:
data = pickle.load(fp)
fp.close()
root_dir = img_path
if not os.path.exists(root_dir):
root_dir = root_dir.replace('train', '').replace('val', '').replace('test', '')
imgs, phases, steps = [], [], []
for vid_name in sorted(data.keys()):
paths = [
os.path.join(root_dir, vid_name, f"{item['Frame_id']}.jpg")
for item in data[vid_name]
]
imgs.append(paths)
phases.append([item['Phase_gt'] for item in data[vid_name]])
steps.append([item['Step_gt'] for item in data[vid_name]])
return imgs, phases, steps
## Read test pickle files
#### TRAIN ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'train', f'1fps_100_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_train, phase_labels_train, step_labels_train = read_pkl_data(
labels, images
)
#### VAL ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'val', f'1fps_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_val, phase_labels_val, step_labels_val = read_pkl_data(
labels, images
)
#### TEST ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'test', f'1fps_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_test, phase_labels_test, step_labels_test = read_pkl_data(labels, images)
_base_ = ['../base.py']
config = dict(
train_config=[
dict(
type='Recognition_frame_bypass',
img_list=v,
label_list=l,
transforms=transforms.Compose(
[
transforms.Resize((360, 640)),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
),
) for v, l in zip(videos_train, phase_labels_train)
],
val_config=[
dict(
type='Recognition_frame_bypass',
img_list=v,
label_list=l,
transforms=transforms.Compose(
[
transforms.Resize((360, 640)),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
),
) for v, l in zip(videos_val, phase_labels_val)
],
test_config=[
dict(
type='Recognition_frame_bypass',
img_list=v,
label_list=l,
transforms=transforms.Compose(
[
transforms.Resize((360, 640)),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
),
) for v, l in zip(videos_test, phase_labels_test)
],
model_config = dict(
type='MVNet_feature_extractor',
backbone_img = dict(
type='img_backbones/ImageEncoder_feature_extractor',
# type='img_backbones/ImageEncoder_CLIPVISUAL',
num_classes=768,
pretrained='imagenet', # imagenet/ssl/random
backbone_name='resnet_50',
# backbone_name='resnet_50_clip'
img_norm=False,
),
backbone_text= dict(
type='text_backbones/BertEncoder',
text_bert_type='/gpfswork/rech/okw/ukw13bv/mmsl/biobert_pretrain_output_all_notes_150000',
text_last_n_layers=4,
text_aggregate_method='sum',
text_norm=False,
text_embedding_dim=768,
text_freeze_bert=False,
text_agg_tokens=True
)
)
)
|