File size: 4,043 Bytes
e744d68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import torch
import torchvision.transforms as transforms
import os
import logging
import pickle

def read_pkl_data(pkl_path, img_path):
    logging.info('reading pickle file: '+ pkl_path)
    with open(pkl_path, "rb") as fp:
        data = pickle.load(fp)
        fp.close()
    
    root_dir = img_path
    if not os.path.exists(root_dir):
        root_dir = root_dir.replace('train', '').replace('val', '').replace('test', '')
    imgs, phases, steps = [], [], []
    for vid_name in sorted(data.keys()):
        paths = [
                os.path.join(root_dir, vid_name, f"{item['Frame_id']}.jpg")
            for item in data[vid_name]
        ]
        imgs.append(paths)
        phases.append([item['Phase_gt'] for item in data[vid_name]])
        steps.append([item['Step_gt'] for item in data[vid_name]])
    
    return imgs, phases, steps


## Read test pickle files
#### TRAIN ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'train', f'1fps_100_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_train, phase_labels_train, step_labels_train = read_pkl_data(
    labels, images
)

#### VAL ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'val', f'1fps_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_val, phase_labels_val, step_labels_val = read_pkl_data(
    labels, images
)

#### TEST ####
labels = os.path.join('/gpfswork/rech/okw/ukw13bv/MultiBypass140/labels', 'bern', 'labels_by70_splits/labels', 'test', f'1fps_0.pickle')
images = os.path.join('/gpfsscratch/rech/okw/ukw13bv/bypass/BernBypass70/frames')
videos_test, phase_labels_test, step_labels_test = read_pkl_data(labels, images)

_base_ = ['../base.py']
config = dict(
    train_config=[
      dict(
      type='Recognition_frame_bypass',
      img_list=v,
      label_list=l,
      transforms=transforms.Compose(
          [
          transforms.Resize((360, 640)),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
          ]
          ),
      ) for v, l in zip(videos_train, phase_labels_train)
    ],
    val_config=[
      dict(
      type='Recognition_frame_bypass',
      img_list=v,
      label_list=l,
      transforms=transforms.Compose(
          [
          transforms.Resize((360, 640)),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
          ]
          ),
      ) for v, l in zip(videos_val, phase_labels_val)
    ],
    test_config=[
      dict(
      type='Recognition_frame_bypass',
      img_list=v,
      label_list=l,
      transforms=transforms.Compose(
          [
          transforms.Resize((360, 640)),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
          ]
          ),
      ) for v, l in zip(videos_test, phase_labels_test)
    ],
    model_config = dict(
        type='MVNet_feature_extractor',
        backbone_img = dict(
            type='img_backbones/ImageEncoder_feature_extractor',
            # type='img_backbones/ImageEncoder_CLIPVISUAL',
            num_classes=768,
            pretrained='imagenet', # imagenet/ssl/random
            backbone_name='resnet_50', 
            # backbone_name='resnet_50_clip' 
            img_norm=False,
        ),
        backbone_text= dict(
            type='text_backbones/BertEncoder',
            text_bert_type='/gpfswork/rech/okw/ukw13bv/mmsl/biobert_pretrain_output_all_notes_150000',
            text_last_n_layers=4,
            text_aggregate_method='sum',
            text_norm=False,
            text_embedding_dim=768,
            text_freeze_bert=False,
            text_agg_tokens=True
        )
    )
)