| LOG_MODEL_INFO: False |
| TRAIN: |
| DATASET: ava |
| BATCH_SIZE: 16 |
| BATCH_SIZE: 8 |
|
|
| EVAL_PERIOD: 2 |
| CHECKPOINT_PERIOD: 1 |
| AUTO_RESUME: True |
| CHECKPOINT_EPOCH_RESET: True |
| CHECKPOINT_IN_INIT: True |
| CHECKPOINT_FILE_PATH: "" |
| CHECKPOINT_TYPE: pytorch |
| CHECKPOINT_EPOCH_RESET: True |
|
|
| DATA: |
| USE_OFFSET_SAMPLING: True |
| DECODING_BACKEND: torchvision |
| NUM_FRAMES: 40 |
| SAMPLING_RATE: 3 |
| TRAIN_JITTER_SCALES: [356, 446] |
| TRAIN_CROP_SIZE: 312 |
| TEST_CROP_SIZE: 312 |
| INPUT_CHANNEL_NUM: [3] |
| PATH_TO_DATA_DIR: "" |
|
|
| TRAIN_JITTER_SCALES_RELATIVE: [0.08, 1.0] |
| TRAIN_JITTER_ASPECT_RELATIVE: [0.75, 1.3333] |
| MEAN: [0.485, 0.456, 0.406] |
| STD: [0.229, 0.224, 0.225] |
| MVIT: |
| ZERO_DECAY_POS_CLS: False |
| SEP_POS_EMBED: True |
| DEPTH: 24 |
| NUM_HEADS: 1 |
| EMBED_DIM: 96 |
| PATCH_KERNEL: (3, 7, 7) |
| PATCH_STRIDE: (2, 4, 4) |
| PATCH_PADDING: (1, 3, 3) |
| MLP_RATIO: 4.0 |
| QKV_BIAS: True |
| DROPPATH_RATE: 0.0 |
| NORM: "layernorm" |
| EMBED_DIM: 144 |
| NUM_HEADS: 2 |
| DEPTH: 48 |
| DIM_MUL: [[2, 2.0], [8, 2.0], [44, 2.0]] |
| HEAD_MUL: [[2, 2.0], [8, 2.0], [44, 2.0]] |
| POOL_Q_STRIDE: [[2, 1, 2, 2], [8, 1, 2, 2], [44, 1, 2, 2]] |
| DROPPATH_RATE: 0.0 |
|
|
| POOL_KV_STRIDE_ADAPTIVE: [1, 8, 8] |
| POOL_KVQ_KERNEL: [3, 3, 3] |
| USE_ABS_POS: False |
| REL_POS_SPATIAL: True |
| REL_POS_TEMPORAL: True |
| MODE: "conv_unshared" |
|
|
| POOL_Q_STRIDE: [[0, 1, 1, 1], [1, 1, 1, 1], [2, 1, 2, 2], [3, 1, 1, 1], [4, 1, 1, 1], [5, 1, 1, 1], [6, 1, 1, 1], [7, 1, 1, 1], [8, 1, 2, 2], [9, 1, 1, 1], [10, 1, 1, 1], |
| [11, 1, 1, 1], [12, 1, 1, 1], [13, 1, 1, 1], [14, 1, 1, 1], [15, 1, 1, 1], [16, 1, 1, 1], [17, 1, 1, 1], [18, 1, 1, 1], [19, 1, 1, 1], [20, 1, 1, 1], |
| [21, 1, 1, 1], [22, 1, 1, 1], [23, 1, 1, 1], [24, 1, 1, 1], [25, 1, 1, 1], [26, 1, 1, 1], [27, 1, 1, 1], [28, 1, 1, 1], [29, 1, 1, 1], [30, 1, 1, 1], |
| [31, 1, 1, 1], [32, 1, 1, 1], [33, 1, 1, 1], [34, 1, 1, 1], [35, 1, 1, 1], [36, 1, 1, 1], [37, 1, 1, 1], [38, 1, 1, 1], [39, 1, 1, 1], [40, 1, 1, 1], |
| [41, 1, 1, 1], [42, 1, 1, 1], [43, 1, 1, 1], [44, 1, 2, 2], [45, 1, 1, 1], [46, 1, 1, 1], [47, 1, 1, 1] ] |
| MODE: "conv" |
| RESIDUAL_POOLING: True |
| SEPARATE_QKV: True |
| CLS_EMBED_ON: False |
|
|
| BN: |
| USE_PRECISE_STATS: False |
| NUM_BATCHES_PRECISE: 200 |
|
|
| DETECTION: |
| ENABLE: True |
| ALIGNED: True |
| SPATIAL_SCALE_FACTOR: 32 |
| AVA: |
| BGR: False |
| DETECTION_SCORE_THRESH: 0.9 |
| TRAIN_PREDICT_BOX_LISTS: [ |
| "ava_train_v2.2.csv", |
| "person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv", |
| ] |
| TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"] |
| ANNOTATION_DIR: /datasets01/AVA/080720/frame_list/ |
| FRAME_LIST_DIR: /datasets01/AVA/080720/frame_list/ |
| FRAME_DIR: /datasets01/AVA/080720/frames/ |
| FULL_TEST_ON_VAL: True |
|
|
|
|
| SOLVER: |
| CLIP_GRAD_L2NORM: 2.0 |
| ZERO_WD_1D_PARAM: True |
| BASE_LR_SCALE_NUM_SHARDS: True |
| BASE_LR: 0.075 |
| COSINE_AFTER_WARMUP: True |
| COSINE_END_LR: 1e-7 |
| WARMUP_START_LR: 1e-8 |
| WARMUP_EPOCHS: 5.0 |
| LR_POLICY: cosine |
| MAX_EPOCH: 20 |
| MOMENTUM: 0.9 |
| WEIGHT_DECAY: 1e-8 |
| OPTIMIZING_METHOD: sgd |
| MODEL: |
| NUM_CLASSES: 80 |
| HEAD_ACT: sigmoid |
| |
| ARCH: mvit |
| MODEL_NAME: MViT |
| LOSS_FUNC: bce |
| DROPOUT_RATE: 0.0 |
| ACT_CHECKPOINT: True |
|
|
| TEST: |
| ENABLE: True |
| DATASET: ava |
| BATCH_SIZE: 1 |
| NUM_SPATIAL_CROPS: 1 |
| CHECKPOINT_FILE_PATH: /home/jathu/mvit.pyth |
|
|
| DATA_LOADER: |
| NUM_WORKERS: 4 |
| PIN_MEMORY: True |
| NUM_GPUS: 1 |
| NUM_SHARDS: 1 |
| SHARD_ID: 0 |
| RNG_SEED: 0 |
| OUTPUT_DIR: . |
|
|
| DEMO: |
| ENABLE: True |
| LABEL_FILE_PATH: /private/home/jathushan/3D/slowfast/ava_names.json |
| WEBCAM: -1 |
| INPUT_VIDEO: /private/home/jathushan/datasets/ttv/webm2/82FE8F069F1354550003607470080_1fcf1757309.4.7.mp4 |
| OUTPUT_FILE: output.mp4 |
|
|
|
|
| |
| |
| |
| |