thanks to brjathu ❤

Browse files

Files changed (5) hide show

.gitattributes +1 -0
lart_mvit.ckpt +3 -0
lart_mvit.config +206 -0
mvit.pyth +3 -0
mvit.yaml +142 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mvit.pyth filter=lfs diff=lfs merge=lfs -text

lart_mvit.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9985bc712a1ca1341ae1c4cfe26c0124be0222f08e1ca9b08afcc764be6a84d
+size 582097545

lart_mvit.config ADDED Viewed

	@@ -0,0 +1,206 @@

+trainer:
+  _target_: lightning.pytorch.Trainer
+  default_root_dir: ${paths.output_dir}
+  min_epochs: 1
+  max_epochs: 30
+  accelerator: gpu
+  devices: 8
+  num_nodes: 8
+  check_val_every_n_epoch: 1
+  deterministic: false
+  benchmark: true
+  accumulate_grad_batches: 1
+  gradient_clip_val: 2.0
+  precision: 32
+  num_sanity_val_steps: 0
+  limit_train_batches: 1.0
+  limit_val_batches: 1.0
+  sync_batchnorm: true
+  strategy: ddp_find_unused_parameters_true
+callbacks:
+  model_checkpoint:
+    _target_: lart.utils.ema_checkpoint.EMACheckpoint
+    dirpath: ${paths.output_dir}/checkpoints
+    filename: epoch_{epoch:03d}
+    monitor: step
+    mode: max
+    save_last: true
+    auto_insert_metric_name: false
+    verbose: false
+    save_top_k: -1
+    save_weights_only: false
+    every_n_train_steps: null
+    train_time_interval: null
+    every_n_epochs: 1
+    save_on_train_epoch_end: true
+  model_summary:
+    _target_: lightning.pytorch.callbacks.RichModelSummary
+    max_depth: 1
+  rich_progress_bar:
+    _target_: lightning.pytorch.callbacks.RichProgressBar
+    refresh_rate: 1
+  learning_rate_monitor:
+    _target_: lightning.pytorch.callbacks.LearningRateMonitor
+  timer:
+    _target_: lightning.pytorch.callbacks.Timer
+  ema:
+    _target_: lart.utils.ema.EMA
+    decay: 0.9999
+    cpu_offload: false
+    validate_original_weights: false
+    every_n_steps: 1
+logger:
+  tensorboard:
+    _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
+    save_dir: ${paths.output_dir}/tensorboard/
+    version: 0
+paths:
+  root_dir: ${oc.env:PROJECT_ROOT}
+  data_dir: ${paths.root_dir}/data/
+  log_dir: ${paths.root_dir}/logs/
+  output_dir: ${hydra:runtime.output_dir}
+  work_dir: ${hydra:runtime.cwd}
+extras:
+  print_config: true
+hydra_logging: colorlog
+job_logging: colorlog
+task_name: LART_mvit_1
+tags:
+- dev
+train: true
+test: true
+ckpt_path: null
+seed: null
+datamodule:
+  _target_: lart.datamodules.phalp_datamodule.PHALPDataModule
+  cfg: ${configs}
+  train: ${train}
+model:
+  _target_: lart.models.lart.LART_LitModule
+  cfg: ${configs}
+configs:
+  data_dir: ${paths.data_dir}
+  storage_folder: ${paths.log_dir}/${task_name}/${hydra:sweep.subdir}
+  train_dataset: ava_train,kinetics_train
+  test_dataset: ava_val
+  map_on: AVA
+  train_batch_size: 8
+  train_num_workers: 8
+  test_batch_size: 8
+  test_num_workers: 8
+  test_class: ''
+  test_batch_id: -1
+  number_of_processes: 25
+  pin_memory: true
+  full_seq_render: false
+  frame_length: 125
+  max_people: 1
+  load_other_tracks: false
+  img_size: 256
+  load_images: false
+  use_mean_std: true
+  use_mean_std_mid: false
+  frame_rate_range: 1
+  num_smpl_heads: 1
+  finetune: false
+  bottle_neck: conv
+  pos_embedding: learned
+  mask_ratio: 0.4
+  in_feat: 512
+  one_euro_filter: pred_loca,pred_pose
+  loss_type: action_BCE
+  mask_type: random
+  mask_type_test: zero
+  test_type: track.fullframe@
+  encode_type: 4c
+  masked: false
+  weights_path: null
+  loss_on_others_action: true
+  debug: false
+  load_strict: true
+  mixed_training: 0
+  compute_map: true
+  compute_acc: true
+  log_frequency: 100
+  hmr_model: hmr2018
+  loca_l1_weight: 1
+  action_space: ava
+  solver:
+    name: AdamW
+    lr: 0.00012
+    momentum: 0.9
+    decay_steps:
+    - 10
+    - 20
+    decay_gamma: 0.1
+    layer_decay: null
+    ZERO_WD_1D_PARAM: true
+    warmup_epochs: 5
+    weight_decay: 0.05
+    scheduler: cosine
+    apply_linear_scaling: true
+  ava:
+    sampling_factor: 1
+    num_action_classes: 80
+    num_valid_action_classes: 60
+    gt_type: all
+    head_dropout: 0.0
+    predict_valid: true
+    map_on: AVA
+  kinetics:
+    sampling_factor: 1
+    num_action_classes: 400
+  loss:
+    focal:
+      gamma: 2
+      alpha: 0.25
+  extra_feat:
+    enable: joints_3D,apperance
+    pose_shape:
+      dim: 229
+      mid_dim: 256
+      en_dim: 128
+    joints_3D:
+      dim: 135
+      mid_dim: 256
+      en_dim: 128
+    apperance:
+      dim: 1152
+      mid_dim: 512
+      en_dim: 256
+  transformer:
+    model: legacy
+    depth: 16
+    heads: 16
+    mlp_dim: 512
+    dim_head: 64
+    dropout: 0.1
+    emb_dropout: 0.1
+    droppath: 0.4
+    use_interaction_module: false
+    use_perceiver: false
+    use_interaction_module_action_only: false
+    conv:
+      pad: 1
+      stride: 5
+  smpl_cfg:
+    SMPL:
+      MODEL_PATH: data/3D
+      GENDER: neutral
+      MODEL_TYPE: smpl
+      NUM_BODY_JOINTS: 23
+      JOINT_REGRESSOR_H36M: data/3D/J_regressor_h36m.npy
+      JOINT_REGRESSOR_EXTRA: data/3D/SMPL_to_J19.pkl
+      TEXTURE: data/3D/texture.npz
+    MODEL:
+      IMAGE_SIZE: 256
+      SMPL_HEAD:
+        TYPE: basic
+        POOL: max
+        SMPL_MEAN_PARAMS: data/3D/smpl_mean_params.npz
+        IN_CHANNELS: 2048
+      BACKBONE:
+        TYPE: resnet
+        NUM_LAYERS: 50
+    EXTRA:
+      FOCAL_LENGTH: 5000

mvit.pyth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acff8a726a3c2ee8e8b75914f2cc17d0ec14808a428f6ce05afceb6ca00732f7
+size 1739882689

mvit.yaml ADDED Viewed

	@@ -0,0 +1,142 @@

+LOG_MODEL_INFO: False
+TRAIN:
+  DATASET: ava
+  BATCH_SIZE: 16
+  BATCH_SIZE: 8
+  EVAL_PERIOD: 2
+  CHECKPOINT_PERIOD: 1
+  AUTO_RESUME: True
+  CHECKPOINT_EPOCH_RESET: True
+  CHECKPOINT_IN_INIT: True
+  CHECKPOINT_FILE_PATH: ""
+  CHECKPOINT_TYPE: pytorch
+  CHECKPOINT_EPOCH_RESET: True
+DATA:
+  USE_OFFSET_SAMPLING: True
+  DECODING_BACKEND: torchvision
+  NUM_FRAMES: 40
+  SAMPLING_RATE: 3
+  TRAIN_JITTER_SCALES: [356, 446]
+  TRAIN_CROP_SIZE: 312
+  TEST_CROP_SIZE: 312 # use if TEST.NUM_SPATIAL_CROPS: 1
+  INPUT_CHANNEL_NUM: [3]
+  PATH_TO_DATA_DIR: ""
+  TRAIN_JITTER_SCALES_RELATIVE: [0.08, 1.0]
+  TRAIN_JITTER_ASPECT_RELATIVE: [0.75, 1.3333]
+  MEAN: [0.485, 0.456, 0.406]
+  STD: [0.229, 0.224, 0.225]
+MVIT:
+  ZERO_DECAY_POS_CLS: False
+  SEP_POS_EMBED: True
+  DEPTH: 24
+  NUM_HEADS: 1
+  EMBED_DIM: 96
+  PATCH_KERNEL: (3, 7, 7)
+  PATCH_STRIDE: (2, 4, 4)
+  PATCH_PADDING: (1, 3, 3)
+  MLP_RATIO: 4.0
+  QKV_BIAS: True
+  DROPPATH_RATE: 0.0
+  NORM: "layernorm"
+  EMBED_DIM: 144
+  NUM_HEADS: 2
+  DEPTH: 48 # [2, 6, 36, 2]
+  DIM_MUL: [[2, 2.0], [8, 2.0], [44, 2.0]]
+  HEAD_MUL: [[2, 2.0], [8, 2.0], [44, 2.0]]
+  POOL_Q_STRIDE: [[2, 1, 2, 2], [8, 1, 2, 2], [44, 1, 2, 2]]
+  DROPPATH_RATE: 0.0
+  POOL_KV_STRIDE_ADAPTIVE: [1, 8, 8]
+  POOL_KVQ_KERNEL: [3, 3, 3]
+  USE_ABS_POS: False # default: True
+  REL_POS_SPATIAL: True # default: false
+  REL_POS_TEMPORAL: True # default: false
+  MODE: "conv_unshared"
+  POOL_Q_STRIDE: [[0, 1, 1, 1], [1, 1, 1, 1], [2, 1, 2, 2], [3, 1, 1, 1], [4, 1, 1, 1], [5, 1, 1, 1], [6, 1, 1, 1], [7, 1, 1, 1], [8, 1, 2, 2], [9, 1, 1, 1], [10, 1, 1, 1],
+  [11, 1, 1, 1], [12, 1, 1, 1], [13, 1, 1, 1], [14, 1, 1, 1], [15, 1, 1, 1], [16, 1, 1, 1], [17, 1, 1, 1], [18, 1, 1, 1], [19, 1, 1, 1], [20, 1, 1, 1],
+  [21, 1, 1, 1], [22, 1, 1, 1], [23, 1, 1, 1], [24, 1, 1, 1], [25, 1, 1, 1], [26, 1, 1, 1], [27, 1, 1, 1], [28, 1, 1, 1], [29, 1, 1, 1], [30, 1, 1, 1],
+  [31, 1, 1, 1], [32, 1, 1, 1], [33, 1, 1, 1], [34, 1, 1, 1], [35, 1, 1, 1], [36, 1, 1, 1], [37, 1, 1, 1], [38, 1, 1, 1], [39, 1, 1, 1], [40, 1, 1, 1],
+  [41, 1, 1, 1], [42, 1, 1, 1], [43, 1, 1, 1], [44, 1, 2, 2], [45, 1, 1, 1], [46, 1, 1, 1], [47, 1, 1, 1] ]
+  MODE: "conv"
+  RESIDUAL_POOLING: True
+  SEPARATE_QKV: True
+  CLS_EMBED_ON: False # defauult: True
+BN:
+  USE_PRECISE_STATS: False
+  NUM_BATCHES_PRECISE: 200
+DETECTION:
+  ENABLE: True
+  ALIGNED: True
+  SPATIAL_SCALE_FACTOR: 32
+AVA:
+  BGR: False
+  DETECTION_SCORE_THRESH: 0.9
+  TRAIN_PREDICT_BOX_LISTS: [
+    "ava_train_v2.2.csv",
+    "person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv",
+  ]
+  TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
+  ANNOTATION_DIR: /datasets01/AVA/080720/frame_list/
+  FRAME_LIST_DIR: /datasets01/AVA/080720/frame_list/
+  FRAME_DIR: /datasets01/AVA/080720/frames/
+  FULL_TEST_ON_VAL: True
+SOLVER:
+  CLIP_GRAD_L2NORM: 2.0
+  ZERO_WD_1D_PARAM: True
+  BASE_LR_SCALE_NUM_SHARDS: True
+  BASE_LR: 0.075
+  COSINE_AFTER_WARMUP: True
+  COSINE_END_LR: 1e-7
+  WARMUP_START_LR: 1e-8
+  WARMUP_EPOCHS: 5.0
+  LR_POLICY: cosine
+  MAX_EPOCH: 20
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-8
+  OPTIMIZING_METHOD: sgd
+MODEL:
+  NUM_CLASSES: 80
+  HEAD_ACT: sigmoid
+  # NUM_CLASSES: 600
+  ARCH: mvit
+  MODEL_NAME: MViT
+  LOSS_FUNC: bce # soft_cross_entropy # default cross_entropy
+  DROPOUT_RATE: 0.0
+  ACT_CHECKPOINT: True # for test flops
+TEST:
+  ENABLE: True
+  DATASET: ava
+  BATCH_SIZE: 1
+  NUM_SPATIAL_CROPS: 1
+  CHECKPOINT_FILE_PATH:  /home/jathu/mvit.pyth
+DATA_LOADER:
+  NUM_WORKERS: 4
+  PIN_MEMORY: True
+NUM_GPUS: 1
+NUM_SHARDS: 1
+SHARD_ID: 0
+RNG_SEED: 0
+OUTPUT_DIR: .
+DEMO:
+  ENABLE: True
+  LABEL_FILE_PATH: /private/home/jathushan/3D/slowfast/ava_names.json
+  WEBCAM: -1
+  INPUT_VIDEO: /private/home/jathushan/datasets/ttv/webm2/82FE8F069F1354550003607470080_1fcf1757309.4.7.mp4
+  OUTPUT_FILE: output.mp4
+# #dbg
+# DATA_LOADER:
+#   NUM_WORKERS: 0
+# NUM_GPUS: 1