AZIIIIIIIIZ commited on
Commit
d670799
·
verified ·
1 Parent(s): fe51e89

Upload 1039 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .circleci/config.yml +34 -0
  2. .circleci/docker/Dockerfile +11 -0
  3. .circleci/test.yml +211 -0
  4. .gitattributes +12 -0
  5. .gitignore +151 -0
  6. .owners.yml +16 -0
  7. .pre-commit-config.yaml +52 -0
  8. .pylintrc +624 -0
  9. .python-version +1 -0
  10. .readthedocs.yml +14 -0
  11. 0.7.1 +0 -0
  12. 2.0.0 +0 -0
  13. 3.0.0 +0 -0
  14. CITATION.cff +8 -0
  15. DEPLOYMENT.md +186 -0
  16. LICENSE +203 -0
  17. MANIFEST.in +4 -0
  18. README.md +92 -12
  19. README_zh-CN.md +398 -0
  20. app.py +115 -0
  21. checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth +3 -0
  22. configs/_base_/default_runtime.py +24 -0
  23. configs/_base_/models/audioonly_r50.py +16 -0
  24. configs/_base_/models/bmn_400x100.py +12 -0
  25. configs/_base_/models/bsn_pem.py +13 -0
  26. configs/_base_/models/bsn_tem.py +8 -0
  27. configs/_base_/models/c2d_r50.py +20 -0
  28. configs/_base_/models/c3d_sports1m_pretrained.py +28 -0
  29. configs/_base_/models/i3d_r50.py +30 -0
  30. configs/_base_/models/ircsn_r152.py +28 -0
  31. configs/_base_/models/mvit_small.py +14 -0
  32. configs/_base_/models/r2plus1d_r34.py +31 -0
  33. configs/_base_/models/slowfast_r50.py +42 -0
  34. configs/_base_/models/slowonly_r50.py +24 -0
  35. configs/_base_/models/swin_tiny.py +28 -0
  36. configs/_base_/models/tanet_r50.py +23 -0
  37. configs/_base_/models/tin_r50.py +29 -0
  38. configs/_base_/models/tpn_slowonly_r50.py +45 -0
  39. configs/_base_/models/tpn_tsm_r50.py +40 -0
  40. configs/_base_/models/trn_r50.py +25 -0
  41. configs/_base_/models/tsm_mobilenet_v2.py +27 -0
  42. configs/_base_/models/tsm_mobileone_s4.py +31 -0
  43. configs/_base_/models/tsm_r50.py +24 -0
  44. configs/_base_/models/tsn_mobileone_s0.py +26 -0
  45. configs/_base_/models/tsn_r50.py +23 -0
  46. configs/_base_/models/x3d.py +20 -0
  47. configs/_base_/schedules/adam_20e.py +20 -0
  48. configs/_base_/schedules/sgd_100e.py +18 -0
  49. configs/_base_/schedules/sgd_150e_warmup.py +19 -0
  50. configs/_base_/schedules/sgd_50e.py +18 -0
.circleci/config.yml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # this allows you to use CircleCI's dynamic configuration feature
4
+ setup: true
5
+
6
+ # the path-filtering orb is required to continue a pipeline based on
7
+ # the path of an updated fileset
8
+ orbs:
9
+ path-filtering: circleci/path-filtering@0.1.2
10
+
11
+ workflows:
12
+ # the always-run workflow is always triggered, regardless of the pipeline parameters.
13
+ always-run:
14
+ jobs:
15
+ # the path-filtering/filter job determines which pipeline
16
+ # parameters to update.
17
+ - path-filtering/filter:
18
+ name: check-updated-files
19
+ # 3-column, whitespace-delimited mapping. One mapping per
20
+ # line:
21
+ # <regex path-to-test> <parameter-to-set> <value-of-pipeline-parameter>
22
+ mapping: |
23
+ mmaction/.* lint_only false
24
+ requirements/.* lint_only false
25
+ tests/.* lint_only false
26
+ tools/.* lint_only false
27
+ configs/.* lint_only false
28
+ .circleci/.* lint_only false
29
+ base-revision: dev-1.x
30
+ # this is the path of the configuration we should trigger once
31
+ # path filtering and pipeline parameter value updates are
32
+ # complete. In this case, we are using the parent dynamic
33
+ # configuration itself.
34
+ config-path: .circleci/test.yml
.circleci/docker/Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG PYTORCH="1.8.1"
2
+ ARG CUDA="11.1"
3
+ ARG CUDNN="8"
4
+
5
+ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
6
+
7
+ # To fix GPG key error when running apt-get update
8
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
9
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
10
+
11
+ RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx
.circleci/test.yml ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # the default pipeline parameters, which will be updated according to
4
+ # the results of the path-filtering orb
5
+ parameters:
6
+ lint_only:
7
+ type: boolean
8
+ default: true
9
+
10
+ jobs:
11
+ lint:
12
+ docker:
13
+ - image: cimg/python:3.7.4
14
+ steps:
15
+ - checkout
16
+ - run:
17
+ name: Install pre-commit hook
18
+ command: |
19
+ pip install pre-commit
20
+ pre-commit install
21
+ - run:
22
+ name: Linting
23
+ command: pre-commit run --all-files
24
+ - run:
25
+ name: Check docstring coverage
26
+ command: |
27
+ pip install interrogate
28
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmaction
29
+ build_cpu:
30
+ parameters:
31
+ # The python version must match available image tags in
32
+ # https://circleci.com/developer/images/image/cimg/python
33
+ python:
34
+ type: string
35
+ torch:
36
+ type: string
37
+ torchvision:
38
+ type: string
39
+ docker:
40
+ - image: cimg/python:<< parameters.python >>
41
+ resource_class: large
42
+ steps:
43
+ - checkout
44
+ - run:
45
+ name: Install Libraries
46
+ command: |
47
+ sudo apt-get update
48
+ sudo apt-get upgrade
49
+ sudo apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libturbojpeg pkg-config
50
+ sudo apt-get install -y libavdevice-dev libavfilter-dev libopus-dev libvpx-dev libsrtp2-dev libsndfile1
51
+ - run:
52
+ name: Configure Python & pip
53
+ command: |
54
+ pip install --upgrade pip
55
+ pip install wheel
56
+ - run:
57
+ name: Install PyTorch
58
+ command: |
59
+ python -V
60
+ pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
61
+ - run:
62
+ name: Install mmaction dependencies
63
+ command: |
64
+ pip install git+ssh://git@github.com/open-mmlab/mmengine.git@main
65
+ pip install -U openmim
66
+ mim install 'mmcv >= 2.0.0'
67
+ pip install git+https://git@github.com/open-mmlab/mmdetection.git@dev-3.x
68
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
69
+ pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
70
+ pip install git+https://github.com/open-mmlab/mmpose.git@dev-1.x
71
+ pip install -r requirements.txt
72
+ - run:
73
+ name: Install timm
74
+ command: |
75
+ pip install timm
76
+ - run:
77
+ name: Install transformers
78
+ command: |
79
+ pip install transformers
80
+ - when:
81
+ condition:
82
+ equal: [ "0.10.0", << parameters.torchvision >> ]
83
+ steps:
84
+ - run: python -m pip install pytorchvideo
85
+ - run:
86
+ name: Build and install
87
+ command: |
88
+ pip install -e .
89
+ - run:
90
+ name: Run unittests
91
+ command: |
92
+ coverage run --branch --source mmaction -m pytest tests/
93
+ coverage xml
94
+ coverage report -m
95
+ build_cuda:
96
+ parameters:
97
+ torch:
98
+ type: string
99
+ cuda:
100
+ type: enum
101
+ enum: ["11.1"]
102
+ cudnn:
103
+ type: integer
104
+ default: 8
105
+ machine:
106
+ image: ubuntu-2004-cuda-11.4:202110-01
107
+ # docker_layer_caching: true
108
+ resource_class: gpu.nvidia.small
109
+ steps:
110
+ - checkout
111
+ - run:
112
+ name: Build Docker image
113
+ command: |
114
+ docker build .circleci/docker -t mmaction:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
115
+ docker run --gpus all -t -d -v /home/circleci/project:/mmaction -w /mmaction --name mmaction mmaction:gpu
116
+ docker exec mmaction apt-get update
117
+ docker exec mmaction apt-get upgrade -y
118
+ docker exec mmaction apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libturbojpeg pkg-config
119
+ docker exec mmaction apt-get install -y libavdevice-dev libavfilter-dev libopus-dev libvpx-dev libsrtp2-dev libsndfile1
120
+ - run:
121
+ name: Install PytorchVideo and timm
122
+ command: |
123
+ docker exec mmaction pip install timm
124
+ docker exec mmaction python -m pip install pytorchvideo
125
+ - run:
126
+ name: Install transformers
127
+ command: |
128
+ docker exec mmaction pip install transformers
129
+ - run:
130
+ name: Install mmaction dependencies
131
+ command: |
132
+ docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmengine.git@main
133
+ docker exec mmaction pip install -U openmim
134
+ docker exec mmaction mim install 'mmcv >= 2.0.0'
135
+ docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmdetection.git@dev-3.x
136
+ docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmpose.git@dev-1.x
137
+ docker exec mmaction pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
138
+ docker exec mmaction pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
139
+ docker exec mmaction pip install -r requirements.txt
140
+ - run:
141
+ name: Build and install
142
+ command: |
143
+ docker exec mmaction pip install -e .
144
+ - run:
145
+ name: Run unittests
146
+ command: |
147
+ docker exec mmaction pytest tests/
148
+ workflows:
149
+ pr_stage_lint:
150
+ jobs:
151
+ - lint:
152
+ name: lint
153
+ filters:
154
+ branches:
155
+ ignore:
156
+ - dev-1.x
157
+ - main
158
+ pr_stage_test:
159
+ when:
160
+ not:
161
+ << pipeline.parameters.lint_only >>
162
+ jobs:
163
+ - lint:
164
+ name: lint
165
+ filters:
166
+ branches:
167
+ ignore:
168
+ - dev-1.x
169
+ - main
170
+ - build_cpu:
171
+ name: minimum_version_cpu
172
+ torch: 1.8.1
173
+ torchvision: 0.9.1
174
+ python: 3.7.4
175
+ requires:
176
+ - lint
177
+ - build_cpu:
178
+ name: maximum_version_cpu
179
+ torch: 1.13.0
180
+ torchvision: 0.14.0
181
+ python: 3.9.0
182
+ requires:
183
+ - minimum_version_cpu
184
+ - hold:
185
+ type: approval
186
+ requires:
187
+ - maximum_version_cpu
188
+ - build_cuda:
189
+ name: mainstream_version_gpu
190
+ torch: 1.8.1
191
+ # Use double quotation mark to explicitly specify its type
192
+ # as string instead of number
193
+ cuda: "11.1"
194
+ requires:
195
+ - hold
196
+ merge_stage_test:
197
+ when:
198
+ not:
199
+ << pipeline.parameters.lint_only >>
200
+ jobs:
201
+ - build_cuda:
202
+ name: minimum_version_gpu
203
+ torch: 1.8.1
204
+ # Use double quotation mark to explicitly specify its type
205
+ # as string instead of number
206
+ cuda: "11.1"
207
+ filters:
208
+ branches:
209
+ only:
210
+ - dev-1.x
211
+ - main
.gitattributes CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ demo/demo.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ resources/data_pipeline.png filter=lfs diff=lfs merge=lfs -text
38
+ resources/miaomiao_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
39
+ resources/mmaction2_overview.gif filter=lfs diff=lfs merge=lfs -text
40
+ resources/qq_group_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
41
+ resources/spatio-temporal-det.gif filter=lfs diff=lfs merge=lfs -text
42
+ resources/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
43
+ tests/data/rawvideo_dataset/part_1.mp4 filter=lfs diff=lfs merge=lfs -text
44
+ tests/data/test.avi filter=lfs diff=lfs merge=lfs -text
45
+ tests/data/test.mp4 filter=lfs diff=lfs merge=lfs -text
46
+ tests/data/test.wav filter=lfs diff=lfs merge=lfs -text
47
+ tools/data/skeleton/S001C001P001R001A001_rgb.avi filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ **/*.pyc
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+
51
+ # Translations
52
+ *.mo
53
+ *.pot
54
+
55
+ # Django stuff:
56
+ *.log
57
+ local_settings.py
58
+ db.sqlite3
59
+
60
+ # Flask stuff:
61
+ instance/
62
+ .webassets-cache
63
+
64
+ # Scrapy stuff:
65
+ .scrapy
66
+
67
+ # Auto generate documentation
68
+ docs/*/_build/
69
+ docs/*/model_zoo/
70
+ docs/*/dataset_zoo/
71
+ docs/*/_model_zoo.rst
72
+ docs/*/modelzoo_statistics.md
73
+ docs/*/datasetzoo_statistics.md
74
+ docs/*/projectzoo.md
75
+ docs/*/papers/
76
+ docs/*/api/generated/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # celery beat schedule file
88
+ celerybeat-schedule
89
+
90
+ # SageMath parsed files
91
+ *.sage.py
92
+
93
+ # Environments
94
+ .env
95
+ .venv
96
+ env/
97
+ venv/
98
+ ENV/
99
+ env.bak/
100
+ venv.bak/
101
+
102
+ # Spyder project settings
103
+ .spyderproject
104
+ .spyproject
105
+
106
+ # Rope project settings
107
+ .ropeproject
108
+
109
+ # mkdocs documentation
110
+ /site
111
+
112
+ # mypy
113
+ .mypy_cache/
114
+
115
+ # custom
116
+ /data
117
+ .vscode
118
+ .idea
119
+ *.pkl
120
+ *.pkl.json
121
+ *.log.json
122
+ benchlist.txt
123
+ work_dirs/
124
+ /projects/*/work_dirs
125
+ /projects/*/data
126
+ .DS_Store
127
+
128
+ # Pytorch
129
+ *.pth
130
+
131
+ # Profile
132
+ *.prof
133
+
134
+ # lmdb
135
+ *.mdb
136
+
137
+ # unignore some data file in tests/data
138
+ !tests/data/**/*.pkl
139
+ !tests/data/**/*.pkl.json
140
+ !tests/data/**/*.log.json
141
+ !tests/data/**/*.pth
142
+
143
+ # avoid soft links created by MIM
144
+ mmaction/tools/*
145
+
146
+ *.ipynb
147
+
148
+ # unignore ipython notebook files in demo
149
+ !demo/*.ipynb
150
+ !projects/stad_tutorial/*.ipynb
151
+ mmaction/.mim
.owners.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assign:
2
+ issues: enabled
3
+ pull_requests: disabled
4
+ strategy:
5
+ # random
6
+ daily-shift-based
7
+ scedule:
8
+ '*/1 * * * *'
9
+ assignees:
10
+ - hukkai
11
+ - Dai-Wenxun
12
+ - cir7
13
+ - Dai-Wenxun
14
+ - cir7
15
+ - hukkai
16
+ - hukkai
.pre-commit-config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exclude: ^tests/data/
2
+ repos:
3
+ - repo: https://github.com/PyCQA/flake8
4
+ rev: 5.0.4
5
+ hooks:
6
+ - id: flake8
7
+ - repo: https://github.com/PyCQA/isort
8
+ rev: 5.11.5
9
+ hooks:
10
+ - id: isort
11
+ - repo: https://github.com/pre-commit/mirrors-yapf
12
+ rev: v0.32.0
13
+ hooks:
14
+ - id: yapf
15
+ - repo: https://github.com/pre-commit/pre-commit-hooks
16
+ rev: v4.3.0
17
+ hooks:
18
+ - id: trailing-whitespace
19
+ - id: check-yaml
20
+ - id: end-of-file-fixer
21
+ - id: requirements-txt-fixer
22
+ - id: double-quote-string-fixer
23
+ - id: check-merge-conflict
24
+ - id: fix-encoding-pragma
25
+ args: ["--remove"]
26
+ - id: mixed-line-ending
27
+ args: ["--fix=lf"]
28
+ - repo: https://github.com/myint/docformatter
29
+ rev: v1.3.1
30
+ hooks:
31
+ - id: docformatter
32
+ args: ["--in-place", "--wrap-descriptions", "79"]
33
+ - repo: https://github.com/codespell-project/codespell
34
+ rev: v2.1.0
35
+ hooks:
36
+ - id: codespell
37
+ args: ["--skip", "*.ipynb", "-L", "ECT,Gool,tread,gool,mot"]
38
+ - repo: https://github.com/executablebooks/mdformat
39
+ rev: 0.7.14
40
+ hooks:
41
+ - id: mdformat
42
+ args: ["--number", "--table-width", "200"]
43
+ additional_dependencies:
44
+ - mdformat-openmmlab
45
+ - mdformat_frontmatter
46
+ - linkify-it-py
47
+ - repo: https://github.com/open-mmlab/pre-commit-hooks
48
+ rev: v0.2.0
49
+ hooks:
50
+ - id: check-algo-readme
51
+ - id: check-copyright
52
+ args: ["mmaction", "tests", "demo", "tools"]
.pylintrc ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MASTER]
2
+
3
+ # A comma-separated list of package or module names from where C extensions may
4
+ # be loaded. Extensions are loading into the active Python interpreter and may
5
+ # run arbitrary code.
6
+ extension-pkg-whitelist=
7
+
8
+ # Specify a score threshold to be exceeded before program exits with error.
9
+ fail-under=10
10
+
11
+ # Add files or directories to the blacklist. They should be base names, not
12
+ # paths.
13
+ ignore=CVS,configs
14
+
15
+ # Add files or directories matching the regex patterns to the blacklist. The
16
+ # regex matches against base names, not paths.
17
+ ignore-patterns=
18
+
19
+ # Python code to execute, usually for sys.path manipulation such as
20
+ # pygtk.require().
21
+ #init-hook=
22
+
23
+ # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
24
+ # number of processors available to use.
25
+ jobs=1
26
+
27
+ # Control the amount of potential inferred values when inferring a single
28
+ # object. This can help the performance when dealing with large functions or
29
+ # complex, nested conditions.
30
+ limit-inference-results=100
31
+
32
+ # List of plugins (as comma separated values of python module names) to load,
33
+ # usually to register additional checkers.
34
+ load-plugins=
35
+
36
+ # Pickle collected data for later comparisons.
37
+ persistent=yes
38
+
39
+ # When enabled, pylint would attempt to guess common misconfiguration and emit
40
+ # user-friendly hints instead of false-positive error messages.
41
+ suggestion-mode=yes
42
+
43
+ # Allow loading of arbitrary C extensions. Extensions are imported into the
44
+ # active Python interpreter and may run arbitrary code.
45
+ unsafe-load-any-extension=no
46
+
47
+
48
+ [MESSAGES CONTROL]
49
+
50
+ # Only show warnings with the listed confidence levels. Leave empty to show
51
+ # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
52
+ confidence=
53
+
54
+ # Disable the message, report, category or checker with the given id(s). You
55
+ # can either give multiple identifiers separated by comma (,) or put this
56
+ # option multiple times (only on the command line, not in the configuration
57
+ # file where it should appear only once). You can also use "--disable=all" to
58
+ # disable everything first and then reenable specific checks. For example, if
59
+ # you want to run only the similarities checker, you can use "--disable=all
60
+ # --enable=similarities". If you want to run only the classes checker, but have
61
+ # no Warning level messages displayed, use "--disable=all --enable=classes
62
+ # --disable=W".
63
+ disable=import-outside-toplevel
64
+ redefined-outer-name
65
+ print-statement,
66
+ parameter-unpacking,
67
+ unpacking-in-except,
68
+ old-raise-syntax,
69
+ backtick,
70
+ long-suffix,
71
+ old-ne-operator,
72
+ old-octal-literal,
73
+ import-star-module-level,
74
+ non-ascii-bytes-literal,
75
+ raw-checker-failed,
76
+ bad-inline-option,
77
+ locally-disabled,
78
+ file-ignored,
79
+ suppressed-message,
80
+ useless-suppression,
81
+ deprecated-pragma,
82
+ use-symbolic-message-instead,
83
+ apply-builtin,
84
+ basestring-builtin,
85
+ buffer-builtin,
86
+ cmp-builtin,
87
+ coerce-builtin,
88
+ execfile-builtin,
89
+ file-builtin,
90
+ long-builtin,
91
+ raw_input-builtin,
92
+ reduce-builtin,
93
+ standarderror-builtin,
94
+ unicode-builtin,
95
+ xrange-builtin,
96
+ coerce-method,
97
+ delslice-method,
98
+ getslice-method,
99
+ setslice-method,
100
+ no-absolute-import,
101
+ old-division,
102
+ dict-iter-method,
103
+ dict-view-method,
104
+ next-method-called,
105
+ metaclass-assignment,
106
+ indexing-exception,
107
+ raising-string,
108
+ reload-builtin,
109
+ oct-method,
110
+ hex-method,
111
+ nonzero-method,
112
+ cmp-method,
113
+ input-builtin,
114
+ round-builtin,
115
+ intern-builtin,
116
+ unichr-builtin,
117
+ map-builtin-not-iterating,
118
+ zip-builtin-not-iterating,
119
+ range-builtin-not-iterating,
120
+ filter-builtin-not-iterating,
121
+ using-cmp-argument,
122
+ eq-without-hash,
123
+ div-method,
124
+ idiv-method,
125
+ rdiv-method,
126
+ exception-message-attribute,
127
+ invalid-str-codec,
128
+ sys-max-int,
129
+ bad-python3-import,
130
+ deprecated-string-function,
131
+ deprecated-str-translate-call,
132
+ deprecated-itertools-function,
133
+ deprecated-types-field,
134
+ next-method-defined,
135
+ dict-items-not-iterating,
136
+ dict-keys-not-iterating,
137
+ dict-values-not-iterating,
138
+ deprecated-operator-function,
139
+ deprecated-urllib-function,
140
+ xreadlines-attribute,
141
+ deprecated-sys-function,
142
+ exception-escape,
143
+ comprehension-escape,
144
+ no-member,
145
+ invalid-name,
146
+ too-many-branches,
147
+ wrong-import-order,
148
+ too-many-arguments,
149
+ missing-function-docstring,
150
+ missing-module-docstring,
151
+ too-many-locals,
152
+ too-few-public-methods,
153
+ abstract-method,
154
+ broad-except,
155
+ too-many-nested-blocks,
156
+ too-many-instance-attributes,
157
+ missing-class-docstring,
158
+ duplicate-code,
159
+ not-callable,
160
+ protected-access,
161
+ dangerous-default-value,
162
+ no-name-in-module,
163
+ logging-fstring-interpolation,
164
+ super-init-not-called,
165
+ redefined-builtin,
166
+ attribute-defined-outside-init,
167
+ arguments-differ,
168
+ cyclic-import,
169
+ bad-super-call,
170
+ too-many-statements,
171
+ line-too-long
172
+
173
+ # Enable the message, report, category or checker with the given id(s). You can
174
+ # either give multiple identifier separated by comma (,) or put this option
175
+ # multiple time (only on the command line, not in the configuration file where
176
+ # it should appear only once). See also the "--disable" option for examples.
177
+ enable=c-extension-no-member
178
+
179
+
180
+ [REPORTS]
181
+
182
+ # Python expression which should return a score less than or equal to 10. You
183
+ # have access to the variables 'error', 'warning', 'refactor', and 'convention'
184
+ # which contain the number of messages in each category, as well as 'statement'
185
+ # which is the total number of statements analyzed. This score is used by the
186
+ # global evaluation report (RP0004).
187
+ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
188
+
189
+ # Template used to display messages. This is a python new-style format string
190
+ # used to format the message information. See doc for all details.
191
+ #msg-template=
192
+
193
+ # Set the output format. Available formats are text, parseable, colorized, json
194
+ # and msvs (visual studio). You can also give a reporter class, e.g.
195
+ # mypackage.mymodule.MyReporterClass.
196
+ output-format=text
197
+
198
+ # Tells whether to display a full report or only the messages.
199
+ reports=no
200
+
201
+ # Activate the evaluation score.
202
+ score=yes
203
+
204
+
205
+ [REFACTORING]
206
+
207
+ # Maximum number of nested blocks for function / method body
208
+ max-nested-blocks=5
209
+
210
+ # Complete name of functions that never returns. When checking for
211
+ # inconsistent-return-statements if a never returning function is called then
212
+ # it will be considered as an explicit return statement and no message will be
213
+ # printed.
214
+ never-returning-functions=sys.exit
215
+
216
+
217
+ [TYPECHECK]
218
+
219
+ # List of decorators that produce context managers, such as
220
+ # contextlib.contextmanager. Add to this list to register other decorators that
221
+ # produce valid context managers.
222
+ contextmanager-decorators=contextlib.contextmanager
223
+
224
+ # List of members which are set dynamically and missed by pylint inference
225
+ # system, and so shouldn't trigger E1101 when accessed. Python regular
226
+ # expressions are accepted.
227
+ generated-members=
228
+
229
+ # Tells whether missing members accessed in mixin class should be ignored. A
230
+ # mixin class is detected if its name ends with "mixin" (case insensitive).
231
+ ignore-mixin-members=yes
232
+
233
+ # Tells whether to warn about missing members when the owner of the attribute
234
+ # is inferred to be None.
235
+ ignore-none=yes
236
+
237
+ # This flag controls whether pylint should warn about no-member and similar
238
+ # checks whenever an opaque object is returned when inferring. The inference
239
+ # can return multiple potential results while evaluating a Python object, but
240
+ # some branches might not be evaluated, which results in partial inference. In
241
+ # that case, it might be useful to still emit no-member and other checks for
242
+ # the rest of the inferred objects.
243
+ ignore-on-opaque-inference=yes
244
+
245
+ # List of class names for which member attributes should not be checked (useful
246
+ # for classes with dynamically set attributes). This supports the use of
247
+ # qualified names.
248
+ ignored-classes=optparse.Values,thread._local,_thread._local
249
+
250
+ # List of module names for which member attributes should not be checked
251
+ # (useful for modules/projects where namespaces are manipulated during runtime
252
+ # and thus existing member attributes cannot be deduced by static analysis). It
253
+ # supports qualified module names, as well as Unix pattern matching.
254
+ ignored-modules=
255
+
256
+ # Show a hint with possible names when a member name was not found. The aspect
257
+ # of finding the hint is based on edit distance.
258
+ missing-member-hint=yes
259
+
260
+ # The minimum edit distance a name should have in order to be considered a
261
+ # similar match for a missing member name.
262
+ missing-member-hint-distance=1
263
+
264
+ # The total number of similar names that should be taken in consideration when
265
+ # showing a hint for a missing member.
266
+ missing-member-max-choices=1
267
+
268
+ # List of decorators that change the signature of a decorated function.
269
+ signature-mutators=
270
+
271
+
272
+ [SPELLING]
273
+
274
+ # Limits count of emitted suggestions for spelling mistakes.
275
+ max-spelling-suggestions=4
276
+
277
+ # Spelling dictionary name. Available dictionaries: none. To make it work,
278
+ # install the python-enchant package.
279
+ spelling-dict=
280
+
281
+ # List of comma separated words that should not be checked.
282
+ spelling-ignore-words=
283
+
284
+ # A path to a file that contains the private dictionary; one word per line.
285
+ spelling-private-dict-file=
286
+
287
+ # Tells whether to store unknown words to the private dictionary (see the
288
+ # --spelling-private-dict-file option) instead of raising a message.
289
+ spelling-store-unknown-words=no
290
+
291
+
292
+ [LOGGING]
293
+
294
+ # The type of string formatting that logging methods do. `old` means using %
295
+ # formatting, `new` is for `{}` formatting.
296
+ logging-format-style=old
297
+
298
+ # Logging modules to check that the string format arguments are in logging
299
+ # function parameter format.
300
+ logging-modules=logging
301
+
302
+
303
+ [VARIABLES]
304
+
305
+ # List of additional names supposed to be defined in builtins. Remember that
306
+ # you should avoid defining new builtins when possible.
307
+ additional-builtins=
308
+
309
+ # Tells whether unused global variables should be treated as a violation.
310
+ allow-global-unused-variables=yes
311
+
312
+ # List of strings which can identify a callback function by name. A callback
313
+ # name must start or end with one of those strings.
314
+ callbacks=cb_,
315
+ _cb
316
+
317
+ # A regular expression matching the name of dummy variables (i.e. expected to
318
+ # not be used).
319
+ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
320
+
321
+ # Argument names that match this expression will be ignored. Default to name
322
+ # with leading underscore.
323
+ ignored-argument-names=_.*|^ignored_|^unused_
324
+
325
+ # Tells whether we should check for unused import in __init__ files.
326
+ init-import=no
327
+
328
+ # List of qualified module names which can have objects that can redefine
329
+ # builtins.
330
+ redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
331
+
332
+
333
+ [FORMAT]
334
+
335
+ # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
336
+ expected-line-ending-format=
337
+
338
+ # Regexp for a line that is allowed to be longer than the limit.
339
+ ignore-long-lines=^\s*(# )?<?https?://\S+>?$
340
+
341
+ # Number of spaces of indent required inside a hanging or continued line.
342
+ indent-after-paren=4
343
+
344
+ # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
345
+ # tab).
346
+ indent-string=' '
347
+
348
+ # Maximum number of characters on a single line.
349
+ max-line-length=100
350
+
351
+ # Maximum number of lines in a module.
352
+ max-module-lines=1000
353
+
354
+ # Allow the body of a class to be on the same line as the declaration if body
355
+ # contains single statement.
356
+ single-line-class-stmt=no
357
+
358
+ # Allow the body of an if to be on the same line as the test if there is no
359
+ # else.
360
+ single-line-if-stmt=no
361
+
362
+
363
+ [STRING]
364
+
365
+ # This flag controls whether inconsistent-quotes generates a warning when the
366
+ # character used as a quote delimiter is used inconsistently within a module.
367
+ check-quote-consistency=no
368
+
369
+ # This flag controls whether the implicit-str-concat should generate a warning
370
+ # on implicit string concatenation in sequences defined over several lines.
371
+ check-str-concat-over-line-jumps=no
372
+
373
+
374
+ [SIMILARITIES]
375
+
376
+ # Ignore comments when computing similarities.
377
+ ignore-comments=yes
378
+
379
+ # Ignore docstrings when computing similarities.
380
+ ignore-docstrings=yes
381
+
382
+ # Ignore imports when computing similarities.
383
+ ignore-imports=no
384
+
385
+ # Minimum lines number of a similarity.
386
+ min-similarity-lines=4
387
+
388
+
389
+ [MISCELLANEOUS]
390
+
391
+ # List of note tags to take in consideration, separated by a comma.
392
+ notes=FIXME,
393
+ XXX,
394
+ TODO
395
+
396
+ # Regular expression of note tags to take in consideration.
397
+ #notes-rgx=
398
+
399
+
400
+ [BASIC]
401
+
402
+ # Naming style matching correct argument names.
403
+ argument-naming-style=snake_case
404
+
405
+ # Regular expression matching correct argument names. Overrides argument-
406
+ # naming-style.
407
+ #argument-rgx=
408
+
409
+ # Naming style matching correct attribute names.
410
+ attr-naming-style=snake_case
411
+
412
+ # Regular expression matching correct attribute names. Overrides attr-naming-
413
+ # style.
414
+ #attr-rgx=
415
+
416
+ # Bad variable names which should always be refused, separated by a comma.
417
+ bad-names=foo,
418
+ bar,
419
+ baz,
420
+ toto,
421
+ tutu,
422
+ tata
423
+
424
+ # Bad variable names regexes, separated by a comma. If names match any regex,
425
+ # they will always be refused
426
+ bad-names-rgxs=
427
+
428
+ # Naming style matching correct class attribute names.
429
+ class-attribute-naming-style=any
430
+
431
+ # Regular expression matching correct class attribute names. Overrides class-
432
+ # attribute-naming-style.
433
+ #class-attribute-rgx=
434
+
435
+ # Naming style matching correct class names.
436
+ class-naming-style=PascalCase
437
+
438
+ # Regular expression matching correct class names. Overrides class-naming-
439
+ # style.
440
+ #class-rgx=
441
+
442
+ # Naming style matching correct constant names.
443
+ const-naming-style=UPPER_CASE
444
+
445
+ # Regular expression matching correct constant names. Overrides const-naming-
446
+ # style.
447
+ #const-rgx=
448
+
449
+ # Minimum line length for functions/classes that require docstrings, shorter
450
+ # ones are exempt.
451
+ docstring-min-length=-1
452
+
453
+ # Naming style matching correct function names.
454
+ function-naming-style=snake_case
455
+
456
+ # Regular expression matching correct function names. Overrides function-
457
+ # naming-style.
458
+ #function-rgx=
459
+
460
+ # Good variable names which should always be accepted, separated by a comma.
461
+ good-names=i,
462
+ j,
463
+ k,
464
+ ex,
465
+ Run,
466
+ _,
467
+ x,
468
+ y,
469
+ w,
470
+ h,
471
+ a,
472
+ b
473
+
474
+ # Good variable names regexes, separated by a comma. If names match any regex,
475
+ # they will always be accepted
476
+ good-names-rgxs=
477
+
478
+ # Include a hint for the correct naming format with invalid-name.
479
+ include-naming-hint=no
480
+
481
+ # Naming style matching correct inline iteration names.
482
+ inlinevar-naming-style=any
483
+
484
+ # Regular expression matching correct inline iteration names. Overrides
485
+ # inlinevar-naming-style.
486
+ #inlinevar-rgx=
487
+
488
+ # Naming style matching correct method names.
489
+ method-naming-style=snake_case
490
+
491
+ # Regular expression matching correct method names. Overrides method-naming-
492
+ # style.
493
+ #method-rgx=
494
+
495
+ # Naming style matching correct module names.
496
+ module-naming-style=snake_case
497
+
498
+ # Regular expression matching correct module names. Overrides module-naming-
499
+ # style.
500
+ #module-rgx=
501
+
502
+ # Colon-delimited sets of names that determine each other's naming style when
503
+ # the name regexes allow several styles.
504
+ name-group=
505
+
506
+ # Regular expression which should only match function or class names that do
507
+ # not require a docstring.
508
+ no-docstring-rgx=^_
509
+
510
+ # List of decorators that produce properties, such as abc.abstractproperty. Add
511
+ # to this list to register other decorators that produce valid properties.
512
+ # These decorators are taken in consideration only for invalid-name.
513
+ property-classes=abc.abstractproperty
514
+
515
+ # Naming style matching correct variable names.
516
+ variable-naming-style=snake_case
517
+
518
+ # Regular expression matching correct variable names. Overrides variable-
519
+ # naming-style.
520
+ #variable-rgx=
521
+
522
+
523
+ [DESIGN]
524
+
525
+ # Maximum number of arguments for function / method.
526
+ max-args=5
527
+
528
+ # Maximum number of attributes for a class (see R0902).
529
+ max-attributes=7
530
+
531
+ # Maximum number of boolean expressions in an if statement (see R0916).
532
+ max-bool-expr=5
533
+
534
+ # Maximum number of branch for function / method body.
535
+ max-branches=12
536
+
537
+ # Maximum number of locals for function / method body.
538
+ max-locals=15
539
+
540
+ # Maximum number of parents for a class (see R0901).
541
+ max-parents=7
542
+
543
+ # Maximum number of public methods for a class (see R0904).
544
+ max-public-methods=20
545
+
546
+ # Maximum number of return / yield for function / method body.
547
+ max-returns=6
548
+
549
+ # Maximum number of statements in function / method body.
550
+ max-statements=50
551
+
552
+ # Minimum number of public methods for a class (see R0903).
553
+ min-public-methods=2
554
+
555
+
556
+ [IMPORTS]
557
+
558
+ # List of modules that can be imported at any level, not just the top level
559
+ # one.
560
+ allow-any-import-level=
561
+
562
+ # Allow wildcard imports from modules that define __all__.
563
+ allow-wildcard-with-all=no
564
+
565
+ # Analyse import fallback blocks. This can be used to support both Python 2 and
566
+ # 3 compatible code, which means that the block might have code that exists
567
+ # only in one or another interpreter, leading to false positives when analysed.
568
+ analyse-fallback-blocks=no
569
+
570
+ # Deprecated modules which should not be used, separated by a comma.
571
+ deprecated-modules=optparse,tkinter.tix
572
+
573
+ # Create a graph of external dependencies in the given file (report RP0402 must
574
+ # not be disabled).
575
+ ext-import-graph=
576
+
577
+ # Create a graph of every (i.e. internal and external) dependencies in the
578
+ # given file (report RP0402 must not be disabled).
579
+ import-graph=
580
+
581
+ # Create a graph of internal dependencies in the given file (report RP0402 must
582
+ # not be disabled).
583
+ int-import-graph=
584
+
585
+ # Force import order to recognize a module as part of the standard
586
+ # compatibility libraries.
587
+ known-standard-library=
588
+
589
+ # Force import order to recognize a module as part of a third party library.
590
+ known-third-party=enchant
591
+
592
+ # Couples of modules and preferred modules, separated by a comma.
593
+ preferred-modules=
594
+
595
+
596
+ [CLASSES]
597
+
598
+ # List of method names used to declare (i.e. assign) instance attributes.
599
+ defining-attr-methods=__init__,
600
+ __new__,
601
+ setUp,
602
+ __post_init__
603
+
604
+ # List of member names, which should be excluded from the protected access
605
+ # warning.
606
+ exclude-protected=_asdict,
607
+ _fields,
608
+ _replace,
609
+ _source,
610
+ _make
611
+
612
+ # List of valid names for the first argument in a class method.
613
+ valid-classmethod-first-arg=cls
614
+
615
+ # List of valid names for the first argument in a metaclass class method.
616
+ valid-metaclass-classmethod-first-arg=cls
617
+
618
+
619
+ [EXCEPTIONS]
620
+
621
+ # Exceptions that will emit a warning when being caught. Defaults to
622
+ # "BaseException, Exception".
623
+ overgeneral-exceptions=BaseException,
624
+ Exception
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
.readthedocs.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.9"
7
+
8
+ formats:
9
+ - epub
10
+
11
+ python:
12
+ install:
13
+ - requirements: requirements/docs.txt
14
+ - requirements: requirements/readthedocs.txt
0.7.1 ADDED
File without changes
2.0.0 ADDED
File without changes
3.0.0 ADDED
File without changes
CITATION.cff ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ authors:
4
+ - name: "MMAction2 Contributors"
5
+ title: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark"
6
+ date-released: 2020-07-21
7
+ url: "https://github.com/open-mmlab/mmaction2"
8
+ license: Apache-2.0
DEPLOYMENT.md ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deployment Guide for Hugging Face Spaces
2
+
3
+ This guide will help you deploy the GenVidBench project on Hugging Face Spaces.
4
+
5
+ ## 🚀 Quick Start
6
+
7
+ ### 1. Prepare Your Repository
8
+
9
+ Make sure your repository contains these files:
10
+ - `app.py` - Main Gradio application
11
+ - `requirements.txt` - Python dependencies
12
+ - `README.md` - Space metadata and description
13
+ - `download_model.py` - Model download script
14
+ - `setup.py` - Setup script (optional)
15
+
16
+ ### 2. Create a Hugging Face Space
17
+
18
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
19
+ 2. Click "Create new Space"
20
+ 3. Fill in the details:
21
+ - **Space name**: `genvidbench` (or your preferred name)
22
+ - **License**: Apache 2.0
23
+ - **SDK**: Gradio
24
+ - **Hardware**: CPU Basic (or GPU if needed)
25
+ - **Visibility**: Public or Private
26
+
27
+ ### 3. Upload Your Code
28
+
29
+ You can either:
30
+ - **Option A**: Push your code to a GitHub repository and connect it
31
+ - **Option B**: Upload files directly through the web interface
32
+
33
+ ### 4. Configure the Space
34
+
35
+ The space will automatically:
36
+ - Install dependencies from `requirements.txt`
37
+ - Run `app.py` as the main application
38
+ - Display the Gradio interface
39
+
40
+ ## 📁 Required Files
41
+
42
+ ### `app.py`
43
+ Your main Gradio application file. Should contain:
44
+ - Model initialization
45
+ - Gradio interface definition
46
+ - Video processing logic
47
+
48
+ ### `requirements.txt`
49
+ List of Python packages needed:
50
+ ```
51
+ torch>=1.13.0
52
+ torchvision>=0.14.0
53
+ mmcv>=2.0.0
54
+ mmengine>=0.7.1
55
+ gradio>=4.0.0
56
+ opencv-python>=4.6.0
57
+ decord>=0.6.0
58
+ # ... other dependencies
59
+ ```
60
+
61
+ ### `README.md`
62
+ Space metadata and description:
63
+ ```yaml
64
+ ---
65
+ title: GenVidBench - Video Action Recognition
66
+ emoji: 🎬
67
+ colorFrom: blue
68
+ colorTo: purple
69
+ sdk: gradio
70
+ sdk_version: 4.0.0
71
+ app_file: app.py
72
+ pinned: false
73
+ license: apache-2.0
74
+ ---
75
+ ```
76
+
77
+ ## 🔧 Model Setup
78
+
79
+ ### Option 1: Automatic Download
80
+ The app will try to download the model checkpoint automatically using `download_model.py`.
81
+
82
+ ### Option 2: Manual Upload
83
+ 1. Download the model checkpoint manually
84
+ 2. Upload it to your space's file system
85
+ 3. Place it in the `checkpoints/` directory
86
+
87
+ ### Option 3: Hugging Face Hub
88
+ Store the model on Hugging Face Hub and load it programmatically:
89
+ ```python
90
+ from huggingface_hub import hf_hub_download
91
+ checkpoint_path = hf_hub_download(repo_id="your-username/your-model", filename="model.pth")
92
+ ```
93
+
94
+ ## 🐛 Troubleshooting
95
+
96
+ ### Common Issues
97
+
98
+ 1. **Model not found**
99
+ - Ensure the checkpoint file is in the correct location
100
+ - Check file permissions
101
+ - Verify the download completed successfully
102
+
103
+ 2. **Import errors**
104
+ - Check that all dependencies are in `requirements.txt`
105
+ - Verify package versions are compatible
106
+
107
+ 3. **Memory issues**
108
+ - Consider using a smaller model
109
+ - Optimize batch size
110
+ - Use CPU instead of GPU if needed
111
+
112
+ 4. **Slow loading**
113
+ - Pre-download models during setup
114
+ - Use model caching
115
+ - Optimize model size
116
+
117
+ ### Debug Mode
118
+
119
+ Add debug information to your app:
120
+ ```python
121
+ import logging
122
+ logging.basicConfig(level=logging.DEBUG)
123
+ ```
124
+
125
+ ## 📊 Performance Optimization
126
+
127
+ ### For Hugging Face Spaces
128
+
129
+ 1. **Model Size**: Keep models under 2GB for faster loading
130
+ 2. **Dependencies**: Minimize the number of packages
131
+ 3. **Caching**: Use model caching to avoid re-downloading
132
+ 4. **Lazy Loading**: Load models only when needed
133
+
134
+ ### Example Optimizations
135
+
136
+ ```python
137
+ # Lazy model loading
138
+ model = None
139
+
140
+ def get_model():
141
+ global model
142
+ if model is None:
143
+ model = init_recognizer(config_file, checkpoint_file, device='cpu')
144
+ return model
145
+
146
+ def analyze_video(video):
147
+ model = get_model() # Load only when needed
148
+ # ... rest of the function
149
+ ```
150
+
151
+ ## 🔒 Security Considerations
152
+
153
+ 1. **File Upload Limits**: Set appropriate limits for video uploads
154
+ 2. **Input Validation**: Validate video formats and sizes
155
+ 3. **Resource Limits**: Monitor CPU/memory usage
156
+ 4. **Error Handling**: Graceful error handling for edge cases
157
+
158
+ ## 📈 Monitoring
159
+
160
+ Monitor your space:
161
+ - Check logs in the Hugging Face interface
162
+ - Monitor resource usage
163
+ - Track user interactions
164
+ - Set up alerts for failures
165
+
166
+ ## 🚀 Going Live
167
+
168
+ Once everything is working:
169
+
170
+ 1. **Test thoroughly** with different video types
171
+ 2. **Optimize performance** for your target audience
172
+ 3. **Add documentation** for users
173
+ 4. **Monitor usage** and gather feedback
174
+ 5. **Iterate and improve** based on user needs
175
+
176
+ ## 📞 Support
177
+
178
+ If you encounter issues:
179
+ - Check the Hugging Face Spaces documentation
180
+ - Review the logs in your space
181
+ - Test locally first
182
+ - Ask for help in the Hugging Face community
183
+
184
+ ---
185
+
186
+ **Happy deploying! 🎉**
LICENSE ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2018-2019 Open-MMLab. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright 2018-2019 Open-MMLab.
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
MANIFEST.in ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ include mmaction/.mim/model-index.yml
2
+ include mmaction/.mim/dataset-index.yml
3
+ recursive-include mmaction/.mim/configs *.py *.yml
4
+ recursive-include mmaction/.mim/tools *.sh *.py
README.md CHANGED
@@ -1,12 +1,92 @@
1
- ---
2
- title: Deepfake Detector
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.47.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GenVidBench - Video Action Recognition
3
+ emoji: 🎬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: State-of-the-art video action recognition using MMAction2
12
+ ---
13
+
14
+ # GenVidBench - Video Action Recognition
15
+
16
+ A powerful video analysis tool that uses state-of-the-art deep learning models to recognize actions and activities in videos. Built on top of MMAction2 framework with a user-friendly Gradio interface.
17
+
18
+ ## 🚀 Features
19
+
20
+ - **Action Recognition**: Identify actions and activities in videos using TSN (Temporal Segment Networks)
21
+ - **Top-5 Predictions**: Get the most likely actions with confidence scores
22
+ - **Multiple Formats**: Support for MP4, AVI, MOV, and other video formats
23
+ - **Real-time Processing**: Fast inference optimized for web deployment
24
+ - **User-friendly Interface**: Clean and intuitive Gradio web interface
25
+
26
+ ## 🎯 Model Details
27
+
28
+ This demo uses:
29
+ - **Model**: TSN (Temporal Segment Networks) with ResNet-50 backbone
30
+ - **Dataset**: Trained on Kinetics-400 dataset (400 action classes)
31
+ - **Framework**: MMAction2 (OpenMMLab)
32
+ - **Input**: RGB video frames
33
+ - **Output**: Top-5 action predictions with confidence scores
34
+
35
+ ## 🛠️ Technical Stack
36
+
37
+ - **Backend**: Python, PyTorch, MMAction2
38
+ - **Frontend**: Gradio
39
+ - **Video Processing**: OpenCV, Decord
40
+ - **Deployment**: Hugging Face Spaces
41
+
42
+ ## 📖 How to Use
43
+
44
+ 1. **Upload Video**: Click the upload area or drag and drop your video file
45
+ 2. **Wait for Processing**: The model will analyze your video (usually takes a few seconds)
46
+ 3. **View Results**: See the top 5 predicted actions with confidence scores
47
+
48
+ ## 💡 Tips for Best Results
49
+
50
+ - **Video Length**: Shorter videos (under 30 seconds) process faster
51
+ - **Video Quality**: Clear, well-lit videos work best
52
+ - **Action Clarity**: Videos with clear, distinct actions yield better results
53
+ - **Supported Formats**: MP4, AVI, MOV, and other common video formats
54
+
55
+ ## 🔬 Supported Actions
56
+
57
+ The model can recognize 400 different action classes from the Kinetics-400 dataset, including:
58
+ - Sports activities (basketball, soccer, tennis, etc.)
59
+ - Daily activities (cooking, cleaning, reading, etc.)
60
+ - Physical exercises (push-ups, jumping jacks, etc.)
61
+ - Musical activities (playing instruments, singing, etc.)
62
+ - And many more!
63
+
64
+ ## 🏗️ Architecture
65
+
66
+ ```
67
+ Video Input → Frame Sampling → Feature Extraction → Classification → Top-5 Predictions
68
+ ```
69
+
70
+ ## 📊 Performance
71
+
72
+ - **Accuracy**: State-of-the-art performance on Kinetics-400
73
+ - **Speed**: Optimized for real-time inference
74
+ - **Memory**: Efficient GPU/CPU utilization
75
+
76
+ ## 🤝 Contributing
77
+
78
+ This project is part of the GenVidBench framework. Contributions are welcome!
79
+
80
+ ## 📄 License
81
+
82
+ This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
83
+
84
+ ## 🙏 Acknowledgments
85
+
86
+ - [MMAction2](https://github.com/open-mmlab/mmaction2) - The underlying framework
87
+ - [OpenMMLab](https://openmmlab.com/) - For the excellent computer vision tools
88
+ - [Hugging Face](https://huggingface.co/) - For the deployment platform
89
+
90
+ ---
91
+
92
+ **Note**: This is a demonstration of video action recognition capabilities. For production use, consider additional validation and error handling.
README_zh-CN.md ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/mmaction2_logo.png" width="600"/>
3
+ <div>&nbsp;</div>
4
+ <div align="center">
5
+ <b><font size="5">OpenMMLab 官网</font></b>
6
+ <sup>
7
+ <a href="https://openmmlab.com">
8
+ <i><font size="4">HOT</font></i>
9
+ </a>
10
+ </sup>
11
+ &nbsp;&nbsp;&nbsp;&nbsp;
12
+ <b><font size="5">OpenMMLab 开放平台</font></b>
13
+ <sup>
14
+ <a href="https://platform.openmmlab.com">
15
+ <i><font size="4">TRY IT OUT</font></i>
16
+ </a>
17
+ </sup>
18
+ </div>
19
+
20
+ [![Documentation](https://readthedocs.org/projects/mmaction2/badge/?version=latest)](https://mmaction2.readthedocs.io/en/latest/)
21
+ [![actions](https://github.com/open-mmlab/mmaction2/workflows/build/badge.svg)](https://github.com/open-mmlab/mmaction2/actions)
22
+ [![codecov](https://codecov.io/gh/open-mmlab/mmaction2/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmaction2)
23
+ [![PyPI](https://img.shields.io/pypi/v/mmaction2)](https://pypi.org/project/mmaction2/)
24
+ [![LICENSE](https://img.shields.io/github/license/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/blob/main/LICENSE)
25
+ [![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/issues)
26
+ [![Percentage of issues still open](https://isitmaintained.com/badge/open/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/issues)
27
+
28
+ [📘中文文档](https://mmaction2.readthedocs.io/zh_CN/latest/index.html) |
29
+ [🛠️安装指南](https://mmaction2.readthedocs.io/zh_CN/latest/get_started/installation.html) |
30
+ [👀模型库](https://mmaction2.readthedocs.io/zh_CN/latest/modelzoo_statistics.html) |
31
+ [🆕更新日志](https://mmaction2.readthedocs.io/en/latest/notes/changelog.html) |
32
+ [🚀进行中项目](https://github.com/open-mmlab/mmaction2/projects) |
33
+ [🤔报告问题](https://github.com/open-mmlab/mmaction2/issues/new/choose)
34
+
35
+ </div>
36
+
37
+ <div align="center">
38
+ <a href="https://openmmlab.medium.com/" style="text-decoration:none;">
39
+ <img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
40
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
41
+ <a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
42
+ <img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
43
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
44
+ <a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
45
+ <img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
46
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
47
+ <a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
48
+ <img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
49
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
50
+ <a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
51
+ <img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
52
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
53
+ <a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
54
+ <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
55
+ </div>
56
+
57
+ [English](/README.md) | 简体中文
58
+
59
+ ## 📄 目录
60
+
61
+ - [📄 目录](#-目录)
62
+ - [🥳 🚀 最新进展](#--最新进展-)
63
+ - [📖 简介](#-简介-)
64
+ - [🎁 主要功能](#-主要功能-)
65
+ - [🛠️ 安装](#️-安装-)
66
+ - [👀 模型库](#-模型库-)
67
+ - [👨‍🏫 新手入门](#-新手入门-)
68
+ - [🎫 许可证](#-许可证-)
69
+ - [🖊️ 引用](#️-引用-)
70
+ - [🙌 参与贡献](#-参与贡献-)
71
+ - [🤝 致谢](#-致谢-)
72
+ - [🏗️ OpenMMLab 的其他项目](#️-openmmlab-的其他项目-)
73
+ - [❤️ 欢迎加入 OpenMMLab 社区](#️-欢迎加入-openmmlab-社区-)
74
+
75
+ ## 🥳 🚀 最新进展 [🔝](#-table-of-contents)
76
+
77
+ **默认分支已经从 `master` (当前的`0.x`) 切换到 `main`(之前的 `1.x`),我们建议用户更新至最新版本,其支持更多模型,更强的预训练权重,以及更简洁的代码实现。详情请参阅[迁移指南](https://mmaction2.readthedocs.io/zh_cn/latest/migration.html)**
78
+
79
+ **Release (2023.07.04)**: v1.1.0 支持以下新功能:
80
+
81
+ - 支持基于 CLIP 的多模态模型: ActionCLIP(Arxiv'2021) 和 CLIP4clip(ArXiv'2022)
82
+ - 支持丰富的 project: 手势识别, 时空行为检测 tutorial, 以及基于 [MMRazor](https://github.com/open-mmlab/mmrazor) 的知识蒸馏
83
+ - 支持 HACS-segments 数据集(ICCV'2019), MultiSports 数据集(ICCV'2021), Kinetics-710 数据集(Arxiv'2022)
84
+ - 支持 VideoMAE V2(CVPR'2023), VideoMAE(NeurIPS'2022) 支持时空行为检测任务
85
+ - 支持 TCANet(CVPR'2021)
86
+ - 支持 [纯 Python 风格的配置文件](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta) 和使用 MIM 一键下载数据集
87
+
88
+ ## 📖 简介 [🔝](#-table-of-contents)
89
+
90
+ MMAction2 是一款基于 PyTorch 开发的行为识别开源工具包,是 [open-mmlab](https://github.com/open-mmlab) 项目的一个子项目。
91
+
92
+ <div align="center">
93
+ <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/mmaction2_overview.gif" width="380px">
94
+ <img src="https://user-images.githubusercontent.com/34324155/123989146-2ecae680-d9fb-11eb-916b-b9db5563a9e5.gif" width="380px">
95
+ <p style="font-size:1.5vw;"> Kinetics-400 数据集行为识别结果(左) 和 NTU-RGB+D-120 数据集基于骨架的行为识别结果(右)</p>
96
+ </div>
97
+
98
+ <div align="center">
99
+ <img src="https://user-images.githubusercontent.com/30782254/155710881-bb26863e-fcb4-458e-b0c4-33cd79f96901.gif" width="580px"/><br>
100
+ <p style="font-size:1.5vw;">Kinetics-400 数据集基于骨骼点的时空行为检测及视频行为识别结果</p>
101
+ </div>
102
+ <div align="center">
103
+ <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/spatio-temporal-det.gif" width="800px"/><br>
104
+ <p style="font-size:1.5vw;">AVA-2.1 数据集时空行为检测结果</p>
105
+ </div>
106
+
107
+ ## 🎁 主要功能 [🔝](#-table-of-contents)
108
+
109
+ - **模块化设计**: 我们将视频理解框架拆分成了不同模块,用户可以很方便地通过组合不同的模块来构建出自定义的视频理解框架。
110
+
111
+ - **支持五种主要的视频理解任务**: MMAction2 为视频理解任务实现了多种多样的算法,包括行为识别,时序动作定位,时空动作检测,基于骨骼点的行为识别,以及视频检索。
112
+
113
+ - **详尽的单元测试和文档**:我们提供了详尽的文档和 API 参考手册,以及单元测试。
114
+
115
+ ## 🛠️ 安装 [🔝](#-table-of-contents)
116
+
117
+ MMAction2依赖于 [PyTorch](https://pytorch.org/),[MMCV](https://github.com/open-mmlab/mmcv),[MMEngine](https://github.com/open-mmlab/mmengine),[MMDetection](https://github.com/open-mmlab/mmdetection) (可选)和 [MMPose](https://github.com/open-mmlab/mmpose) (可选)
118
+
119
+ 具体步骤请参考 [安装文档](https://mmaction2.readthedocs.io/zh_cn/latest/get_started/installation.html)。
120
+
121
+ <details close>
122
+ <summary>快速安装</summary>
123
+
124
+ ```shell
125
+ conda create --name openmmlab python=3.8 -y
126
+ conda activate open-mmlab
127
+ conda install pytorch torchvision -c pytorch # 该命令将自动安装最新版的 PyTorch 和 cudatoolkit,请确认此是否匹配你的当前环境。
128
+ pip install -U openmim
129
+ mim install mmengine
130
+ mim install mmcv
131
+ mim install mmdet # 可选
132
+ mim install mmpose # 可选
133
+ git clone https://github.com/open-mmlab/mmaction2.git
134
+ cd mmaction2
135
+ pip install -v -e .
136
+ ```
137
+
138
+ </details>
139
+
140
+ ## 👀 模型库 [🔝](#-table-of-contents)
141
+
142
+ 结果及模型位于[模型库](https://mmaction2.readthedocs.io/zh_cn/latest/modelzoo_statistics.html)
143
+
144
+ <details close>
145
+
146
+ <summary>模型支持</summary>
147
+
148
+ <table style="margin-left:auto;margin-right:auto;font-size:1.3vw;padding:3px 5px;text-align:center;vertical-align:center;">
149
+ <tr>
150
+ <td colspan="5" style="font-weight:bold;">行为识别</td>
151
+ </tr>
152
+ <tr>
153
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/c3d/README.md">C3D</a> (CVPR'2014)</td>
154
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsn/README.md">TSN</a> (ECCV'2016)</td>
155
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/i3d/README.md">I3D</a> (CVPR'2017)</td>
156
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/c2d/README.md">C2D</a> (CVPR'2018)</td>
157
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/i3d/README.md">I3D Non-Local</a> (CVPR'2018)</td>
158
+ </tr>
159
+ <tr>
160
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/r2plus1d/README.md">R(2+1)D</a> (CVPR'2018)</td>
161
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/trn/README.md">TRN</a> (ECCV'2018)</td>
162
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsm/README.md">TSM</a> (ICCV'2019)</td>
163
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsm/README.md">TSM Non-Local</a> (ICCV'2019)</td>
164
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/slowonly/README.md">SlowOnly</a> (ICCV'2019)</td>
165
+ </tr>
166
+ <tr>
167
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/slowfast/README.md">SlowFast</a> (ICCV'2019)</td>
168
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/csn/README.md">CSN</a> (ICCV'2019)</td>
169
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tin/README.md">TIN</a> (AAAI'2020)</td>
170
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tpn/README.md">TPN</a> (CVPR'2020)</td>
171
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/x3d/README.md">X3D</a> (CVPR'2020)</td>
172
+ </tr>
173
+ <tr>
174
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition_audio/resnet/README.md">MultiModality: Audio</a> (ArXiv'2020)</td>
175
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tanet/README.md">TANet</a> (ArXiv'2020)</td>
176
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/timesformer/README.md">TimeSformer</a> (ICML'2021)</td>
177
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/actionclip/README.md">ActionCLIP</a> (ArXiv'2021)</td>
178
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/swin/README.md">VideoSwin</a> (CVPR'2022)</td>
179
+ </tr>
180
+ <tr>
181
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomae/README.md">VideoMAE</a> (NeurIPS'2022)</td>
182
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/mvit/README.md">MViT V2</a> (CVPR'2022)</td>
183
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/uniformer/README.md">UniFormer V1</a> (ICLR'2022)</td>
184
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/uniformerv2/README.md">UniFormer V2</a> (Arxiv'2022)</td>
185
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomaev2/README.md">VideoMAE V2</a> (CVPR'2023)</td>
186
+ </tr>
187
+ <tr>
188
+ <td colspan="5" style="font-weight:bold;">时序动作定位</td>
189
+ </tr>
190
+ <tr>
191
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/bsn/README.md">BSN</a> (ECCV'2018)</td>
192
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/bmn/README.md">BMN</a> (ICCV'2019)</td>
193
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/tcanet/README.md">TCANet</a> (CVPR'2021)</td>
194
+ <td></td>
195
+ <td></td>
196
+ </tr>
197
+ <tr>
198
+ <td colspan="5" style="font-weight:bold;">时空行为检测</td>
199
+ </tr>
200
+ <tr>
201
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/acrn/README.md">ACRN</a> (ECCV'2018)</td>
202
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/slowonly/README.md">SlowOnly+Fast R-CNN</a> (ICCV'2019)</td>
203
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/slowfast/README.md">SlowFast+Fast R-CNN</a> (ICCV'2019)</td>
204
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/lfb/README.md">LFB</a> (CVPR'2019)</td>
205
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomae/README.md">VideoMAE</a> (NeurIPS'2022)</td>
206
+ </tr>
207
+ <tr>
208
+ <td colspan="5" style="font-weight:bold;">基于骨骼点的行为识别</td>
209
+ </tr>
210
+ <tr>
211
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/stgcn/README.md">ST-GCN</a> (AAAI'2018)</td>
212
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/2s-agcn/README.md">2s-AGCN</a> (CVPR'2019)</td>
213
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/posec3d/README.md">PoseC3D</a> (CVPR'2022)</td>
214
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/stgcnpp/README.md">STGCN++</a> (ArXiv'2022)</td>
215
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/ctrgcn/README.md">CTRGCN</a> (CVPR'2021)</td>
216
+ </tr>
217
+ <tr>
218
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/msg3d/README.md">MSG3D</a> (CVPR'2020)</td>
219
+ <td></td>
220
+ <td></td>
221
+ <td></td>
222
+ <td></td>
223
+ </tr>
224
+ <tr>
225
+ <td colspan="5" style="font-weight:bold;">视频检索</td>
226
+ </tr>
227
+ <tr>
228
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/retrieval/clip4clip/README.md">CLIP4Clip</a> (ArXiv'2022)</td>
229
+ <td></td>
230
+ <td></td>
231
+ <td></td>
232
+ <td></td>
233
+ </tr>
234
+
235
+ </table>
236
+
237
+ </details>
238
+
239
+ <details close>
240
+
241
+ <summary>数据集支持</summary>
242
+
243
+ <table style="margin-left:auto;margin-right:auto;font-size:1.3vw;padding:3px 5px;text-align:center;vertical-align:center;">
244
+ <tr>
245
+ <td colspan="4" style="font-weight:bold;">行为识别</td>
246
+ </tr>
247
+ <tr>
248
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hmdb51/README.md">HMDB51</a> (<a href="https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/">官网</a>) (ICCV'2011)</td>
249
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ucf101/README.md">UCF101</a> (<a href="https://www.crcv.ucf.edu/research/data-sets/ucf101/">官网</a>) (CRCV-IR-12-01)</td>
250
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/activitynet/README.md">ActivityNet</a> (<a href="http://activity-net.org/">官网</a>) (CVPR'2015)</td>
251
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/kinetics/README.md">Kinetics-[400/600/700]</a> (<a href="https://deepmind.com/research/open-source/kinetics/">官网</a>) (CVPR'2017)</td>
252
+ </tr>
253
+ <tr>
254
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/sthv1/README.md">SthV1</a> (ICCV'2017)</td>
255
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/sthv2/README.md">SthV2</a> (<a href="https://developer.qualcomm.com/software/ai-datasets/something-something">官网</a>) (ICCV'2017)</td>
256
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/diving48/README.md">Diving48</a> (<a href="http://www.svcl.ucsd.edu/projects/resound/dataset.html">官网</a>) (ECCV'2018)</td>
257
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/jester/README.md">Jester</a> (<a href="https://developer.qualcomm.com/software/ai-datasets/jester">官网</a>) (ICCV'2019)</td>
258
+ </tr>
259
+ <tr>
260
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/mit/README.md">Moments in Time</a> (<a href="http://moments.csail.mit.edu/">官网</a>) (TPAMI'2019)</td>
261
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/mmit/README.md">Multi-Moments in Time</a> (<a href="http://moments.csail.mit.edu/challenge_iccv_2019.html">官网</a>) (ArXiv'2019)</td>
262
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hvu/README.md">HVU</a> (<a href="https://github.com/holistic-video-understanding/HVU-Dataset">官网</a>) (ECCV'2020)</td>
263
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/omnisource/README.md">OmniSource</a> (<a href="https://kennymckormick.github.io/omnisource/">官网</a>) (ECCV'2020)</td>
264
+ </tr>
265
+ <tr>
266
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/gym/README.md">FineGYM</a> (<a href="https://sdolivia.github.io/FineGym/">官网</a>) (CVPR'2020)</td>
267
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/kinetics710/README.md">Kinetics-710</a> (<a href="https://arxiv.org/pdf/2211.09552.pdf">官网</a>) (Arxiv'2022)</td>
268
+ <td></td>
269
+ <td></td>
270
+ </tr>
271
+ <tr>
272
+ <td colspan="4" style="font-weight:bold;">时序动作定位</td>
273
+ </tr>
274
+ <tr>
275
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/thumos14/README.md">THUMOS14</a> (<a href="https://www.crcv.ucf.edu/THUMOS14/download.html">官网</a>) (THUMOS Challenge 2014)</td>
276
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/activitynet/README.md">ActivityNet</a> (<a href="http://activity-net.org/">官网</a>) (CVPR'2015)</td>
277
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hacs/README.md">HACS</a> (<a href="https://github.com/hangzhaomit/HACS-dataset">官网</a>) (ICCV'2019)</td>
278
+ <td></td>
279
+ </tr>
280
+ <tr>
281
+ <td colspan="4" style="font-weight:bold;">时空行为检测</td>
282
+ </tr>
283
+ <tr>
284
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ucf101_24/README.md">UCF101-24*</a> (<a href="http://www.thumos.info/download.html">官网</a>) (CRCV-IR-12-01)</td>
285
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/jhmdb/README.md">JHMDB*</a> (<a href="http://jhmdb.is.tue.mpg.de/">官网</a>) (ICCV'2015)</td>
286
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ava/README.md">AVA</a> (<a href="https://research.google.com/ava/index.html">官网</a>) (CVPR'2018)</td>
287
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ava_kinetics/README.md">AVA-Kinetics</a> (<a href="https://research.google.com/ava/index.html">官网</a>) (Arxiv'2020)</td>
288
+ </tr>
289
+ <tr>
290
+ <td colspan="4" style="font-weight:bold;">基于骨架的行为识别</td>
291
+ </tr>
292
+ <tr>
293
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-FineGYM</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
294
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-NTURGB+D</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
295
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-UCF101</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
296
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-HMDB51</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
297
+ </tr>
298
+ <tr>
299
+ <td colspan="4" style="font-weight:bold;">视频检索</td>
300
+ </tr>
301
+ <tr>
302
+ <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/video_retrieval/README.md">MSRVTT</a> (<a href="https://www.microsoft.com/en-us/research/publication/msr-vtt-a-large-video-description-dataset-for-bridging-video-and-language/">官网</a>) (CVPR'2016)</td>
303
+ <td></td>
304
+ <td></td>
305
+ <td></td>
306
+ </tr>
307
+ </table>
308
+
309
+ </details>
310
+
311
+ ## 👨‍🏫 新手入门 [🔝](#-table-of-contents)
312
+
313
+ 我们提供了一系列简明的教程,帮助新用户轻松上手使用:
314
+
315
+ - [从 MMAction2 0.X 迁移](https://mmaction2.readthedocs.io/zh_cn/latest/migration.html)
316
+ - [学习配置相关知识](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/config.html)
317
+ - [准备数据集](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/prepare_dataset.html)
318
+ - [使用现有模型进行推理](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/inference.html)
319
+ - [训练与测试](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/train_test.html)
320
+
321
+ <details close>
322
+ <summary>基于 MMAction2 的社区工作</summary>
323
+
324
+ - Video Swin Transformer. [\[paper\]](https://arxiv.org/abs/2106.13230)[\[github\]](https://github.com/SwinTransformer/Video-Swin-Transformer)
325
+ - Evidential Deep Learning for Open Set Action Recognition, ICCV 2021 **Oral**. [\[paper\]](https://arxiv.org/abs/2107.10161)[\[github\]](https://github.com/Cogito2012/DEAR)
326
+ - Rethinking Self-supervised Correspondence Learning: A Video Frame-level Similarity Perspective, ICCV 2021 **Oral**. [\[paper\]](https://arxiv.org/abs/2103.17263)[\[github\]](https://github.com/xvjiarui/VFS)
327
+
328
+ </details>
329
+
330
+ ## 🎫 许可证 [🔝](#-table-of-contents)
331
+
332
+ 本项目基于 [Apache 2.0 license](LICENSE) 发布。
333
+
334
+ ## 🖊️ 引用 [🔝](#-table-of-contents)
335
+
336
+ 如你发现本项目对你的研究有帮助,请参考如下 bibtex 引用 MMAction2。
337
+
338
+ ```BibTeX
339
+ @misc{2020mmaction2,
340
+ title={OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark},
341
+ author={MMAction2 Contributors},
342
+ howpublished = {\url{https://github.com/open-mmlab/mmaction2}},
343
+ year={2020}
344
+ }
345
+ ```
346
+
347
+ ## 🙌 参与贡献 [🔝](#-table-of-contents)
348
+
349
+ 我们感谢所有的贡献者为改进和提升 MMAction2 所作出的努力。请参考[贡献指南](https://github.com/open-mmlab/mmcv/blob/2.x/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
350
+
351
+ ## 🤝 致谢 [🔝](#-table-of-contents)
352
+
353
+ MMAction2 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望此工具箱可以帮助大家来复现已有的方法和开发新的方法,从而为研究社区贡献力量。
354
+
355
+ ## 🏗️ OpenMMLab 的其他项目 [🔝](#-table-of-contents)
356
+
357
+ - [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab 深度学习模型训练基础库
358
+ - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库
359
+ - [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目、算法、模型的统一入口
360
+ - [MMEval](https://github.com/open-mmlab/mmeval): 统一开放的跨框架算法评测库
361
+ - [MMPreTrain](https://github.com/open-mmlab/mmpretrain): OpenMMLab 深度学习预训练工具箱
362
+ - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
363
+ - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
364
+ - [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
365
+ - [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
366
+ - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
367
+ - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
368
+ - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
369
+ - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
370
+ - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
371
+ - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
372
+ - [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
373
+ - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
374
+ - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
375
+ - [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
376
+ - [MMagic](https://github.com/open-mmlab/mmagic): OpenMMLab 新一代人工智能内容生成(AIGC)工具箱
377
+ - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
378
+ - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架
379
+ - [Playground](https://github.com/open-mmlab/playground): 收集和展示 OpenMMLab 相关的前沿、有趣的社区项目
380
+
381
+ ## ❤️ 欢迎加入 OpenMMLab 社区 [🔝](#-table-of-contents)
382
+
383
+ 扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),扫描下方微信二维码添加喵喵好友,进入 MMAction2 微信交流社群。【加好友申请格式:研究方向+地区+学校/公司+姓名】
384
+
385
+ <div align="center">
386
+ <img src="./resources/zhihu_qrcode.jpg" height="400"/> <img src="./resources/miaomiao_qrcode.jpg" height="400"/>
387
+ </div>
388
+
389
+ 我们会在 OpenMMLab 社区为大家
390
+
391
+ - 📢 分享 AI 框架的前沿核心技术
392
+ - 💻 解读 PyTorch 常用模块源码
393
+ - 📰 发布 OpenMMLab 的相关新闻
394
+ - 🚀 介绍 OpenMMLab 开发的前沿算法
395
+ - 🏃 获取更高效的问题答疑和意见反馈
396
+ - 🔥 提供与各行各业开发者充分交流的平台
397
+
398
+ 干货满满 📘,等你来撩 💗,OpenMMLab 社区期待您的加入 👬
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from operator import itemgetter
4
+ from mmaction.apis import init_recognizer, inference_recognizer
5
+ import gradio as gr
6
+
7
+ # Set paths for Hugging Face Spaces
8
+ config_file = 'demo/demo_configs/tsn_r50_1x1x8_video_infer.py'
9
+ checkpoint_file = 'checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth'
10
+
11
+ # Download model checkpoint if it doesn't exist
12
+ def download_checkpoint():
13
+ if not os.path.exists(checkpoint_file):
14
+ os.makedirs('checkpoints', exist_ok=True)
15
+ print("Model checkpoint not found. Please run 'python download_model.py' to download it.")
16
+ print("Or place the checkpoint file manually at:", checkpoint_file)
17
+ return False
18
+ return True
19
+
20
+ # Initialize model
21
+ print("Initializing model...")
22
+ if not download_checkpoint():
23
+ print("❌ Cannot initialize model without checkpoint. Exiting...")
24
+ exit(1)
25
+
26
+ try:
27
+ model = init_recognizer(config_file, checkpoint_file, device='cpu')
28
+ print("✅ Model loaded successfully!")
29
+ except Exception as e:
30
+ print(f"❌ Error loading model: {e}")
31
+ print("Please check that the config file and checkpoint are correct.")
32
+ exit(1)
33
+ # test a single video and show the result:
34
+ # video = 'demo.mp4'
35
+ # label = '../tools/data/kinetics/label_map_k400.txt'
36
+ # results = inference_recognizer(model, video)
37
+
38
+ # pred_scores = results.pred_score.tolist()
39
+ # score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
40
+ # score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
41
+ # top5_label = score_sorted[:5]
42
+
43
+ # labels = open(label).readlines()
44
+ # labels = [x.strip() for x in labels]
45
+ # results = [(labels[k[0]], k[1]) for k in top5_label]
46
+
47
+
48
+ # # show the results
49
+ # for result in results:
50
+ # print(f'{result[0]}: ', result[1])
51
+
52
+
53
+ def analyze_video(video):
54
+ """Analyze video for action recognition"""
55
+ try:
56
+ if video is None:
57
+ return "Please upload a video file."
58
+
59
+ print(f"Processing video: {video}")
60
+ results = inference_recognizer(model, video)
61
+
62
+ # Format results nicely
63
+ if hasattr(results, 'pred_score'):
64
+ pred_scores = results.pred_score.tolist()
65
+ score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
66
+ score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
67
+ top5_label = score_sorted[:5]
68
+
69
+ # Load labels if available
70
+ label_file = 'tools/data/kinetics/label_map_k400.txt'
71
+ if os.path.exists(label_file):
72
+ with open(label_file, 'r') as f:
73
+ labels = [x.strip() for x in f.readlines()]
74
+ results_formatted = [(labels[k[0]], f"{k[1]:.4f}") for k in top5_label]
75
+ else:
76
+ results_formatted = [(f"Class {k[0]}", f"{k[1]:.4f}") for k in top5_label]
77
+
78
+ result_text = "Top 5 Predictions:\n"
79
+ for i, (label, score) in enumerate(results_formatted, 1):
80
+ result_text += f"{i}. {label}: {score}\n"
81
+
82
+ return result_text
83
+ else:
84
+ return f"Analysis complete. Raw result: {results}"
85
+
86
+ except Exception as e:
87
+ return f"Error processing video: {str(e)}"
88
+
89
+ # Create Gradio interface
90
+ demo = gr.Interface(
91
+ fn=analyze_video,
92
+ inputs=gr.Video(label="Upload Video", height=300),
93
+ outputs=gr.Textbox(label="Analysis Results", lines=10),
94
+ title="🎬 GenVidBench - Video Action Recognition",
95
+ description="""
96
+ Upload a video to analyze its content using state-of-the-art action recognition models.
97
+ This demo uses TSN (Temporal Segment Networks) trained on Kinetics-400 dataset.
98
+
99
+ **Supported formats:** MP4, AVI, MOV, etc.
100
+ **Max duration:** Recommended under 30 seconds for faster processing.
101
+ """,
102
+ examples=[
103
+ ["demo/demo.mp4"] if os.path.exists("demo/demo.mp4") else None
104
+ ],
105
+ cache_examples=False,
106
+ theme=gr.themes.Soft(),
107
+ allow_flagging="never"
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ demo.launch()
112
+
113
+
114
+
115
+
checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2692d16c712e24994aaa3cfb48f957a521e053ffb81c474e2c0b3e579c888650
3
+ size 97641409
configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmaction'
2
+
3
+ default_hooks = dict(
4
+ runtime_info=dict(type='RuntimeInfoHook'),
5
+ timer=dict(type='IterTimerHook'),
6
+ logger=dict(type='LoggerHook', interval=20, ignore_last=False),
7
+ param_scheduler=dict(type='ParamSchedulerHook'),
8
+ checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto'),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ sync_buffers=dict(type='SyncBuffersHook'))
11
+
12
+ env_cfg = dict(
13
+ cudnn_benchmark=False,
14
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
15
+ dist_cfg=dict(backend='nccl'))
16
+
17
+ log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)
18
+
19
+ vis_backends = [dict(type='LocalVisBackend')]
20
+ visualizer = dict(type='ActionVisualizer', vis_backends=vis_backends)
21
+
22
+ log_level = 'INFO'
23
+ load_from = None
24
+ resume = False
configs/_base_/models/audioonly_r50.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='RecognizerAudio',
4
+ backbone=dict(
5
+ type='ResNetAudio',
6
+ depth=50,
7
+ pretrained=None,
8
+ in_channels=1,
9
+ norm_eval=False),
10
+ cls_head=dict(
11
+ type='TSNAudioHead',
12
+ num_classes=400,
13
+ in_channels=1024,
14
+ dropout_ratio=0.5,
15
+ init_std=0.01,
16
+ average_clips='prob'))
configs/_base_/models/bmn_400x100.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='BMN',
4
+ temporal_dim=100,
5
+ boundary_ratio=0.5,
6
+ num_samples=32,
7
+ num_samples_per_bin=3,
8
+ feat_dim=400,
9
+ soft_nms_alpha=0.4,
10
+ soft_nms_low_threshold=0.5,
11
+ soft_nms_high_threshold=0.9,
12
+ post_process_top_k=100)
configs/_base_/models/bsn_pem.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='PEM',
4
+ pem_feat_dim=32,
5
+ pem_hidden_dim=256,
6
+ pem_u_ratio_m=1,
7
+ pem_u_ratio_l=2,
8
+ pem_high_temporal_iou_threshold=0.6,
9
+ pem_low_temporal_iou_threshold=0.2,
10
+ soft_nms_alpha=0.75,
11
+ soft_nms_low_threshold=0.65,
12
+ soft_nms_high_threshold=0.9,
13
+ post_process_top_k=100)
configs/_base_/models/bsn_tem.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='TEM',
4
+ temporal_dim=100,
5
+ boundary_ratio=0.1,
6
+ tem_feat_dim=400,
7
+ tem_hidden_dim=512,
8
+ tem_match_threshold=0.5)
configs/_base_/models/c2d_r50.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer3D',
3
+ backbone=dict(
4
+ type='C2D',
5
+ depth=50,
6
+ pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
7
+ norm_eval=False),
8
+ cls_head=dict(
9
+ type='I3DHead',
10
+ num_classes=400,
11
+ in_channels=2048,
12
+ spatial_type='avg',
13
+ dropout_ratio=0.5,
14
+ init_std=0.01,
15
+ average_clips='prob'),
16
+ data_preprocessor=dict(
17
+ type='ActionDataPreprocessor',
18
+ mean=[123.675, 116.28, 103.53],
19
+ std=[58.395, 57.12, 57.375],
20
+ format_shape='NCTHW'))
configs/_base_/models/c3d_sports1m_pretrained.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(
5
+ type='C3D',
6
+ pretrained= # noqa: E251
7
+ 'https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_pretrain_20201016-dcc47ddc.pth', # noqa: E501
8
+ style='pytorch',
9
+ conv_cfg=dict(type='Conv3d'),
10
+ norm_cfg=None,
11
+ act_cfg=dict(type='ReLU'),
12
+ dropout_ratio=0.5,
13
+ init_std=0.005),
14
+ cls_head=dict(
15
+ type='I3DHead',
16
+ num_classes=101,
17
+ in_channels=4096,
18
+ spatial_type=None,
19
+ dropout_ratio=0.5,
20
+ init_std=0.01,
21
+ average_clips='prob'),
22
+ data_preprocessor=dict(
23
+ type='ActionDataPreprocessor',
24
+ mean=[104, 117, 128],
25
+ std=[1, 1, 1],
26
+ format_shape='NCTHW'),
27
+ train_cfg=None,
28
+ test_cfg=None)
configs/_base_/models/i3d_r50.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(
5
+ type='ResNet3d',
6
+ pretrained2d=True,
7
+ pretrained='torchvision://resnet50',
8
+ depth=50,
9
+ conv1_kernel=(5, 7, 7),
10
+ conv1_stride_t=2,
11
+ pool1_stride_t=2,
12
+ conv_cfg=dict(type='Conv3d'),
13
+ norm_eval=False,
14
+ inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
15
+ zero_init_residual=False),
16
+ cls_head=dict(
17
+ type='I3DHead',
18
+ num_classes=400,
19
+ in_channels=2048,
20
+ spatial_type='avg',
21
+ dropout_ratio=0.5,
22
+ init_std=0.01,
23
+ average_clips='prob'),
24
+ data_preprocessor=dict(
25
+ type='ActionDataPreprocessor',
26
+ mean=[123.675, 116.28, 103.53],
27
+ std=[58.395, 57.12, 57.375],
28
+ format_shape='NCTHW'))
29
+
30
+ # This setting refers to https://github.com/open-mmlab/mmaction/blob/master/mmaction/models/tenons/backbones/resnet_i3d.py#L329-L332 # noqa: E501
configs/_base_/models/ircsn_r152.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(
5
+ type='ResNet3dCSN',
6
+ pretrained2d=False,
7
+ pretrained=None,
8
+ depth=152,
9
+ with_pool2=False,
10
+ bottleneck_mode='ir',
11
+ norm_eval=False,
12
+ zero_init_residual=False),
13
+ cls_head=dict(
14
+ type='I3DHead',
15
+ num_classes=400,
16
+ in_channels=2048,
17
+ spatial_type='avg',
18
+ dropout_ratio=0.5,
19
+ init_std=0.01,
20
+ average_clips='prob'),
21
+ data_preprocessor=dict(
22
+ type='ActionDataPreprocessor',
23
+ mean=[123.675, 116.28, 103.53],
24
+ std=[58.395, 57.12, 57.375],
25
+ format_shape='NCTHW'),
26
+ # model training and testing settings
27
+ train_cfg=None,
28
+ test_cfg=dict(max_testing_views=10))
configs/_base_/models/mvit_small.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer3D',
3
+ backbone=dict(type='MViT', arch='small', drop_path_rate=0.2),
4
+ data_preprocessor=dict(
5
+ type='ActionDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ format_shape='NCTHW'),
9
+ cls_head=dict(
10
+ type='MViTHead',
11
+ in_channels=768,
12
+ num_classes=400,
13
+ label_smooth_eps=0.1,
14
+ average_clips='prob'))
configs/_base_/models/r2plus1d_r34.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(
5
+ type='ResNet2Plus1d',
6
+ depth=34,
7
+ pretrained=None,
8
+ pretrained2d=False,
9
+ norm_eval=False,
10
+ conv_cfg=dict(type='Conv2plus1d'),
11
+ norm_cfg=dict(type='SyncBN', requires_grad=True, eps=1e-3),
12
+ conv1_kernel=(3, 7, 7),
13
+ conv1_stride_t=1,
14
+ pool1_stride_t=1,
15
+ inflate=(1, 1, 1, 1),
16
+ spatial_strides=(1, 2, 2, 2),
17
+ temporal_strides=(1, 2, 2, 2),
18
+ zero_init_residual=False),
19
+ cls_head=dict(
20
+ type='I3DHead',
21
+ num_classes=400,
22
+ in_channels=512,
23
+ spatial_type='avg',
24
+ dropout_ratio=0.5,
25
+ init_std=0.01,
26
+ average_clips='prob'),
27
+ data_preprocessor=dict(
28
+ type='ActionDataPreprocessor',
29
+ mean=[123.675, 116.28, 103.53],
30
+ std=[58.395, 57.12, 57.375],
31
+ format_shape='NCTHW'))
configs/_base_/models/slowfast_r50.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(
5
+ type='ResNet3dSlowFast',
6
+ pretrained=None,
7
+ resample_rate=8, # tau
8
+ speed_ratio=8, # alpha
9
+ channel_ratio=8, # beta_inv
10
+ slow_pathway=dict(
11
+ type='resnet3d',
12
+ depth=50,
13
+ pretrained=None,
14
+ lateral=True,
15
+ conv1_kernel=(1, 7, 7),
16
+ dilations=(1, 1, 1, 1),
17
+ conv1_stride_t=1,
18
+ pool1_stride_t=1,
19
+ inflate=(0, 0, 1, 1),
20
+ norm_eval=False),
21
+ fast_pathway=dict(
22
+ type='resnet3d',
23
+ depth=50,
24
+ pretrained=None,
25
+ lateral=False,
26
+ base_channels=8,
27
+ conv1_kernel=(5, 7, 7),
28
+ conv1_stride_t=1,
29
+ pool1_stride_t=1,
30
+ norm_eval=False)),
31
+ cls_head=dict(
32
+ type='SlowFastHead',
33
+ in_channels=2304, # 2048+256
34
+ num_classes=400,
35
+ spatial_type='avg',
36
+ dropout_ratio=0.5,
37
+ average_clips='prob'),
38
+ data_preprocessor=dict(
39
+ type='ActionDataPreprocessor',
40
+ mean=[123.675, 116.28, 103.53],
41
+ std=[58.395, 57.12, 57.375],
42
+ format_shape='NCTHW'))
configs/_base_/models/slowonly_r50.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer3D',
3
+ backbone=dict(
4
+ type='ResNet3dSlowOnly',
5
+ depth=50,
6
+ pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
7
+ lateral=False,
8
+ conv1_kernel=(1, 7, 7),
9
+ conv1_stride_t=1,
10
+ pool1_stride_t=1,
11
+ inflate=(0, 0, 1, 1),
12
+ norm_eval=False),
13
+ cls_head=dict(
14
+ type='I3DHead',
15
+ in_channels=2048,
16
+ num_classes=400,
17
+ spatial_type='avg',
18
+ dropout_ratio=0.5,
19
+ average_clips='prob'),
20
+ data_preprocessor=dict(
21
+ type='ActionDataPreprocessor',
22
+ mean=[123.675, 116.28, 103.53],
23
+ std=[58.395, 57.12, 57.375],
24
+ format_shape='NCTHW'))
configs/_base_/models/swin_tiny.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer3D',
3
+ backbone=dict(
4
+ type='SwinTransformer3D',
5
+ arch='tiny',
6
+ pretrained=None,
7
+ pretrained2d=True,
8
+ patch_size=(2, 4, 4),
9
+ window_size=(8, 7, 7),
10
+ mlp_ratio=4.,
11
+ qkv_bias=True,
12
+ qk_scale=None,
13
+ drop_rate=0.,
14
+ attn_drop_rate=0.,
15
+ drop_path_rate=0.1,
16
+ patch_norm=True),
17
+ data_preprocessor=dict(
18
+ type='ActionDataPreprocessor',
19
+ mean=[123.675, 116.28, 103.53],
20
+ std=[58.395, 57.12, 57.375],
21
+ format_shape='NCTHW'),
22
+ cls_head=dict(
23
+ type='I3DHead',
24
+ in_channels=768,
25
+ num_classes=400,
26
+ spatial_type='avg',
27
+ dropout_ratio=0.5,
28
+ average_clips='prob'))
configs/_base_/models/tanet_r50.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer2D',
4
+ data_preprocessor=dict(
5
+ type='ActionDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.5],
7
+ std=[58.395, 57.12, 57.375],
8
+ format_shape='NCHW'),
9
+ backbone=dict(
10
+ type='TANet',
11
+ pretrained='torchvision://resnet50',
12
+ depth=50,
13
+ num_segments=8,
14
+ tam_cfg=None),
15
+ cls_head=dict(
16
+ type='TSMHead',
17
+ num_classes=400,
18
+ in_channels=2048,
19
+ spatial_type='avg',
20
+ consensus=dict(type='AvgConsensus', dim=1),
21
+ dropout_ratio=0.5,
22
+ init_std=0.001,
23
+ average_clips='prob'))
configs/_base_/models/tin_r50.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+
3
+ preprocess_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53],
5
+ std=[58.395, 57.12, 57.375],
6
+ format_shape='NCHW')
7
+
8
+ model = dict(
9
+ type='Recognizer2D',
10
+ backbone=dict(
11
+ type='ResNetTIN',
12
+ pretrained='torchvision://resnet50',
13
+ depth=50,
14
+ norm_eval=False,
15
+ shift_div=4),
16
+ cls_head=dict(
17
+ type='TSMHead',
18
+ num_classes=400,
19
+ in_channels=2048,
20
+ spatial_type='avg',
21
+ consensus=dict(type='AvgConsensus', dim=1),
22
+ dropout_ratio=0.5,
23
+ init_std=0.001,
24
+ is_shift=False,
25
+ average_clips='prob'),
26
+ data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
27
+ # model training and testing settings
28
+ train_cfg=None,
29
+ test_cfg=None)
configs/_base_/models/tpn_slowonly_r50.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer3D',
3
+ backbone=dict(
4
+ type='ResNet3dSlowOnly',
5
+ depth=50,
6
+ pretrained='torchvision://resnet50',
7
+ lateral=False,
8
+ out_indices=(2, 3),
9
+ conv1_kernel=(1, 7, 7),
10
+ conv1_stride_t=1,
11
+ pool1_stride_t=1,
12
+ inflate=(0, 0, 1, 1),
13
+ norm_eval=False),
14
+ neck=dict(
15
+ type='TPN',
16
+ in_channels=(1024, 2048),
17
+ out_channels=1024,
18
+ spatial_modulation_cfg=dict(
19
+ in_channels=(1024, 2048), out_channels=2048),
20
+ temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
21
+ upsample_cfg=dict(scale_factor=(1, 1, 1)),
22
+ downsample_cfg=dict(downsample_scale=(1, 1, 1)),
23
+ level_fusion_cfg=dict(
24
+ in_channels=(1024, 1024),
25
+ mid_channels=(1024, 1024),
26
+ out_channels=2048,
27
+ downsample_scales=((1, 1, 1), (1, 1, 1))),
28
+ aux_head_cfg=dict(out_channels=400, loss_weight=0.5)),
29
+ cls_head=dict(
30
+ type='TPNHead',
31
+ num_classes=400,
32
+ in_channels=2048,
33
+ spatial_type='avg',
34
+ consensus=dict(type='AvgConsensus', dim=1),
35
+ dropout_ratio=0.5,
36
+ init_std=0.01,
37
+ average_clips='prob'),
38
+ data_preprocessor=dict(
39
+ type='ActionDataPreprocessor',
40
+ mean=[123.675, 116.28, 103.53],
41
+ std=[58.395, 57.12, 57.375],
42
+ format_shape='NCTHW'),
43
+ # model training and testing settings
44
+ train_cfg=None,
45
+ test_cfg=dict(fcn_test=True))
configs/_base_/models/tpn_tsm_r50.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer2D',
3
+ backbone=dict(
4
+ type='ResNetTSM',
5
+ pretrained='torchvision://resnet50',
6
+ depth=50,
7
+ out_indices=(2, 3),
8
+ norm_eval=False,
9
+ shift_div=8),
10
+ neck=dict(
11
+ type='TPN',
12
+ in_channels=(1024, 2048),
13
+ out_channels=1024,
14
+ spatial_modulation_cfg=dict(
15
+ in_channels=(1024, 2048), out_channels=2048),
16
+ temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
17
+ upsample_cfg=dict(scale_factor=(1, 1, 1)),
18
+ downsample_cfg=dict(downsample_scale=(1, 1, 1)),
19
+ level_fusion_cfg=dict(
20
+ in_channels=(1024, 1024),
21
+ mid_channels=(1024, 1024),
22
+ out_channels=2048,
23
+ downsample_scales=((1, 1, 1), (1, 1, 1))),
24
+ aux_head_cfg=dict(out_channels=174, loss_weight=0.5)),
25
+ cls_head=dict(
26
+ type='TPNHead',
27
+ num_classes=174,
28
+ in_channels=2048,
29
+ spatial_type='avg',
30
+ consensus=dict(type='AvgConsensus', dim=1),
31
+ dropout_ratio=0.5,
32
+ init_std=0.01,
33
+ average_clips='prob'),
34
+ data_preprocessor=dict(
35
+ type='ActionDataPreprocessor',
36
+ mean=[123.675, 116.28, 103.53],
37
+ std=[58.395, 57.12, 57.375],
38
+ format_shape='NCHW'),
39
+ train_cfg=None,
40
+ test_cfg=dict(fcn_test=True))
configs/_base_/models/trn_r50.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer2D',
4
+ backbone=dict(
5
+ type='ResNet',
6
+ pretrained='torchvision://resnet50',
7
+ depth=50,
8
+ norm_eval=False,
9
+ partial_bn=True),
10
+ cls_head=dict(
11
+ type='TRNHead',
12
+ num_classes=400,
13
+ in_channels=2048,
14
+ num_segments=8,
15
+ spatial_type='avg',
16
+ relation_type='TRNMultiScale',
17
+ hidden_dim=256,
18
+ dropout_ratio=0.8,
19
+ init_std=0.001,
20
+ average_clips='prob'),
21
+ data_preprocessor=dict(
22
+ type='ActionDataPreprocessor',
23
+ mean=[123.675, 116.28, 103.53],
24
+ std=[58.395, 57.12, 57.375],
25
+ format_shape='NCHW'))
configs/_base_/models/tsm_mobilenet_v2.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ preprocess_cfg = dict(
3
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
4
+
5
+ model = dict(
6
+ type='Recognizer2D',
7
+ backbone=dict(
8
+ type='MobileNetV2TSM',
9
+ shift_div=8,
10
+ num_segments=8,
11
+ is_shift=True,
12
+ pretrained='mmcls://mobilenet_v2'),
13
+ cls_head=dict(
14
+ type='TSMHead',
15
+ num_segments=8,
16
+ num_classes=400,
17
+ in_channels=1280,
18
+ spatial_type='avg',
19
+ consensus=dict(type='AvgConsensus', dim=1),
20
+ dropout_ratio=0.5,
21
+ init_std=0.001,
22
+ is_shift=True,
23
+ average_clips='prob'),
24
+ # model training and testing settings
25
+ data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
26
+ train_cfg=None,
27
+ test_cfg=None)
configs/_base_/models/tsm_mobileone_s4.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ preprocess_cfg = dict(
3
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
4
+
5
+ checkpoint = ('https://download.openmmlab.com/mmclassification/'
6
+ 'v0/mobileone/mobileone-s4_8xb32_in1k_20221110-28d888cb.pth')
7
+ model = dict(
8
+ type='Recognizer2D',
9
+ backbone=dict(
10
+ type='MobileOneTSM',
11
+ arch='s4',
12
+ shift_div=8,
13
+ num_segments=8,
14
+ is_shift=True,
15
+ init_cfg=dict(
16
+ type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
17
+ cls_head=dict(
18
+ type='TSMHead',
19
+ num_segments=8,
20
+ num_classes=400,
21
+ in_channels=2048,
22
+ spatial_type='avg',
23
+ consensus=dict(type='AvgConsensus', dim=1),
24
+ dropout_ratio=0.5,
25
+ init_std=0.001,
26
+ is_shift=True,
27
+ average_clips='prob'),
28
+ # model training and testing settings
29
+ data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
30
+ train_cfg=None,
31
+ test_cfg=None)
configs/_base_/models/tsm_r50.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ preprocess_cfg = dict(
2
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
3
+
4
+ model = dict(
5
+ type='Recognizer2D',
6
+ backbone=dict(
7
+ type='ResNetTSM',
8
+ pretrained='torchvision://resnet50',
9
+ depth=50,
10
+ norm_eval=False,
11
+ shift_div=8),
12
+ cls_head=dict(
13
+ type='TSMHead',
14
+ num_classes=400,
15
+ in_channels=2048,
16
+ spatial_type='avg',
17
+ consensus=dict(type='AvgConsensus', dim=1),
18
+ dropout_ratio=0.5,
19
+ init_std=0.001,
20
+ is_shift=True,
21
+ average_clips='prob'),
22
+ data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
23
+ train_cfg=None,
24
+ test_cfg=None)
configs/_base_/models/tsn_mobileone_s0.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint = ('https://download.openmmlab.com/mmclassification/'
2
+ 'v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.pth')
3
+ model = dict(
4
+ type='Recognizer2D',
5
+ backbone=dict(
6
+ type='mmpretrain.MobileOne',
7
+ arch='s0',
8
+ init_cfg=dict(
9
+ type='Pretrained', checkpoint=checkpoint, prefix='backbone'),
10
+ norm_eval=False),
11
+ cls_head=dict(
12
+ type='TSNHead',
13
+ num_classes=400,
14
+ in_channels=1024,
15
+ spatial_type='avg',
16
+ consensus=dict(type='AvgConsensus', dim=1),
17
+ dropout_ratio=0.4,
18
+ init_std=0.01,
19
+ average_clips='prob'),
20
+ data_preprocessor=dict(
21
+ type='ActionDataPreprocessor',
22
+ mean=[123.675, 116.28, 103.53],
23
+ std=[58.395, 57.12, 57.375],
24
+ format_shape='NCHW'),
25
+ train_cfg=None,
26
+ test_cfg=None)
configs/_base_/models/tsn_r50.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='Recognizer2D',
3
+ backbone=dict(
4
+ type='ResNet',
5
+ pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
6
+ depth=50,
7
+ norm_eval=False),
8
+ cls_head=dict(
9
+ type='TSNHead',
10
+ num_classes=400,
11
+ in_channels=2048,
12
+ spatial_type='avg',
13
+ consensus=dict(type='AvgConsensus', dim=1),
14
+ dropout_ratio=0.4,
15
+ init_std=0.01,
16
+ average_clips='prob'),
17
+ data_preprocessor=dict(
18
+ type='ActionDataPreprocessor',
19
+ mean=[123.675, 116.28, 103.53],
20
+ std=[58.395, 57.12, 57.375],
21
+ format_shape='NCHW'),
22
+ train_cfg=None,
23
+ test_cfg=None)
configs/_base_/models/x3d.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='Recognizer3D',
4
+ backbone=dict(type='X3D', gamma_w=1, gamma_b=2.25, gamma_d=2.2),
5
+ cls_head=dict(
6
+ type='X3DHead',
7
+ in_channels=432,
8
+ num_classes=400,
9
+ spatial_type='avg',
10
+ dropout_ratio=0.5,
11
+ fc1_bias=False,
12
+ average_clips='prob'),
13
+ data_preprocessor=dict(
14
+ type='ActionDataPreprocessor',
15
+ mean=[114.75, 114.75, 114.75],
16
+ std=[57.38, 57.38, 57.38],
17
+ format_shape='NCTHW'),
18
+ # model training and testing settings
19
+ train_cfg=None,
20
+ test_cfg=None)
configs/_base_/schedules/adam_20e.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_cfg = dict(
2
+ type='EpochBasedTrainLoop', max_epochs=20, val_begin=1, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ param_scheduler = [
7
+ dict(
8
+ type='MultiStepLR',
9
+ begin=0,
10
+ end=20,
11
+ by_epoch=True,
12
+ milestones=[10],
13
+ gamma=0.1)
14
+ ]
15
+
16
+ optimizer = dict(
17
+ type='Adam', lr=0.01, weight_decay=0.00001) # this lr is used for 1 gpus
18
+
19
+ optim_wrapper = dict(
20
+ optimizer=optimizer, clip_grad=dict(max_norm=40, norm_type=2))
configs/_base_/schedules/sgd_100e.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_cfg = dict(
2
+ type='EpochBasedTrainLoop', max_epochs=100, val_begin=1, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ param_scheduler = [
7
+ dict(
8
+ type='MultiStepLR',
9
+ begin=0,
10
+ end=100,
11
+ by_epoch=True,
12
+ milestones=[40, 80],
13
+ gamma=0.1)
14
+ ]
15
+
16
+ optim_wrapper = dict(
17
+ optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
18
+ clip_grad=dict(max_norm=40, norm_type=2))
configs/_base_/schedules/sgd_150e_warmup.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_cfg = dict(
2
+ type='EpochBasedTrainLoop', max_epochs=150, val_begin=1, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ param_scheduler = [
7
+ dict(type='LinearLR', start_factor=0.1, by_epoch=True, begin=0, end=10),
8
+ dict(
9
+ type='MultiStepLR',
10
+ begin=0,
11
+ end=150,
12
+ by_epoch=True,
13
+ milestones=[90, 130],
14
+ gamma=0.1)
15
+ ]
16
+
17
+ optim_wrapper = dict(
18
+ optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
19
+ clip_grad=dict(max_norm=40, norm_type=2))
configs/_base_/schedules/sgd_50e.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_cfg = dict(
2
+ type='EpochBasedTrainLoop', max_epochs=50, val_begin=1, val_interval=1)
3
+ val_cfg = dict(type='ValLoop')
4
+ test_cfg = dict(type='TestLoop')
5
+
6
+ param_scheduler = [
7
+ dict(
8
+ type='MultiStepLR',
9
+ begin=0,
10
+ end=50,
11
+ by_epoch=True,
12
+ milestones=[20, 40],
13
+ gamma=0.1)
14
+ ]
15
+
16
+ optim_wrapper = dict(
17
+ optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
18
+ clip_grad=dict(max_norm=40, norm_type=2))