Spaces:

AIDetect-benchmarked
/

Deepfake-Detector

Build error

App Files Files Community

AZIIIIIIIIZ commited on Sep 27, 2025

Commit

d670799

verified ·

1 Parent(s): fe51e89

Upload 1039 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.circleci/config.yml +34 -0
.circleci/docker/Dockerfile +11 -0
.circleci/test.yml +211 -0
.gitattributes +12 -0
.gitignore +151 -0
.owners.yml +16 -0
.pre-commit-config.yaml +52 -0
.pylintrc +624 -0
.python-version +1 -0
.readthedocs.yml +14 -0
0.7.1 +0 -0
2.0.0 +0 -0
3.0.0 +0 -0
CITATION.cff +8 -0
DEPLOYMENT.md +186 -0
LICENSE +203 -0
MANIFEST.in +4 -0
README.md +92 -12
README_zh-CN.md +398 -0
app.py +115 -0
checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth +3 -0
configs/_base_/default_runtime.py +24 -0
configs/_base_/models/audioonly_r50.py +16 -0
configs/_base_/models/bmn_400x100.py +12 -0
configs/_base_/models/bsn_pem.py +13 -0
configs/_base_/models/bsn_tem.py +8 -0
configs/_base_/models/c2d_r50.py +20 -0
configs/_base_/models/c3d_sports1m_pretrained.py +28 -0
configs/_base_/models/i3d_r50.py +30 -0
configs/_base_/models/ircsn_r152.py +28 -0
configs/_base_/models/mvit_small.py +14 -0
configs/_base_/models/r2plus1d_r34.py +31 -0
configs/_base_/models/slowfast_r50.py +42 -0
configs/_base_/models/slowonly_r50.py +24 -0
configs/_base_/models/swin_tiny.py +28 -0
configs/_base_/models/tanet_r50.py +23 -0
configs/_base_/models/tin_r50.py +29 -0
configs/_base_/models/tpn_slowonly_r50.py +45 -0
configs/_base_/models/tpn_tsm_r50.py +40 -0
configs/_base_/models/trn_r50.py +25 -0
configs/_base_/models/tsm_mobilenet_v2.py +27 -0
configs/_base_/models/tsm_mobileone_s4.py +31 -0
configs/_base_/models/tsm_r50.py +24 -0
configs/_base_/models/tsn_mobileone_s0.py +26 -0
configs/_base_/models/tsn_r50.py +23 -0
configs/_base_/models/x3d.py +20 -0
configs/_base_/schedules/adam_20e.py +20 -0
configs/_base_/schedules/sgd_100e.py +18 -0
configs/_base_/schedules/sgd_150e_warmup.py +19 -0
configs/_base_/schedules/sgd_50e.py +18 -0

.circleci/config.yml ADDED Viewed

	@@ -0,0 +1,34 @@

+version: 2.1
+# this allows you to use CircleCI's dynamic configuration feature
+setup: true
+# the path-filtering orb is required to continue a pipeline based on
+# the path of an updated fileset
+orbs:
+  path-filtering: circleci/path-filtering@0.1.2
+workflows:
+  # the always-run workflow is always triggered, regardless of the pipeline parameters.
+  always-run:
+    jobs:
+      # the path-filtering/filter job determines which pipeline
+      # parameters to update.
+      - path-filtering/filter:
+          name: check-updated-files
+          # 3-column, whitespace-delimited mapping. One mapping per
+          # line:
+          # <regex path-to-test> <parameter-to-set> <value-of-pipeline-parameter>
+          mapping: |
+            mmaction/.* lint_only false
+            requirements/.* lint_only false
+            tests/.* lint_only false
+            tools/.* lint_only false
+            configs/.* lint_only false
+            .circleci/.* lint_only false
+          base-revision: dev-1.x
+          # this is the path of the configuration we should trigger once
+          # path filtering and pipeline parameter value updates are
+          # complete. In this case, we are using the parent dynamic
+          # configuration itself.
+          config-path: .circleci/test.yml

.circleci/docker/Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+ARG PYTORCH="1.8.1"
+ARG CUDA="11.1"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+# To fix GPG key error when running apt-get update
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx

.circleci/test.yml ADDED Viewed

	@@ -0,0 +1,211 @@

+version: 2.1
+# the default pipeline parameters, which will be updated according to
+# the results of the path-filtering orb
+parameters:
+  lint_only:
+    type: boolean
+    default: true
+jobs:
+  lint:
+    docker:
+      - image: cimg/python:3.7.4
+    steps:
+      - checkout
+      - run:
+          name: Install pre-commit hook
+          command: |
+            pip install pre-commit
+            pre-commit install
+      - run:
+          name: Linting
+          command: pre-commit run --all-files
+      - run:
+          name: Check docstring coverage
+          command: |
+            pip install interrogate
+            interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmaction
+  build_cpu:
+    parameters:
+      # The python version must match available image tags in
+      # https://circleci.com/developer/images/image/cimg/python
+      python:
+        type: string
+      torch:
+        type: string
+      torchvision:
+        type: string
+    docker:
+      - image: cimg/python:<< parameters.python >>
+    resource_class: large
+    steps:
+      - checkout
+      - run:
+          name: Install Libraries
+          command: |
+            sudo apt-get update
+            sudo apt-get upgrade
+            sudo apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libturbojpeg pkg-config
+            sudo apt-get install -y libavdevice-dev libavfilter-dev libopus-dev libvpx-dev libsrtp2-dev libsndfile1
+      - run:
+          name: Configure Python & pip
+          command: |
+            pip install --upgrade pip
+            pip install wheel
+      - run:
+          name: Install PyTorch
+          command: |
+            python -V
+            pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
+      - run:
+          name: Install mmaction dependencies
+          command: |
+            pip install git+ssh://git@github.com/open-mmlab/mmengine.git@main
+            pip install -U openmim
+            mim install 'mmcv >= 2.0.0'
+            pip install git+https://git@github.com/open-mmlab/mmdetection.git@dev-3.x
+            pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+            pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
+            pip install git+https://github.com/open-mmlab/mmpose.git@dev-1.x
+            pip install -r requirements.txt
+      - run:
+          name: Install timm
+          command: |
+            pip install timm
+      - run:
+          name: Install transformers
+          command: |
+            pip install transformers
+      - when:
+          condition:
+            equal: [ "0.10.0", << parameters.torchvision >> ]
+          steps:
+            - run: python -m pip install pytorchvideo
+      - run:
+          name: Build and install
+          command: |
+            pip install -e .
+      - run:
+          name: Run unittests
+          command: |
+            coverage run --branch --source mmaction -m pytest tests/
+            coverage xml
+            coverage report -m
+  build_cuda:
+    parameters:
+      torch:
+        type: string
+      cuda:
+        type: enum
+        enum: ["11.1"]
+      cudnn:
+        type: integer
+        default: 8
+    machine:
+      image: ubuntu-2004-cuda-11.4:202110-01
+      # docker_layer_caching: true
+    resource_class: gpu.nvidia.small
+    steps:
+      - checkout
+      - run:
+          name: Build Docker image
+          command: |
+            docker build .circleci/docker -t mmaction:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
+            docker run --gpus all -t -d -v /home/circleci/project:/mmaction -w /mmaction --name mmaction mmaction:gpu
+            docker exec mmaction apt-get update
+            docker exec mmaction apt-get upgrade -y
+            docker exec mmaction apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libturbojpeg pkg-config
+            docker exec mmaction apt-get install -y libavdevice-dev libavfilter-dev libopus-dev libvpx-dev libsrtp2-dev libsndfile1
+      - run:
+          name: Install PytorchVideo and timm
+          command: |
+            docker exec mmaction pip install timm
+            docker exec mmaction python -m pip install pytorchvideo
+      - run:
+          name: Install transformers
+          command: |
+            docker exec mmaction pip install transformers
+      - run:
+          name: Install mmaction dependencies
+          command: |
+            docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmengine.git@main
+            docker exec mmaction pip install -U openmim
+            docker exec mmaction mim install 'mmcv >= 2.0.0'
+            docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmdetection.git@dev-3.x
+            docker exec mmaction pip install git+https://git@github.com/open-mmlab/mmpose.git@dev-1.x
+            docker exec mmaction pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+            docker exec mmaction pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
+            docker exec mmaction pip install -r requirements.txt
+      - run:
+          name: Build and install
+          command: |
+            docker exec mmaction pip install -e .
+      - run:
+          name: Run unittests
+          command: |
+            docker exec mmaction pytest tests/
+workflows:
+  pr_stage_lint:
+    jobs:
+      - lint:
+          name: lint
+          filters:
+            branches:
+              ignore:
+                - dev-1.x
+                - main
+  pr_stage_test:
+    when:
+      not:
+        << pipeline.parameters.lint_only >>
+    jobs:
+      - lint:
+          name: lint
+          filters:
+            branches:
+              ignore:
+                - dev-1.x
+                - main
+      - build_cpu:
+          name: minimum_version_cpu
+          torch: 1.8.1
+          torchvision: 0.9.1
+          python: 3.7.4
+          requires:
+            - lint
+      - build_cpu:
+          name: maximum_version_cpu
+          torch: 1.13.0
+          torchvision: 0.14.0
+          python: 3.9.0
+          requires:
+            - minimum_version_cpu
+      - hold:
+          type: approval
+          requires:
+            - maximum_version_cpu
+      - build_cuda:
+          name: mainstream_version_gpu
+          torch: 1.8.1
+          # Use double quotation mark to explicitly specify its type
+          # as string instead of number
+          cuda: "11.1"
+          requires:
+            - hold
+  merge_stage_test:
+    when:
+      not:
+        << pipeline.parameters.lint_only >>
+    jobs:
+      - build_cuda:
+          name: minimum_version_gpu
+          torch: 1.8.1
+          # Use double quotation mark to explicitly specify its type
+          # as string instead of number
+          cuda: "11.1"
+          filters:
+            branches:
+              only:
+                - dev-1.x
+                - main

.gitattributes CHANGED Viewed

@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+demo/demo.mp4 filter=lfs diff=lfs merge=lfs -text
+resources/data_pipeline.png filter=lfs diff=lfs merge=lfs -text
+resources/miaomiao_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
+resources/mmaction2_overview.gif filter=lfs diff=lfs merge=lfs -text
+resources/qq_group_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
+resources/spatio-temporal-det.gif filter=lfs diff=lfs merge=lfs -text
+resources/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
+tests/data/rawvideo_dataset/part_1.mp4 filter=lfs diff=lfs merge=lfs -text
+tests/data/test.avi filter=lfs diff=lfs merge=lfs -text
+tests/data/test.mp4 filter=lfs diff=lfs merge=lfs -text
+tests/data/test.wav filter=lfs diff=lfs merge=lfs -text
+tools/data/skeleton/S001C001P001R001A001_rgb.avi filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,151 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+**/*.pyc
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Auto generate documentation
+docs/*/_build/
+docs/*/model_zoo/
+docs/*/dataset_zoo/
+docs/*/_model_zoo.rst
+docs/*/modelzoo_statistics.md
+docs/*/datasetzoo_statistics.md
+docs/*/projectzoo.md
+docs/*/papers/
+docs/*/api/generated/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# custom
+/data
+.vscode
+.idea
+*.pkl
+*.pkl.json
+*.log.json
+benchlist.txt
+work_dirs/
+/projects/*/work_dirs
+/projects/*/data
+.DS_Store
+# Pytorch
+*.pth
+# Profile
+*.prof
+# lmdb
+*.mdb
+# unignore some data file in tests/data
+!tests/data/**/*.pkl
+!tests/data/**/*.pkl.json
+!tests/data/**/*.log.json
+!tests/data/**/*.pth
+# avoid soft links created by MIM
+mmaction/tools/*
+*.ipynb
+# unignore ipython notebook files in demo
+!demo/*.ipynb
+!projects/stad_tutorial/*.ipynb
+mmaction/.mim

.owners.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+assign:
+  issues: enabled
+  pull_requests: disabled
+  strategy:
+    # random
+    daily-shift-based
+  scedule:
+    '*/1 * * * *'
+  assignees:
+    - hukkai
+    - Dai-Wenxun
+    - cir7
+    - Dai-Wenxun
+    - cir7
+    - hukkai
+    - hukkai

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+exclude: ^tests/data/
+repos:
+  - repo: https://github.com/PyCQA/flake8
+    rev: 5.0.4
+    hooks:
+      - id: flake8
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.11.5
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.32.0
+    hooks:
+      - id: yapf
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://github.com/myint/docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "79"]
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+      - id: codespell
+        args: ["--skip", "*.ipynb", "-L", "ECT,Gool,tread,gool,mot"]
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.14
+    hooks:
+      - id: mdformat
+        args: ["--number", "--table-width", "200"]
+        additional_dependencies:
+          - mdformat-openmmlab
+          - mdformat_frontmatter
+          - linkify-it-py
+  - repo: https://github.com/open-mmlab/pre-commit-hooks
+    rev: v0.2.0
+    hooks:
+      - id: check-algo-readme
+      - id: check-copyright
+        args: ["mmaction", "tests", "demo", "tools"]

.pylintrc ADDED Viewed

	@@ -0,0 +1,624 @@

+[MASTER]
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=
+# Specify a score threshold to be exceeded before program exits with error.
+fail-under=10
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS,configs
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+# Pickle collected data for later comparisons.
+persistent=yes
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+[MESSAGES CONTROL]
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=import-outside-toplevel
+        redefined-outer-name
+        print-statement,
+        parameter-unpacking,
+        unpacking-in-except,
+        old-raise-syntax,
+        backtick,
+        long-suffix,
+        old-ne-operator,
+        old-octal-literal,
+        import-star-module-level,
+        non-ascii-bytes-literal,
+        raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        apply-builtin,
+        basestring-builtin,
+        buffer-builtin,
+        cmp-builtin,
+        coerce-builtin,
+        execfile-builtin,
+        file-builtin,
+        long-builtin,
+        raw_input-builtin,
+        reduce-builtin,
+        standarderror-builtin,
+        unicode-builtin,
+        xrange-builtin,
+        coerce-method,
+        delslice-method,
+        getslice-method,
+        setslice-method,
+        no-absolute-import,
+        old-division,
+        dict-iter-method,
+        dict-view-method,
+        next-method-called,
+        metaclass-assignment,
+        indexing-exception,
+        raising-string,
+        reload-builtin,
+        oct-method,
+        hex-method,
+        nonzero-method,
+        cmp-method,
+        input-builtin,
+        round-builtin,
+        intern-builtin,
+        unichr-builtin,
+        map-builtin-not-iterating,
+        zip-builtin-not-iterating,
+        range-builtin-not-iterating,
+        filter-builtin-not-iterating,
+        using-cmp-argument,
+        eq-without-hash,
+        div-method,
+        idiv-method,
+        rdiv-method,
+        exception-message-attribute,
+        invalid-str-codec,
+        sys-max-int,
+        bad-python3-import,
+        deprecated-string-function,
+        deprecated-str-translate-call,
+        deprecated-itertools-function,
+        deprecated-types-field,
+        next-method-defined,
+        dict-items-not-iterating,
+        dict-keys-not-iterating,
+        dict-values-not-iterating,
+        deprecated-operator-function,
+        deprecated-urllib-function,
+        xreadlines-attribute,
+        deprecated-sys-function,
+        exception-escape,
+        comprehension-escape,
+        no-member,
+        invalid-name,
+        too-many-branches,
+        wrong-import-order,
+        too-many-arguments,
+        missing-function-docstring,
+        missing-module-docstring,
+        too-many-locals,
+        too-few-public-methods,
+        abstract-method,
+        broad-except,
+        too-many-nested-blocks,
+        too-many-instance-attributes,
+        missing-class-docstring,
+        duplicate-code,
+        not-callable,
+        protected-access,
+        dangerous-default-value,
+        no-name-in-module,
+        logging-fstring-interpolation,
+        super-init-not-called,
+        redefined-builtin,
+        attribute-defined-outside-init,
+        arguments-differ,
+        cyclic-import,
+        bad-super-call,
+        too-many-statements,
+        line-too-long
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+[REPORTS]
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+# Tells whether to display a full report or only the messages.
+reports=no
+# Activate the evaluation score.
+score=yes
+[REFACTORING]
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+[TYPECHECK]
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+[SPELLING]
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the python-enchant package.
+spelling-dict=
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+[LOGGING]
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+[VARIABLES]
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+[FORMAT]
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+# Maximum number of characters on a single line.
+max-line-length=100
+# Maximum number of lines in a module.
+max-module-lines=1000
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+[STRING]
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+[SIMILARITIES]
+# Ignore comments when computing similarities.
+ignore-comments=yes
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+# Ignore imports when computing similarities.
+ignore-imports=no
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+[MISCELLANEOUS]
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+[BASIC]
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+# Naming style matching correct function names.
+function-naming-style=snake_case
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _,
+           x,
+           y,
+           w,
+           h,
+           a,
+           b
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+# Naming style matching correct method names.
+method-naming-style=snake_case
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+# Naming style matching correct module names.
+module-naming-style=snake_case
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+[DESIGN]
+# Maximum number of arguments for function / method.
+max-args=5
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+# Maximum number of branch for function / method body.
+max-branches=12
+# Maximum number of locals for function / method body.
+max-locals=15
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+# Maximum number of return / yield for function / method body.
+max-returns=6
+# Maximum number of statements in function / method body.
+max-statements=50
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+[IMPORTS]
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+[CLASSES]
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      __post_init__
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+[EXCEPTIONS]
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=BaseException,
+                       Exception

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

.readthedocs.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+version: 2
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.9"
+formats:
+    - epub
+python:
+  install:
+    - requirements: requirements/docs.txt
+    - requirements: requirements/readthedocs.txt

0.7.1 ADDED Viewed

File without changes

2.0.0 ADDED Viewed

File without changes

3.0.0 ADDED Viewed

File without changes

CITATION.cff ADDED Viewed

	@@ -0,0 +1,8 @@

+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+  - name: "MMAction2 Contributors"
+title: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark"
+date-released: 2020-07-21
+url: "https://github.com/open-mmlab/mmaction2"
+license: Apache-2.0

DEPLOYMENT.md ADDED Viewed

	@@ -0,0 +1,186 @@

+# Deployment Guide for Hugging Face Spaces
+This guide will help you deploy the GenVidBench project on Hugging Face Spaces.
+## 🚀 Quick Start
+### 1. Prepare Your Repository
+Make sure your repository contains these files:
+- `app.py` - Main Gradio application
+- `requirements.txt` - Python dependencies
+- `README.md` - Space metadata and description
+- `download_model.py` - Model download script
+- `setup.py` - Setup script (optional)
+### 2. Create a Hugging Face Space
+1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
+2. Click "Create new Space"
+3. Fill in the details:
+   - **Space name**: `genvidbench` (or your preferred name)
+   - **License**: Apache 2.0
+   - **SDK**: Gradio
+   - **Hardware**: CPU Basic (or GPU if needed)
+   - **Visibility**: Public or Private
+### 3. Upload Your Code
+You can either:
+- **Option A**: Push your code to a GitHub repository and connect it
+- **Option B**: Upload files directly through the web interface
+### 4. Configure the Space
+The space will automatically:
+- Install dependencies from `requirements.txt`
+- Run `app.py` as the main application
+- Display the Gradio interface
+## 📁 Required Files
+### `app.py`
+Your main Gradio application file. Should contain:
+- Model initialization
+- Gradio interface definition
+- Video processing logic
+### `requirements.txt`
+List of Python packages needed:
+```
+torch>=1.13.0
+torchvision>=0.14.0
+mmcv>=2.0.0
+mmengine>=0.7.1
+gradio>=4.0.0
+opencv-python>=4.6.0
+decord>=0.6.0
+# ... other dependencies
+```
+### `README.md`
+Space metadata and description:
+```yaml
+---
+title: GenVidBench - Video Action Recognition
+emoji: 🎬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+```
+## 🔧 Model Setup
+### Option 1: Automatic Download
+The app will try to download the model checkpoint automatically using `download_model.py`.
+### Option 2: Manual Upload
+1. Download the model checkpoint manually
+2. Upload it to your space's file system
+3. Place it in the `checkpoints/` directory
+### Option 3: Hugging Face Hub
+Store the model on Hugging Face Hub and load it programmatically:
+```python
+from huggingface_hub import hf_hub_download
+checkpoint_path = hf_hub_download(repo_id="your-username/your-model", filename="model.pth")
+```
+## 🐛 Troubleshooting
+### Common Issues
+1. **Model not found**
+   - Ensure the checkpoint file is in the correct location
+   - Check file permissions
+   - Verify the download completed successfully
+2. **Import errors**
+   - Check that all dependencies are in `requirements.txt`
+   - Verify package versions are compatible
+3. **Memory issues**
+   - Consider using a smaller model
+   - Optimize batch size
+   - Use CPU instead of GPU if needed
+4. **Slow loading**
+   - Pre-download models during setup
+   - Use model caching
+   - Optimize model size
+### Debug Mode
+Add debug information to your app:
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+## 📊 Performance Optimization
+### For Hugging Face Spaces
+1. **Model Size**: Keep models under 2GB for faster loading
+2. **Dependencies**: Minimize the number of packages
+3. **Caching**: Use model caching to avoid re-downloading
+4. **Lazy Loading**: Load models only when needed
+### Example Optimizations
+```python
+# Lazy model loading
+model = None
+def get_model():
+    global model
+    if model is None:
+        model = init_recognizer(config_file, checkpoint_file, device='cpu')
+    return model
+def analyze_video(video):
+    model = get_model()  # Load only when needed
+    # ... rest of the function
+```
+## 🔒 Security Considerations
+1. **File Upload Limits**: Set appropriate limits for video uploads
+2. **Input Validation**: Validate video formats and sizes
+3. **Resource Limits**: Monitor CPU/memory usage
+4. **Error Handling**: Graceful error handling for edge cases
+## 📈 Monitoring
+Monitor your space:
+- Check logs in the Hugging Face interface
+- Monitor resource usage
+- Track user interactions
+- Set up alerts for failures
+## 🚀 Going Live
+Once everything is working:
+1. **Test thoroughly** with different video types
+2. **Optimize performance** for your target audience
+3. **Add documentation** for users
+4. **Monitor usage** and gather feedback
+5. **Iterate and improve** based on user needs
+## 📞 Support
+If you encounter issues:
+- Check the Hugging Face Spaces documentation
+- Review the logs in your space
+- Test locally first
+- Ask for help in the Hugging Face community
+---
+**Happy deploying! 🎉**

LICENSE ADDED Viewed

	@@ -0,0 +1,203 @@

+Copyright 2018-2019 Open-MMLab. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2018-2019 Open-MMLab.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MANIFEST.in ADDED Viewed

	@@ -0,0 +1,4 @@

+include mmaction/.mim/model-index.yml
+include mmaction/.mim/dataset-index.yml
+recursive-include mmaction/.mim/configs *.py *.yml
+recursive-include mmaction/.mim/tools *.sh *.py

README.md CHANGED Viewed

@@ -1,12 +1,92 @@
----
-title: Deepfake Detector
-emoji: 💻
-colorFrom: pink
-colorTo: gray
-sdk: gradio
-sdk_version: 5.47.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: GenVidBench - Video Action Recognition
+emoji: 🎬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+short_description: State-of-the-art video action recognition using MMAction2
+---
+# GenVidBench - Video Action Recognition
+A powerful video analysis tool that uses state-of-the-art deep learning models to recognize actions and activities in videos. Built on top of MMAction2 framework with a user-friendly Gradio interface.
+## 🚀 Features
+- **Action Recognition**: Identify actions and activities in videos using TSN (Temporal Segment Networks)
+- **Top-5 Predictions**: Get the most likely actions with confidence scores
+- **Multiple Formats**: Support for MP4, AVI, MOV, and other video formats
+- **Real-time Processing**: Fast inference optimized for web deployment
+- **User-friendly Interface**: Clean and intuitive Gradio web interface
+## 🎯 Model Details
+This demo uses:
+- **Model**: TSN (Temporal Segment Networks) with ResNet-50 backbone
+- **Dataset**: Trained on Kinetics-400 dataset (400 action classes)
+- **Framework**: MMAction2 (OpenMMLab)
+- **Input**: RGB video frames
+- **Output**: Top-5 action predictions with confidence scores
+## 🛠️ Technical Stack
+- **Backend**: Python, PyTorch, MMAction2
+- **Frontend**: Gradio
+- **Video Processing**: OpenCV, Decord
+- **Deployment**: Hugging Face Spaces
+## 📖 How to Use
+1. **Upload Video**: Click the upload area or drag and drop your video file
+2. **Wait for Processing**: The model will analyze your video (usually takes a few seconds)
+3. **View Results**: See the top 5 predicted actions with confidence scores
+## 💡 Tips for Best Results
+- **Video Length**: Shorter videos (under 30 seconds) process faster
+- **Video Quality**: Clear, well-lit videos work best
+- **Action Clarity**: Videos with clear, distinct actions yield better results
+- **Supported Formats**: MP4, AVI, MOV, and other common video formats
+## 🔬 Supported Actions
+The model can recognize 400 different action classes from the Kinetics-400 dataset, including:
+- Sports activities (basketball, soccer, tennis, etc.)
+- Daily activities (cooking, cleaning, reading, etc.)
+- Physical exercises (push-ups, jumping jacks, etc.)
+- Musical activities (playing instruments, singing, etc.)
+- And many more!
+## 🏗️ Architecture
+```
+Video Input → Frame Sampling → Feature Extraction → Classification → Top-5 Predictions
+```
+## 📊 Performance
+- **Accuracy**: State-of-the-art performance on Kinetics-400
+- **Speed**: Optimized for real-time inference
+- **Memory**: Efficient GPU/CPU utilization
+## 🤝 Contributing
+This project is part of the GenVidBench framework. Contributions are welcome!
+## 📄 License
+This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
+## 🙏 Acknowledgments
+- [MMAction2](https://github.com/open-mmlab/mmaction2) - The underlying framework
+- [OpenMMLab](https://openmmlab.com/) - For the excellent computer vision tools
+- [Hugging Face](https://huggingface.co/) - For the deployment platform
+---
+**Note**: This is a demonstration of video action recognition capabilities. For production use, consider additional validation and error handling.

README_zh-CN.md ADDED Viewed

	@@ -0,0 +1,398 @@

+<div align="center">
+  <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/mmaction2_logo.png" width="600"/>
+  <div>&nbsp;</div>
+  <div align="center">
+    <b><font size="5">OpenMMLab 官网</font></b>
+    <sup>
+      <a href="https://openmmlab.com">
+        <i><font size="4">HOT</font></i>
+      </a>
+    </sup>
+    &nbsp;&nbsp;&nbsp;&nbsp;
+    <b><font size="5">OpenMMLab 开放平台</font></b>
+    <sup>
+      <a href="https://platform.openmmlab.com">
+        <i><font size="4">TRY IT OUT</font></i>
+      </a>
+    </sup>
+  </div>
+[![Documentation](https://readthedocs.org/projects/mmaction2/badge/?version=latest)](https://mmaction2.readthedocs.io/en/latest/)
+[![actions](https://github.com/open-mmlab/mmaction2/workflows/build/badge.svg)](https://github.com/open-mmlab/mmaction2/actions)
+[![codecov](https://codecov.io/gh/open-mmlab/mmaction2/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmaction2)
+[![PyPI](https://img.shields.io/pypi/v/mmaction2)](https://pypi.org/project/mmaction2/)
+[![LICENSE](https://img.shields.io/github/license/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/blob/main/LICENSE)
+[![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/issues)
+[![Percentage of issues still open](https://isitmaintained.com/badge/open/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/issues)
+[📘中文文档](https://mmaction2.readthedocs.io/zh_CN/latest/index.html) |
+[🛠️安装指南](https://mmaction2.readthedocs.io/zh_CN/latest/get_started/installation.html) |
+[👀模型库](https://mmaction2.readthedocs.io/zh_CN/latest/modelzoo_statistics.html) |
+[🆕更新日志](https://mmaction2.readthedocs.io/en/latest/notes/changelog.html) |
+[🚀进行中项目](https://github.com/open-mmlab/mmaction2/projects) |
+[🤔报告问题](https://github.com/open-mmlab/mmaction2/issues/new/choose)
+</div>
+<div align="center">
+  <a href="https://openmmlab.medium.com/" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
+</div>
+[English](/README.md) | 简体中文
+## 📄 目录
+- [📄 目录](#-目录)
+- [🥳 🚀 最新进展](#--最新进展-)
+- [📖 简介](#-简介-)
+- [🎁 主要功能](#-主要功能-)
+- [🛠️ 安装](#️-安装-)
+- [👀 模型库](#-模型库-)
+- [👨‍🏫 新手入门](#-新手入门-)
+- [🎫 许可证](#-许可证-)
+- [🖊️ 引用](#️-引用-)
+- [🙌 参与贡献](#-参与贡献-)
+- [🤝 致谢](#-致谢-)
+- [🏗️ OpenMMLab 的其他项目](#️-openmmlab-的其他项目-)
+- [❤️ 欢迎加入 OpenMMLab 社区](#️-欢迎加入-openmmlab-社区-)
+## 🥳 🚀 最新进展 [🔝](#-table-of-contents)
+**默认分支已经从 `master` （当前的`0.x`） 切换到 `main`（之前的 `1.x`），我们建议用户更新至最新版本，其支持更多模型，更强的预训练权重，以及更简洁的代码实现。详情请参阅[迁移指南](https://mmaction2.readthedocs.io/zh_cn/latest/migration.html)**
+**Release (2023.07.04)**: v1.1.0 支持以下新功能:
+- 支持基于 CLIP 的多模态模型: ActionCLIP(Arxiv'2021) 和 CLIP4clip(ArXiv'2022)
+- 支持丰富的 project: 手势识别, 时空行为检测 tutorial, 以及基于 [MMRazor](https://github.com/open-mmlab/mmrazor) 的知识蒸馏
+- 支持 HACS-segments 数据集(ICCV'2019), MultiSports 数据集(ICCV'2021), Kinetics-710 数据集(Arxiv'2022)
+- 支持 VideoMAE V2(CVPR'2023), VideoMAE(NeurIPS'2022) 支持时空行为检测任务
+- 支持 TCANet(CVPR'2021)
+- 支持 [纯 Python 风格的配置文件](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta) 和使用 MIM 一键下载数据集
+## 📖 简介 [🔝](#-table-of-contents)
+MMAction2 是一款基于 PyTorch 开发的行为识别开源工具包，是 [open-mmlab](https://github.com/open-mmlab)  项目的一个子项目。
+<div align="center">
+  <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/mmaction2_overview.gif" width="380px">
+  <img src="https://user-images.githubusercontent.com/34324155/123989146-2ecae680-d9fb-11eb-916b-b9db5563a9e5.gif" width="380px">
+  <p style="font-size:1.5vw;"> Kinetics-400 数据集行为识别结果（左） 和 NTU-RGB+D-120 数据集基于骨架的行为识别结果（右）</p>
+</div>
+<div align="center">
+  <img src="https://user-images.githubusercontent.com/30782254/155710881-bb26863e-fcb4-458e-b0c4-33cd79f96901.gif" width="580px"/><br>
+    <p style="font-size:1.5vw;">Kinetics-400 数据集基于骨骼点的时空行为检测及视频行为识别结果</p>
+</div>
+<div align="center">
+  <img src="https://github.com/open-mmlab/mmaction2/raw/main/resources/spatio-temporal-det.gif" width="800px"/><br>
+    <p style="font-size:1.5vw;">AVA-2.1 数据集时空行为检测结果</p>
+</div>
+## 🎁 主要功能 [🔝](#-table-of-contents)
+- **模块化设计**： 我们将视频理解框架拆分成了不同模块，用户可以很方便地通过组合不同的模块来构建出自定义的视频理解框架。
+- **支持五种主要的视频理解任务**： MMAction2 为视频理解任务实现了多种多样的算法，包括行为识别，时序动作定位，时空动作检测，基于骨骼点的行为识别，以及视频检索。
+- **详尽的单元测试和文档**：我们提供了详尽的文档和 API 参考手册，以及单元测试。
+## 🛠️ 安装 [🔝](#-table-of-contents)
+MMAction2依赖于 [PyTorch](https://pytorch.org/)，[MMCV](https://github.com/open-mmlab/mmcv)，[MMEngine](https://github.com/open-mmlab/mmengine)，[MMDetection](https://github.com/open-mmlab/mmdetection) （可选）和 [MMPose](https://github.com/open-mmlab/mmpose) （可选）
+具体步骤请参考 [安装文档](https://mmaction2.readthedocs.io/zh_cn/latest/get_started/installation.html)。
+<details close>
+<summary>快速安装</summary>
+```shell
+conda create --name openmmlab python=3.8 -y
+conda activate open-mmlab
+conda install pytorch torchvision -c pytorch  # 该命令将自动安装最新版的 PyTorch 和 cudatoolkit，请确认此是否匹配你的当前环境。
+pip install -U openmim
+mim install mmengine
+mim install mmcv
+mim install mmdet  # 可选
+mim install mmpose  # 可选
+git clone https://github.com/open-mmlab/mmaction2.git
+cd mmaction2
+pip install -v -e .
+```
+</details>
+## 👀 模型库 [🔝](#-table-of-contents)
+结果及模型位于[模型库](https://mmaction2.readthedocs.io/zh_cn/latest/modelzoo_statistics.html)
+<details close>
+<summary>模型支持</summary>
+<table style="margin-left:auto;margin-right:auto;font-size:1.3vw;padding:3px 5px;text-align:center;vertical-align:center;">
+  <tr>
+    <td colspan="5" style="font-weight:bold;">行为识别</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/c3d/README.md">C3D</a> (CVPR'2014)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsn/README.md">TSN</a> (ECCV'2016)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/i3d/README.md">I3D</a> (CVPR'2017)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/c2d/README.md">C2D</a> (CVPR'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/i3d/README.md">I3D Non-Local</a> (CVPR'2018)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/r2plus1d/README.md">R(2+1)D</a> (CVPR'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/trn/README.md">TRN</a> (ECCV'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsm/README.md">TSM</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tsm/README.md">TSM Non-Local</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/slowonly/README.md">SlowOnly</a> (ICCV'2019)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/slowfast/README.md">SlowFast</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/csn/README.md">CSN</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tin/README.md">TIN</a> (AAAI'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tpn/README.md">TPN</a> (CVPR'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/x3d/README.md">X3D</a> (CVPR'2020)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition_audio/resnet/README.md">MultiModality: Audio</a> (ArXiv'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/tanet/README.md">TANet</a> (ArXiv'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/timesformer/README.md">TimeSformer</a> (ICML'2021)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/actionclip/README.md">ActionCLIP</a> (ArXiv'2021)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/swin/README.md">VideoSwin</a> (CVPR'2022)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomae/README.md">VideoMAE</a> (NeurIPS'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/mvit/README.md">MViT V2</a> (CVPR'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/uniformer/README.md">UniFormer V1</a> (ICLR'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/uniformerv2/README.md">UniFormer V2</a> (Arxiv'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomaev2/README.md">VideoMAE V2</a> (CVPR'2023)</td>
+  </tr>
+  <tr>
+    <td colspan="5" style="font-weight:bold;">时序动作定位</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/bsn/README.md">BSN</a> (ECCV'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/bmn/README.md">BMN</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/localization/tcanet/README.md">TCANet</a> (CVPR'2021)</td>
+    <td></td>
+    <td></td>
+  </tr>
+  <tr>
+    <td colspan="5" style="font-weight:bold;">时空行为检测</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/acrn/README.md">ACRN</a> (ECCV'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/slowonly/README.md">SlowOnly+Fast R-CNN</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/slowfast/README.md">SlowFast+Fast R-CNN</a> (ICCV'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/detection/lfb/README.md">LFB</a> (CVPR'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/recognition/videomae/README.md">VideoMAE</a> (NeurIPS'2022)</td>
+  </tr>
+  <tr>
+    <td colspan="5" style="font-weight:bold;">基于骨骼点的行为识别</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/stgcn/README.md">ST-GCN</a> (AAAI'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/2s-agcn/README.md">2s-AGCN</a> (CVPR'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/posec3d/README.md">PoseC3D</a> (CVPR'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/skeleton/stgcnpp/README.md">STGCN++</a> (ArXiv'2022)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/ctrgcn/README.md">CTRGCN</a> (CVPR'2021)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/projects/msg3d/README.md">MSG3D</a> (CVPR'2020)</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+  <tr>
+    <td colspan="5" style="font-weight:bold;">视频检索</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/configs/retrieval/clip4clip/README.md">CLIP4Clip</a> (ArXiv'2022)</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+</details>
+<details close>
+<summary>数据集支持</summary>
+<table style="margin-left:auto;margin-right:auto;font-size:1.3vw;padding:3px 5px;text-align:center;vertical-align:center;">
+  <tr>
+    <td colspan="4" style="font-weight:bold;">行为识别</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hmdb51/README.md">HMDB51</a> (<a href="https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/">官网</a>) (ICCV'2011)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ucf101/README.md">UCF101</a> (<a href="https://www.crcv.ucf.edu/research/data-sets/ucf101/">官网</a>) (CRCV-IR-12-01)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/activitynet/README.md">ActivityNet</a> (<a href="http://activity-net.org/">官网</a>) (CVPR'2015)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/kinetics/README.md">Kinetics-[400/600/700]</a> (<a href="https://deepmind.com/research/open-source/kinetics/">官网</a>) (CVPR'2017)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/sthv1/README.md">SthV1</a>  (ICCV'2017)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/sthv2/README.md">SthV2</a> (<a href="https://developer.qualcomm.com/software/ai-datasets/something-something">官网</a>) (ICCV'2017)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/diving48/README.md">Diving48</a> (<a href="http://www.svcl.ucsd.edu/projects/resound/dataset.html">官网</a>) (ECCV'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/jester/README.md">Jester</a> (<a href="https://developer.qualcomm.com/software/ai-datasets/jester">官网</a>) (ICCV'2019)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/mit/README.md">Moments in Time</a> (<a href="http://moments.csail.mit.edu/">官网</a>) (TPAMI'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/mmit/README.md">Multi-Moments in Time</a> (<a href="http://moments.csail.mit.edu/challenge_iccv_2019.html">官网</a>) (ArXiv'2019)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hvu/README.md">HVU</a> (<a href="https://github.com/holistic-video-understanding/HVU-Dataset">官网</a>) (ECCV'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/omnisource/README.md">OmniSource</a> (<a href="https://kennymckormick.github.io/omnisource/">官网</a>) (ECCV'2020)</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/gym/README.md">FineGYM</a> (<a href="https://sdolivia.github.io/FineGym/">官网</a>) (CVPR'2020)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/kinetics710/README.md">Kinetics-710</a> (<a href="https://arxiv.org/pdf/2211.09552.pdf">官网</a>) (Arxiv'2022)</td>
+    <td></td>
+    <td></td>
+  </tr>
+  <tr>
+    <td colspan="4" style="font-weight:bold;">时序动作定位</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/thumos14/README.md">THUMOS14</a> (<a href="https://www.crcv.ucf.edu/THUMOS14/download.html">官网</a>) (THUMOS Challenge 2014)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/activitynet/README.md">ActivityNet</a> (<a href="http://activity-net.org/">官网</a>) (CVPR'2015)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/hacs/README.md">HACS</a> (<a href="https://github.com/hangzhaomit/HACS-dataset">官网</a>) (ICCV'2019)</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td colspan="4" style="font-weight:bold;">时空行为检测</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ucf101_24/README.md">UCF101-24*</a> (<a href="http://www.thumos.info/download.html">官网</a>) (CRCV-IR-12-01)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/jhmdb/README.md">JHMDB*</a> (<a href="http://jhmdb.is.tue.mpg.de/">官网</a>) (ICCV'2015)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ava/README.md">AVA</a> (<a href="https://research.google.com/ava/index.html">官网</a>) (CVPR'2018)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/ava_kinetics/README.md">AVA-Kinetics</a> (<a href="https://research.google.com/ava/index.html">官网</a>) (Arxiv'2020)</td>
+  </tr>
+  <tr>
+    <td colspan="4" style="font-weight:bold;">基于骨架的行为识别</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-FineGYM</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-NTURGB+D</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-UCF101</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/skeleton/README.md">PoseC3D-HMDB51</a> (<a href="https://kennymckormick.github.io/posec3d/">官网</a>) (ArXiv'2021)</td>
+  </tr>
+  <tr>
+    <td colspan="4" style="font-weight:bold;">视频检索</td>
+  </tr>
+  <tr>
+    <td><a href="https://github.com/open-mmlab/mmaction2/blob/main/tools/data/video_retrieval/README.md">MSRVTT</a> (<a href="https://www.microsoft.com/en-us/research/publication/msr-vtt-a-large-video-description-dataset-for-bridging-video-and-language/">官网</a>) (CVPR'2016)</td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+</details>
+## 👨‍🏫 新手入门 [🔝](#-table-of-contents)
+我们提供了一系列简明的教程，帮助新用户轻松上手使用：
+- [从 MMAction2 0.X 迁移](https://mmaction2.readthedocs.io/zh_cn/latest/migration.html)
+- [学习配置相关知识](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/config.html)
+- [准备数据集](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/prepare_dataset.html)
+- [使用现有模型进行推理](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/inference.html)
+- [训练与测试](https://mmaction2.readthedocs.io/zh_cn/latest/user_guides/train_test.html)
+<details close>
+<summary>基于 MMAction2 的社区工作</summary>
+- Video Swin Transformer. [\[paper\]](https://arxiv.org/abs/2106.13230)[\[github\]](https://github.com/SwinTransformer/Video-Swin-Transformer)
+- Evidential Deep Learning for Open Set Action Recognition, ICCV 2021 **Oral**. [\[paper\]](https://arxiv.org/abs/2107.10161)[\[github\]](https://github.com/Cogito2012/DEAR)
+- Rethinking Self-supervised Correspondence Learning: A Video Frame-level Similarity Perspective, ICCV 2021 **Oral**. [\[paper\]](https://arxiv.org/abs/2103.17263)[\[github\]](https://github.com/xvjiarui/VFS)
+</details>
+## 🎫 许可证 [🔝](#-table-of-contents)
+本项目基于 [Apache 2.0 license](LICENSE) 发布。
+## 🖊️ 引用 [🔝](#-table-of-contents)
+如你发现本项目对你的研究有帮助，请参考如下 bibtex 引用 MMAction2。
+```BibTeX
+@misc{2020mmaction2,
+    title={OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark},
+    author={MMAction2 Contributors},
+    howpublished = {\url{https://github.com/open-mmlab/mmaction2}},
+    year={2020}
+}
+```
+## 🙌 参与贡献 [🔝](#-table-of-contents)
+我们感谢所有的贡献者为改进和提升 MMAction2 所作出的努力。请参考[贡献指南](https://github.com/open-mmlab/mmcv/blob/2.x/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
+## 🤝 致谢 [🔝](#-table-of-contents)
+MMAction2 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者，以及提供宝贵反馈的用户。 我们希望此工具箱可以帮助大家来复现已有的方法和开发新的方法，从而为研究社区贡献力量。
+## 🏗️ OpenMMLab 的其他项目 [🔝](#-table-of-contents)
+- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab 深度学习模型训练基础库
+- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库
+- [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目、算法、模型的统一入口
+- [MMEval](https://github.com/open-mmlab/mmeval): 统一开放的跨框架算法评测库
+- [MMPreTrain](https://github.com/open-mmlab/mmpretrain): OpenMMLab 深度学习预训练工具箱
+- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
+- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
+- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
+- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
+- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
+- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
+- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
+- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
+- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
+- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
+- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
+- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
+- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
+- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
+- [MMagic](https://github.com/open-mmlab/mmagic): OpenMMLab 新一代人工智能内容生成（AIGC）工具箱
+- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
+- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架
+- [Playground](https://github.com/open-mmlab/playground): 收集和展示 OpenMMLab 相关的前沿、有趣的社区项目
+## ❤️ 欢迎加入 OpenMMLab 社区 [🔝](#-table-of-contents)
+扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，扫描下方微信二维码添加喵喵好友，进入 MMAction2 微信交流社群。【加好友申请格式：研究方向+地区+学校/公司+姓名】
+<div align="center">
+<img src="./resources/zhihu_qrcode.jpg" height="400"/> <img src="./resources/miaomiao_qrcode.jpg" height="400"/>
+</div>
+我们会在 OpenMMLab 社区为大家
+- 📢 分享 AI 框架的前沿核心技术
+- 💻 解读 PyTorch 常用模块源码
+- 📰 发布 OpenMMLab 的相关新闻
+- 🚀 介绍 OpenMMLab 开发的前沿算法
+- 🏃 获取更高效的问题答疑和意见反馈
+- 🔥 提供与各行各业开发者充分交流的平台
+干货满满 📘，等你来撩 💗，OpenMMLab 社区期待您的加入 👬

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import torch
+from operator import itemgetter
+from mmaction.apis import init_recognizer, inference_recognizer
+import gradio as gr
+# Set paths for Hugging Face Spaces
+config_file = 'demo/demo_configs/tsn_r50_1x1x8_video_infer.py'
+checkpoint_file = 'checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth'
+# Download model checkpoint if it doesn't exist
+def download_checkpoint():
+    if not os.path.exists(checkpoint_file):
+        os.makedirs('checkpoints', exist_ok=True)
+        print("Model checkpoint not found. Please run 'python download_model.py' to download it.")
+        print("Or place the checkpoint file manually at:", checkpoint_file)
+        return False
+    return True
+# Initialize model
+print("Initializing model...")
+if not download_checkpoint():
+    print("❌ Cannot initialize model without checkpoint. Exiting...")
+    exit(1)
+try:
+    model = init_recognizer(config_file, checkpoint_file, device='cpu')
+    print("✅ Model loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    print("Please check that the config file and checkpoint are correct.")
+    exit(1)
+# test a single video and show the result:
+# video = 'demo.mp4'
+# label = '../tools/data/kinetics/label_map_k400.txt'
+# results = inference_recognizer(model, video)
+# pred_scores = results.pred_score.tolist()
+# score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
+# score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
+# top5_label = score_sorted[:5]
+# labels = open(label).readlines()
+# labels = [x.strip() for x in labels]
+# results = [(labels[k[0]], k[1]) for k in top5_label]
+# # show the results
+# for result in results:
+#     print(f'{result[0]}: ', result[1])
+def analyze_video(video):
+    """Analyze video for action recognition"""
+    try:
+        if video is None:
+            return "Please upload a video file."
+        print(f"Processing video: {video}")
+        results = inference_recognizer(model, video)
+        # Format results nicely
+        if hasattr(results, 'pred_score'):
+            pred_scores = results.pred_score.tolist()
+            score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
+            score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
+            top5_label = score_sorted[:5]
+            # Load labels if available
+            label_file = 'tools/data/kinetics/label_map_k400.txt'
+            if os.path.exists(label_file):
+                with open(label_file, 'r') as f:
+                    labels = [x.strip() for x in f.readlines()]
+                results_formatted = [(labels[k[0]], f"{k[1]:.4f}") for k in top5_label]
+            else:
+                results_formatted = [(f"Class {k[0]}", f"{k[1]:.4f}") for k in top5_label]
+            result_text = "Top 5 Predictions:\n"
+            for i, (label, score) in enumerate(results_formatted, 1):
+                result_text += f"{i}. {label}: {score}\n"
+            return result_text
+        else:
+            return f"Analysis complete. Raw result: {results}"
+    except Exception as e:
+        return f"Error processing video: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=analyze_video,
+    inputs=gr.Video(label="Upload Video", height=300),
+    outputs=gr.Textbox(label="Analysis Results", lines=10),
+    title="🎬 GenVidBench - Video Action Recognition",
+    description="""
+    Upload a video to analyze its content using state-of-the-art action recognition models.
+    This demo uses TSN (Temporal Segment Networks) trained on Kinetics-400 dataset.
+    **Supported formats:** MP4, AVI, MOV, etc.
+    **Max duration:** Recommended under 30 seconds for faster processing.
+    """,
+    examples=[
+        ["demo/demo.mp4"] if os.path.exists("demo/demo.mp4") else None
+    ],
+    cache_examples=False,
+    theme=gr.themes.Soft(),
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()

checkpoints/tsn_r50_8xb32-1x1x8-100e_kinetics400-rgb_20220818-2692d16c.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2692d16c712e24994aaa3cfb48f957a521e053ffb81c474e2c0b3e579c888650
+size 97641409

configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,24 @@

+default_scope = 'mmaction'
+default_hooks = dict(
+    runtime_info=dict(type='RuntimeInfoHook'),
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=20, ignore_last=False),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    sync_buffers=dict(type='SyncBuffersHook'))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'))
+log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(type='ActionVisualizer', vis_backends=vis_backends)
+log_level = 'INFO'
+load_from = None
+resume = False

configs/_base_/models/audioonly_r50.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# model settings
+model = dict(
+    type='RecognizerAudio',
+    backbone=dict(
+        type='ResNetAudio',
+        depth=50,
+        pretrained=None,
+        in_channels=1,
+        norm_eval=False),
+    cls_head=dict(
+        type='TSNAudioHead',
+        num_classes=400,
+        in_channels=1024,
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'))

configs/_base_/models/bmn_400x100.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# model settings
+model = dict(
+    type='BMN',
+    temporal_dim=100,
+    boundary_ratio=0.5,
+    num_samples=32,
+    num_samples_per_bin=3,
+    feat_dim=400,
+    soft_nms_alpha=0.4,
+    soft_nms_low_threshold=0.5,
+    soft_nms_high_threshold=0.9,
+    post_process_top_k=100)

configs/_base_/models/bsn_pem.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# model settings
+model = dict(
+    type='PEM',
+    pem_feat_dim=32,
+    pem_hidden_dim=256,
+    pem_u_ratio_m=1,
+    pem_u_ratio_l=2,
+    pem_high_temporal_iou_threshold=0.6,
+    pem_low_temporal_iou_threshold=0.2,
+    soft_nms_alpha=0.75,
+    soft_nms_low_threshold=0.65,
+    soft_nms_high_threshold=0.9,
+    post_process_top_k=100)

configs/_base_/models/bsn_tem.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# model settings
+model = dict(
+    type='TEM',
+    temporal_dim=100,
+    boundary_ratio=0.1,
+    tem_feat_dim=400,
+    tem_hidden_dim=512,
+    tem_match_threshold=0.5)

configs/_base_/models/c2d_r50.py ADDED Viewed

	@@ -0,0 +1,20 @@

+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='C2D',
+        depth=50,
+        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
+        norm_eval=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'))

configs/_base_/models/c3d_sports1m_pretrained.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='C3D',
+        pretrained=  # noqa: E251
+        'https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_pretrain_20201016-dcc47ddc.pth',  # noqa: E501
+        style='pytorch',
+        conv_cfg=dict(type='Conv3d'),
+        norm_cfg=None,
+        act_cfg=dict(type='ReLU'),
+        dropout_ratio=0.5,
+        init_std=0.005),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=101,
+        in_channels=4096,
+        spatial_type=None,
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[104, 117, 128],
+        std=[1, 1, 1],
+        format_shape='NCTHW'),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/i3d_r50.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3d',
+        pretrained2d=True,
+        pretrained='torchvision://resnet50',
+        depth=50,
+        conv1_kernel=(5, 7, 7),
+        conv1_stride_t=2,
+        pool1_stride_t=2,
+        conv_cfg=dict(type='Conv3d'),
+        norm_eval=False,
+        inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'))
+# This setting refers to https://github.com/open-mmlab/mmaction/blob/master/mmaction/models/tenons/backbones/resnet_i3d.py#L329-L332  # noqa: E501

configs/_base_/models/ircsn_r152.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dCSN',
+        pretrained2d=False,
+        pretrained=None,
+        depth=152,
+        with_pool2=False,
+        bottleneck_mode='ir',
+        norm_eval=False,
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'),
+    # model training and testing settings
+    train_cfg=None,
+    test_cfg=dict(max_testing_views=10))

configs/_base_/models/mvit_small.py ADDED Viewed

	@@ -0,0 +1,14 @@

+model = dict(
+    type='Recognizer3D',
+    backbone=dict(type='MViT', arch='small', drop_path_rate=0.2),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'),
+    cls_head=dict(
+        type='MViTHead',
+        in_channels=768,
+        num_classes=400,
+        label_smooth_eps=0.1,
+        average_clips='prob'))

configs/_base_/models/r2plus1d_r34.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet2Plus1d',
+        depth=34,
+        pretrained=None,
+        pretrained2d=False,
+        norm_eval=False,
+        conv_cfg=dict(type='Conv2plus1d'),
+        norm_cfg=dict(type='SyncBN', requires_grad=True, eps=1e-3),
+        conv1_kernel=(3, 7, 7),
+        conv1_stride_t=1,
+        pool1_stride_t=1,
+        inflate=(1, 1, 1, 1),
+        spatial_strides=(1, 2, 2, 2),
+        temporal_strides=(1, 2, 2, 2),
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=400,
+        in_channels=512,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'))

configs/_base_/models/slowfast_r50.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dSlowFast',
+        pretrained=None,
+        resample_rate=8,  # tau
+        speed_ratio=8,  # alpha
+        channel_ratio=8,  # beta_inv
+        slow_pathway=dict(
+            type='resnet3d',
+            depth=50,
+            pretrained=None,
+            lateral=True,
+            conv1_kernel=(1, 7, 7),
+            dilations=(1, 1, 1, 1),
+            conv1_stride_t=1,
+            pool1_stride_t=1,
+            inflate=(0, 0, 1, 1),
+            norm_eval=False),
+        fast_pathway=dict(
+            type='resnet3d',
+            depth=50,
+            pretrained=None,
+            lateral=False,
+            base_channels=8,
+            conv1_kernel=(5, 7, 7),
+            conv1_stride_t=1,
+            pool1_stride_t=1,
+            norm_eval=False)),
+    cls_head=dict(
+        type='SlowFastHead',
+        in_channels=2304,  # 2048+256
+        num_classes=400,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'))

configs/_base_/models/slowonly_r50.py ADDED Viewed

	@@ -0,0 +1,24 @@

+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dSlowOnly',
+        depth=50,
+        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
+        lateral=False,
+        conv1_kernel=(1, 7, 7),
+        conv1_stride_t=1,
+        pool1_stride_t=1,
+        inflate=(0, 0, 1, 1),
+        norm_eval=False),
+    cls_head=dict(
+        type='I3DHead',
+        in_channels=2048,
+        num_classes=400,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'))

configs/_base_/models/swin_tiny.py ADDED Viewed

	@@ -0,0 +1,28 @@

+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='SwinTransformer3D',
+        arch='tiny',
+        pretrained=None,
+        pretrained2d=True,
+        patch_size=(2, 4, 4),
+        window_size=(8, 7, 7),
+        mlp_ratio=4.,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.1,
+        patch_norm=True),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'),
+    cls_head=dict(
+        type='I3DHead',
+        in_channels=768,
+        num_classes=400,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        average_clips='prob'))

configs/_base_/models/tanet_r50.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# model settings
+model = dict(
+    type='Recognizer2D',
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.5],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'),
+    backbone=dict(
+        type='TANet',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        num_segments=8,
+        tam_cfg=None),
+    cls_head=dict(
+        type='TSMHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        average_clips='prob'))

configs/_base_/models/tin_r50.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# model settings
+preprocess_cfg = dict(
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    format_shape='NCHW')
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNetTIN',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        norm_eval=False,
+        shift_div=4),
+    cls_head=dict(
+        type='TSMHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        is_shift=False,
+        average_clips='prob'),
+    data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
+    # model training and testing settings
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/tpn_slowonly_r50.py ADDED Viewed

	@@ -0,0 +1,45 @@

+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3dSlowOnly',
+        depth=50,
+        pretrained='torchvision://resnet50',
+        lateral=False,
+        out_indices=(2, 3),
+        conv1_kernel=(1, 7, 7),
+        conv1_stride_t=1,
+        pool1_stride_t=1,
+        inflate=(0, 0, 1, 1),
+        norm_eval=False),
+    neck=dict(
+        type='TPN',
+        in_channels=(1024, 2048),
+        out_channels=1024,
+        spatial_modulation_cfg=dict(
+            in_channels=(1024, 2048), out_channels=2048),
+        temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
+        upsample_cfg=dict(scale_factor=(1, 1, 1)),
+        downsample_cfg=dict(downsample_scale=(1, 1, 1)),
+        level_fusion_cfg=dict(
+            in_channels=(1024, 1024),
+            mid_channels=(1024, 1024),
+            out_channels=2048,
+            downsample_scales=((1, 1, 1), (1, 1, 1))),
+        aux_head_cfg=dict(out_channels=400, loss_weight=0.5)),
+    cls_head=dict(
+        type='TPNHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCTHW'),
+    # model training and testing settings
+    train_cfg=None,
+    test_cfg=dict(fcn_test=True))

configs/_base_/models/tpn_tsm_r50.py ADDED Viewed

	@@ -0,0 +1,40 @@

+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNetTSM',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        out_indices=(2, 3),
+        norm_eval=False,
+        shift_div=8),
+    neck=dict(
+        type='TPN',
+        in_channels=(1024, 2048),
+        out_channels=1024,
+        spatial_modulation_cfg=dict(
+            in_channels=(1024, 2048), out_channels=2048),
+        temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
+        upsample_cfg=dict(scale_factor=(1, 1, 1)),
+        downsample_cfg=dict(downsample_scale=(1, 1, 1)),
+        level_fusion_cfg=dict(
+            in_channels=(1024, 1024),
+            mid_channels=(1024, 1024),
+            out_channels=2048,
+            downsample_scales=((1, 1, 1), (1, 1, 1))),
+        aux_head_cfg=dict(out_channels=174, loss_weight=0.5)),
+    cls_head=dict(
+        type='TPNHead',
+        num_classes=174,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'),
+    train_cfg=None,
+    test_cfg=dict(fcn_test=True))

configs/_base_/models/trn_r50.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# model settings
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNet',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        norm_eval=False,
+        partial_bn=True),
+    cls_head=dict(
+        type='TRNHead',
+        num_classes=400,
+        in_channels=2048,
+        num_segments=8,
+        spatial_type='avg',
+        relation_type='TRNMultiScale',
+        hidden_dim=256,
+        dropout_ratio=0.8,
+        init_std=0.001,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'))

configs/_base_/models/tsm_mobilenet_v2.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# model settings
+preprocess_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='MobileNetV2TSM',
+        shift_div=8,
+        num_segments=8,
+        is_shift=True,
+        pretrained='mmcls://mobilenet_v2'),
+    cls_head=dict(
+        type='TSMHead',
+        num_segments=8,
+        num_classes=400,
+        in_channels=1280,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        is_shift=True,
+        average_clips='prob'),
+    # model training and testing settings
+    data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/tsm_mobileone_s4.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# model settings
+preprocess_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
+checkpoint = ('https://download.openmmlab.com/mmclassification/'
+              'v0/mobileone/mobileone-s4_8xb32_in1k_20221110-28d888cb.pth')
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='MobileOneTSM',
+        arch='s4',
+        shift_div=8,
+        num_segments=8,
+        is_shift=True,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
+    cls_head=dict(
+        type='TSMHead',
+        num_segments=8,
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        is_shift=True,
+        average_clips='prob'),
+    # model training and testing settings
+    data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/tsm_r50.py ADDED Viewed

	@@ -0,0 +1,24 @@

+preprocess_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNetTSM',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        norm_eval=False,
+        shift_div=8),
+    cls_head=dict(
+        type='TSMHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        is_shift=True,
+        average_clips='prob'),
+    data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/tsn_mobileone_s0.py ADDED Viewed

	@@ -0,0 +1,26 @@

+checkpoint = ('https://download.openmmlab.com/mmclassification/'
+              'v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.pth')
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='mmpretrain.MobileOne',
+        arch='s0',
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone'),
+        norm_eval=False),
+    cls_head=dict(
+        type='TSNHead',
+        num_classes=400,
+        in_channels=1024,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.4,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/tsn_r50.py ADDED Viewed

	@@ -0,0 +1,23 @@

+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNet',
+        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',
+        depth=50,
+        norm_eval=False),
+    cls_head=dict(
+        type='TSNHead',
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.4,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/models/x3d.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(type='X3D', gamma_w=1, gamma_b=2.25, gamma_d=2.2),
+    cls_head=dict(
+        type='X3DHead',
+        in_channels=432,
+        num_classes=400,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        fc1_bias=False,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[114.75, 114.75, 114.75],
+        std=[57.38, 57.38, 57.38],
+        format_shape='NCTHW'),
+    # model training and testing settings
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/schedules/adam_20e.py ADDED Viewed

	@@ -0,0 +1,20 @@

+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=20, val_begin=1, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=20,
+        by_epoch=True,
+        milestones=[10],
+        gamma=0.1)
+]
+optimizer = dict(
+    type='Adam', lr=0.01, weight_decay=0.00001)  # this lr is used for 1 gpus
+optim_wrapper = dict(
+    optimizer=optimizer, clip_grad=dict(max_norm=40, norm_type=2))

configs/_base_/schedules/sgd_100e.py ADDED Viewed

	@@ -0,0 +1,18 @@

+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=100, val_begin=1, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=100,
+        by_epoch=True,
+        milestones=[40, 80],
+        gamma=0.1)
+]
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=40, norm_type=2))

configs/_base_/schedules/sgd_150e_warmup.py ADDED Viewed

	@@ -0,0 +1,19 @@

+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=150, val_begin=1, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(type='LinearLR', start_factor=0.1, by_epoch=True, begin=0, end=10),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=150,
+        by_epoch=True,
+        milestones=[90, 130],
+        gamma=0.1)
+]
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=40, norm_type=2))

configs/_base_/schedules/sgd_50e.py ADDED Viewed

	@@ -0,0 +1,18 @@

+train_cfg = dict(
+    type='EpochBasedTrainLoop', max_epochs=50, val_begin=1, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=50,
+        by_epoch=True,
+        milestones=[20, 40],
+        gamma=0.1)
+]
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=40, norm_type=2))