Spaces:

thienphuc12339
/

SignLanguage

Runtime error

File size: 4,710 Bytes

9f83ce9

from pathlib import Path
from typing import Any
from dataclasses import dataclass, field
from utils import MODELS, VIDEO_EXTENSIONS


@dataclass
class TransformConfig:
    # RGB specific
    horizontal_flip_prob: float = 0.5
    aug_type: str = "augmix"
    aug_paras: dict = field(
        default_factory=lambda: {
            "magnitude": 3,
            "alpha": 1.0,
            "width": 5,
            "depth": -1,
        }
    )
    sample_rate: int = 4

    # Pose specific
    normalization: bool = True

    # SL-GCN, DSTA-SLR specific
    random_choose: bool = False
    random_shift: bool = False
    random_move: bool = False
    random_mirror: bool = False
    random_mirror_p: float = 0.5
    bone_stream: bool = False
    motion_stream: bool = False

    # SPOTER specific
    augmentation: bool = True
    aug_prob: float = 0.5
    noise: bool = True

    def __post_init__(self):
        assert self.aug_type in ["augmix", "mixup"], \
            "Only AugMix and MixUp are supported for now"


@dataclass
class DataConfig:
    dataset: str = "vsl"
    modality: str = "rgb"
    subset: str = None
    data_dir: str = "data/processed/vsl"
    transform: Any = None
    fps: int = 30
    debug: bool = False
    # transform: TransformConfig = TransformConfig()
    transform: TransformConfig = field(default_factory=TransformConfig)


    def __post_init__(self):
        assert self.dataset in ["vsl_98", "vsl_400"], \
            "Only VSL dataset is supported for now"
        assert self.modality in ["rgb", "pose"], \
            "Only RGB and Pose modalities are supported for now"


@dataclass
class ModelConfig:
    arch: str = "sl_gcn"
    pretrained: str = "vsltranslation/sl_gcn_joint_v3_0"
    num_frozen_layers: int = 0
    ignored_weights: list = field(default_factory=lambda: [])
    num_frames: int = 16

    # SL-GCN specific
    num_points: int = 27
    groups: int = 8
    block_size: int = 41
    in_channels: int = 3
    labeling_mode: str = "spatial"
    is_vector: bool = False

    # DSTA-SLR specific
    graph: str = "wlasl"
    inner_dim: int = 64
    drop_layers: int = 2
    depth: int = 4
    s_num_heads: int = 1
    window_size: int = 120

    # SPOTER specific
    hidden_dim: int = 108

    def __post_init__(self):
        assert self.arch in MODELS, f"Model {self.arch} is not supported"


@dataclass
class TrainingConfig:
    output_dir: str = "experiments"
    remove_unused_columns: bool = False
    do_train: bool = True
    use_cpu: bool = False

    eval_strategy: str = "epoch"
    logging_strategy: str = "epoch"
    save_strategy: str = "epoch"
    logging_steps: int = 1
    save_steps: int = 1
    eval_steps: int = 1

    learning_rate: float = 5e-5
    weight_decay: float = 0
    adam_beta1: float = 0.9
    adam_beta2: float = 0.999
    adam_epsilon: float = 1e-8
    warmup_ratio: float = 0.1

    num_train_epochs: int = 10
    per_device_train_batch_size: int = 8
    per_device_eval_batch_size: int = 8
    dataloader_num_workers: int = 0

    load_best_model_at_end: bool = True
    metric_for_best_model: str = "accuracy"
    resume_from_checkpoint: str = None

    run_name: str = "swin3d"
    report_to: str = None
    push_to_hub: bool = False
    hub_model_id: str = None
    hub_strategy: str = "checkpoint"
    hub_private_repo: bool = True

    def __post_init__(self):
        self.output_dir = Path(self.output_dir)
        if str(self.output_dir) == "experiments":
            self.output_dir = self.output_dir / self.run_name
        self.output_dir.mkdir(parents=True, exist_ok=True)

        if self.hub_model_id is not None:
            self.push_to_hub = True
            if len(self.hub_model_id.split("/")) == 1:
                self.hub_model_id = f"{self.hub_model_id}/{self.run_name}"


@dataclass
class InferenceConfig:
    source: str = "webcam"
    output_dir: str = "demo"
    use_onnx: bool = False
    device: str = "cpu"
    cache_dir: str = "models/huggingface"

    visualize: bool = False
    show_skeleton: bool = False

    visibility: float = 0.5
    angle_threshold: int = 140
    min_num_up_frames: int = 10
    min_num_down_frames: int = 10
    delay: int = 400

    top_k: int = 3
    # SL-GCN, DSTA-SLR specific
    bone_stream: bool = False
    motion_stream: bool = False

    def __post_init__(self):
        self.source = Path(self.source)
        assert any((
            str(self.source) == "webcam",
            (self.source.exists() and str(self.source).endswith(VIDEO_EXTENSIONS))
        )), \
            f"Only Webcam and Video sources are supported for now (got {self.source})"
        self.output_dir = Path(self.output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)