Spaces:
Sleeping
Sleeping
File size: 4,550 Bytes
77da9e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# ------------------------------------------------------------------------
# RF-DETR
# Copyright (c) 2025 Roboflow. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
from pydantic import BaseModel
from typing import List, Optional, Literal, Type
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
class ModelConfig(BaseModel):
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"]
out_feature_indexes: List[int]
dec_layers: int
two_stage: bool = True
projector_scale: List[Literal["P3", "P4", "P5"]]
hidden_dim: int
patch_size: int
num_windows: int
sa_nheads: int
ca_nheads: int
dec_n_points: int
bbox_reparam: bool = True
lite_refpoint_refine: bool = True
layer_norm: bool = True
amp: bool = True
num_classes: int = 90
pretrain_weights: Optional[str] = None
device: Literal["cpu", "cuda", "mps"] = DEVICE
resolution: int
group_detr: int = 13
gradient_checkpointing: bool = False
positional_encoding_size: int
class RFDETRBaseConfig(ModelConfig):
"""
The configuration for an RF-DETR Base model.
"""
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small"
hidden_dim: int = 256
patch_size: int = 14
num_windows: int = 4
dec_layers: int = 3
sa_nheads: int = 8
ca_nheads: int = 16
dec_n_points: int = 2
num_queries: int = 300
num_select: int = 300
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"]
out_feature_indexes: List[int] = [2, 5, 8, 11]
pretrain_weights: Optional[str] = "rf-detr-base.pth"
resolution: int = 560
positional_encoding_size: int = 37
class RFDETRLargeConfig(RFDETRBaseConfig):
"""
The configuration for an RF-DETR Large model.
"""
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base"
hidden_dim: int = 384
sa_nheads: int = 12
ca_nheads: int = 24
dec_n_points: int = 4
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"]
pretrain_weights: Optional[str] = "rf-detr-large.pth"
class RFDETRNanoConfig(RFDETRBaseConfig):
"""
The configuration for an RF-DETR Nano model.
"""
out_feature_indexes: List[int] = [3, 6, 9, 12]
num_windows: int = 2
dec_layers: int = 2
patch_size: int = 16
resolution: int = 384
positional_encoding_size: int = 24
pretrain_weights: Optional[str] = "rf-detr-nano.pth"
class RFDETRSmallConfig(RFDETRBaseConfig):
"""
The configuration for an RF-DETR Small model.
"""
out_feature_indexes: List[int] = [3, 6, 9, 12]
num_windows: int = 2
dec_layers: int = 3
patch_size: int = 16
resolution: int = 512
positional_encoding_size: int = 32
pretrain_weights: Optional[str] = "rf-detr-small.pth"
class RFDETRMediumConfig(RFDETRBaseConfig):
"""
The configuration for an RF-DETR Medium model.
"""
out_feature_indexes: List[int] = [3, 6, 9, 12]
num_windows: int = 2
dec_layers: int = 4
patch_size: int = 16
resolution: int = 576
positional_encoding_size: int = 36
pretrain_weights: Optional[str] = "rf-detr-medium.pth"
class TrainConfig(BaseModel):
lr: float = 1e-4
lr_encoder: float = 1.5e-4
batch_size: int = 4
grad_accum_steps: int = 4
epochs: int = 100
ema_decay: float = 0.993
ema_tau: int = 100
lr_drop: int = 100
checkpoint_interval: int = 10
warmup_epochs: int = 0
lr_vit_layer_decay: float = 0.8
lr_component_decay: float = 0.7
drop_path: float = 0.0
group_detr: int = 13
ia_bce_loss: bool = True
cls_loss_coef: float = 1.0
num_select: int = 300
dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow"
square_resize_div_64: bool = True
dataset_dir: str
output_dir: str = "output"
multi_scale: bool = True
expanded_scales: bool = True
do_random_resize_via_padding: bool = False
use_ema: bool = True
num_workers: int = 2
weight_decay: float = 1e-4
early_stopping: bool = False
early_stopping_patience: int = 10
early_stopping_min_delta: float = 0.001
early_stopping_use_ema: bool = False
tensorboard: bool = True
wandb: bool = False
project: Optional[str] = None
run: Optional[str] = None
class_names: List[str] = None
run_test: bool = True
|