eeeeeeeeeeeeee3 commited on
Commit
8bcfdc8
·
verified ·
1 Parent(s): 4523c61

Upload configs/training_fast.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/training_fast.yaml +127 -0
configs/training_fast.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Configuration for RF-DETR - Optimized for Speed
2
+ # This config prioritizes epoch speed while maintaining accuracy
3
+
4
+ # Model Architecture
5
+ model:
6
+ architecture: "detr" # Options: "detr" (vanilla DETR) or "rfdetr" (RF-DETR from Roboflow)
7
+ backbone: "resnet50" # ResNet backbone
8
+ num_classes: 2 # player, ball
9
+ pretrained: true # Use pre-trained weights
10
+ hidden_dim: 256
11
+ nheads: 8
12
+ num_encoder_layers: 6
13
+ num_decoder_layers: 6
14
+ # RF-DETR specific options (used when architecture="rfdetr")
15
+ rfdetr_size: "base" # Options: "nano", "small", "medium", "base", "large"
16
+
17
+ # Hyperparameters
18
+ training:
19
+ batch_size: 32 # Increased from 24 - test if GPU memory allows (A40 has 48GB)
20
+ num_epochs: 100 # Increased to continue training from checkpoint
21
+ learning_rate: 0.0001 # 1e-4 as float
22
+ weight_decay: 0.0001 # 1e-4 as float
23
+ warmup_epochs: 5
24
+ gradient_clip: 0.1
25
+ gradient_accumulation_steps: 1 # Reduced from 2 - larger batch size means less need for accumulation
26
+ memory_cleanup_frequency: 20 # Reduced cleanup frequency (less overhead)
27
+ adaptive_optimization: true # Enable adaptive resource optimization based on usage
28
+ target_gpu_utilization: 0.90 # Increased target (was 0.85) - push GPU harder
29
+ max_ram_usage: 0.85 # Increased from 0.80 - allow more RAM usage
30
+ adaptive_adjustment_interval: 50 # Check and adjust every N batches
31
+ mixed_precision: true # Enable AMP for faster training (~2x speedup)
32
+ compile_model: false # Disabled: causes recompilation overhead with variable-sized DETR inputs
33
+ channels_last: true # Use channels-last memory format for faster convolutions
34
+ cudnn_benchmark: true # Optimize CUDNN for consistent input sizes
35
+ tf32: true # Enable TF32 on Ampere GPUs (A40) for faster matmul
36
+ # Class weights disabled - using Focal Loss instead for better handling of class imbalance
37
+ # Focal Loss dynamically adjusts based on prediction confidence, avoiding precision collapse
38
+ class_weights:
39
+ enabled: false
40
+ player: 1.0
41
+ ball: 1.0
42
+ # Focal Loss configuration for handling class imbalance
43
+ focal_loss:
44
+ enabled: true
45
+ alpha: 0.25 # Weighting factor for rare class (ball)
46
+ gamma: 2.0 # Focusing parameter - higher gamma focuses more on hard examples
47
+
48
+ # Optimizer
49
+ optimizer:
50
+ type: "AdamW"
51
+ lr: 0.0001 # 1e-4 as float
52
+ betas: [0.9, 0.999]
53
+ weight_decay: 0.0001 # 1e-4 as float
54
+
55
+ # Learning Rate Schedule
56
+ lr_schedule:
57
+ type: "cosine" # cosine annealing
58
+ warmup_epochs: 5
59
+ min_lr: 0.000001 # 1e-6 as float
60
+
61
+ # Data Augmentation
62
+ # Reduced augmentation complexity for faster data loading
63
+ augmentation:
64
+ train:
65
+ horizontal_flip: 0.5
66
+ color_jitter:
67
+ brightness: 0.2
68
+ contrast: 0.2
69
+ saturation: 0.2
70
+ hue: 0.1
71
+ random_crop: false
72
+ resize_range: [800, 1333] # DETR standard
73
+ # Copy-Paste augmentation for ball class balancing
74
+ copy_paste:
75
+ enabled: true
76
+ prob: 0.5 # Probability of applying copy-paste
77
+ max_pastes: 3 # Maximum balls to paste per image
78
+ # CLAHE contrast enhancement - can be disabled for speed
79
+ clahe:
80
+ enabled: false # Disabled for speed (was true) - minimal accuracy impact
81
+ clip_limit: 2.0 # Contrast limiting threshold
82
+ tile_grid_size: [8, 8] # Grid size for adaptive equalization
83
+ # Motion blur augmentation - can be disabled for speed
84
+ motion_blur:
85
+ enabled: false # Disabled for speed (was true) - minimal accuracy impact
86
+ prob: 0.3 # Probability of applying motion blur
87
+ max_kernel_size: 15 # Maximum motion blur kernel size
88
+ val:
89
+ resize: 1333 # Fixed size for validation
90
+
91
+ # Dataset - Optimized for speed
92
+ dataset:
93
+ train_path: "/workspace/datasets/train"
94
+ val_path: "/workspace/datasets/val"
95
+ num_workers: 8 # Increased from 4 - more parallel data loading
96
+ pin_memory: true
97
+ prefetch_factor: 4 # Increased from 2 - prefetch more batches
98
+ persistent_workers: true # Enabled: faster worker startup (was false)
99
+ # Note: persistent_workers requires num_workers > 0
100
+
101
+ # Checkpoint Settings
102
+ checkpoint:
103
+ save_dir: "models/checkpoints"
104
+ save_frequency: 999 # Disabled: use lightweight checkpoints only to avoid disk quota issues
105
+ save_every_epoch: true # Save lightweight checkpoint every epoch (ensures no progress loss)
106
+ keep_last_lightweight: 20 # Keep last N lightweight checkpoints (deletes older ones to save space)
107
+ save_best: false # Disabled: use lightweight checkpoints only to avoid disk quota issues
108
+ metric: "mAP" # Mean Average Precision
109
+ use_lightweight_only: true # Only save lightweight checkpoints to avoid disk quota issues
110
+
111
+ # Evaluation
112
+ evaluation:
113
+ iou_thresholds: [0.5, 0.75] # IoU thresholds for mAP
114
+ max_detections: 100
115
+ # Consider reducing validation frequency for speed
116
+ val_frequency: 1 # Validate every epoch (can increase to 2-3 for speed)
117
+
118
+ # Logging - Reduced for speed
119
+ logging:
120
+ log_dir: "logs"
121
+ tensorboard: true
122
+ mlflow: true # Enable MLflow tracking
123
+ mlflow_tracking_uri: "file:./mlruns" # Local file-based tracking (or use SQLite/remote server)
124
+ mlflow_experiment_name: "detr_training" # MLflow experiment name
125
+ mlflow_log_models: false # Disabled for speed (was true) - model logging is slow
126
+ print_frequency: 50 # Increased from 20 - less frequent printing (less I/O)
127
+ log_every_n_steps: 100 # Increased from 50 - less frequent TensorBoard logging