Upload configs/resume_20_epochs_low_memory.yaml with huggingface_hub
Browse files
configs/resume_20_epochs_low_memory.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Resume Training Configuration - 20% Memory Reduction
|
| 2 |
+
# Optimized to reduce system memory requirements by ~20%
|
| 3 |
+
|
| 4 |
+
model:
|
| 5 |
+
architecture: detr
|
| 6 |
+
backbone: resnet50
|
| 7 |
+
num_classes: 2
|
| 8 |
+
pretrained: true
|
| 9 |
+
hidden_dim: 256
|
| 10 |
+
nheads: 8
|
| 11 |
+
num_encoder_layers: 6
|
| 12 |
+
num_decoder_layers: 6
|
| 13 |
+
rfdetr_size: base
|
| 14 |
+
remap_mscoco_category: false
|
| 15 |
+
|
| 16 |
+
training:
|
| 17 |
+
batch_size: 2 # Keep at 2 (already minimum)
|
| 18 |
+
learning_rate: 0.0002
|
| 19 |
+
epochs: 40 # Total epochs (will resume from 20, train 20 more)
|
| 20 |
+
weight_decay: 0.0001
|
| 21 |
+
gradient_clip: 0.1
|
| 22 |
+
grad_accum_steps: 20 # Increased from 16 to maintain effective batch size (2*20=40 vs 2*16=32)
|
| 23 |
+
# Note: Higher grad_accum_steps doesn't increase memory, just computation time
|
| 24 |
+
resolution: 1120 # Reduced from 1288 (13% reduction, divisible by 56: 1120 = 56*20)
|
| 25 |
+
num_workers: 1 # Reduced from 2 (50% reduction in data loading memory)
|
| 26 |
+
device: cuda
|
| 27 |
+
mixed_precision: true # Already enabled (AMP)
|
| 28 |
+
# Memory optimizations - these are passed to RF-DETR
|
| 29 |
+
multi_scale: false # Disable multi-scale training (significant memory savings)
|
| 30 |
+
expanded_scales: false # Disable expanded scales (memory savings)
|
| 31 |
+
|
| 32 |
+
dataset:
|
| 33 |
+
# Use existing COCO format dataset (skip YOLO conversion)
|
| 34 |
+
coco_train_path: /workspace/soccer_cv_ball/models/ball_detection_combined_optimized/dataset/train
|
| 35 |
+
coco_val_path: /workspace/soccer_cv_ball/models/ball_detection_combined_optimized/dataset/valid
|
| 36 |
+
# Category configuration (required even for COCO)
|
| 37 |
+
category_name: "ball"
|
| 38 |
+
category_id: 0
|
| 39 |
+
ball_class_id: 1 # Ball class ID (for reference)
|
| 40 |
+
# Data loading optimizations for memory
|
| 41 |
+
pin_memory: false # Disable pin_memory to save RAM
|
| 42 |
+
prefetch_factor: 1 # Reduced from default (less prefetched data)
|
| 43 |
+
persistent_workers: false # Disable persistent workers to save memory
|
| 44 |
+
|
| 45 |
+
checkpoint:
|
| 46 |
+
resume_from: /workspace/soccer_cv_ball/models/checkpoint.pth
|
| 47 |
+
start_epoch: 38
|
| 48 |
+
save_dir: models/checkpoints
|
| 49 |
+
|
| 50 |
+
# Memory reduction summary:
|
| 51 |
+
# - Resolution: 1288 -> 1152 (20% reduction in image area = ~20% activation memory)
|
| 52 |
+
# - Multi-scale: disabled (significant memory savings)
|
| 53 |
+
# - Expanded scales: disabled (memory savings)
|
| 54 |
+
# - num_workers: 2 -> 1 (reduces data loading memory)
|
| 55 |
+
# - pin_memory: disabled (saves RAM)
|
| 56 |
+
# - prefetch_factor: reduced (less prefetched data)
|
| 57 |
+
# Expected total memory reduction: ~20-25%
|