AlbeRota commited on
Commit
0475949
·
verified ·
1 Parent(s): 679c502

Upload weights, notebooks, sample images

Browse files
Files changed (1) hide show
  1. configs/pretrained_config.yaml +5 -5
configs/pretrained_config.yaml CHANGED
@@ -9,7 +9,7 @@ parameters:
9
  MODEL_MODULE: "models" # Module name to import model classes from (default: "models")
10
  RGB_ENCODER:
11
  ENCODER: "facebook/dinov3-vitl16-pretrain-lvd1689m" # DINOv3 encoder model name (HuggingFace format)
12
- IMAGE_SIZE: 448 # Input image size (height and width in pixels)
13
  RETURN_SELECTED_LAYERS: [3, 6, 9, 12] # Transformer layer indices to extract features from (0-indexed)
14
  RGB_ENCODER_LR: 0.0 # Learning rate for RGB encoder (0.0 = frozen, must be explicitly set)
15
  DECODERS:
@@ -19,10 +19,10 @@ parameters:
19
  REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048] # Output channels for each decoder stage (DPT-style reassembly)
20
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
21
  READOUT_TYPE: "ignore" # Readout type for DPT decoder ("ignore", "project", etc.)
22
- FROM_PRETRAINED: "weights/rgb_decoder.pth" # Path to pretrained decoder weights (optional)
23
  USE_BN: False # Use batch normalization in decoder
24
  DROPOUT: 0.1 # Dropout rate in decoder layers
25
- OUTPUT_IMAGE_SIZE: [448,448] # Output image resolution [height, width]
26
  OUTPUT_CHANNELS: 3 # Number of output channels (3 for RGB diffuse image)
27
  DECODER_LR: 1.0e-5 # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
28
  NUM_FUSION_BLOCKS_TRAINABLE: 1 # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
@@ -35,14 +35,14 @@ parameters:
35
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
36
  USE_BN: False # Use batch normalization in decoder
37
  DROPOUT: 0.1 # Dropout rate in decoder layers
38
- OUTPUT_IMAGE_SIZE: [448,448] # Output image resolution [height, width]
39
  OUTPUT_CHANNELS: 1 # Number of output channels (1 for highlight mask)
40
  DECODER_LR: 5.0e-4 # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
41
  NUM_FUSION_BLOCKS_TRAINABLE: null # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
42
  TOKEN_INPAINTER:
43
  TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior" # Token inpainter class name
44
  TOKEN_INPAINTER_MODULE: "token_inpainters" # Module name to import token inpainter from
45
- FROM_PRETRAINED: "weights/token_inpainter.pth" # Path to pretrained token inpainter weights
46
  TOKEN_INPAINTER_LR: 1.0e-5 # Learning rate for token inpainter (can differ from base LR)
47
  DEPTH: 6 # Number of transformer blocks
48
  HEADS: 16 # Number of attention heads
 
9
  MODEL_MODULE: "models" # Module name to import model classes from (default: "models")
10
  RGB_ENCODER:
11
  ENCODER: "facebook/dinov3-vitl16-pretrain-lvd1689m" # DINOv3 encoder model name (HuggingFace format)
12
+ IMAGE_SIZE: 896 # Input image size (height and width in pixels)
13
  RETURN_SELECTED_LAYERS: [3, 6, 9, 12] # Transformer layer indices to extract features from (0-indexed)
14
  RGB_ENCODER_LR: 0.0 # Learning rate for RGB encoder (0.0 = frozen, must be explicitly set)
15
  DECODERS:
 
19
  REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048] # Output channels for each decoder stage (DPT-style reassembly)
20
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
21
  READOUT_TYPE: "ignore" # Readout type for DPT decoder ("ignore", "project", etc.)
22
+ # FROM_PRETRAINED: "weights/rgb_decoder.pth" # Path to pretrained decoder weights (optional)
23
  USE_BN: False # Use batch normalization in decoder
24
  DROPOUT: 0.1 # Dropout rate in decoder layers
25
+ OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
26
  OUTPUT_CHANNELS: 3 # Number of output channels (3 for RGB diffuse image)
27
  DECODER_LR: 1.0e-5 # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
28
  NUM_FUSION_BLOCKS_TRAINABLE: 1 # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
 
35
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
36
  USE_BN: False # Use batch normalization in decoder
37
  DROPOUT: 0.1 # Dropout rate in decoder layers
38
+ OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
39
  OUTPUT_CHANNELS: 1 # Number of output channels (1 for highlight mask)
40
  DECODER_LR: 5.0e-4 # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
41
  NUM_FUSION_BLOCKS_TRAINABLE: null # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
42
  TOKEN_INPAINTER:
43
  TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior" # Token inpainter class name
44
  TOKEN_INPAINTER_MODULE: "token_inpainters" # Module name to import token inpainter from
45
+ # FROM_PRETRAINED: "weights/token_inpainter.pth" # Path to pretrained token inpainter weights
46
  TOKEN_INPAINTER_LR: 1.0e-5 # Learning rate for token inpainter (can differ from base LR)
47
  DEPTH: 6 # Number of transformer blocks
48
  HEADS: 16 # Number of attention heads