| # CLIPSeg fine-tune — single unified model, both classes via prompt. | |
| # Loss: focal BCE (γ=2) + Dice. Best CLIPSeg variant from ablation. | |
| seed: 42 | |
| tasks: | |
| - name: crack | |
| dataset_dir: "data/processed/crack" | |
| prompts: | |
| - "segment crack" | |
| - "segment wall crack" | |
| - "a crack on the wall" | |
| - "thin crack line" | |
| eval_prompt: "segment crack" | |
| - name: taping | |
| dataset_dir: "data/processed/taping" | |
| prompts: | |
| - "segment taping area" | |
| - "segment joint tape" | |
| - "segment drywall seam" | |
| - "drywall joint" | |
| eval_prompt: "segment taping area" | |
| model: | |
| name: clipseg | |
| pretrained: "CIDAS/clipseg-rd64-refined" | |
| freeze_clip: true | |
| unfreeze_decoder: true | |
| unfreeze_film: true | |
| unfreeze_visual_adapter: false | |
| data: | |
| input_size: 352 | |
| full_res_eval: true | |
| num_workers: 4 | |
| pin_memory: true | |
| train: | |
| epochs: 30 | |
| batch_size: 16 | |
| optimizer: adamw | |
| lr: 1.0e-4 | |
| weight_decay: 1.0e-4 | |
| scheduler: cosine | |
| warmup_steps: 300 | |
| amp: true | |
| log_every: 20 | |
| ckpt_dir: "outputs/checkpoints/clipseg" | |
| log_dir: "outputs/logs/clipseg" | |
| early_stop_patience: 6 | |
| monitor: "val/dice_mean" | |
| neg_prob: 0.3 | |
| balanced_sampler: true | |
| loss: | |
| type: "focal_dice" | |
| gamma: 2.0 | |
| bce_weight: 0.5 | |
| dice_weight: 0.5 | |
| pos_weight: 5.0 | |
| augmentation: | |
| resize: 352 | |
| crop_size: 352 | |
| hflip_p: 0.5 | |
| vflip_p: 0.2 | |
| rotate_limit: 15 | |
| brightness_contrast_p: 0.3 | |
| blur_p: 0.1 | |
| noise_p: 0.1 | |
| inference: | |
| threshold: 0.5 | |
| tta: true | |
| multi_prompt_avg: true | |
| eval: | |
| metrics: [iou, dice, precision, recall] | |