Spaces:

Mr7Explorer
/

backdrop-studio-backend

Runtime error

App Files Files Community

Mr7Explorer commited on Oct 17, 2025

Commit

06e6b8c

verified ·

1 Parent(s): 67390a4

Upload 12 files

Browse files

Files changed (12) hide show

eval_existingOnes.py +73 -0
gen_best_ep.py +85 -0
inference.py +120 -0
loss.py +248 -0
make_a_copy.sh +16 -0
rm_cache.sh +25 -0
sub.sh +17 -0
test.sh +25 -0
train.py +262 -0
train.sh +42 -0
train_test.sh +12 -0
utils.py +100 -0

eval_existingOnes.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import argparse
+from glob import glob
+import prettytable as pt
+from evaluation.metrics import evaluator, sort_and_round_scores
+from config import Config
+config = Config()
+def do_eval(args):
+    task_to_field_names = {
+        'DIS5K': ["Dataset", "Method", "maxFm", "wFmeasure", 'MAE', "Smeasure", "meanEm", "HCE", "maxEm", "meanFm", "adpEm", "adpFm", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'COD': ["Dataset", "Method", "Smeasure", "wFmeasure", "meanFm", "meanEm", "maxEm", 'MAE', "maxFm", "adpEm", "adpFm", "HCE", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'HRSOD': ["Dataset", "Method", "Smeasure", "maxFm", "meanEm", 'MAE', "maxEm", "meanFm", "wFmeasure", "adpEm", "adpFm", "HCE", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'General': ["Dataset", "Method", "maxFm", "wFmeasure", 'MAE', "Smeasure", "meanEm", "HCE", "maxEm", "meanFm", "adpEm", "adpFm", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'Matting': ["Dataset", "Method", "Smeasure", "maxFm", "meanEm", 'MSE', "maxEm", "meanFm", "wFmeasure", "adpEm", "adpFm", "HCE", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'General-2K': ["Dataset", "Method", "maxFm", "wFmeasure", 'MAE', "Smeasure", "meanEm", "HCE", "maxEm", "meanFm", "adpEm", "adpFm", 'mBA', 'maxBIoU', 'meanBIoU'],
+        'Others': ["Dataset", "Method", "Smeasure", 'MAE', "maxEm", "meanEm", "maxFm", "meanFm", "wFmeasure", "adpEm", "adpFm", "HCE", 'mBA', 'maxBIoU', 'meanBIoU'],
+    }
+    for data_name in args.data_lst.split('+'):
+        print('#' * 20, data_name, '#' * 20)
+        if not glob(os.path.join(args.pred_root, args.model_lst[0], data_name)):
+            print('Skip dataset {}.'.format(data_name))
+            continue
+        gt_paths = sorted(glob(os.path.join(args.gt_root, data_name, 'gt', '*')))
+        tb = pt.PrettyTable()
+        tb.vertical_char = '&'
+        tb.field_names = task_to_field_names[config.task] if config.task in task_to_field_names else task_to_field_names['Others']
+        for model_name in args.model_lst[:]:
+            print('\t', 'Evaluating model: {}...'.format(model_name))
+            pred_paths = [p.replace(args.gt_root, os.path.join(args.pred_root, model_name)).replace('/gt/', '/') for p in gt_paths]
+            em, sm, fm, mae, mse, wfm, hce, mba, biou = evaluator(
+                gt_paths=gt_paths,
+                pred_paths=pred_paths,
+                metrics=args.metrics.split('+'),
+                verbose=config.verbose_eval,
+                num_workers=min(8, int(os.cpu_count() * 0.9)),
+            )
+            scores = sort_and_round_scores(config.task, [em, sm, fm, mae, mse, wfm, hce, mba, biou])
+            for idx_score, score in enumerate(scores):
+                scores[idx_score] = '.' + format(score, '.3f').split('.')[-1] if score <= 1  else format(score, '<4')
+            records = [data_name, model_name] + scores
+            tb.add_row(records)
+            os.makedirs(args.save_dir, exist_ok=True)
+            with open(os.path.join(args.save_dir, '{}_eval.txt'.format(data_name)), 'w+') as file_to_write:
+                file_to_write.write(str(tb)+'\n')
+        print(tb)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gt_root', type=str, help='ground-truth root', default=os.path.join(config.data_root_dir, config.task))
+    parser.add_argument('--pred_root', type=str, help='prediction root', default='./e_preds')
+    parser.add_argument('--data_lst', type=str, help='test datasets', default=config.testsets.replace(',', '+'))
+    parser.add_argument('--save_dir', type=str, help='directory to save results', default='e_results')
+    parser.add_argument('--metrics', type=str, help='candidate competitors', default='+'.join(['S', 'MAE']))
+    args = parser.parse_args()
+    if args.metrics == 'all':
+        args.metrics = '+'.join(['S', 'MAE', 'E', 'F', 'WF', 'MBA', 'BIoU', 'MSE', 'HCE'][:100 if sum(['DIS-' in _data for _data in args.data_lst.split('+')]) else -1])
+    try:
+        args.model_lst = [m for m in sorted(os.listdir(args.pred_root), key=lambda x: int(x.split('epoch_')[-1].split('-')[0]), reverse=True) if int(m.split('epoch_')[-1].split('-')[0]) % 1 == 0]
+    except Exception as e:
+        print(f"Exception: {type(e).__name__} at line {e.__traceback__.tb_lineno} of {__file__}: {e}")
+        args.model_lst = [m for m in sorted(os.listdir(args.pred_root))]
+    do_eval(args)

gen_best_ep.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+from glob import glob
+import numpy as np
+from config import Config
+config = Config()
+eval_txts = sorted(glob('e_results/*_eval.txt'))
+print('eval_txts:', [_.split(os.sep)[-1] for _ in eval_txts])
+score_panel = {}
+sep = '&'
+metrics = ['sm', 'wfm', 'hce']    # we used HCE for DIS and wFm for others.
+if 'DIS5K' not in config.task:
+    metrics.remove('hce')
+for metric in metrics:
+    print('Metric:', metric)
+    current_line_nums = []
+    for idx_et, eval_txt in enumerate(eval_txts):
+        with open(eval_txt, 'r') as f:
+            lines = [l for l in f.readlines()[3:] if '.' in l]
+        current_line_nums.append(len(lines))
+    for idx_et, eval_txt in enumerate(eval_txts):
+        with open(eval_txt, 'r') as f:
+            lines = [l for l in f.readlines()[3:] if '.' in l]
+        for idx_line, line in enumerate(lines[:min(current_line_nums)]):    # Consist line numbers by the minimal result file.
+            properties = line.strip().strip(sep).split(sep)
+            dataset = properties[0].strip()
+            ckpt = properties[1].strip()
+            if int(ckpt.split('--epoch_')[-1].strip()) < 0:
+                continue
+            targe_idx = {
+                'sm': [5, 2, 2, 5, 5, 2],
+                'wfm': [3, 3, 8, 3, 3, 8],
+                'hce': [7, -1, -1, 7, 7, -1]
+            }[metric][['DIS5K', 'COD', 'HRSOD', 'General', 'General-2K', 'Matting'].index(config.task)]
+            if metric != 'hce':
+                score_sm = float(properties[targe_idx].strip())
+            else:
+                score_sm = int(properties[targe_idx].strip().strip('.'))
+            if idx_et == 0:
+                score_panel[ckpt] = []
+            score_panel[ckpt].append(score_sm)
+    metrics_min = ['hce', 'mae']
+    max_or_min = min if metric in metrics_min else max
+    score_max = max_or_min(score_panel.values(), key=lambda x: np.sum(x))
+    good_models = []
+    for k, v in score_panel.items():
+        if (np.sum(v) <= np.sum(score_max)) if metric in metrics_min else (np.sum(v) >= np.sum(score_max)):
+            print(k, v)
+            good_models.append(k)
+    # Write
+    with open(eval_txt, 'r') as f:
+        lines = f.readlines()
+    info4good_models = lines[:3]
+    metric_names = [m.strip() for m in lines[1].strip().strip('&').split('&')[2:]]
+    testset_mean_values = {metric_name: [] for metric_name in metric_names}
+    for good_model in good_models:
+        for idx_et, eval_txt in enumerate(eval_txts):
+            with open(eval_txt, 'r') as f:
+                lines = f.readlines()
+            for line in lines:
+                if set([good_model]) & set([_.strip() for _ in line.split(sep)]):
+                    info4good_models.append(line)
+                    metric_scores = [float(m.strip()) for m in line.strip().strip('&').split('&')[2:]]
+                    for idx_score, metric_score in enumerate(metric_scores):
+                        testset_mean_values[metric_names[idx_score]].append(metric_score)
+    if 'DIS5K' in config.task:
+        testset_mean_values_lst = ['{:<4}'.format(int(np.mean(v_lst[:-1]).round())) if name == 'HCE' else '{:.3f}'.format(np.mean(v_lst[:-1])).lstrip('0') for name, v_lst in testset_mean_values.items()]  # [:-1] to remove DIS-VD
+        sample_line_for_placing_mean_values = info4good_models[-2]
+        numbers_placed_well = sample_line_for_placing_mean_values.replace(sample_line_for_placing_mean_values.split('&')[1].strip(), 'DIS-TEs').strip().split('&')[3:]
+        for idx_number, (number_placed_well, testset_mean_value) in enumerate(zip(numbers_placed_well, testset_mean_values_lst)):
+            numbers_placed_well[idx_number] = number_placed_well.replace(number_placed_well.strip(), testset_mean_value)
+        testset_mean_line = '&'.join(sample_line_for_placing_mean_values.replace(sample_line_for_placing_mean_values.split('&')[1].strip(), 'DIS-TEs').split('&')[:3] + numbers_placed_well) + '\n'
+        info4good_models.append(testset_mean_line)
+    info4good_models.append(lines[-1])
+    info = ''.join(info4good_models)
+    print(info)
+    with open(os.path.join('e_results', 'eval-{}_best_on_{}.txt'.format(config.task, metric)), 'w') as f:
+        f.write(info + '\n')

inference.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import os
+import argparse
+from glob import glob
+from tqdm import tqdm
+import cv2
+import torch
+from contextlib import nullcontext
+from dataset import MyData
+from models.birefnet import BiRefNet
+from utils import save_tensor_img, check_state_dict
+from config import Config
+config = Config()
+mixed_precision = config.mixed_precision
+if mixed_precision == 'fp16':
+    mixed_dtype = torch.float16
+elif mixed_precision == 'bf16':
+    mixed_dtype = torch.bfloat16
+else:
+    mixed_dtype = None
+autocast_ctx = torch.amp.autocast(device_type='cuda', dtype=mixed_dtype) if mixed_dtype else nullcontext()
+def inference(model, data_loader_test, pred_root, method, testset, device=0):
+    model_training = model.training
+    if model_training:
+        model.eval()
+    for batch in tqdm(data_loader_test, total=len(data_loader_test)) if config.verbose_eval else data_loader_test:
+        inputs = batch[0].to(device)
+        label_paths = batch[-1]
+        with autocast_ctx, torch.no_grad():
+            scaled_preds = model(inputs)[-1].sigmoid().to(torch.float32)
+        os.makedirs(os.path.join(pred_root, method, testset), exist_ok=True)
+        for idx_sample in range(scaled_preds.shape[0]):
+            res = torch.nn.functional.interpolate(
+                scaled_preds[idx_sample].unsqueeze(0),
+                size=cv2.imread(label_paths[idx_sample], cv2.IMREAD_GRAYSCALE).shape[:2],
+                mode='bilinear',
+                align_corners=True
+            )
+            save_tensor_img(res, os.path.join(os.path.join(pred_root, method, testset), label_paths[idx_sample].replace('\\', '/').split('/')[-1]))   # test set dir + file name
+    if model_training:
+        model.train()
+    return None
+def main(args):
+    device = config.device
+    if args.ckpt_folder:
+        print('Testing with models in {}'.format(args.ckpt_folder))
+    else:
+        print('Testing with model {}'.format(args.ckpt))
+    if config.model == 'BiRefNet':
+        model = BiRefNet(bb_pretrained=False)
+    else:
+        print('Undefined model: {}.'.format(config.model))
+        return None
+    weights_lst = sorted(
+        glob(os.path.join(args.ckpt_folder, '*.pth')) if args.ckpt_folder else [args.ckpt],
+        key=lambda x: int(x.split('epoch_')[-1].split('.pth')[0]),
+        reverse=True
+    )
+    try:
+        if args.resolution in [None, 'None', 0, '']:
+            # Use original resolution for inference.
+            data_size = None
+        elif args.resolution in ['config.size']:
+            data_size = config.size
+        else:
+            data_size = [int(l) for l in args.resolution.split('x')]
+    except Exception as e:
+        print(f"Exception: {type(e).__name__} at line {e.__traceback__.tb_lineno} of {__file__}: {e}")
+        # default as the config.size.
+        data_size = config.size
+    for testset in args.testsets.split('+'):
+        print('>>>> Testset: {}...'.format(testset))
+        data_loader_test = torch.utils.data.DataLoader(
+            dataset=MyData(testset, data_size=data_size, is_train=False),
+            batch_size=config.batch_size_valid, shuffle=False, num_workers=config.num_workers, pin_memory=True
+        )
+        for weights in weights_lst:
+            if int(weights.strip('.pth').split('epoch_')[-1]) % 1 != 0:
+                continue
+            print('\tInferencing {}...'.format(weights))
+            state_dict = torch.load(weights, map_location='cpu', weights_only=True)
+            state_dict = check_state_dict(state_dict)
+            model.load_state_dict(state_dict)
+            model = model.to(device)
+            inference(
+                model, data_loader_test=data_loader_test, pred_root=args.pred_root,
+                method='--'.join([w.rstrip('.pth') for w in weights.split(os.sep)[-2:]]) + '-reso_{}'.format('x'.join([str(s) for s in data_size])),
+                testset=testset, device=config.device
+            )
+if __name__ == '__main__':
+    # Parameter from command line
+    parser = argparse.ArgumentParser(description='')
+    parser.add_argument('--ckpt', type=str, help='model folder')
+    parser.add_argument('--ckpt_folder', default=sorted(glob(os.path.join('ckpts', '*')))[-1], type=str, help='model folder')
+    parser.add_argument('--pred_root', default='e_preds', type=str, help='Output folder')
+    parser.add_argument('--resolution', default='default', type=str, help='WeixHei')
+    parser.add_argument('--testsets',
+                        default=config.testsets.replace(',', '+'),
+                        type=str,
+                        help="Test all sets: DIS5K -> 'DIS-VD+DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4'")
+    args = parser.parse_args()
+    if config.precisionHigh:
+        torch.set_float32_matmul_precision('high')
+    main(args)

loss.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from math import exp
+from config import Config
+class ContourLoss(torch.nn.Module):
+    def __init__(self):
+        super(ContourLoss, self).__init__()
+    def forward(self, pred, target, weight=10):
+        '''
+        target, pred: tensor of shape (B, C, H, W), where target[:,:,region_in_contour] == 1,
+                        target[:,:,region_out_contour] == 0.
+        weight: scalar, length term weight.
+        '''
+        # length term
+        delta_r = pred[:,:,1:,:] - pred[:,:,:-1,:] # horizontal gradient (B, C, H-1, W)
+        delta_c = pred[:,:,:,1:] - pred[:,:,:,:-1] # vertical gradient   (B, C, H,   W-1)
+        delta_r    = delta_r[:,:,1:,:-2]**2  # (B, C, H-2, W-2)
+        delta_c    = delta_c[:,:,:-2,1:]**2  # (B, C, H-2, W-2)
+        delta_pred = torch.abs(delta_r + delta_c)
+        epsilon = 1e-8 # where is a parameter to avoid square root is zero in practice.
+        length = torch.mean(torch.sqrt(delta_pred + epsilon)) # eq.(11) in the paper, mean is used instead of sum.
+        c_in  = torch.ones_like(pred)
+        c_out = torch.zeros_like(pred)
+        region_in  = torch.mean( pred     * (target - c_in )**2 ) # equ.(12) in the paper, mean is used instead of sum.
+        region_out = torch.mean( (1-pred) * (target - c_out)**2 )
+        region = region_in + region_out
+        loss =  weight * length + region
+        return loss
+class IoULoss(torch.nn.Module):
+    def __init__(self):
+        super(IoULoss, self).__init__()
+    def forward(self, pred, target):
+        b = pred.shape[0]
+        IoU = 0.0
+        for i in range(0, b):
+            # compute the IoU of the foreground
+            Iand1 = torch.sum(target[i, :, :, :] * pred[i, :, :, :])
+            Ior1 = torch.sum(target[i, :, :, :]) + torch.sum(pred[i, :, :, :]) - Iand1
+            IoU1 = Iand1 / Ior1
+            # IoU loss is (1-IoU1)
+            IoU = IoU + (1-IoU1)
+        # return IoU/b
+        return IoU
+class StructureLoss(torch.nn.Module):
+    def __init__(self):
+        super(StructureLoss, self).__init__()
+    def forward(self, pred, target):
+        weit  = 1+5*torch.abs(F.avg_pool2d(target, kernel_size=31, stride=1, padding=15)-target)
+        wbce  = F.binary_cross_entropy_with_logits(pred, target, reduction='none')
+        wbce  = (weit*wbce).sum(dim=(2,3))/weit.sum(dim=(2,3))
+        pred  = torch.sigmoid(pred)
+        inter = ((pred * target) * weit).sum(dim=(2, 3))
+        union = ((pred + target) * weit).sum(dim=(2, 3))
+        wiou  = 1-(inter+1)/(union-inter+1)
+        return (wbce+wiou).mean()
+class PatchIoULoss(torch.nn.Module):
+    def __init__(self):
+        super(PatchIoULoss, self).__init__()
+        self.iou_loss = IoULoss()
+    def forward(self, pred, target):
+        win_y, win_x = 64, 64
+        iou_loss = 0.
+        for anchor_y in range(0, target.shape[0], win_y):
+            for anchor_x in range(0, target.shape[1], win_y):
+                patch_pred = pred[:, :, anchor_y:anchor_y+win_y, anchor_x:anchor_x+win_x]
+                patch_target = target[:, :, anchor_y:anchor_y+win_y, anchor_x:anchor_x+win_x]
+                patch_iou_loss = self.iou_loss(patch_pred, patch_target)
+                iou_loss += patch_iou_loss
+        return iou_loss
+class ThrReg_loss(torch.nn.Module):
+    def __init__(self):
+        super(ThrReg_loss, self).__init__()
+    def forward(self, pred, gt=None):
+        return torch.mean(1 - ((pred - 0) ** 2 + (pred - 1) ** 2))
+class ClsLoss(nn.Module):
+    """
+    Auxiliary classification loss for each refined class output.
+    """
+    def __init__(self):
+        super(ClsLoss, self).__init__()
+        self.config = Config()
+        self.lambdas_cls = self.config.lambdas_cls
+        self.criterions_last = {
+            'ce': nn.CrossEntropyLoss()
+        }
+    def forward(self, preds, gt):
+        loss = 0.
+        for _, pred_lvl in enumerate(preds):
+            if pred_lvl is None:
+                continue
+            for criterion_name, criterion in self.criterions_last.items():
+                loss += criterion(pred_lvl, gt) * self.lambdas_cls[criterion_name]
+        return loss
+class PixLoss(nn.Module):
+    """
+    Pixel loss for each refined map output.
+    """
+    def __init__(self):
+        super(PixLoss, self).__init__()
+        self.config = Config()
+        self.lambdas_pix_last = self.config.lambdas_pix_last
+        self.criterions_last = {}
+        if 'bce' in self.lambdas_pix_last and self.lambdas_pix_last['bce']:
+            self.criterions_last['bce'] = nn.BCELoss()
+        if 'iou' in self.lambdas_pix_last and self.lambdas_pix_last['iou']:
+            self.criterions_last['iou'] = IoULoss()
+        if 'iou_patch' in self.lambdas_pix_last and self.lambdas_pix_last['iou_patch']:
+            self.criterions_last['iou_patch'] = PatchIoULoss()
+        if 'ssim' in self.lambdas_pix_last and self.lambdas_pix_last['ssim']:
+            self.criterions_last['ssim'] = SSIMLoss()
+        if 'mae' in self.lambdas_pix_last and self.lambdas_pix_last['mae']:
+            self.criterions_last['mae'] = nn.L1Loss()
+        if 'mse' in self.lambdas_pix_last and self.lambdas_pix_last['mse']:
+            self.criterions_last['mse'] = nn.MSELoss()
+        if 'reg' in self.lambdas_pix_last and self.lambdas_pix_last['reg']:
+            self.criterions_last['reg'] = ThrReg_loss()
+        if 'cnt' in self.lambdas_pix_last and self.lambdas_pix_last['cnt']:
+            self.criterions_last['cnt'] = ContourLoss()
+        if 'structure' in self.lambdas_pix_last and self.lambdas_pix_last['structure']:
+            self.criterions_last['structure'] = StructureLoss()
+    def forward(self, scaled_preds, gt, pix_loss_lambda=1.0):
+        loss = 0.
+        loss_dict = {}
+        for _, pred_lvl in enumerate(scaled_preds):
+            if pred_lvl.shape != gt.shape:
+                pred_lvl = nn.functional.interpolate(pred_lvl, size=gt.shape[2:], mode='bilinear', align_corners=True)
+            for criterion_name, criterion in self.criterions_last.items():
+                _loss = criterion(pred_lvl.sigmoid(), gt) * self.lambdas_pix_last[criterion_name] * pix_loss_lambda
+                loss += _loss
+                loss_dict[criterion_name] = loss_dict.get(criterion_name, 0.) + _loss.item() / len(scaled_preds)
+                # print(criterion_name, _loss.item())
+        return loss, loss_dict
+class SSIMLoss(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True):
+        super(SSIMLoss, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = 1
+        self.window = create_window(window_size, self.channel)
+    def forward(self, img1, img2):
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.data.type() == img1.data.type():
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel)
+            if img1.is_cuda:
+                window = window.cuda(img1.get_device())
+            window = window.type_as(img1)
+            self.window = window
+            self.channel = channel
+        return 1 - (1 + _ssim(img1, img2, window, self.window_size, channel, self.size_average)) / 2
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
+    return gauss/gauss.sum()
+def create_window(window_size, channel):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
+    return window
+def _ssim(img1, img2, window, window_size, channel, size_average=True):
+    mu1 = F.conv2d(img1, window, padding = window_size//2, groups=channel)
+    mu2 = F.conv2d(img2, window, padding = window_size//2, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1*mu2
+    sigma1_sq = F.conv2d(img1*img1, window, padding=window_size//2, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2*img2, window, padding=window_size//2, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(img1*img2, window, padding=window_size//2, groups=channel) - mu1_mu2
+    C1 = 0.01**2
+    C2 = 0.03**2
+    ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
+    if size_average:
+        return ssim_map.mean()
+    else:
+        return ssim_map.mean(1).mean(1).mean(1)
+def SSIM(x, y):
+    C1 = 0.01 ** 2
+    C2 = 0.03 ** 2
+    mu_x = nn.AvgPool2d(3, 1, 1)(x)
+    mu_y = nn.AvgPool2d(3, 1, 1)(y)
+    mu_x_mu_y = mu_x * mu_y
+    mu_x_sq = mu_x.pow(2)
+    mu_y_sq = mu_y.pow(2)
+    sigma_x = nn.AvgPool2d(3, 1, 1)(x * x) - mu_x_sq
+    sigma_y = nn.AvgPool2d(3, 1, 1)(y * y) - mu_y_sq
+    sigma_xy = nn.AvgPool2d(3, 1, 1)(x * y) - mu_x_mu_y
+    SSIM_n = (2 * mu_x_mu_y + C1) * (2 * sigma_xy + C2)
+    SSIM_d = (mu_x_sq + mu_y_sq + C1) * (sigma_x + sigma_y + C2)
+    SSIM = SSIM_n / SSIM_d
+    return torch.clamp((1 - SSIM) / 2, 0, 1)
+def saliency_structure_consistency(x, y):
+    ssim = torch.mean(SSIM(x,y))
+    return ssim

make_a_copy.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+#!/bin/bash
+# Set dst repo here.
+repo=$1
+mkdir ../${repo}
+mkdir ../${repo}/evaluation
+mkdir ../${repo}/models
+mkdir ../${repo}/models/backbones
+mkdir ../${repo}/models/modules
+cp ./*.sh ../${repo}
+cp ./*.py ../${repo}
+cp ./evaluation/*.py ../${repo}/evaluation
+cp ./models/*.py ../${repo}/models
+cp ./models/backbones/*.py ../${repo}/models/backbones
+cp ./models/modules/*.py ../${repo}/models/modules
+cp -r ./.git* ../${repo}

rm_cache.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+#!/bin/bash
+rm -rf __pycache__ */__pycache__ */*/__pycache__
+# Val
+rm -r tmp*
+# Train
+rm slurm*
+rm -r ckpts
+rm nohup.out*
+rm nohup.log*
+# Eval
+rm -r evaluation/eval-*
+rm -r tmp*
+rm -r e_logs/
+# System
+rm core-*-python-*
+# Inference cache
+rm -rf images_todo/
+rm -rf predictions/
+clear

sub.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+# Example: ./sub.sh tmp_proj 0,1,2,3 3 --> Use 0,1,2,3 for training, release GPUs, use GPU:3 for inference.
+# module load gcc/11.2.0 cuda/11.8 cudnn/8.6.0_cu11x && cpu_core_num=6
+module load compilers/cuda/11.8 compilers/gcc/12.2.0 cudnn/8.4.0.27_cuda11.x && cpu_core_num=32
+export PYTHONUNBUFFERED=1
+method=${1:-"BSL"}
+devices=${2:-"0,1"}
+gpu_num=$(($(echo ${devices%%,} | grep -o "," | wc -l)+1))
+sbatch --nodes=1 -p vip_gpu_ailab -A ai4bio \
+    --gres=gpu:${gpu_num} --ntasks-per-node=1 --cpus-per-task=$((gpu_num*cpu_core_num)) \
+    ./train_test.sh ${method} ${devices}
+hostname

test.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+devices=${1:-0}
+pred_root=${2:-e_preds}
+resolutions=${3:-"config.size"}
+# Inference
+# resolutions="1024x1024 None"
+for resolution in ${resolutions}; do
+    CUDA_VISIBLE_DEVICES=${devices} python inference.py --pred_root ${pred_root} --resolution ${resolution}
+done
+echo Inference finished at $(date)
+# Evaluation
+log_dir=e_logs && mkdir ${log_dir}
+task=$(python3 config.py --print_task)
+testsets=$(python3 config.py --print_testsets)
+testsets=(`echo ${testsets} | tr ',' ' '`) && testsets=${testsets[@]}
+for testset in ${testsets}; do
+    python eval_existingOnes.py --pred_root ${pred_root} --data_lst ${testset} --metrics 'all' > ${log_dir}/eval_${testset}.out
+done
+echo Evaluation started at $(date)

train.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import os
+import datetime
+from contextlib import nullcontext
+import argparse
+import torch
+import torch.nn as nn
+import torch.optim as optim
+if tuple(map(int, torch.__version__.split('+')[0].split(".")[:3])) >= (2, 5, 0):
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+from config import Config
+from loss import PixLoss, ClsLoss
+from dataset import MyData
+from models.birefnet import BiRefNet
+from utils import Logger, AverageMeter, set_seed, check_state_dict
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.distributed import init_process_group, destroy_process_group
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--resume', default=None, type=str, help='path to latest checkpoint')
+parser.add_argument('--epochs', default=120, type=int)
+parser.add_argument('--ckpt_dir', default='ckpts/tmp', help='Temporary folder')
+parser.add_argument('--dist', default=False, type=lambda x: x == 'True')
+parser.add_argument('--use_accelerate', action='store_true', help='`accelerate launch --multi_gpu train.py --use_accelerate`. Use accelerate for training, good for FP16/BF16/...')
+args = parser.parse_args()
+config = Config()
+if args.use_accelerate:
+    from accelerate import Accelerator, utils
+    mixed_precision = config.mixed_precision
+    kwargs_handlers = [
+            utils.InitProcessGroupKwargs(backend="nccl", timeout=datetime.timedelta(seconds=3600*10)),
+            utils.DistributedDataParallelKwargs(find_unused_parameters=False),
+            utils.GradScalerKwargs(backoff_factor=0.5),
+    ]
+    if mixed_precision == 'fp8':
+        kwargs_handlers.append(utils.AORecipeKwargs())
+    accelerator = Accelerator(
+        mixed_precision=mixed_precision,
+        gradient_accumulation_steps=1,
+        kwargs_handlers=kwargs_handlers,
+    )
+    accelerator.print(accelerator.state)
+    accelerator.print('backbone:', config.bb, ', freeze_bb:', config.freeze_bb)
+    args.dist = False
+# DDP
+to_be_distributed = args.dist
+if to_be_distributed:
+    init_process_group(backend="nccl", timeout=datetime.timedelta(seconds=3600*10))
+    device = int(os.environ["LOCAL_RANK"])
+else:
+    if args.use_accelerate:
+        device = accelerator.local_process_index
+    else:
+        device = config.device
+if config.rand_seed:
+    set_seed(config.rand_seed + device)
+epoch_st = 1
+# make dir for ckpt
+os.makedirs(args.ckpt_dir, exist_ok=True)
+# Init log file
+logger = Logger(os.path.join(args.ckpt_dir, "log.txt"))
+logger_loss_idx = 1
+# log model and optimizer params
+# logger.info("Model details:"); logger.info(model)
+# if args.use_accelerate and accelerator.mixed_precision != 'no':
+#     config.compile = False
+logger.info("datasets: load_all={}, compile={}.".format(config.load_all, config.compile))
+logger.info("Other hyperparameters:"); logger.info(args)
+print('batch size:', config.batch_size)
+from dataset import custom_collate_fn
+def prepare_dataloader(dataset: torch.utils.data.Dataset, batch_size: int, to_be_distributed=False, is_train=True):
+    # Prepare dataloaders
+    if to_be_distributed:
+        return torch.utils.data.DataLoader(
+            dataset=dataset, batch_size=batch_size, num_workers=min(config.num_workers, batch_size), pin_memory=True,
+            shuffle=False, sampler=DistributedSampler(dataset), drop_last=True, collate_fn=custom_collate_fn if is_train and config.dynamic_size else None
+        )
+    else:
+        return torch.utils.data.DataLoader(
+            dataset=dataset, batch_size=batch_size, num_workers=min(config.num_workers, batch_size), pin_memory=True,
+            shuffle=is_train, sampler=None, drop_last=True, collate_fn=custom_collate_fn if is_train and config.dynamic_size else None
+        )
+def init_data_loaders(to_be_distributed):
+    # Prepare datasets
+    train_loader = prepare_dataloader(
+        MyData(datasets=config.training_set, data_size=None if config.dynamic_size else config.size, is_train=True),
+        config.batch_size, to_be_distributed=to_be_distributed, is_train=True
+    )
+    print(len(train_loader), "batches of train dataloader {} have been created.".format(config.training_set))
+    return train_loader
+def init_models_optimizers(epochs, to_be_distributed):
+    # Init models
+    if config.model == 'BiRefNet':
+        model = BiRefNet(bb_pretrained=True and not os.path.isfile(str(args.resume)))
+    else:
+        print('Undefined model: {}.'.format(config.model))
+        return None
+    if args.resume:
+        if os.path.isfile(args.resume):
+            logger.info("=> loading checkpoint '{}'".format(args.resume))
+            state_dict = torch.load(args.resume, map_location='cpu', weights_only=True)
+            state_dict = check_state_dict(state_dict)
+            model.load_state_dict(state_dict)
+            global epoch_st
+            epoch_st = int(args.resume.rstrip('.pth').split('epoch_')[-1]) + 1
+        else:
+            logger.info("=> no checkpoint found at '{}'".format(args.resume))
+    if not args.use_accelerate:
+        if to_be_distributed:
+            model = model.to(device)
+            model = DDP(model, device_ids=[device])
+        else:
+            model = model.to(device)
+    if config.compile:
+        model = torch.compile(model, mode=['default', 'reduce-overhead', 'max-autotune'][0])
+    if config.precisionHigh:
+        torch.set_float32_matmul_precision('high')
+    # Setting optimizer
+    if config.optimizer == 'AdamW':
+        optimizer = optim.AdamW(params=[p for p in model.parameters() if p.requires_grad], lr=config.lr, weight_decay=1e-2)
+    elif config.optimizer == 'Adam':
+        optimizer = optim.Adam(params=[p for p in model.parameters() if p.requires_grad], lr=config.lr, weight_decay=0)
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer,
+        milestones=[lde if lde > 0 else epochs + lde + 1 for lde in config.lr_decay_epochs],
+        gamma=config.lr_decay_rate
+    )
+    # logger.info("Optimizer details:"); logger.info(optimizer)
+    return model, optimizer, lr_scheduler
+class Trainer:
+    def __init__(
+        self, data_loaders, model_opt_lrsch,
+    ):
+        self.model, self.optimizer, self.lr_scheduler = model_opt_lrsch
+        self.train_loader = data_loaders
+        if args.use_accelerate:
+            self.train_loader, self.model, self.optimizer = accelerator.prepare(self.train_loader, self.model, self.optimizer)
+        if config.out_ref:
+            self.criterion_gdt = nn.BCELoss()
+        # Setting Losses
+        self.pix_loss = PixLoss()
+        self.cls_loss = ClsLoss()
+        # Others
+        self.loss_log = AverageMeter()
+    def _train_batch(self, batch):
+        if args.use_accelerate:
+            inputs = batch[0]#.to(device)
+            gts = batch[1]#.to(device)
+            class_labels = batch[2]#.to(device)
+        else:
+            inputs = batch[0].to(device)
+            gts = batch[1].to(device)
+            class_labels = batch[2].to(device)
+        self.optimizer.zero_grad()
+        scaled_preds, class_preds_lst = self.model(inputs)
+        if config.out_ref:
+            (outs_gdt_pred, outs_gdt_label), scaled_preds = scaled_preds
+            for _idx, (_gdt_pred, _gdt_label) in enumerate(zip(outs_gdt_pred, outs_gdt_label)):
+                _gdt_pred = nn.functional.interpolate(_gdt_pred, size=_gdt_label.shape[2:], mode='bilinear', align_corners=True).sigmoid()
+                _gdt_label = _gdt_label.sigmoid()
+                loss_gdt = self.criterion_gdt(_gdt_pred, _gdt_label) if _idx == 0 else self.criterion_gdt(_gdt_pred, _gdt_label) + loss_gdt
+            # self.loss_dict['loss_gdt'] = loss_gdt.item()
+        if None in class_preds_lst:
+            loss_cls = 0.
+        else:
+            loss_cls = self.cls_loss(class_preds_lst, class_labels)
+            self.loss_dict['loss_cls'] = loss_cls.item()
+        # Loss
+        loss_pix, loss_dict_pix = self.pix_loss(scaled_preds, torch.clamp(gts, 0, 1), pix_loss_lambda=1.0)
+        self.loss_dict.update(loss_dict_pix)
+        self.loss_dict['loss_pix'] = loss_pix.item()
+        # since there may be several losses for sal, the lambdas for them (lambdas_pix) are inside the loss.py
+        loss = loss_pix + loss_cls
+        if config.out_ref:
+            loss = loss + loss_gdt * 1.0
+        self.loss_log.update(loss.item(), inputs.size(0))
+        if args.use_accelerate:
+            loss = loss / accelerator.gradient_accumulation_steps
+            accelerator.backward(loss)
+        else:
+            loss.backward()
+        self.optimizer.step()
+    def train_epoch(self, epoch):
+        global logger_loss_idx
+        self.model.train()
+        self.loss_dict = {}
+        if epoch > args.epochs + config.finetune_last_epochs:
+            if config.task == 'Matting':
+                self.pix_loss.lambdas_pix_last['mae'] *= 1
+                self.pix_loss.lambdas_pix_last['mse'] *= 0.9
+                self.pix_loss.lambdas_pix_last['ssim'] *= 0.9
+            else:
+                self.pix_loss.lambdas_pix_last['bce'] *= 0
+                self.pix_loss.lambdas_pix_last['ssim'] *= 1
+                self.pix_loss.lambdas_pix_last['iou'] *= 0.5
+                self.pix_loss.lambdas_pix_last['mae'] *= 0.9
+        for batch_idx, batch in enumerate(self.train_loader):
+            # with nullcontext if not args.use_accelerate or accelerator.gradient_accumulation_steps <= 1 else accelerator.accumulate(self.model):
+            self._train_batch(batch)
+            # Logger
+            if (epoch < 2 and batch_idx < 100 and batch_idx % 20 == 0) or batch_idx % max(100, len(self.train_loader) / 100 // 100 * 100) == 0:
+                info_progress = f'Epoch[{epoch}/{args.epochs}] Iter[{batch_idx}/{len(self.train_loader)}].'
+                info_loss = 'Training Losses:'
+                for loss_name, loss_value in self.loss_dict.items():
+                    info_loss += f' {loss_name}: {loss_value:.5g} |'
+                logger.info(' '.join((info_progress, info_loss)))
+        info_loss = f'@==Final== Epoch[{epoch}/{args.epochs}]  Training Loss: {self.loss_log.avg:.5g}  '
+        logger.info(info_loss)
+        self.lr_scheduler.step()
+        return self.loss_log.avg
+def main():
+    trainer = Trainer(
+        data_loaders=init_data_loaders(to_be_distributed),
+        model_opt_lrsch=init_models_optimizers(args.epochs, to_be_distributed)
+    )
+    for epoch in range(epoch_st, args.epochs+1):
+        train_loss = trainer.train_epoch(epoch)
+        # Save checkpoint
+        if epoch >= args.epochs - config.save_last and epoch % config.save_step == 0:
+            if args.use_accelerate:
+                state_dict = trainer.model.state_dict()
+            else:
+                state_dict = trainer.model.module.state_dict() if to_be_distributed else trainer.model.state_dict()
+            torch.save(state_dict, os.path.join(args.ckpt_dir, 'epoch_{}.pth'.format(epoch)))
+    if to_be_distributed:
+        destroy_process_group()
+if __name__ == '__main__':
+    main()

train.sh ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/bin/bash
+# Run script
+# Settings of training & test for different tasks.
+method="$1"
+task=$(python3 config.py --print_task)
+case "${task}" in
+    'DIS5K') epochs=500 && val_last=50 && step=5 ;;
+    'COD') epochs=150 && val_last=50 && step=5 ;;
+    'HRSOD') epochs=150 && val_last=50 && step=5 ;;
+    'General') epochs=200 && val_last=50 && step=5 ;;
+    'General-2K') epochs=250 && val_last=30 && step=2 ;;
+    'Matting') epochs=150 && val_last=50 && step=5 ;;
+esac
+# Train
+devices=$2
+nproc_per_node=$(echo ${devices%%,} | grep -o "," | wc -l)
+to_be_distributed=`echo ${nproc_per_node} | awk '{if($e > 0) print "True"; else print "False";}'`
+echo Training started at $(date)
+resume_weights_path='path_to_a_pth'
+if [ ${to_be_distributed} == "True" ]
+then
+    # Adapt the nproc_per_node by the number of GPUs. Give 8989 as the default value of master_port.
+    echo "Multi-GPU mode received..."
+    CUDA_VISIBLE_DEVICES=${devices} \
+    torchrun --standalone --nproc_per_node $((nproc_per_node+1)) \
+    train.py --ckpt_dir ckpts/${method} --epochs ${epochs} \
+        --dist ${to_be_distributed} \
+        --resume ${resume_weights_path} \
+        --use_accelerate
+else
+    echo "Single-GPU mode received..."
+    CUDA_VISIBLE_DEVICES=${devices} \
+    python train.py --ckpt_dir ckpts/${method} --epochs ${epochs} \
+        --dist ${to_be_distributed} \
+        --resume ${resume_weights_path} \
+        --use_accelerate
+fi
+echo Training finished at $(date)

train_test.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+# Example: `setsid nohup ./train_test.sh BiRefNet 0,1,2,3,4,5,6,7 0 &>nohup.log &`
+method=${1:-"BSL"}
+devices=${2:-"0,1,2,3,4,5,6,7"}
+bash train.sh ${method} ${devices}
+devices_test=${3:-0}
+bash test.sh ${devices_test}
+hostname

utils.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import logging
+import os
+import torch
+from torchvision import transforms
+import numpy as np
+import random
+import cv2
+from PIL import Image
+def path_to_image(path, size=(1024, 1024), color_type=['rgb', 'gray'][0]):
+    if color_type.lower() == 'rgb':
+        image = cv2.imread(path)
+    elif color_type.lower() == 'gray':
+        image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+    else:
+        print('Select the color_type to return, either to RGB or gray image.')
+        return
+    if size:
+        image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
+    if color_type.lower() == 'rgb':
+        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert('RGB')
+    else:
+        image = Image.fromarray(image).convert('L')
+    return image
+def check_state_dict(state_dict, unwanted_prefixes=['module.', '_orig_mod.']):
+    for k, v in list(state_dict.items()):
+        prefix_length = 0
+        for unwanted_prefix in unwanted_prefixes:
+            if k[prefix_length:].startswith(unwanted_prefix):
+                prefix_length += len(unwanted_prefix)
+        state_dict[k[prefix_length:]] = state_dict.pop(k)
+    return state_dict
+def generate_smoothed_gt(gts):
+    epsilon = 0.001
+    new_gts = (1-epsilon)*gts+epsilon/2
+    return new_gts
+class Logger():
+    def __init__(self, path="log.txt"):
+        self.logger = logging.getLogger('BiRefNet')
+        self.file_handler = logging.FileHandler(path, "w")
+        self.stdout_handler = logging.StreamHandler()
+        self.stdout_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+        self.file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+        self.logger.addHandler(self.file_handler)
+        self.logger.addHandler(self.stdout_handler)
+        self.logger.setLevel(logging.INFO)
+        self.logger.propagate = False
+    def info(self, txt):
+        self.logger.info(txt)
+    def close(self):
+        self.file_handler.close()
+        self.stdout_handler.close()
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0.0
+        self.avg = 0.0
+        self.sum = 0.0
+        self.count = 0.0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+def save_checkpoint(state, path, filename="latest.pth"):
+    torch.save(state, os.path.join(path, filename))
+def save_tensor_img(tenor_im, path):
+    im = tenor_im.cpu().clone()
+    im = im.squeeze(0)
+    tensor2pil = transforms.ToPILImage()
+    im = tensor2pil(im)
+    im.save(path)
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True