Spaces:

gyrojeff
/

YuzuMarker.FontDetection

Running

App Files Files Community

gyrojeff commited on Mar 31, 2023

Commit

855e240

1 Parent(s): 416c7bb

feat: add crop roi bbox

Browse files

Files changed (2) hide show

detector/data.py +16 -3
train.py +7 -0

detector/data.py CHANGED Viewed

@@ -96,11 +96,13 @@ class FontDataset(Dataset):
         config_path: str = "configs/font.yml",
         regression_use_tanh: bool = False,
         transforms: bool = False,
     ):
         self.path = path
         self.fonts = load_font_with_exclusion(config_path)
         self.regression_use_tanh = regression_use_tanh
         self.transforms = transforms
         self.images = [
             os.path.join(path, f) for f in os.listdir(path) if f.endswith(".jpg")
@@ -146,6 +148,12 @@ class FontDataset(Dataset):
         with open(label_path, "rb") as f:
             label: FontLabel = pickle.load(f)
         # encode label
         label = self.fontlabel2tensor(label, label_path)
@@ -188,6 +196,7 @@ class FontDataModule(LightningDataModule):
         train_transforms: bool = False,
         val_transforms: bool = False,
         test_transforms: bool = False,
         regression_use_tanh: bool = False,
         **kwargs,
     ):
@@ -197,13 +206,17 @@ class FontDataModule(LightningDataModule):
         self.val_shuffle = val_shuffle
         self.test_shuffle = test_shuffle
         self.train_dataset = FontDataset(
-            train_path, config_path, regression_use_tanh, train_transforms
         )
         self.val_dataset = FontDataset(
-            val_path, config_path, regression_use_tanh, val_transforms
         )
         self.test_dataset = FontDataset(
-            test_path, config_path, regression_use_tanh, test_transforms
         )
     def get_train_num_iter(self, num_device: int) -> int:

         config_path: str = "configs/font.yml",
         regression_use_tanh: bool = False,
         transforms: bool = False,
+        crop_roi_bbox: bool = False,
     ):
         self.path = path
         self.fonts = load_font_with_exclusion(config_path)
         self.regression_use_tanh = regression_use_tanh
         self.transforms = transforms
+        self.crop_roi_bbox = crop_roi_bbox
         self.images = [
             os.path.join(path, f) for f in os.listdir(path) if f.endswith(".jpg")
         with open(label_path, "rb") as f:
             label: FontLabel = pickle.load(f)
+        if self.crop_roi_bbox:
+            left, top, width, height = label.bbox
+            image = TF.crop(image, top, left, height, width)
+            label.image_width = width
+            label.image_height = height
         # encode label
         label = self.fontlabel2tensor(label, label_path)
         train_transforms: bool = False,
         val_transforms: bool = False,
         test_transforms: bool = False,
+        crop_roi_bbox: bool = False,
         regression_use_tanh: bool = False,
         **kwargs,
     ):
         self.val_shuffle = val_shuffle
         self.test_shuffle = test_shuffle
         self.train_dataset = FontDataset(
+            train_path,
+            config_path,
+            regression_use_tanh,
+            train_transforms,
+            crop_roi_bbox,
         )
         self.val_dataset = FontDataset(
+            val_path, config_path, regression_use_tanh, val_transforms, crop_roi_bbox
         )
         self.test_dataset = FontDataset(
+            test_path, config_path, regression_use_tanh, test_transforms, crop_roi_bbox
         )
     def get_train_num_iter(self, num_device: int) -> int:

train.py CHANGED Viewed

@@ -48,6 +48,12 @@ parser.add_argument(
     action="store_true",
     help="Use pretrained model for ResNet (default: False)",
 )
 args = parser.parse_args()
@@ -85,6 +91,7 @@ data_module = FontDataModule(
     test_shuffle=False,
     regression_use_tanh=regression_use_tanh,
     train_transforms=augmentation,
 )
 num_iters = data_module.get_train_num_iter(num_device) * num_epochs

     action="store_true",
     help="Use pretrained model for ResNet (default: False)",
 )
+parser.add_argument(
+    "-i",
+    "--crop-roi-bbox",
+    action="store_true",
+    help="Crop ROI bounding box (default: False)",
+)
 args = parser.parse_args()
     test_shuffle=False,
     regression_use_tanh=regression_use_tanh,
     train_transforms=augmentation,
+    crop_roi_bbox=args.crop_roi_bbox,
 )
 num_iters = data_module.get_train_num_iter(num_device) * num_epochs