dikdimon commited on
Commit
194b4ef
·
verified ·
1 Parent(s): 2699174

Upload exhm using SD-Hub extension

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +14 -0
  2. exhm/detailer/dddetailer/.gitignore +10 -0
  3. exhm/detailer/dddetailer/README.md +62 -0
  4. exhm/detailer/dddetailer/config/coco_panoptic.py +98 -0
  5. exhm/detailer/dddetailer/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py +265 -0
  6. exhm/detailer/dddetailer/config/mmdet_anime-face_yolov3.py +177 -0
  7. exhm/detailer/dddetailer/config/mmdet_dd-person_mask2former.py +105 -0
  8. exhm/detailer/dddetailer/install.py +71 -0
  9. exhm/detailer/dddetailer/misc/ddetailer_example_1.png +0 -0
  10. exhm/detailer/dddetailer/misc/ddetailer_example_2.png +0 -0
  11. exhm/detailer/dddetailer/misc/ddetailer_example_3.gif +0 -0
  12. exhm/detailer/dddetailer/pyproject.toml +29 -0
  13. exhm/detailer/dddetailer/scripts/dddetailer.py +1057 -0
  14. exhm/detailer/ddetailer/.gitignore +8 -0
  15. exhm/detailer/ddetailer/README.md +44 -0
  16. exhm/detailer/ddetailer/misc/ddetailer_example_1.png +0 -0
  17. exhm/detailer/ddetailer/misc/ddetailer_example_2.png +0 -0
  18. exhm/detailer/ddetailer/misc/ddetailer_example_3.gif +0 -0
  19. exhm/detailer/ddetailer/scripts/__pycache__/ddetailer.cpython-310.pyc +0 -0
  20. exhm/detailer/ddetailer/scripts/ddetailer.py +536 -0
  21. exhm/detailer/sd-webui-ddsd-orig/.gitignore +170 -0
  22. exhm/detailer/sd-webui-ddsd-orig/README.md +108 -0
  23. exhm/detailer/sd-webui-ddsd-orig/config/Empty.ddcfg +1 -0
  24. exhm/detailer/sd-webui-ddsd-orig/install.py +100 -0
  25. exhm/detailer/sd-webui-ddsd-orig/requirements.txt +8 -0
  26. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd.cpython-310.pyc +0 -0
  27. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_bs.cpython-310.pyc +0 -0
  28. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_dino.cpython-310.pyc +0 -0
  29. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc +0 -0
  30. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_sam.cpython-310.pyc +0 -0
  31. exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_utils.cpython-310.pyc +0 -0
  32. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd.py +0 -0
  33. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_bs.py +71 -0
  34. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_dino.py +99 -0
  35. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_postprocess.py +83 -0
  36. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_sam.py +89 -0
  37. exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_utils.py +383 -0
  38. exhm/detailer/sd-webui-ddsd/.gitignore +170 -0
  39. exhm/detailer/sd-webui-ddsd/README.md +108 -0
  40. exhm/detailer/sd-webui-ddsd/config/Empty.ddcfg +1 -0
  41. exhm/detailer/sd-webui-ddsd/install.py +110 -0
  42. exhm/detailer/sd-webui-ddsd/requirements.txt +8 -0
  43. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd.cpython-310.pyc +0 -0
  44. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_bs.cpython-310.pyc +0 -0
  45. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_dino.cpython-310.pyc +0 -0
  46. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc +0 -0
  47. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_sam.cpython-310.pyc +0 -0
  48. exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_utils.cpython-310.pyc +0 -0
  49. exhm/detailer/sd-webui-ddsd/scripts/ddsd.py +0 -0
  50. exhm/detailer/sd-webui-ddsd/scripts/ddsd_bs.py +71 -0
.gitattributes CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ exhm/detailer/stable-diffusion-webui-eyemask/models/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
37
+ exhm/extensions[[:space:]]img2/ComfyUI-nodes-hnmr/examples/workflow_mbw_multi.png filter=lfs diff=lfs merge=lfs -text
38
+ exhm/extensions[[:space:]]img2/ComfyUI-nodes-hnmr/examples/workflow_xyz.png filter=lfs diff=lfs merge=lfs -text
39
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/default.png filter=lfs diff=lfs merge=lfs -text
40
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/img2img_latent_upscale_process.png filter=lfs diff=lfs merge=lfs -text
41
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-normal1.png filter=lfs diff=lfs merge=lfs -text
42
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-normal2.png filter=lfs diff=lfs merge=lfs -text
43
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple1.png filter=lfs diff=lfs merge=lfs -text
44
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple2.png filter=lfs diff=lfs merge=lfs -text
45
+ exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple8.png filter=lfs diff=lfs merge=lfs -text
46
+ exhm/extensions[[:space:]]img2/sd-webui-img2txt/sd-webui-img2txt.gif filter=lfs diff=lfs merge=lfs -text
47
+ exhm/extensions[[:space:]]img2/sd-webui-inpaint-anything/images/inpaint_anything_ui_image_1.png filter=lfs diff=lfs merge=lfs -text
48
+ exhm/extensions[[:space:]]img2/sd-webui-manga-inpainting/manga_inpainting/repo/examples/representative.png filter=lfs diff=lfs merge=lfs -text
49
+ exhm/extensions[[:space:]]img2/sd-webui-real-image-artifacts/examples/before.png filter=lfs diff=lfs merge=lfs -text
exhm/detailer/dddetailer/.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.ckpt
3
+ *.pth
4
+ /tmp
5
+ /outputs
6
+ /log
7
+ .vscode
8
+ /test-cases
9
+ .mypy_cache/
10
+ .ruff_cache/
exhm/detailer/dddetailer/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 돚거 Detection Detailer
2
+
3
+ Dotgeo(hijack) Detection Detailer
4
+
5
+ ddetailer with torch 2.0, mmcv 2.0, mmdet 3.0
6
+
7
+ integrated with [noahge4/ddetailer](https://github.com/noahge4/ddetailer)
8
+
9
+ AI실사채널 ChatGPT23님의 [ddetailer 수정본](https://arca.live/b/aireal/72297207) 병합됨
10
+
11
+ ## Installation
12
+
13
+ 1. remove original ddetailer extension - `stable-diffusion-webui/extensions/ddetailer` folder
14
+ 2. remove original model files - `stable-diffusion-webui/models/mmdet` folder
15
+ 3. install from the extensions tab with url `https://github.com/Bing-su/dddetailer`
16
+
17
+ ## Problem
18
+
19
+ The predictive accuracy of the segmentation model has become very poor.
20
+
21
+ # Detection Detailer
22
+ An object detection and auto-mask extension for [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui). See [Installation](https://github.com/dustysys/ddetailer#installation).
23
+
24
+ ![adoringfan](/misc/ddetailer_example_1.png)
25
+
26
+ ### Segmentation
27
+ Default models enable person and face instance segmentation.
28
+
29
+ ![amgothic](/misc/ddetailer_example_2.png)
30
+
31
+ ### Detailing
32
+ With full-resolution inpainting, the extension is handy for improving faces without the hassle of manual masking.
33
+
34
+ ![zion](/misc/ddetailer_example_3.gif)
35
+
36
+ ## Installation
37
+ 1. Use `git clone https://github.com/dustysys/ddetailer.git` from your SD web UI `/extensions` folder. Alternatively, install from the extensions tab with url `https://github.com/dustysys/ddetailer`
38
+ 2. Start or reload SD web UI.
39
+
40
+ The models and dependencies should download automatically. To install them manually, follow the [official instructions for installing mmdet](https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-mim-recommended). The models can be [downloaded here](https://huggingface.co/dustysys/ddetailer) and should be placed in `/models/mmdet/bbox` for bounding box (`anime-face_yolov3`) or `/models/mmdet/segm` for instance segmentation models (`dd-person_mask2former`). See the [MMDetection docs](https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html) for guidance on training your own models. For using official MMDetection pretrained models see [here](https://github.com/dustysys/ddetailer/issues/5#issuecomment-1311231989), these are trained for photorealism. See [Troubleshooting](https://github.com/dustysys/ddetailer#troubleshooting) if you encounter issues during installation.
41
+
42
+ ## Usage
43
+ Select Detection Detailer as the script in SD web UI to use the extension. Click 'Generate' to run the script. Here are some tips:
44
+ - `anime-face_yolov3` can detect the bounding box of faces as the primary model while `dd-person_mask2former` isolates the head's silhouette as the secondary model by using the bitwise AND option. Refer to [this example](https://github.com/dustysys/ddetailer/issues/4#issuecomment-1311200268).
45
+ - The dilation factor expands the mask, while the x & y offsets move the mask around.
46
+ - The script is available in txt2img mode as well and can improve the quality of your 10 pulls with moderate settings (low denoise).
47
+
48
+ ## Troubleshooting
49
+ If you get the message ERROR: 'Failed building wheel for pycocotools' follow [these steps](https://github.com/dustysys/ddetailer/issues/1#issuecomment-1309415543).
50
+
51
+ Any other issues installing, open an [issue](https://github.com/dustysys/ddetailer/issues).
52
+
53
+ ## Credits
54
+ hysts/[anime-face-detector](https://github.com/hysts/anime-face-detector) - Creator of `anime-face_yolov3`, which has impressive performance on a variety of art styles.
55
+
56
+ skytnt/[anime-segmentation](https://huggingface.co/datasets/skytnt/anime-segmentation) - Synthetic dataset used to train `dd-person_mask2former`.
57
+
58
+ jerryli27/[AniSeg](https://github.com/jerryli27/AniSeg) - Annotated dataset used to train `dd-person_mask2former`.
59
+
60
+ open-mmlab/[mmdetection](https://github.com/open-mmlab/mmdetection) - Object detection toolset. `dd-person_mask2former` was trained via transfer learning using their [R-50 Mask2Former instance segmentation model](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask2former#instance-segmentation) as a base.
61
+
62
+ AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) - Web UI for Stable Diffusion, base application for this extension.
exhm/detailer/dddetailer/config/coco_panoptic.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = "CocoPanopticDataset"
3
+ data_root = 'data/coco/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = "s3://openmmlab/datasets/detection/coco/"
10
+
11
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection/',
16
+ # 'data/': 's3://openmmlab/datasets/detection/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ train_pipeline = [
21
+ dict(type="LoadImageFromFile", backend_args=backend_args),
22
+ dict(type="LoadPanopticAnnotations", backend_args=backend_args),
23
+ dict(type="Resize", scale=(1333, 800), keep_ratio=True),
24
+ dict(type="RandomFlip", prob=0.5),
25
+ dict(type="PackDetInputs"),
26
+ ]
27
+ test_pipeline = [
28
+ dict(type="LoadImageFromFile", backend_args=backend_args),
29
+ dict(type="Resize", scale=(1333, 800), keep_ratio=True),
30
+ dict(type="LoadPanopticAnnotations", backend_args=backend_args),
31
+ dict(
32
+ type="PackDetInputs",
33
+ meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
34
+ ),
35
+ ]
36
+
37
+ train_dataloader = dict(
38
+ batch_size=2,
39
+ num_workers=2,
40
+ persistent_workers=True,
41
+ sampler=dict(type="DefaultSampler", shuffle=True),
42
+ batch_sampler=dict(type="AspectRatioBatchSampler"),
43
+ dataset=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ ann_file="annotations/panoptic_train2017.json",
47
+ data_prefix=dict(img="train2017/", seg="annotations/panoptic_train2017/"),
48
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
49
+ pipeline=train_pipeline,
50
+ backend_args=backend_args,
51
+ ),
52
+ )
53
+ val_dataloader = dict(
54
+ batch_size=1,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ drop_last=False,
58
+ sampler=dict(type="DefaultSampler", shuffle=False),
59
+ dataset=dict(
60
+ type=dataset_type,
61
+ data_root=data_root,
62
+ ann_file="annotations/panoptic_val2017.json",
63
+ data_prefix=dict(img="val2017/", seg="annotations/panoptic_val2017/"),
64
+ test_mode=True,
65
+ pipeline=test_pipeline,
66
+ backend_args=backend_args,
67
+ ),
68
+ )
69
+ test_dataloader = val_dataloader
70
+
71
+ val_evaluator = dict(
72
+ type="CocoPanopticMetric",
73
+ ann_file=data_root + "annotations/panoptic_val2017.json",
74
+ seg_prefix=data_root + "annotations/panoptic_val2017/",
75
+ backend_args=backend_args,
76
+ )
77
+ test_evaluator = val_evaluator
78
+
79
+ # inference on test dataset and
80
+ # format the output results for submission.
81
+ # test_dataloader = dict(
82
+ # batch_size=1,
83
+ # num_workers=1,
84
+ # persistent_workers=True,
85
+ # drop_last=False,
86
+ # sampler=dict(type='DefaultSampler', shuffle=False),
87
+ # dataset=dict(
88
+ # type=dataset_type,
89
+ # data_root=data_root,
90
+ # ann_file='annotations/panoptic_image_info_test-dev2017.json',
91
+ # data_prefix=dict(img='test2017/'),
92
+ # test_mode=True,
93
+ # pipeline=test_pipeline))
94
+ # test_evaluator = dict(
95
+ # type='CocoPanopticMetric',
96
+ # format_only=True,
97
+ # ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
98
+ # outfile_prefix='./work_dirs/coco_panoptic/test')
exhm/detailer/dddetailer/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["./coco_panoptic.py"]
2
+ image_size = (1024, 1024)
3
+ batch_augments = [
4
+ dict(
5
+ type="BatchFixedSizePad",
6
+ size=image_size,
7
+ img_pad_value=0,
8
+ pad_mask=True,
9
+ mask_pad_value=0,
10
+ pad_seg=True,
11
+ seg_pad_value=255,
12
+ )
13
+ ]
14
+ data_preprocessor = dict(
15
+ type="DetDataPreprocessor",
16
+ mean=[123.675, 116.28, 103.53],
17
+ std=[58.395, 57.12, 57.375],
18
+ bgr_to_rgb=True,
19
+ pad_size_divisor=32,
20
+ pad_mask=True,
21
+ mask_pad_value=0,
22
+ pad_seg=True,
23
+ seg_pad_value=255,
24
+ batch_augments=batch_augments,
25
+ )
26
+
27
+ num_things_classes = 1
28
+ num_stuff_classes = 0
29
+ num_classes = num_things_classes + num_stuff_classes
30
+ model = dict(
31
+ type="Mask2Former",
32
+ data_preprocessor=data_preprocessor,
33
+ backbone=dict(
34
+ type="ResNet",
35
+ depth=50,
36
+ num_stages=4,
37
+ out_indices=(0, 1, 2, 3),
38
+ frozen_stages=-1,
39
+ norm_cfg=dict(type="BN", requires_grad=False),
40
+ norm_eval=True,
41
+ style="pytorch",
42
+ init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet50"),
43
+ ),
44
+ panoptic_head=dict(
45
+ type="Mask2FormerHead",
46
+ in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
47
+ strides=[4, 8, 16, 32],
48
+ feat_channels=256,
49
+ out_channels=256,
50
+ num_things_classes=num_things_classes,
51
+ num_stuff_classes=num_stuff_classes,
52
+ num_queries=100,
53
+ num_transformer_feat_level=3,
54
+ pixel_decoder=dict(
55
+ type="MSDeformAttnPixelDecoder",
56
+ num_outs=3,
57
+ norm_cfg=dict(type="GN", num_groups=32),
58
+ act_cfg=dict(type="ReLU"),
59
+ encoder=dict( # DeformableDetrTransformerEncoder
60
+ num_layers=6,
61
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
62
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
63
+ embed_dims=256,
64
+ num_heads=8,
65
+ num_levels=3,
66
+ num_points=4,
67
+ dropout=0.0,
68
+ batch_first=True,
69
+ ),
70
+ ffn_cfg=dict(
71
+ embed_dims=256,
72
+ feedforward_channels=1024,
73
+ num_fcs=2,
74
+ ffn_drop=0.0,
75
+ act_cfg=dict(type="ReLU", inplace=True),
76
+ ),
77
+ ),
78
+ ),
79
+ positional_encoding=dict(num_feats=128, normalize=True),
80
+ ),
81
+ enforce_decoder_input_project=False,
82
+ positional_encoding=dict(num_feats=128, normalize=True),
83
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
84
+ return_intermediate=True,
85
+ num_layers=9,
86
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
87
+ self_attn_cfg=dict( # MultiheadAttention
88
+ embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
89
+ ),
90
+ cross_attn_cfg=dict( # MultiheadAttention
91
+ embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
92
+ ),
93
+ ffn_cfg=dict(
94
+ embed_dims=256,
95
+ feedforward_channels=2048,
96
+ num_fcs=2,
97
+ ffn_drop=0.0,
98
+ act_cfg=dict(type="ReLU", inplace=True),
99
+ ),
100
+ ),
101
+ init_cfg=None,
102
+ ),
103
+ loss_cls=dict(
104
+ type="CrossEntropyLoss",
105
+ use_sigmoid=False,
106
+ loss_weight=2.0,
107
+ reduction="mean",
108
+ class_weight=[1.0] * num_classes + [0.1],
109
+ ),
110
+ loss_mask=dict(
111
+ type="CrossEntropyLoss", use_sigmoid=True, reduction="mean", loss_weight=5.0
112
+ ),
113
+ loss_dice=dict(
114
+ type="DiceLoss",
115
+ use_sigmoid=True,
116
+ activate=True,
117
+ reduction="mean",
118
+ naive_dice=True,
119
+ eps=1.0,
120
+ loss_weight=5.0,
121
+ ),
122
+ ),
123
+ panoptic_fusion_head=dict(
124
+ type="MaskFormerFusionHead",
125
+ num_things_classes=num_things_classes,
126
+ num_stuff_classes=num_stuff_classes,
127
+ loss_panoptic=None,
128
+ init_cfg=None,
129
+ ),
130
+ train_cfg=dict(
131
+ num_points=12544,
132
+ oversample_ratio=3.0,
133
+ importance_sample_ratio=0.75,
134
+ assigner=dict(
135
+ type="HungarianAssigner",
136
+ match_costs=[
137
+ dict(type="ClassificationCost", weight=2.0),
138
+ dict(type="CrossEntropyLossCost", weight=5.0, use_sigmoid=True),
139
+ dict(type="DiceCost", weight=5.0, pred_act=True, eps=1.0),
140
+ ],
141
+ ),
142
+ sampler=dict(type="MaskPseudoSampler"),
143
+ ),
144
+ test_cfg=dict(
145
+ panoptic_on=True,
146
+ # For now, the dataset does not support
147
+ # evaluating semantic segmentation metric.
148
+ semantic_on=False,
149
+ instance_on=True,
150
+ # max_per_image is for instance segmentation.
151
+ max_per_image=100,
152
+ iou_thr=0.8,
153
+ # In Mask2Former's panoptic postprocessing,
154
+ # it will filter mask area where score is less than 0.5 .
155
+ filter_low_score=True,
156
+ ),
157
+ init_cfg=None,
158
+ )
159
+
160
+ # dataset settings
161
+ data_root = "data/coco/"
162
+ train_pipeline = [
163
+ dict(
164
+ type="LoadImageFromFile", to_float32=True, backend_args={{_base_.backend_args}}
165
+ ),
166
+ dict(
167
+ type="LoadPanopticAnnotations",
168
+ with_bbox=True,
169
+ with_mask=True,
170
+ with_seg=True,
171
+ backend_args={{_base_.backend_args}},
172
+ ),
173
+ dict(type="RandomFlip", prob=0.5),
174
+ # large scale jittering
175
+ dict(
176
+ type="RandomResize", scale=image_size, ratio_range=(0.1, 2.0), keep_ratio=True
177
+ ),
178
+ dict(
179
+ type="RandomCrop",
180
+ crop_size=image_size,
181
+ crop_type="absolute",
182
+ recompute_bbox=True,
183
+ allow_negative_crop=True,
184
+ ),
185
+ dict(type="PackDetInputs"),
186
+ ]
187
+
188
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
189
+
190
+ val_evaluator = [
191
+ dict(
192
+ type="CocoPanopticMetric",
193
+ ann_file=data_root + "annotations/panoptic_val2017.json",
194
+ seg_prefix=data_root + "annotations/panoptic_val2017/",
195
+ backend_args={{_base_.backend_args}},
196
+ ),
197
+ dict(
198
+ type="CocoMetric",
199
+ ann_file=data_root + "annotations/instances_val2017.json",
200
+ metric=["bbox", "segm"],
201
+ backend_args={{_base_.backend_args}},
202
+ ),
203
+ ]
204
+ test_evaluator = val_evaluator
205
+
206
+ # optimizer
207
+ embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
208
+ optim_wrapper = dict(
209
+ type="OptimWrapper",
210
+ optimizer=dict(
211
+ type="AdamW", lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)
212
+ ),
213
+ paramwise_cfg=dict(
214
+ custom_keys={
215
+ "backbone": dict(lr_mult=0.1, decay_mult=1.0),
216
+ "query_embed": embed_multi,
217
+ "query_feat": embed_multi,
218
+ "level_embed": embed_multi,
219
+ },
220
+ norm_decay_mult=0.0,
221
+ ),
222
+ clip_grad=dict(max_norm=0.01, norm_type=2),
223
+ )
224
+
225
+ # learning policy
226
+ max_iters = 368750
227
+ param_scheduler = dict(
228
+ type="MultiStepLR",
229
+ begin=0,
230
+ end=max_iters,
231
+ by_epoch=False,
232
+ milestones=[327778, 355092],
233
+ gamma=0.1,
234
+ )
235
+
236
+ # Before 365001th iteration, we do evaluation every 5000 iterations.
237
+ # After 365000th iteration, we do evaluation every 368750 iterations,
238
+ # which means that we do evaluation at the end of training.
239
+ interval = 5000
240
+ dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
241
+ train_cfg = dict(
242
+ type="IterBasedTrainLoop",
243
+ max_iters=max_iters,
244
+ val_interval=interval,
245
+ dynamic_intervals=dynamic_intervals,
246
+ )
247
+ val_cfg = dict(type="ValLoop")
248
+ test_cfg = dict(type="TestLoop")
249
+
250
+ default_hooks = dict(
251
+ checkpoint=dict(
252
+ type="CheckpointHook",
253
+ by_epoch=False,
254
+ save_last=True,
255
+ max_keep_ckpts=3,
256
+ interval=interval,
257
+ )
258
+ )
259
+ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False)
260
+
261
+ # Default setting for scaling LR automatically
262
+ # - `enable` means enable scaling LR automatically
263
+ # or not by default.
264
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
265
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
exhm/detailer/dddetailer/config/mmdet_anime-face_yolov3.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # _base_ = ["../_base_/schedules/schedule_1x.py", "../_base_/default_runtime.py"]
2
+ # model settings
3
+ data_preprocessor = dict(
4
+ type="DetDataPreprocessor",
5
+ mean=[0, 0, 0],
6
+ std=[255.0, 255.0, 255.0],
7
+ bgr_to_rgb=True,
8
+ pad_size_divisor=32,
9
+ )
10
+ model = dict(
11
+ type="YOLOV3",
12
+ data_preprocessor=data_preprocessor,
13
+ backbone=dict(
14
+ type="Darknet",
15
+ depth=53,
16
+ out_indices=(3, 4, 5),
17
+ init_cfg=dict(type="Pretrained", checkpoint="open-mmlab://darknet53"),
18
+ ),
19
+ neck=dict(
20
+ type="YOLOV3Neck",
21
+ num_scales=3,
22
+ in_channels=[1024, 512, 256],
23
+ out_channels=[512, 256, 128],
24
+ ),
25
+ bbox_head=dict(
26
+ type="YOLOV3Head",
27
+ num_classes=1,
28
+ in_channels=[512, 256, 128],
29
+ out_channels=[1024, 512, 256],
30
+ anchor_generator=dict(
31
+ type="YOLOAnchorGenerator",
32
+ base_sizes=[
33
+ [(116, 90), (156, 198), (373, 326)],
34
+ [(30, 61), (62, 45), (59, 119)],
35
+ [(10, 13), (16, 30), (33, 23)],
36
+ ],
37
+ strides=[32, 16, 8],
38
+ ),
39
+ bbox_coder=dict(type="YOLOBBoxCoder"),
40
+ featmap_strides=[32, 16, 8],
41
+ loss_cls=dict(
42
+ type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0, reduction="sum"
43
+ ),
44
+ loss_conf=dict(
45
+ type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0, reduction="sum"
46
+ ),
47
+ loss_xy=dict(
48
+ type="CrossEntropyLoss", use_sigmoid=True, loss_weight=2.0, reduction="sum"
49
+ ),
50
+ loss_wh=dict(type="MSELoss", loss_weight=2.0, reduction="sum"),
51
+ ),
52
+ # training and testing settings
53
+ train_cfg=dict(
54
+ assigner=dict(
55
+ type="GridAssigner", pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0
56
+ )
57
+ ),
58
+ test_cfg=dict(
59
+ nms_pre=1000,
60
+ min_bbox_size=0,
61
+ score_thr=0.05,
62
+ conf_thr=0.005,
63
+ nms=dict(type="nms", iou_threshold=0.45),
64
+ max_per_img=100,
65
+ ),
66
+ )
67
+ # dataset settings
68
+ dataset_type = "CocoDataset"
69
+ data_root = "data/coco/"
70
+
71
+ # Example to use different file client
72
+ # Method 1: simply set the data root and let the file I/O module
73
+ # automatically infer from prefix (not support LMDB and Memcache yet)
74
+
75
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
76
+
77
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
78
+ # backend_args = dict(
79
+ # backend='petrel',
80
+ # path_mapping=dict({
81
+ # './data/': 's3://openmmlab/datasets/detection/',
82
+ # 'data/': 's3://openmmlab/datasets/detection/'
83
+ # }))
84
+ backend_args = None
85
+
86
+ train_pipeline = [
87
+ dict(type="LoadImageFromFile", backend_args=backend_args),
88
+ dict(type="LoadAnnotations", with_bbox=True),
89
+ dict(
90
+ type="Expand",
91
+ mean=data_preprocessor["mean"],
92
+ to_rgb=data_preprocessor["bgr_to_rgb"],
93
+ ratio_range=(1, 2),
94
+ ),
95
+ dict(
96
+ type="MinIoURandomCrop",
97
+ min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
98
+ min_crop_size=0.3,
99
+ ),
100
+ dict(type="RandomResize", scale=[(320, 320), (608, 608)], keep_ratio=True),
101
+ dict(type="RandomFlip", prob=0.5),
102
+ dict(type="PhotoMetricDistortion"),
103
+ dict(type="PackDetInputs"),
104
+ ]
105
+ test_pipeline = [
106
+ dict(type="LoadImageFromFile", backend_args=backend_args),
107
+ dict(type="Resize", scale=(608, 608), keep_ratio=True),
108
+ dict(type="LoadAnnotations", with_bbox=True),
109
+ dict(
110
+ type="PackDetInputs",
111
+ meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
112
+ ),
113
+ ]
114
+
115
+ train_dataloader = dict(
116
+ batch_size=8,
117
+ num_workers=4,
118
+ persistent_workers=True,
119
+ sampler=dict(type="DefaultSampler", shuffle=True),
120
+ batch_sampler=dict(type="AspectRatioBatchSampler"),
121
+ dataset=dict(
122
+ type=dataset_type,
123
+ data_root=data_root,
124
+ ann_file="annotations/instances_train2017.json",
125
+ data_prefix=dict(img="train2017/"),
126
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
127
+ pipeline=train_pipeline,
128
+ backend_args=backend_args,
129
+ ),
130
+ )
131
+ val_dataloader = dict(
132
+ batch_size=1,
133
+ num_workers=2,
134
+ persistent_workers=True,
135
+ drop_last=False,
136
+ sampler=dict(type="DefaultSampler", shuffle=False),
137
+ dataset=dict(
138
+ type=dataset_type,
139
+ data_root=data_root,
140
+ ann_file="annotations/instances_val2017.json",
141
+ data_prefix=dict(img="val2017/"),
142
+ test_mode=True,
143
+ pipeline=test_pipeline,
144
+ backend_args=backend_args,
145
+ ),
146
+ )
147
+ test_dataloader = val_dataloader
148
+
149
+ val_evaluator = dict(
150
+ type="CocoMetric",
151
+ ann_file=data_root + "annotations/instances_val2017.json",
152
+ metric="bbox",
153
+ backend_args=backend_args,
154
+ )
155
+ test_evaluator = val_evaluator
156
+
157
+ train_cfg = dict(max_epochs=273, val_interval=7)
158
+
159
+ # optimizer
160
+ optim_wrapper = dict(
161
+ type="OptimWrapper",
162
+ optimizer=dict(type="SGD", lr=0.001, momentum=0.9, weight_decay=0.0005),
163
+ clip_grad=dict(max_norm=35, norm_type=2),
164
+ )
165
+
166
+ # learning policy
167
+ param_scheduler = [
168
+ dict(type="LinearLR", start_factor=0.1, by_epoch=False, begin=0, end=2000),
169
+ dict(type="MultiStepLR", by_epoch=True, milestones=[218, 246], gamma=0.1),
170
+ ]
171
+
172
+ default_hooks = dict(checkpoint=dict(type="CheckpointHook", interval=7))
173
+
174
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
175
+ # USER SHOULD NOT CHANGE ITS VALUES.
176
+ # base_batch_size = (8 GPUs) x (8 samples per GPU)
177
+ auto_scale_lr = dict(base_batch_size=64)
exhm/detailer/dddetailer/config/mmdet_dd-person_mask2former.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["./mask2former_r50_8xb2-lsj-50e_coco-panoptic.py"]
2
+
3
+ num_things_classes = 1
4
+ num_stuff_classes = 0
5
+ num_classes = num_things_classes + num_stuff_classes
6
+ image_size = (1024, 1024)
7
+ batch_augments = [
8
+ dict(
9
+ type="BatchFixedSizePad",
10
+ size=image_size,
11
+ img_pad_value=0,
12
+ pad_mask=True,
13
+ mask_pad_value=0,
14
+ pad_seg=False,
15
+ )
16
+ ]
17
+ data_preprocessor = dict(
18
+ type="DetDataPreprocessor",
19
+ mean=[123.675, 116.28, 103.53],
20
+ std=[58.395, 57.12, 57.375],
21
+ bgr_to_rgb=True,
22
+ pad_size_divisor=32,
23
+ pad_mask=True,
24
+ mask_pad_value=0,
25
+ pad_seg=False,
26
+ batch_augments=batch_augments,
27
+ )
28
+ model = dict(
29
+ data_preprocessor=data_preprocessor,
30
+ panoptic_head=dict(
31
+ num_things_classes=num_things_classes,
32
+ num_stuff_classes=num_stuff_classes,
33
+ loss_cls=dict(class_weight=[1.0] * num_classes + [0.1]),
34
+ ),
35
+ panoptic_fusion_head=dict(
36
+ num_things_classes=num_things_classes, num_stuff_classes=num_stuff_classes
37
+ ),
38
+ test_cfg=dict(panoptic_on=False),
39
+ )
40
+
41
+ # dataset settings
42
+ train_pipeline = [
43
+ dict(type="LoadImageFromFile", to_float32=True, backend_args=None),
44
+ dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
45
+ dict(type="RandomFlip", prob=0.5),
46
+ # large scale jittering
47
+ dict(
48
+ type="RandomResize",
49
+ scale=image_size,
50
+ ratio_range=(0.1, 2.0),
51
+ resize_type="Resize",
52
+ keep_ratio=True,
53
+ ),
54
+ dict(
55
+ type="RandomCrop",
56
+ crop_size=image_size,
57
+ crop_type="absolute",
58
+ recompute_bbox=True,
59
+ allow_negative_crop=True,
60
+ ),
61
+ dict(type="FilterAnnotations", min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True),
62
+ dict(type="PackDetInputs"),
63
+ ]
64
+
65
+ test_pipeline = [
66
+ dict(type="LoadImageFromFile", to_float32=True, backend_args=None),
67
+ dict(type="Resize", scale=(1333, 800), keep_ratio=True),
68
+ # If you don't have a gt annotation, delete the pipeline
69
+ dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
70
+ dict(
71
+ type="PackDetInputs",
72
+ meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
73
+ ),
74
+ ]
75
+
76
+ dataset_type = "CocoDataset"
77
+ data_root = "data/coco/"
78
+
79
+ train_dataloader = dict(
80
+ dataset=dict(
81
+ type=dataset_type,
82
+ ann_file="annotations/instances_train2017.json",
83
+ data_prefix=dict(img="train2017/"),
84
+ pipeline=train_pipeline,
85
+ )
86
+ )
87
+ val_dataloader = dict(
88
+ dataset=dict(
89
+ type=dataset_type,
90
+ ann_file="annotations/instances_val2017.json",
91
+ data_prefix=dict(img="val2017/"),
92
+ pipeline=test_pipeline,
93
+ )
94
+ )
95
+ test_dataloader = val_dataloader
96
+
97
+ val_evaluator = dict(
98
+ _delete_=True,
99
+ type="CocoMetric",
100
+ ann_file=data_root + "annotations/instances_val2017.json",
101
+ metric=["bbox", "segm"],
102
+ format_only=False,
103
+ backend_args=None,
104
+ )
105
+ test_evaluator = val_evaluator
exhm/detailer/dddetailer/install.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ from textwrap import dedent
4
+
5
+ from packaging import version
6
+
7
+ import launch
8
+ from launch import is_installed, run, run_pip
9
+
10
+ try:
11
+ skip_install = launch.args.skip_install
12
+ except Exception:
13
+ skip_install = False
14
+
15
+ python = sys.executable
16
+
17
+ def check_ddetailer() -> bool:
18
+ try:
19
+ from modules.paths import extensions_dir
20
+
21
+ extensions_path = Path(extensions_dir)
22
+ except ImportError:
23
+ from modules.paths import data_path
24
+
25
+ extensions_path = Path(data_path, "extensions")
26
+
27
+ ddetailer_exists = any(p.is_dir() and p.name.startswith("ddetailer") for p in extensions_path.iterdir())
28
+ return not ddetailer_exists
29
+
30
+
31
+ def check_install() -> bool:
32
+ try:
33
+ import mmcv
34
+ import mmdet
35
+ from mmdet.evaluation import get_classes
36
+ except Exception:
37
+ return False
38
+
39
+ if not hasattr(mmcv, "__version__") or not hasattr(mmdet, "__version__"):
40
+ return False
41
+
42
+ v1 = version.parse(mmcv.__version__) >= version.parse("2.0.0")
43
+ v2 = version.parse(mmdet.__version__) >= version.parse("3.0.0")
44
+ return v1 and v2
45
+
46
+
47
+ def install():
48
+ if not is_installed("pycocotools"):
49
+ run(f"{python} -m pip install pycocotools", live=True)
50
+
51
+ if not is_installed("mim"):
52
+ run_pip("install openmim", desc="openmim")
53
+
54
+ if not check_install():
55
+ print("Uninstalling mmcv mmdet... (if installed)")
56
+ run(f'"{python}" -m pip uninstall -y mmcv mmcv-full mmdet mmengine', live=True)
57
+ print("Installing mmcv mmdet...")
58
+ run(f'"{python}" -m mim install -U mmcv>=2.0.0 mmdet>=3.0.0', live=True)
59
+
60
+
61
+ if not check_ddetailer():
62
+ message = """
63
+ [-] dddetailer: Please remove the following:
64
+ 1. the original ddetailer extension - "stable-diffusion-webui/extensions/ddetailer" folder.
65
+ 2. original model files - "stable-diffusion-webui/models/mmdet" folder.
66
+ """
67
+ message = dedent(message)
68
+ raise RuntimeError(message)
69
+
70
+ if not skip_install:
71
+ install()
exhm/detailer/dddetailer/misc/ddetailer_example_1.png ADDED
exhm/detailer/dddetailer/misc/ddetailer_example_2.png ADDED
exhm/detailer/dddetailer/misc/ddetailer_example_3.gif ADDED
exhm/detailer/dddetailer/pyproject.toml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "dddetailer"
3
+ version = "23.8.0"
4
+ description = "An object detection and auto-mask extension for Stable Diffusion web UI."
5
+ authors = [
6
+ {name = "dowon", email = "ks2515@naver.com"},
7
+ ]
8
+ requires-python = ">=3.8,<3.12"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+
12
+ [project.urls]
13
+ repository = "https://github.com/Bing-su/dddetailer"
14
+
15
+ [tool.isort]
16
+ profile = "black"
17
+ known_first_party = ["modules", "launch"]
18
+
19
+ [tool.black]
20
+ line-length = 120
21
+
22
+ [tool.ruff]
23
+ select = ["A", "B", "C4", "E", "F", "I001", "ISC", "N", "PIE", "PT", "RET", "SIM", "UP", "W"]
24
+ ignore = ["B008", "B905", "E501"]
25
+ unfixable = ["F401"]
26
+ line-length = 120
27
+
28
+ [tool.ruff.isort]
29
+ known-first-party = ["modules", "launch"]
exhm/detailer/dddetailer/scripts/dddetailer.py ADDED
@@ -0,0 +1,1057 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from copy import copy
4
+ from pathlib import Path
5
+ from textwrap import dedent
6
+
7
+ import cv2
8
+ import gradio as gr
9
+ import numpy as np
10
+ from basicsr.utils.download_util import load_file_from_url
11
+ from packaging.version import parse
12
+ from PIL import Image
13
+
14
+ from launch import run
15
+ from modules import (
16
+ devices,
17
+ images,
18
+ modelloader,
19
+ processing,
20
+ script_callbacks,
21
+ scripts,
22
+ shared,
23
+ )
24
+ from modules.paths import data_path, models_path
25
+ from modules.processing import (
26
+ Processed,
27
+ StableDiffusionProcessingImg2Img,
28
+ StableDiffusionProcessingTxt2Img,
29
+ )
30
+ from modules.sd_models import model_hash
31
+ from modules.shared import cmd_opts, opts, state
32
+
33
+ DETECTION_DETAILER = "Detection Detailer"
34
+ dd_models_path = os.path.join(models_path, "mmdet")
35
+ python = sys.executable
36
+
37
+
38
+ def check_ddetailer() -> bool:
39
+ try:
40
+ from modules.paths import extensions_dir
41
+
42
+ extensions_path = Path(extensions_dir)
43
+ except ImportError:
44
+ from modules.paths import data_path
45
+
46
+ extensions_path = Path(data_path, "extensions")
47
+
48
+ ddetailer_exists = any(p.is_dir() and p.name.startswith("ddetailer") for p in extensions_path.iterdir())
49
+ return not ddetailer_exists
50
+
51
+
52
+ def check_install() -> bool:
53
+ try:
54
+ import mmcv
55
+ import mmdet
56
+ from mmdet.evaluation import get_classes
57
+ except Exception:
58
+ return False
59
+
60
+ if not hasattr(mmcv, "__version__") or not hasattr(mmdet, "__version__"):
61
+ return False
62
+
63
+ v1 = parse(mmcv.__version__) >= parse("2.0.0")
64
+ v2 = parse(mmdet.__version__) >= parse("3.0.0")
65
+ return v1 and v2
66
+
67
+
68
+ def list_models(model_path):
69
+ model_list = modelloader.load_models(model_path=model_path, ext_filter=[".pth"])
70
+
71
+ def modeltitle(path, shorthash):
72
+ abspath = os.path.abspath(path)
73
+
74
+ if abspath.startswith(model_path):
75
+ name = abspath.replace(model_path, "")
76
+ else:
77
+ name = os.path.basename(path)
78
+
79
+ if name.startswith(("\\", "/")):
80
+ name = name[1:]
81
+
82
+ shortname = os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0]
83
+
84
+ return f"{name} [{shorthash}]", shortname
85
+
86
+ models = []
87
+ for filename in model_list:
88
+ h = model_hash(filename)
89
+ title, short_model_name = modeltitle(filename, h)
90
+ models.append(title)
91
+
92
+ return models
93
+
94
+
95
+ def startup():
96
+ if not check_ddetailer():
97
+ message = """
98
+ [-] dddetailer: dddetailer doesn't work with the original ddetailer extension.
99
+ dddetailer는 원본 ddetailer 확장이 있을 때 동작하지 않습니다.
100
+ """
101
+ raise RuntimeError(dedent(message))
102
+
103
+ if not check_install():
104
+ run(f'"{python}" -m pip uninstall -y mmcv mmcv-full mmdet mmengine')
105
+ run(f'"{python}" -m pip install openmim', desc="Installing openmim", errdesc="Couldn't install openmim")
106
+ run(
107
+ f'"{python}" -m mim install mmcv>=2.0.0 mmdet>=3.0.0',
108
+ desc="Installing mmdet",
109
+ errdesc="Couldn't install mmdet",
110
+ )
111
+
112
+ if len(list_models(dd_models_path)) == 0:
113
+ print("No detection models found, downloading...")
114
+ bbox_path = os.path.join(dd_models_path, "bbox")
115
+ segm_path = os.path.join(dd_models_path, "segm")
116
+ # bbox
117
+ load_file_from_url(
118
+ "https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/bbox/mmdet_anime-face_yolov3.pth",
119
+ bbox_path,
120
+ )
121
+ load_file_from_url(
122
+ "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mmdet_anime-face_yolov3.py",
123
+ bbox_path,
124
+ )
125
+ # segm
126
+ load_file_from_url(
127
+ "https://github.com/Bing-su/dddetailer/releases/download/segm/mmdet_dd-person_mask2former.pth",
128
+ segm_path,
129
+ )
130
+ load_file_from_url(
131
+ "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mmdet_dd-person_mask2former.py",
132
+ segm_path,
133
+ )
134
+ load_file_from_url(
135
+ "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py",
136
+ segm_path,
137
+ )
138
+ load_file_from_url(
139
+ "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/coco_panoptic.py",
140
+ segm_path,
141
+ )
142
+
143
+
144
+ startup()
145
+
146
+
147
+ def gr_show(visible=True):
148
+ return {"visible": visible, "__type__": "update"}
149
+
150
+
151
+ def ddetailer_extra_generation_params(
152
+ dd_prompt,
153
+ dd_neg_prompt,
154
+ dd_model_a,
155
+ dd_conf_a,
156
+ dd_dilation_factor_a,
157
+ dd_offset_x_a,
158
+ dd_offset_y_a,
159
+ dd_preprocess_b,
160
+ dd_bitwise_op,
161
+ dd_model_b,
162
+ dd_conf_b,
163
+ dd_dilation_factor_b,
164
+ dd_offset_x_b,
165
+ dd_offset_y_b,
166
+ dd_mask_blur,
167
+ dd_denoising_strength,
168
+ dd_inpaint_full_res,
169
+ dd_inpaint_full_res_padding,
170
+ dd_cfg_scale,
171
+ ):
172
+ params = {
173
+ "DDetailer prompt": dd_prompt,
174
+ "DDetailer neg prompt": dd_neg_prompt,
175
+ "DDetailer model a": dd_model_a,
176
+ "DDetailer conf a": dd_conf_a,
177
+ "DDetailer dilation a": dd_dilation_factor_a,
178
+ "DDetailer offset x a": dd_offset_x_a,
179
+ "DDetailer offset y a": dd_offset_y_a,
180
+ "DDetailer preprocess b": dd_preprocess_b,
181
+ "DDetailer bitwise": dd_bitwise_op,
182
+ "DDetailer model b": dd_model_b,
183
+ "DDetailer conf b": dd_conf_b,
184
+ "DDetailer dilation b": dd_dilation_factor_b,
185
+ "DDetailer offset x b": dd_offset_x_b,
186
+ "DDetailer offset y b": dd_offset_y_b,
187
+ "DDetailer mask blur": dd_mask_blur,
188
+ "DDetailer denoising": dd_denoising_strength,
189
+ "DDetailer inpaint full": dd_inpaint_full_res,
190
+ "DDetailer inpaint padding": dd_inpaint_full_res_padding,
191
+ "DDetailer cfg": dd_cfg_scale,
192
+ "Script": DETECTION_DETAILER,
193
+ }
194
+ if not dd_prompt:
195
+ params.pop("DDetailer prompt")
196
+ if not dd_neg_prompt:
197
+ params.pop("DDetailer neg prompt")
198
+ return params
199
+
200
+
201
+ class DetectionDetailerScript(scripts.Script):
202
+ def title(self):
203
+ return DETECTION_DETAILER
204
+
205
+ def show(self, is_img2img):
206
+ return True
207
+
208
+ def ui(self, is_img2img):
209
+ import modules.ui
210
+
211
+ model_list = list_models(dd_models_path)
212
+ model_list.insert(0, "None")
213
+ if is_img2img:
214
+ info = gr.HTML(
215
+ '<p style="margin-bottom:0.75em">Recommended settings: Use from inpaint tab, inpaint at full res ON, denoise < 0.5</p>'
216
+ )
217
+ else:
218
+ info = gr.HTML("")
219
+ dd_prompt = None
220
+ with gr.Group():
221
+ if not is_img2img:
222
+ with gr.Row():
223
+ dd_prompt = gr.Textbox(
224
+ label="dd_prompt",
225
+ elem_id="t2i_dd_prompt",
226
+ show_label=False,
227
+ lines=3,
228
+ placeholder="Ddetailer Prompt",
229
+ )
230
+
231
+ with gr.Row():
232
+ dd_neg_prompt = gr.Textbox(
233
+ label="dd_neg_prompt",
234
+ elem_id="t2i_dd_neg_prompt",
235
+ show_label=False,
236
+ lines=2,
237
+ placeholder="Ddetailer Negative prompt",
238
+ )
239
+
240
+ with gr.Row():
241
+ dd_model_a = gr.Dropdown(
242
+ label="Primary detection model (A)",
243
+ choices=model_list,
244
+ value="None",
245
+ visible=True,
246
+ type="value",
247
+ )
248
+
249
+ with gr.Row():
250
+ dd_conf_a = gr.Slider(
251
+ label="Detection confidence threshold % (A)",
252
+ minimum=0,
253
+ maximum=100,
254
+ step=1,
255
+ value=30,
256
+ visible=True,
257
+ )
258
+ dd_dilation_factor_a = gr.Slider(
259
+ label="Dilation factor (A)",
260
+ minimum=0,
261
+ maximum=255,
262
+ step=1,
263
+ value=4,
264
+ visible=True,
265
+ )
266
+
267
+ with gr.Row():
268
+ dd_offset_x_a = gr.Slider(
269
+ label="X offset (A)",
270
+ minimum=-200,
271
+ maximum=200,
272
+ step=1,
273
+ value=0,
274
+ visible=True,
275
+ )
276
+ dd_offset_y_a = gr.Slider(
277
+ label="Y offset (A)",
278
+ minimum=-200,
279
+ maximum=200,
280
+ step=1,
281
+ value=0,
282
+ visible=True,
283
+ )
284
+
285
+ with gr.Row():
286
+ dd_preprocess_b = gr.Checkbox(
287
+ label="Inpaint model B detections before model A runs",
288
+ value=False,
289
+ visible=True,
290
+ )
291
+ dd_bitwise_op = gr.Radio(
292
+ label="Bitwise operation",
293
+ choices=["None", "A&B", "A-B"],
294
+ value="None",
295
+ visible=True,
296
+ )
297
+
298
+ br = gr.HTML("<br>")
299
+
300
+ with gr.Group():
301
+ with gr.Row():
302
+ dd_model_b = gr.Dropdown(
303
+ label="Secondary detection model (B) (optional)",
304
+ choices=model_list,
305
+ value="None",
306
+ visible=True,
307
+ type="value",
308
+ )
309
+
310
+ with gr.Row():
311
+ dd_conf_b = gr.Slider(
312
+ label="Detection confidence threshold % (B)",
313
+ minimum=0,
314
+ maximum=100,
315
+ step=1,
316
+ value=30,
317
+ visible=True,
318
+ )
319
+ dd_dilation_factor_b = gr.Slider(
320
+ label="Dilation factor (B)",
321
+ minimum=0,
322
+ maximum=255,
323
+ step=1,
324
+ value=4,
325
+ visible=True,
326
+ )
327
+
328
+ with gr.Row():
329
+ dd_offset_x_b = gr.Slider(
330
+ label="X offset (B)",
331
+ minimum=-200,
332
+ maximum=200,
333
+ step=1,
334
+ value=0,
335
+ visible=True,
336
+ )
337
+ dd_offset_y_b = gr.Slider(
338
+ label="Y offset (B)",
339
+ minimum=-200,
340
+ maximum=200,
341
+ step=1,
342
+ value=0,
343
+ visible=True,
344
+ )
345
+
346
+ with gr.Group():
347
+ with gr.Row():
348
+ dd_mask_blur = gr.Slider(
349
+ label="Mask blur ",
350
+ minimum=0,
351
+ maximum=64,
352
+ step=1,
353
+ value=4,
354
+ visible=(not is_img2img),
355
+ )
356
+ dd_denoising_strength = gr.Slider(
357
+ label="Denoising strength (Inpaint)",
358
+ minimum=0.0,
359
+ maximum=1.0,
360
+ step=0.01,
361
+ value=0.4,
362
+ visible=(not is_img2img),
363
+ )
364
+
365
+ with gr.Row():
366
+ dd_inpaint_full_res = gr.Checkbox(
367
+ label="Inpaint at full resolution ",
368
+ value=True,
369
+ visible=(not is_img2img),
370
+ )
371
+ dd_inpaint_full_res_padding = gr.Slider(
372
+ label="Inpaint at full resolution padding, pixels ",
373
+ minimum=0,
374
+ maximum=256,
375
+ step=4,
376
+ value=32,
377
+ visible=(not is_img2img),
378
+ )
379
+
380
+ with gr.Row():
381
+ dd_cfg_scale = gr.Slider(
382
+ label="CFG Scale",
383
+ minimum=0,
384
+ maximum=30,
385
+ step=0.5,
386
+ value=7,
387
+ visible=True,
388
+ )
389
+
390
+ dd_model_a.change(
391
+ lambda modelname: {
392
+ dd_model_b: gr_show(modelname != "None"),
393
+ dd_conf_a: gr_show(modelname != "None"),
394
+ dd_dilation_factor_a: gr_show(modelname != "None"),
395
+ dd_offset_x_a: gr_show(modelname != "None"),
396
+ dd_offset_y_a: gr_show(modelname != "None"),
397
+ },
398
+ inputs=[dd_model_a],
399
+ outputs=[
400
+ dd_model_b,
401
+ dd_conf_a,
402
+ dd_dilation_factor_a,
403
+ dd_offset_x_a,
404
+ dd_offset_y_a,
405
+ ],
406
+ )
407
+
408
+ dd_model_b.change(
409
+ lambda modelname: {
410
+ dd_preprocess_b: gr_show(modelname != "None"),
411
+ dd_bitwise_op: gr_show(modelname != "None"),
412
+ dd_conf_b: gr_show(modelname != "None"),
413
+ dd_dilation_factor_b: gr_show(modelname != "None"),
414
+ dd_offset_x_b: gr_show(modelname != "None"),
415
+ dd_offset_y_b: gr_show(modelname != "None"),
416
+ },
417
+ inputs=[dd_model_b],
418
+ outputs=[
419
+ dd_preprocess_b,
420
+ dd_bitwise_op,
421
+ dd_conf_b,
422
+ dd_dilation_factor_b,
423
+ dd_offset_x_b,
424
+ dd_offset_y_b,
425
+ ],
426
+ )
427
+ if dd_prompt:
428
+ self.infotext_fields = (
429
+ (dd_prompt, "DDetailer prompt"),
430
+ (dd_neg_prompt, "DDetailer neg prompt"),
431
+ (dd_model_a, "DDetailer model a"),
432
+ (dd_conf_a, "DDetailer conf a"),
433
+ (dd_dilation_factor_a, "DDetailer dilation a"),
434
+ (dd_offset_x_a, "DDetailer offset x a"),
435
+ (dd_offset_y_a, "DDetailer offset y a"),
436
+ (dd_preprocess_b, "DDetailer preprocess b"),
437
+ (dd_bitwise_op, "DDetailer bitwise"),
438
+ (dd_model_b, "DDetailer model b"),
439
+ (dd_conf_b, "DDetailer conf b"),
440
+ (dd_dilation_factor_b, "DDetailer dilation b"),
441
+ (dd_offset_x_b, "DDetailer offset x b"),
442
+ (dd_offset_y_b, "DDetailer offset y b"),
443
+ (dd_mask_blur, "DDetailer mask blur"),
444
+ (dd_denoising_strength, "DDetailer denoising"),
445
+ (dd_inpaint_full_res, "DDetailer inpaint full"),
446
+ (dd_inpaint_full_res_padding, "DDetailer inpaint padding"),
447
+ (dd_cfg_scale, "DDetailer cfg"),
448
+ )
449
+
450
+ ret = [
451
+ info,
452
+ dd_model_a,
453
+ dd_conf_a,
454
+ dd_dilation_factor_a,
455
+ dd_offset_x_a,
456
+ dd_offset_y_a,
457
+ dd_preprocess_b,
458
+ dd_bitwise_op,
459
+ br,
460
+ dd_model_b,
461
+ dd_conf_b,
462
+ dd_dilation_factor_b,
463
+ dd_offset_x_b,
464
+ dd_offset_y_b,
465
+ dd_mask_blur,
466
+ dd_denoising_strength,
467
+ dd_inpaint_full_res,
468
+ dd_inpaint_full_res_padding,
469
+ dd_cfg_scale,
470
+ ]
471
+ if not is_img2img:
472
+ ret += [dd_prompt, dd_neg_prompt]
473
+ return ret
474
+
475
+ def run(
476
+ self,
477
+ p,
478
+ info,
479
+ dd_model_a,
480
+ dd_conf_a,
481
+ dd_dilation_factor_a,
482
+ dd_offset_x_a,
483
+ dd_offset_y_a,
484
+ dd_preprocess_b,
485
+ dd_bitwise_op,
486
+ br,
487
+ dd_model_b,
488
+ dd_conf_b,
489
+ dd_dilation_factor_b,
490
+ dd_offset_x_b,
491
+ dd_offset_y_b,
492
+ dd_mask_blur,
493
+ dd_denoising_strength,
494
+ dd_inpaint_full_res,
495
+ dd_inpaint_full_res_padding,
496
+ dd_cfg_scale,
497
+ dd_prompt=None,
498
+ dd_neg_prompt=None,
499
+ ):
500
+ processing.fix_seed(p)
501
+ seed = p.seed
502
+ subseed = p.subseed
503
+ p.batch_size = 1
504
+ ddetail_count = p.n_iter
505
+ p.n_iter = 1
506
+ p.do_not_save_grid = True
507
+ p.do_not_save_samples = True
508
+ is_txt2img = isinstance(p, StableDiffusionProcessingTxt2Img)
509
+ info = ""
510
+
511
+ # ddetailer info
512
+ extra_generation_params = ddetailer_extra_generation_params(
513
+ dd_prompt,
514
+ dd_neg_prompt,
515
+ dd_model_a,
516
+ dd_conf_a,
517
+ dd_dilation_factor_a,
518
+ dd_offset_x_a,
519
+ dd_offset_y_a,
520
+ dd_preprocess_b,
521
+ dd_bitwise_op,
522
+ dd_model_b,
523
+ dd_conf_b,
524
+ dd_dilation_factor_b,
525
+ dd_offset_x_b,
526
+ dd_offset_y_b,
527
+ dd_mask_blur,
528
+ dd_denoising_strength,
529
+ dd_inpaint_full_res,
530
+ dd_inpaint_full_res_padding,
531
+ dd_cfg_scale,
532
+ )
533
+ p.extra_generation_params.update(extra_generation_params)
534
+
535
+ p_txt = copy(p)
536
+ if not is_txt2img:
537
+ orig_image = p.init_images[0]
538
+ else:
539
+ img2img_sampler_name = p_txt.sampler_name
540
+ # PLMS/UniPC do not support img2img so we just silently switch to DDIM
541
+ if p_txt.sampler_name in ["PLMS", "UniPC"]:
542
+ img2img_sampler_name = "DDIM"
543
+ p_txt_prompt = dd_prompt if dd_prompt else p_txt.prompt
544
+ p_txt_neg_prompt = dd_neg_prompt if dd_neg_prompt else p_txt.negative_prompt
545
+ p = StableDiffusionProcessingImg2Img(
546
+ init_images=None,
547
+ resize_mode=0,
548
+ denoising_strength=dd_denoising_strength,
549
+ mask=None,
550
+ mask_blur=dd_mask_blur,
551
+ inpainting_fill=1,
552
+ inpaint_full_res=dd_inpaint_full_res,
553
+ inpaint_full_res_padding=dd_inpaint_full_res_padding,
554
+ inpainting_mask_invert=0,
555
+ sd_model=p_txt.sd_model,
556
+ outpath_samples=p_txt.outpath_samples,
557
+ outpath_grids=p_txt.outpath_grids,
558
+ prompt=p_txt_prompt,
559
+ negative_prompt=p_txt_neg_prompt,
560
+ styles=p_txt.styles,
561
+ seed=p_txt.seed,
562
+ subseed=p_txt.subseed,
563
+ subseed_strength=p_txt.subseed_strength,
564
+ seed_resize_from_h=p_txt.seed_resize_from_h,
565
+ seed_resize_from_w=p_txt.seed_resize_from_w,
566
+ sampler_name=img2img_sampler_name,
567
+ n_iter=p_txt.n_iter,
568
+ steps=p_txt.steps,
569
+ cfg_scale=p_txt.cfg_scale,
570
+ width=p_txt.width,
571
+ height=p_txt.height,
572
+ tiling=p_txt.tiling,
573
+ extra_generation_params=p_txt.extra_generation_params,
574
+ )
575
+ p.do_not_save_grid = True
576
+ p.do_not_save_samples = True
577
+ p.cached_c = [None, None]
578
+ p.cached_uc = [None, None]
579
+
580
+ p.scripts = p_txt.scripts
581
+ p.script_args = p_txt.script_args
582
+
583
+ # output info
584
+ all_prompts = []
585
+ all_negative_prompts = []
586
+ all_seeds = []
587
+ all_subseeds = []
588
+ infotexts = []
589
+ output_images = []
590
+
591
+ state.job_count = ddetail_count
592
+ for n in range(ddetail_count):
593
+ devices.torch_gc()
594
+ start_seed = seed + n
595
+
596
+ all_prompts.append(p_txt.prompt)
597
+ all_negative_prompts.append(p_txt.negative_prompt)
598
+ all_seeds.append(start_seed)
599
+ all_subseeds.append(subseed + n)
600
+
601
+ if is_txt2img:
602
+ print(f"Processing initial image for output generation {n + 1}.")
603
+ p_txt.seed = start_seed
604
+ processed = processing.process_images(p_txt)
605
+ init_image = processed.images[0]
606
+ info = processed.info
607
+ if not dd_prompt:
608
+ p.prompt = processed.all_prompts[0]
609
+ if not dd_neg_prompt:
610
+ p.negative_prompt = processed.all_negative_prompts[0]
611
+ all_prompts[n] = processed.all_prompts[0]
612
+ all_negative_prompts[n] = processed.all_negative_prompts[0]
613
+ else:
614
+ init_image = orig_image
615
+ p.prompt = p_txt.prompt
616
+ p.negative_prompt = p_txt.negative_prompt
617
+ p.cfg_scale = dd_cfg_scale
618
+
619
+ if opts.enable_pnginfo:
620
+ init_image.info["parameters"] = info
621
+
622
+ infotexts.append(info)
623
+ output_images.append(init_image)
624
+
625
+ masks_a = []
626
+ masks_b_pre = []
627
+
628
+ # Optional secondary pre-processing run
629
+ if dd_model_b != "None" and dd_preprocess_b:
630
+ label_b_pre = "B"
631
+ results_b_pre = inference(init_image, dd_model_b, dd_conf_b / 100.0, label_b_pre)
632
+ masks_b_pre = create_segmasks(results_b_pre)
633
+ masks_b_pre = dilate_masks(masks_b_pre, dd_dilation_factor_b, 1)
634
+ masks_b_pre = offset_masks(masks_b_pre, dd_offset_x_b, dd_offset_y_b)
635
+ if len(masks_b_pre) > 0:
636
+ results_b_pre = update_result_masks(results_b_pre, masks_b_pre)
637
+ segmask_preview_b = create_segmask_preview(results_b_pre, init_image)
638
+ shared.state.current_image = segmask_preview_b
639
+ if opts.dd_save_previews:
640
+ images.save_image(
641
+ segmask_preview_b,
642
+ opts.outdir_ddetailer_previews,
643
+ "",
644
+ start_seed,
645
+ p.prompt,
646
+ opts.samples_format,
647
+ p=p,
648
+ )
649
+ gen_count = len(masks_b_pre)
650
+ state.job_count += gen_count
651
+ print(f"Processing {gen_count} model {label_b_pre} detections for output generation {n + 1}.")
652
+ p.seed = start_seed
653
+ p.init_images = [init_image]
654
+
655
+ for i in range(gen_count):
656
+ p.image_mask = masks_b_pre[i]
657
+ if opts.dd_save_masks:
658
+ images.save_image(
659
+ masks_b_pre[i],
660
+ opts.outdir_ddetailer_masks,
661
+ "",
662
+ start_seed,
663
+ p.prompt,
664
+ opts.samples_format,
665
+ p=p,
666
+ )
667
+ processed = processing.process_images(p)
668
+ if not is_txt2img:
669
+ p.prompt = processed.all_prompts[0]
670
+ p.negative_prompt = processed.all_negative_prompts[0]
671
+ p.seed = processed.seed + 1
672
+ p.subseed = processed.subseed + 1
673
+ p.init_images = [processed.images[0]]
674
+
675
+ if gen_count > 0:
676
+ output_images[n] = processed.images[0]
677
+ init_image = processed.images[0]
678
+
679
+ else:
680
+ print(f"No model B detections for output generation {n} with current settings.")
681
+
682
+ # Primary run
683
+ if dd_model_a != "None":
684
+ label_a = "A"
685
+ if dd_model_b != "None" and dd_bitwise_op != "None":
686
+ label_a = dd_bitwise_op
687
+ results_a = inference(init_image, dd_model_a, dd_conf_a / 100.0, label_a)
688
+ masks_a = create_segmasks(results_a)
689
+ masks_a = dilate_masks(masks_a, dd_dilation_factor_a, 1)
690
+ masks_a = offset_masks(masks_a, dd_offset_x_a, dd_offset_y_a)
691
+ if dd_model_b != "None" and dd_bitwise_op != "None":
692
+ label_b = "B"
693
+ results_b = inference(init_image, dd_model_b, dd_conf_b / 100.0, label_b)
694
+ masks_b = create_segmasks(results_b)
695
+ masks_b = dilate_masks(masks_b, dd_dilation_factor_b, 1)
696
+ masks_b = offset_masks(masks_b, dd_offset_x_b, dd_offset_y_b)
697
+ if len(masks_b) > 0:
698
+ combined_mask_b = combine_masks(masks_b)
699
+ for i in reversed(range(len(masks_a))):
700
+ if dd_bitwise_op == "A&B":
701
+ masks_a[i] = bitwise_and_masks(masks_a[i], combined_mask_b)
702
+ elif dd_bitwise_op == "A-B":
703
+ masks_a[i] = subtract_masks(masks_a[i], combined_mask_b)
704
+ if is_allblack(masks_a[i]):
705
+ del masks_a[i]
706
+ for result in results_a:
707
+ del result[i]
708
+
709
+ else:
710
+ print("No model B detections to overlap with model A masks")
711
+ results_a = []
712
+ masks_a = []
713
+
714
+ if len(masks_a) > 0:
715
+ results_a = update_result_masks(results_a, masks_a)
716
+ segmask_preview_a = create_segmask_preview(results_a, init_image)
717
+ shared.state.current_image = segmask_preview_a
718
+ if opts.dd_save_previews:
719
+ images.save_image(
720
+ segmask_preview_a,
721
+ opts.outdir_ddetailer_previews,
722
+ "",
723
+ start_seed,
724
+ p.prompt,
725
+ opts.samples_format,
726
+ p=p,
727
+ )
728
+ gen_count = len(masks_a)
729
+ state.job_count += gen_count
730
+ print(f"Processing {gen_count} model {label_a} detections for output generation {n + 1}.")
731
+ p.seed = start_seed
732
+ p.init_images = [init_image]
733
+
734
+ for i in range(gen_count):
735
+ p.image_mask = masks_a[i]
736
+ if opts.dd_save_masks:
737
+ images.save_image(
738
+ masks_a[i],
739
+ opts.outdir_ddetailer_masks,
740
+ "",
741
+ start_seed,
742
+ p.prompt,
743
+ opts.samples_format,
744
+ p=p,
745
+ )
746
+
747
+ processed = processing.process_images(p)
748
+ if not is_txt2img:
749
+ p.prompt = processed.all_prompts[0]
750
+ p.negative_prompt = processed.all_negative_prompts[0]
751
+ info = processed.info
752
+ all_prompts[n] = processed.all_prompts[0]
753
+ all_negative_prompts[n] = processed.all_negative_prompts[0]
754
+ p.seed = processed.seed + 1
755
+ p.subseed = processed.subseed + 1
756
+ p.init_images = [processed.images[0]]
757
+
758
+ if gen_count > 0:
759
+ final_image = processed.images[0]
760
+
761
+ if opts.enable_pnginfo:
762
+ final_image.info["parameters"] = info
763
+ output_images[n] = final_image
764
+ infotexts[n] = info
765
+
766
+ if opts.samples_save:
767
+ images.save_image(
768
+ final_image,
769
+ p.outpath_samples,
770
+ "",
771
+ start_seed,
772
+ p.prompt,
773
+ opts.samples_format,
774
+ info=info,
775
+ p=p,
776
+ )
777
+
778
+ else:
779
+ print(f"No model {label_a} detections for output generation {n} with current settings.")
780
+
781
+ if opts.samples_save:
782
+ images.save_image(
783
+ init_image,
784
+ p.outpath_samples,
785
+ "",
786
+ start_seed,
787
+ p.prompt,
788
+ opts.samples_format,
789
+ info=info,
790
+ p=p,
791
+ )
792
+
793
+ state.job = f"Generation {n + 1} out of {state.job_count}"
794
+
795
+ if dd_prompt or dd_neg_prompt:
796
+ params_txt = os.path.join(data_path, "params.txt")
797
+ with open(params_txt, "w", encoding="utf-8") as file:
798
+ file.write(infotexts[0])
799
+
800
+ return Processed(
801
+ p,
802
+ output_images,
803
+ seed,
804
+ infotexts[0],
805
+ all_prompts=all_prompts,
806
+ all_negative_prompts=all_negative_prompts,
807
+ all_seeds=all_seeds,
808
+ all_subseeds=all_subseeds,
809
+ infotexts=infotexts,
810
+ )
811
+
812
+
813
+ def modeldataset(model_shortname):
814
+ path = modelpath(model_shortname)
815
+ dataset = "coco" if "mmdet" in path and "segm" in path else "bbox"
816
+ return dataset
817
+
818
+
819
+ def modelpath(model_shortname):
820
+ model_list = modelloader.load_models(model_path=dd_models_path, ext_filter=[".pth"])
821
+ model_h = model_shortname.split("[")[-1].split("]")[0]
822
+ for path in model_list:
823
+ if model_hash(path) == model_h:
824
+ return path
825
+ return None
826
+
827
+
828
+ def update_result_masks(results, masks):
829
+ for i in range(len(masks)):
830
+ boolmask = np.array(masks[i], dtype=bool)
831
+ results[2][i] = boolmask
832
+ return results
833
+
834
+
835
+ def create_segmask_preview(results, image):
836
+ labels = results[0]
837
+ bboxes = results[1]
838
+ segms = results[2]
839
+ scores = results[3]
840
+
841
+ cv2_image = np.array(image)
842
+ cv2_image = cv2_image[:, :, ::-1].copy()
843
+
844
+ for i in range(len(segms)):
845
+ color = np.full_like(cv2_image, np.random.randint(100, 256, (1, 3), dtype=np.uint8))
846
+ alpha = 0.2
847
+ color_image = cv2.addWeighted(cv2_image, alpha, color, 1 - alpha, 0)
848
+ cv2_mask = segms[i].astype(np.uint8) * 255
849
+ cv2_mask_bool = np.array(segms[i], dtype=bool)
850
+ centroid = np.mean(np.argwhere(cv2_mask_bool), axis=0)
851
+ centroid_x, centroid_y = int(centroid[1]), int(centroid[0])
852
+
853
+ cv2_mask_rgb = cv2.merge((cv2_mask, cv2_mask, cv2_mask))
854
+ cv2_image = np.where(cv2_mask_rgb == 255, color_image, cv2_image)
855
+ text_color = tuple([int(x) for x in (color[0][0] - 100)])
856
+ name = labels[i]
857
+ score = scores[i]
858
+ score = str(score)[:4]
859
+ text = name + ":" + score
860
+ cv2.putText(
861
+ cv2_image,
862
+ text,
863
+ (centroid_x - 30, centroid_y),
864
+ cv2.FONT_HERSHEY_DUPLEX,
865
+ 0.4,
866
+ text_color,
867
+ 1,
868
+ cv2.LINE_AA,
869
+ )
870
+
871
+ if len(segms) > 0:
872
+ preview_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))
873
+ else:
874
+ preview_image = image
875
+
876
+ return preview_image
877
+
878
+
879
+ def is_allblack(mask):
880
+ cv2_mask = np.array(mask)
881
+ return cv2.countNonZero(cv2_mask) == 0
882
+
883
+
884
+ def bitwise_and_masks(mask1, mask2):
885
+ cv2_mask1 = np.array(mask1)
886
+ cv2_mask2 = np.array(mask2)
887
+ cv2_mask = cv2.bitwise_and(cv2_mask1, cv2_mask2)
888
+ mask = Image.fromarray(cv2_mask)
889
+ return mask
890
+
891
+
892
+ def subtract_masks(mask1, mask2):
893
+ cv2_mask1 = np.array(mask1)
894
+ cv2_mask2 = np.array(mask2)
895
+ cv2_mask = cv2.subtract(cv2_mask1, cv2_mask2)
896
+ mask = Image.fromarray(cv2_mask)
897
+ return mask
898
+
899
+
900
+ def dilate_masks(masks, dilation_factor, iter=1):
901
+ if dilation_factor == 0:
902
+ return masks
903
+ dilated_masks = []
904
+ kernel = np.ones((dilation_factor, dilation_factor), np.uint8)
905
+ for i in range(len(masks)):
906
+ cv2_mask = np.array(masks[i])
907
+ dilated_mask = cv2.dilate(cv2_mask, kernel, iter)
908
+ dilated_masks.append(Image.fromarray(dilated_mask))
909
+ return dilated_masks
910
+
911
+
912
+ def offset_masks(masks, offset_x, offset_y):
913
+ if offset_x == 0 and offset_y == 0:
914
+ return masks
915
+ offset_masks = []
916
+ for i in range(len(masks)):
917
+ cv2_mask = np.array(masks[i])
918
+ offset_mask = cv2_mask.copy()
919
+ offset_mask = np.roll(offset_mask, -offset_y, axis=0)
920
+ offset_mask = np.roll(offset_mask, offset_x, axis=1)
921
+
922
+ offset_masks.append(Image.fromarray(offset_mask))
923
+ return offset_masks
924
+
925
+
926
+ def combine_masks(masks):
927
+ initial_cv2_mask = np.array(masks[0])
928
+ combined_cv2_mask = initial_cv2_mask
929
+ for i in range(1, len(masks)):
930
+ cv2_mask = np.array(masks[i])
931
+ combined_cv2_mask = cv2.bitwise_or(combined_cv2_mask, cv2_mask)
932
+
933
+ combined_mask = Image.fromarray(combined_cv2_mask)
934
+ return combined_mask
935
+
936
+
937
+ def on_ui_settings():
938
+ shared.opts.add_option(
939
+ "dd_save_previews",
940
+ shared.OptionInfo(False, "Save mask previews", section=("ddetailer", DETECTION_DETAILER)),
941
+ )
942
+ shared.opts.add_option(
943
+ "outdir_ddetailer_previews",
944
+ shared.OptionInfo(
945
+ "extensions/dddetailer/outputs/masks-previews",
946
+ "Output directory for mask previews",
947
+ section=("ddetailer", DETECTION_DETAILER),
948
+ ),
949
+ )
950
+ shared.opts.add_option(
951
+ "dd_save_masks",
952
+ shared.OptionInfo(False, "Save masks", section=("ddetailer", DETECTION_DETAILER)),
953
+ )
954
+ shared.opts.add_option(
955
+ "outdir_ddetailer_masks",
956
+ shared.OptionInfo(
957
+ "extensions/dddetailer/outputs/masks",
958
+ "Output directory for masks",
959
+ section=("ddetailer", DETECTION_DETAILER),
960
+ ),
961
+ )
962
+
963
+
964
+ def create_segmasks(results):
965
+ segms = results[2]
966
+ segmasks = []
967
+ for i in range(len(segms)):
968
+ cv2_mask = segms[i].astype(np.uint8) * 255
969
+ mask = Image.fromarray(cv2_mask)
970
+ segmasks.append(mask)
971
+
972
+ return segmasks
973
+
974
+
975
+ from mmdet.apis import inference_detector, init_detector
976
+ from mmdet.evaluation import get_classes
977
+
978
+
979
+ def get_device():
980
+ device = devices.get_optimal_device_name()
981
+ if device == "mps":
982
+ return device
983
+ if any(getattr(cmd_opts, vram, False) for vram in ["lowvram", "medvram"]):
984
+ return "cpu"
985
+ return device
986
+
987
+
988
+ def inference(image, modelname, conf_thres, label):
989
+ path = modelpath(modelname)
990
+ if "mmdet" in path and "bbox" in path:
991
+ results = inference_mmdet_bbox(image, modelname, conf_thres, label)
992
+ elif "mmdet" in path and "segm" in path:
993
+ results = inference_mmdet_segm(image, modelname, conf_thres, label)
994
+ return results
995
+
996
+
997
+ def inference_mmdet_segm(image, modelname, conf_thres, label):
998
+ model_checkpoint = modelpath(modelname)
999
+ model_config = os.path.splitext(model_checkpoint)[0] + ".py"
1000
+ model_device = get_device()
1001
+ model = init_detector(model_config, model_checkpoint, device=model_device)
1002
+ mmdet_results = inference_detector(model, np.array(image)).pred_instances
1003
+ bboxes = mmdet_results.bboxes.cpu().numpy()
1004
+ segms = mmdet_results.masks.cpu().numpy()
1005
+ scores = mmdet_results.scores.cpu().numpy()
1006
+ dataset = modeldataset(modelname)
1007
+ classes = get_classes(dataset)
1008
+
1009
+ n, m = bboxes.shape
1010
+ if n == 0:
1011
+ return [[], [], [], []]
1012
+ labels = mmdet_results.labels
1013
+ filter_inds = np.where(scores > conf_thres)[0]
1014
+ results = [[], [], [], []]
1015
+ for i in filter_inds:
1016
+ results[0].append(label + "-" + classes[labels[i]])
1017
+ results[1].append(bboxes[i])
1018
+ results[2].append(segms[i])
1019
+ results[3].append(scores[i])
1020
+
1021
+ return results
1022
+
1023
+
1024
+ def inference_mmdet_bbox(image, modelname, conf_thres, label):
1025
+ model_checkpoint = modelpath(modelname)
1026
+ model_config = os.path.splitext(model_checkpoint)[0] + ".py"
1027
+ model_device = get_device()
1028
+ model = init_detector(model_config, model_checkpoint, device=model_device)
1029
+ output = inference_detector(model, np.array(image)).pred_instances
1030
+ cv2_image = np.array(image)
1031
+ cv2_image = cv2_image[:, :, ::-1].copy()
1032
+ cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
1033
+
1034
+ segms = []
1035
+ for x0, y0, x1, y1 in output.bboxes:
1036
+ cv2_mask = np.zeros((cv2_gray.shape), np.uint8)
1037
+ cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
1038
+ cv2_mask_bool = cv2_mask.astype(bool)
1039
+ segms.append(cv2_mask_bool)
1040
+
1041
+ n, m = output.bboxes.shape
1042
+ if n == 0:
1043
+ return [[], [], [], []]
1044
+ bboxes = output.bboxes.cpu().numpy()
1045
+ scores = output.scores.cpu().numpy()
1046
+ filter_inds = np.where(scores > conf_thres)[0]
1047
+ results = [[], [], [], []]
1048
+ for i in filter_inds:
1049
+ results[0].append(label)
1050
+ results[1].append(bboxes[i])
1051
+ results[2].append(segms[i])
1052
+ results[3].append(scores[i])
1053
+
1054
+ return results
1055
+
1056
+
1057
+ script_callbacks.on_ui_settings(on_ui_settings)
exhm/detailer/ddetailer/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.ckpt
3
+ *.pth
4
+ /tmp
5
+ /outputs
6
+ /log
7
+ .vscode
8
+ /test-cases
exhm/detailer/ddetailer/README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Detection and img2img have come a long way. This project is no longer maintained and there are now several alternatives for this function. See [μ Detection Detailer](https://github.com/wkpark/uddetailer) or [adetailer](https://github.com/Bing-su/adetailer) implementations.
2
+
3
+ # Detection Detailer
4
+ An object detection and auto-mask extension for [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui). See [Installation](https://github.com/dustysys/ddetailer#installation).
5
+
6
+ ![adoringfan](/misc/ddetailer_example_1.png)
7
+
8
+ ### Segmentation
9
+ Default models enable person and face instance segmentation.
10
+
11
+ ![amgothic](/misc/ddetailer_example_2.png)
12
+
13
+ ### Detailing
14
+ With full-resolution inpainting, the extension is handy for improving faces without the hassle of manual masking.
15
+
16
+ ![zion](/misc/ddetailer_example_3.gif)
17
+
18
+ ## Installation
19
+ 1. Use `git clone https://github.com/dustysys/ddetailer.git` from your SD web UI `/extensions` folder. Alternatively, install from the extensions tab with url `https://github.com/dustysys/ddetailer`
20
+ 2. Start or reload SD web UI.
21
+
22
+ The models and dependencies should download automatically. To install them manually, follow the [official instructions for installing mmdet](https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-mim-recommended). The models can be [downloaded here](https://huggingface.co/dustysys/ddetailer) and should be placed in `/models/mmdet/bbox` for bounding box (`anime-face_yolov3`) or `/models/mmdet/segm` for instance segmentation models (`dd-person_mask2former`). See the [MMDetection docs](https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html) for guidance on training your own models. For using official MMDetection pretrained models see [here](https://github.com/dustysys/ddetailer/issues/5#issuecomment-1311231989), these are trained for photorealism. See [Troubleshooting](https://github.com/dustysys/ddetailer#troubleshooting) if you encounter issues during installation.
23
+
24
+ ## Usage
25
+ Select Detection Detailer as the script in SD web UI to use the extension. Click 'Generate' to run the script. Here are some tips:
26
+ - `anime-face_yolov3` can detect the bounding box of faces as the primary model while `dd-person_mask2former` isolates the head's silhouette as the secondary model by using the bitwise AND option. Refer to [this example](https://github.com/dustysys/ddetailer/issues/4#issuecomment-1311200268).
27
+ - The dilation factor expands the mask, while the x & y offsets move the mask around.
28
+ - The script is available in txt2img mode as well and can improve the quality of your 10 pulls with moderate settings (low denoise).
29
+
30
+ ## Troubleshooting
31
+ If you get the message ERROR: 'Failed building wheel for pycocotools' follow [these steps](https://github.com/dustysys/ddetailer/issues/1#issuecomment-1309415543).
32
+
33
+ Any other issues installing, open an [issue](https://github.com/dustysys/ddetailer/issues).
34
+
35
+ ## Credits
36
+ hysts/[anime-face-detector](https://github.com/hysts/anime-face-detector) - Creator of `anime-face_yolov3`, which has impressive performance on a variety of art styles.
37
+
38
+ skytnt/[anime-segmentation](https://huggingface.co/datasets/skytnt/anime-segmentation) - Synthetic dataset used to train `dd-person_mask2former`.
39
+
40
+ jerryli27/[AniSeg](https://github.com/jerryli27/AniSeg) - Annotated dataset used to train `dd-person_mask2former`.
41
+
42
+ open-mmlab/[mmdetection](https://github.com/open-mmlab/mmdetection) - Object detection toolset. `dd-person_mask2former` was trained via transfer learning using their [R-50 Mask2Former instance segmentation model](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask2former#instance-segmentation) as a base.
43
+
44
+ AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) - Web UI for Stable Diffusion, base application for this extension.
exhm/detailer/ddetailer/misc/ddetailer_example_1.png ADDED
exhm/detailer/ddetailer/misc/ddetailer_example_2.png ADDED
exhm/detailer/ddetailer/misc/ddetailer_example_3.gif ADDED
exhm/detailer/ddetailer/scripts/__pycache__/ddetailer.cpython-310.pyc ADDED
Binary file (16.5 kB). View file
 
exhm/detailer/ddetailer/scripts/ddetailer.py ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ from PIL import Image
5
+ import numpy as np
6
+ import gradio as gr
7
+
8
+ from modules import processing, images
9
+ from modules import scripts, script_callbacks, shared, devices, modelloader
10
+ from modules.processing import Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img
11
+ from modules.shared import opts, cmd_opts, state
12
+ from modules.sd_models import model_hash
13
+ from modules.paths import models_path
14
+ from basicsr.utils.download_util import load_file_from_url
15
+
16
+ dd_models_path = os.path.join(models_path, "mmdet")
17
+
18
+ def list_models(model_path):
19
+ model_list = modelloader.load_models(model_path=model_path, ext_filter=[".pth"])
20
+
21
+ def modeltitle(path, shorthash):
22
+ abspath = os.path.abspath(path)
23
+
24
+ if abspath.startswith(model_path):
25
+ name = abspath.replace(model_path, '')
26
+ else:
27
+ name = os.path.basename(path)
28
+
29
+ if name.startswith("\\") or name.startswith("/"):
30
+ name = name[1:]
31
+
32
+ shortname = os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0]
33
+
34
+ return f'{name} [{shorthash}]', shortname
35
+
36
+ models = []
37
+ for filename in model_list:
38
+ h = model_hash(filename)
39
+ title, short_model_name = modeltitle(filename, h)
40
+ models.append(title)
41
+
42
+ return models
43
+
44
+ def startup():
45
+ from launch import is_installed, run
46
+ if not is_installed("mmdet"):
47
+ python = sys.executable
48
+ run(f'"{python}" -m pip install -U openmim', desc="Installing openmim", errdesc="Couldn't install openmim")
49
+ run(f'"{python}" -m mim install mmcv-full', desc=f"Installing mmcv-full", errdesc=f"Couldn't install mmcv-full")
50
+ run(f'"{python}" -m pip install mmdet', desc=f"Installing mmdet", errdesc=f"Couldn't install mmdet")
51
+
52
+ if (len(list_models(dd_models_path)) == 0):
53
+ print("No detection models found, downloading...")
54
+ bbox_path = os.path.join(dd_models_path, "bbox")
55
+ segm_path = os.path.join(dd_models_path, "segm")
56
+ load_file_from_url("https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/bbox/mmdet_anime-face_yolov3.pth", bbox_path)
57
+ load_file_from_url("https://huggingface.co/dustysys/ddetailer/raw/main/mmdet/bbox/mmdet_anime-face_yolov3.py", bbox_path)
58
+ load_file_from_url("https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/segm/mmdet_dd-person_mask2former.pth", segm_path)
59
+ load_file_from_url("https://huggingface.co/dustysys/ddetailer/raw/main/mmdet/segm/mmdet_dd-person_mask2former.py", segm_path)
60
+
61
+ startup()
62
+
63
+ def gr_show(visible=True):
64
+ return {"visible": visible, "__type__": "update"}
65
+
66
+ class DetectionDetailerScript(scripts.Script):
67
+ def title(self):
68
+ return "Detection Detailer"
69
+
70
+ def show(self, is_img2img):
71
+ return True
72
+
73
+ def ui(self, is_img2img):
74
+ import modules.ui
75
+
76
+ model_list = list_models(dd_models_path)
77
+ model_list.insert(0, "None")
78
+ if is_img2img:
79
+ info = gr.HTML("<p style=\"margin-bottom:0.75em\">Recommended settings: Use from inpaint tab, inpaint at full res ON, denoise <0.5</p>")
80
+ else:
81
+ info = gr.HTML("")
82
+ with gr.Group():
83
+ with gr.Row():
84
+ dd_model_a = gr.Dropdown(label="Primary detection model (A)", choices=model_list,value = "None", visible=True, type="value")
85
+
86
+ with gr.Row():
87
+ dd_conf_a = gr.Slider(label='Detection confidence threshold % (A)', minimum=0, maximum=100, step=1, value=30, visible=False)
88
+ dd_dilation_factor_a = gr.Slider(label='Dilation factor (A)', minimum=0, maximum=255, step=1, value=4, visible=False)
89
+
90
+ with gr.Row():
91
+ dd_offset_x_a = gr.Slider(label='X offset (A)', minimum=-200, maximum=200, step=1, value=0, visible=False)
92
+ dd_offset_y_a = gr.Slider(label='Y offset (A)', minimum=-200, maximum=200, step=1, value=0, visible=False)
93
+
94
+ with gr.Row():
95
+ dd_preprocess_b = gr.Checkbox(label='Inpaint model B detections before model A runs', value=False, visible=False)
96
+ dd_bitwise_op = gr.Radio(label='Bitwise operation', choices=['None', 'A&B', 'A-B'], value="None", visible=False)
97
+
98
+ br = gr.HTML("<br>")
99
+
100
+ with gr.Group():
101
+ with gr.Row():
102
+ dd_model_b = gr.Dropdown(label="Secondary detection model (B) (optional)", choices=model_list,value = "None", visible =False, type="value")
103
+
104
+ with gr.Row():
105
+ dd_conf_b = gr.Slider(label='Detection confidence threshold % (B)', minimum=0, maximum=100, step=1, value=30, visible=False)
106
+ dd_dilation_factor_b = gr.Slider(label='Dilation factor (B)', minimum=0, maximum=255, step=1, value=4, visible=False)
107
+
108
+ with gr.Row():
109
+ dd_offset_x_b = gr.Slider(label='X offset (B)', minimum=-200, maximum=200, step=1, value=0, visible=False)
110
+ dd_offset_y_b = gr.Slider(label='Y offset (B)', minimum=-200, maximum=200, step=1, value=0, visible=False)
111
+
112
+ with gr.Group():
113
+ with gr.Row():
114
+ dd_mask_blur = gr.Slider(label='Mask blur ', minimum=0, maximum=64, step=1, value=4, visible=(not is_img2img))
115
+ dd_denoising_strength = gr.Slider(label='Denoising strength (Inpaint)', minimum=0.0, maximum=1.0, step=0.01, value=0.4, visible=(not is_img2img))
116
+
117
+ with gr.Row():
118
+ dd_inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution ', value=True, visible = (not is_img2img))
119
+ dd_inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels ', minimum=0, maximum=256, step=4, value=32, visible=(not is_img2img))
120
+
121
+ dd_model_a.change(
122
+ lambda modelname: {
123
+ dd_model_b:gr_show( modelname != "None" ),
124
+ dd_conf_a:gr_show( modelname != "None" ),
125
+ dd_dilation_factor_a:gr_show( modelname != "None"),
126
+ dd_offset_x_a:gr_show( modelname != "None" ),
127
+ dd_offset_y_a:gr_show( modelname != "None" )
128
+
129
+ },
130
+ inputs= [dd_model_a],
131
+ outputs =[dd_model_b, dd_conf_a, dd_dilation_factor_a, dd_offset_x_a, dd_offset_y_a]
132
+ )
133
+
134
+ dd_model_b.change(
135
+ lambda modelname: {
136
+ dd_preprocess_b:gr_show( modelname != "None" ),
137
+ dd_bitwise_op:gr_show( modelname != "None" ),
138
+ dd_conf_b:gr_show( modelname != "None" ),
139
+ dd_dilation_factor_b:gr_show( modelname != "None"),
140
+ dd_offset_x_b:gr_show( modelname != "None" ),
141
+ dd_offset_y_b:gr_show( modelname != "None" )
142
+ },
143
+ inputs= [dd_model_b],
144
+ outputs =[dd_preprocess_b, dd_bitwise_op, dd_conf_b, dd_dilation_factor_b, dd_offset_x_b, dd_offset_y_b]
145
+ )
146
+
147
+ return [info,
148
+ dd_model_a,
149
+ dd_conf_a, dd_dilation_factor_a,
150
+ dd_offset_x_a, dd_offset_y_a,
151
+ dd_preprocess_b, dd_bitwise_op,
152
+ br,
153
+ dd_model_b,
154
+ dd_conf_b, dd_dilation_factor_b,
155
+ dd_offset_x_b, dd_offset_y_b,
156
+ dd_mask_blur, dd_denoising_strength,
157
+ dd_inpaint_full_res, dd_inpaint_full_res_padding
158
+ ]
159
+
160
+ def run(self, p, info,
161
+ dd_model_a,
162
+ dd_conf_a, dd_dilation_factor_a,
163
+ dd_offset_x_a, dd_offset_y_a,
164
+ dd_preprocess_b, dd_bitwise_op,
165
+ br,
166
+ dd_model_b,
167
+ dd_conf_b, dd_dilation_factor_b,
168
+ dd_offset_x_b, dd_offset_y_b,
169
+ dd_mask_blur, dd_denoising_strength,
170
+ dd_inpaint_full_res, dd_inpaint_full_res_padding):
171
+
172
+ processing.fix_seed(p)
173
+ initial_info = None
174
+ seed = p.seed
175
+ p.batch_size = 1
176
+ ddetail_count = p.n_iter
177
+ p.n_iter = 1
178
+ p.do_not_save_grid = True
179
+ p.do_not_save_samples = True
180
+ is_txt2img = isinstance(p, StableDiffusionProcessingTxt2Img)
181
+ if (not is_txt2img):
182
+ orig_image = p.init_images[0]
183
+ else:
184
+ p_txt = p
185
+ p = StableDiffusionProcessingImg2Img(
186
+ init_images = None,
187
+ resize_mode = 0,
188
+ denoising_strength = dd_denoising_strength,
189
+ mask = None,
190
+ mask_blur= dd_mask_blur,
191
+ inpainting_fill = 1,
192
+ inpaint_full_res = dd_inpaint_full_res,
193
+ inpaint_full_res_padding= dd_inpaint_full_res_padding,
194
+ inpainting_mask_invert= 0,
195
+ sd_model=p_txt.sd_model,
196
+ outpath_samples=p_txt.outpath_samples,
197
+ outpath_grids=p_txt.outpath_grids,
198
+ prompt=p_txt.prompt,
199
+ negative_prompt=p_txt.negative_prompt,
200
+ styles=p_txt.styles,
201
+ seed=p_txt.seed,
202
+ subseed=p_txt.subseed,
203
+ subseed_strength=p_txt.subseed_strength,
204
+ seed_resize_from_h=p_txt.seed_resize_from_h,
205
+ seed_resize_from_w=p_txt.seed_resize_from_w,
206
+ sampler_name=p_txt.sampler_name,
207
+ n_iter=p_txt.n_iter,
208
+ steps=p_txt.steps,
209
+ cfg_scale=p_txt.cfg_scale,
210
+ width=p_txt.width,
211
+ height=p_txt.height,
212
+ tiling=p_txt.tiling,
213
+ )
214
+ p.do_not_save_grid = True
215
+ p.do_not_save_samples = True
216
+ output_images = []
217
+ state.job_count = ddetail_count
218
+ for n in range(ddetail_count):
219
+ devices.torch_gc()
220
+ start_seed = seed + n
221
+ if ( is_txt2img ):
222
+ print(f"Processing initial image for output generation {n + 1}.")
223
+ p_txt.seed = start_seed
224
+ processed = processing.process_images(p_txt)
225
+ init_image = processed.images[0]
226
+ else:
227
+ init_image = orig_image
228
+
229
+ output_images.append(init_image)
230
+ masks_a = []
231
+ masks_b_pre = []
232
+
233
+ # Optional secondary pre-processing run
234
+ if (dd_model_b != "None" and dd_preprocess_b):
235
+ label_b_pre = "B"
236
+ results_b_pre = inference(init_image, dd_model_b, dd_conf_b/100.0, label_b_pre)
237
+ masks_b_pre = create_segmasks(results_b_pre)
238
+ masks_b_pre = dilate_masks(masks_b_pre, dd_dilation_factor_b, 1)
239
+ masks_b_pre = offset_masks(masks_b_pre,dd_offset_x_b, dd_offset_y_b)
240
+ if (len(masks_b_pre) > 0):
241
+ results_b_pre = update_result_masks(results_b_pre, masks_b_pre)
242
+ segmask_preview_b = create_segmask_preview(results_b_pre, init_image)
243
+ shared.state.current_image = segmask_preview_b
244
+ if ( opts.dd_save_previews):
245
+ images.save_image(segmask_preview_b, opts.outdir_ddetailer_previews, "", start_seed, p.prompt, opts.samples_format, p=p)
246
+ gen_count = len(masks_b_pre)
247
+ state.job_count += gen_count
248
+ print(f"Processing {gen_count} model {label_b_pre} detections for output generation {n + 1}.")
249
+ p.seed = start_seed
250
+ p.init_images = [init_image]
251
+
252
+ for i in range(gen_count):
253
+ p.image_mask = masks_b_pre[i]
254
+ if ( opts.dd_save_masks):
255
+ images.save_image(masks_b_pre[i], opts.outdir_ddetailer_masks, "", start_seed, p.prompt, opts.samples_format, p=p)
256
+ processed = processing.process_images(p)
257
+ p.seed = processed.seed + 1
258
+ p.init_images = processed.images
259
+
260
+ if (gen_count > 0):
261
+ output_images[n] = processed.images[0]
262
+ init_image = processed.images[0]
263
+
264
+ else:
265
+ print(f"No model B detections for output generation {n} with current settings.")
266
+
267
+ # Primary run
268
+ if (dd_model_a != "None"):
269
+ label_a = "A"
270
+ if (dd_model_b != "None" and dd_bitwise_op != "None"):
271
+ label_a = dd_bitwise_op
272
+ results_a = inference(init_image, dd_model_a, dd_conf_a/100.0, label_a)
273
+ masks_a = create_segmasks(results_a)
274
+ masks_a = dilate_masks(masks_a, dd_dilation_factor_a, 1)
275
+ masks_a = offset_masks(masks_a,dd_offset_x_a, dd_offset_y_a)
276
+ if (dd_model_b != "None" and dd_bitwise_op != "None"):
277
+ label_b = "B"
278
+ results_b = inference(init_image, dd_model_b, dd_conf_b/100.0, label_b)
279
+ masks_b = create_segmasks(results_b)
280
+ masks_b = dilate_masks(masks_b, dd_dilation_factor_b, 1)
281
+ masks_b = offset_masks(masks_b,dd_offset_x_b, dd_offset_y_b)
282
+ if (len(masks_b) > 0):
283
+ combined_mask_b = combine_masks(masks_b)
284
+ for i in reversed(range(len(masks_a))):
285
+ if (dd_bitwise_op == "A&B"):
286
+ masks_a[i] = bitwise_and_masks(masks_a[i], combined_mask_b)
287
+ elif (dd_bitwise_op == "A-B"):
288
+ masks_a[i] = subtract_masks(masks_a[i], combined_mask_b)
289
+ if (is_allblack(masks_a[i])):
290
+ del masks_a[i]
291
+ for result in results_a:
292
+ del result[i]
293
+
294
+ else:
295
+ print("No model B detections to overlap with model A masks")
296
+ results_a = []
297
+ masks_a = []
298
+
299
+ if (len(masks_a) > 0):
300
+ results_a = update_result_masks(results_a, masks_a)
301
+ segmask_preview_a = create_segmask_preview(results_a, init_image)
302
+ shared.state.current_image = segmask_preview_a
303
+ if ( opts.dd_save_previews):
304
+ images.save_image(segmask_preview_a, opts.outdir_ddetailer_previews, "", start_seed, p.prompt, opts.samples_format, p=p)
305
+ gen_count = len(masks_a)
306
+ state.job_count += gen_count
307
+ print(f"Processing {gen_count} model {label_a} detections for output generation {n + 1}.")
308
+ p.seed = start_seed
309
+ p.init_images = [init_image]
310
+
311
+ for i in range(gen_count):
312
+ p.image_mask = masks_a[i]
313
+ if ( opts.dd_save_masks):
314
+ images.save_image(masks_a[i], opts.outdir_ddetailer_masks, "", start_seed, p.prompt, opts.samples_format, p=p)
315
+
316
+ processed = processing.process_images(p)
317
+ if initial_info is None:
318
+ initial_info = processed.info
319
+ p.seed = processed.seed + 1
320
+ p.init_images = processed.images
321
+
322
+ if (gen_count > 0):
323
+ output_images[n] = processed.images[0]
324
+ if ( opts.samples_save ):
325
+ images.save_image(processed.images[0], p.outpath_samples, "", start_seed, p.prompt, opts.samples_format, info=initial_info, p=p)
326
+
327
+ else:
328
+ print(f"No model {label_a} detections for output generation {n} with current settings.")
329
+ state.job = f"Generation {n + 1} out of {state.job_count}"
330
+ if (initial_info is None):
331
+ initial_info = "No detections found."
332
+
333
+ return Processed(p, output_images, seed, initial_info)
334
+
335
+ def modeldataset(model_shortname):
336
+ path = modelpath(model_shortname)
337
+ if ("mmdet" in path and "segm" in path):
338
+ dataset = 'coco'
339
+ else:
340
+ dataset = 'bbox'
341
+ return dataset
342
+
343
+ def modelpath(model_shortname):
344
+ model_list = modelloader.load_models(model_path=dd_models_path, ext_filter=[".pth"])
345
+ model_h = model_shortname.split("[")[-1].split("]")[0]
346
+ for path in model_list:
347
+ if ( model_hash(path) == model_h):
348
+ return path
349
+
350
+ def update_result_masks(results, masks):
351
+ for i in range(len(masks)):
352
+ boolmask = np.array(masks[i], dtype=bool)
353
+ results[2][i] = boolmask
354
+ return results
355
+
356
+ def create_segmask_preview(results, image):
357
+ labels = results[0]
358
+ bboxes = results[1]
359
+ segms = results[2]
360
+
361
+ cv2_image = np.array(image)
362
+ cv2_image = cv2_image[:, :, ::-1].copy()
363
+
364
+ for i in range(len(segms)):
365
+ color = np.full_like(cv2_image, np.random.randint(100, 256, (1, 3), dtype=np.uint8))
366
+ alpha = 0.2
367
+ color_image = cv2.addWeighted(cv2_image, alpha, color, 1-alpha, 0)
368
+ cv2_mask = segms[i].astype(np.uint8) * 255
369
+ cv2_mask_bool = np.array(segms[i], dtype=bool)
370
+ centroid = np.mean(np.argwhere(cv2_mask_bool),axis=0)
371
+ centroid_x, centroid_y = int(centroid[1]), int(centroid[0])
372
+
373
+ cv2_mask_rgb = cv2.merge((cv2_mask, cv2_mask, cv2_mask))
374
+ cv2_image = np.where(cv2_mask_rgb == 255, color_image, cv2_image)
375
+ text_color = tuple([int(x) for x in ( color[0][0] - 100 )])
376
+ name = labels[i]
377
+ score = bboxes[i][4]
378
+ score = str(score)[:4]
379
+ text = name + ":" + score
380
+ cv2.putText(cv2_image, text, (centroid_x - 30, centroid_y), cv2.FONT_HERSHEY_DUPLEX, 0.4, text_color, 1, cv2.LINE_AA)
381
+
382
+ if ( len(segms) > 0):
383
+ preview_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))
384
+ else:
385
+ preview_image = image
386
+
387
+ return preview_image
388
+
389
+ def is_allblack(mask):
390
+ cv2_mask = np.array(mask)
391
+ return cv2.countNonZero(cv2_mask) == 0
392
+
393
+ def bitwise_and_masks(mask1, mask2):
394
+ cv2_mask1 = np.array(mask1)
395
+ cv2_mask2 = np.array(mask2)
396
+ cv2_mask = cv2.bitwise_and(cv2_mask1, cv2_mask2)
397
+ mask = Image.fromarray(cv2_mask)
398
+ return mask
399
+
400
+ def subtract_masks(mask1, mask2):
401
+ cv2_mask1 = np.array(mask1)
402
+ cv2_mask2 = np.array(mask2)
403
+ cv2_mask = cv2.subtract(cv2_mask1, cv2_mask2)
404
+ mask = Image.fromarray(cv2_mask)
405
+ return mask
406
+
407
+ def dilate_masks(masks, dilation_factor, iter=1):
408
+ if dilation_factor == 0:
409
+ return masks
410
+ dilated_masks = []
411
+ kernel = np.ones((dilation_factor,dilation_factor), np.uint8)
412
+ for i in range(len(masks)):
413
+ cv2_mask = np.array(masks[i])
414
+ dilated_mask = cv2.dilate(cv2_mask, kernel, iter)
415
+ dilated_masks.append(Image.fromarray(dilated_mask))
416
+ return dilated_masks
417
+
418
+ def offset_masks(masks, offset_x, offset_y):
419
+ if (offset_x == 0 and offset_y == 0):
420
+ return masks
421
+ offset_masks = []
422
+ for i in range(len(masks)):
423
+ cv2_mask = np.array(masks[i])
424
+ offset_mask = cv2_mask.copy()
425
+ offset_mask = np.roll(offset_mask, -offset_y, axis=0)
426
+ offset_mask = np.roll(offset_mask, offset_x, axis=1)
427
+
428
+ offset_masks.append(Image.fromarray(offset_mask))
429
+ return offset_masks
430
+
431
+ def combine_masks(masks):
432
+ initial_cv2_mask = np.array(masks[0])
433
+ combined_cv2_mask = initial_cv2_mask
434
+ for i in range(1, len(masks)):
435
+ cv2_mask = np.array(masks[i])
436
+ combined_cv2_mask = cv2.bitwise_or(combined_cv2_mask, cv2_mask)
437
+
438
+ combined_mask = Image.fromarray(combined_cv2_mask)
439
+ return combined_mask
440
+
441
+ def on_ui_settings():
442
+ shared.opts.add_option("dd_save_previews", shared.OptionInfo(False, "Save mask previews", section=("ddetailer", "Detection Detailer")))
443
+ shared.opts.add_option("outdir_ddetailer_previews", shared.OptionInfo("extensions/ddetailer/outputs/masks-previews", 'Output directory for mask previews', section=("ddetailer", "Detection Detailer")))
444
+ shared.opts.add_option("dd_save_masks", shared.OptionInfo(False, "Save masks", section=("ddetailer", "Detection Detailer")))
445
+ shared.opts.add_option("outdir_ddetailer_masks", shared.OptionInfo("extensions/ddetailer/outputs/masks", 'Output directory for masks', section=("ddetailer", "Detection Detailer")))
446
+
447
+ def create_segmasks(results):
448
+ segms = results[2]
449
+ segmasks = []
450
+ for i in range(len(segms)):
451
+ cv2_mask = segms[i].astype(np.uint8) * 255
452
+ mask = Image.fromarray(cv2_mask)
453
+ segmasks.append(mask)
454
+
455
+ return segmasks
456
+
457
+ import mmcv
458
+ from mmdet.core import get_classes
459
+ from mmdet.apis import (inference_detector,
460
+ init_detector)
461
+
462
+ def get_device():
463
+ device_id = shared.cmd_opts.device_id
464
+ if device_id is not None:
465
+ cuda_device = f"cuda:{device_id}"
466
+ else:
467
+ cuda_device = "cpu"
468
+ return cuda_device
469
+
470
+ def inference(image, modelname, conf_thres, label):
471
+ path = modelpath(modelname)
472
+ if ( "mmdet" in path and "bbox" in path ):
473
+ results = inference_mmdet_bbox(image, modelname, conf_thres, label)
474
+ elif ( "mmdet" in path and "segm" in path):
475
+ results = inference_mmdet_segm(image, modelname, conf_thres, label)
476
+ return results
477
+
478
+ def inference_mmdet_segm(image, modelname, conf_thres, label):
479
+ model_checkpoint = modelpath(modelname)
480
+ model_config = os.path.splitext(model_checkpoint)[0] + ".py"
481
+ model_device = get_device()
482
+ model = init_detector(model_config, model_checkpoint, device=model_device)
483
+ mmdet_results = inference_detector(model, np.array(image))
484
+ bbox_results, segm_results = mmdet_results
485
+ dataset = modeldataset(modelname)
486
+ classes = get_classes(dataset)
487
+ labels = [
488
+ np.full(bbox.shape[0], i, dtype=np.int32)
489
+ for i, bbox in enumerate(bbox_results)
490
+ ]
491
+ n,m = bbox_results[0].shape
492
+ if (n == 0):
493
+ return [[],[],[]]
494
+ labels = np.concatenate(labels)
495
+ bboxes = np.vstack(bbox_results)
496
+ segms = mmcv.concat_list(segm_results)
497
+ filter_inds = np.where(bboxes[:,-1] > conf_thres)[0]
498
+ results = [[],[],[]]
499
+ for i in filter_inds:
500
+ results[0].append(label + "-" + classes[labels[i]])
501
+ results[1].append(bboxes[i])
502
+ results[2].append(segms[i])
503
+
504
+ return results
505
+
506
+ def inference_mmdet_bbox(image, modelname, conf_thres, label):
507
+ model_checkpoint = modelpath(modelname)
508
+ model_config = os.path.splitext(model_checkpoint)[0] + ".py"
509
+ model_device = get_device()
510
+ model = init_detector(model_config, model_checkpoint, device=model_device)
511
+ results = inference_detector(model, np.array(image))
512
+ cv2_image = np.array(image)
513
+ cv2_image = cv2_image[:, :, ::-1].copy()
514
+ cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
515
+
516
+ segms = []
517
+ for (x0, y0, x1, y1, conf) in results[0]:
518
+ cv2_mask = np.zeros((cv2_gray.shape), np.uint8)
519
+ cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
520
+ cv2_mask_bool = cv2_mask.astype(bool)
521
+ segms.append(cv2_mask_bool)
522
+
523
+ n,m = results[0].shape
524
+ if (n == 0):
525
+ return [[],[],[]]
526
+ bboxes = np.vstack(results[0])
527
+ filter_inds = np.where(bboxes[:,-1] > conf_thres)[0]
528
+ results = [[],[],[]]
529
+ for i in filter_inds:
530
+ results[0].append(label)
531
+ results[1].append(bboxes[i])
532
+ results[2].append(segms[i])
533
+
534
+ return results
535
+
536
+ script_callbacks.on_ui_settings(on_ui_settings)
exhm/detailer/sd-webui-ddsd-orig/.gitignore ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea
161
+ *.pt
162
+ *.pth
163
+ *.ckpt
164
+ *.safetensors
165
+ models/control_sd15_scribble.pth
166
+ detected_maps/
167
+
168
+ # Ignore all .ddcfg files except for Empty.ddcfg
169
+ config/*.ddcfg
170
+ !config/Empty.ddcfg
exhm/detailer/sd-webui-ddsd-orig/README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # sd-webui-ddsd
2
+ 자동으로 동작하는 후보정 작업 확장.
3
+
4
+ ## What is
5
+ ### Outpaint
6
+ #### Outpaint How to use
7
+ 1. 증가시킬 픽셀을 선택
8
+ 2. 증가시킬 방향 선택
9
+ 1. 방향이 None이면 미동작
10
+ 3. 증가시킬때 사용할 프롬프트 작성(전체 인페인팅시 이용)
11
+ 1. 비어있을때 원본 프롬프트 사용
12
+ 4. Denoise, CFG, Step 선택
13
+ 1. Step은 최소 원본 Step 2 ~ 3배 이상 적절한 값 요구
14
+ 5. 생성!
15
+ ### Upscale
16
+ 이미지를 특정 크기로 잘라내어 타일별 업스케일을 하는 도구. 업스케일시 VRAM을 적게 소모.
17
+ #### Upscale How to use
18
+ 1. 크기를 키울때 사용할 upscaler 모델 선택
19
+ 2. 크기를 키울 배수 선택
20
+ 3. 가로, 세로를 내가 단일로 생성할 수 있는 이미지의 최대 크기로 선택(이미지 생성 속도를 최대한 빠르게 하기 위하여)
21
+ 1. 가로 또는 세로중 한개를 0으로 세팅시 업스케일만 동작(세부 구조를 디테일하게하는 인페인팅이 동작하지 않음)
22
+ 4. before running 체크
23
+ 1. 체크시 업스케일을 먼저 돌려서 인페인팅의 퀄리티 상승. 단, 인페인팅시 더 많은 VRAM 요구
24
+ 5. 생성!
25
+ ### Detect Detailer
26
+ 특정 키워드로 이미지를 탐색 후 인페인팅하는 도구.
27
+ #### Detect Detailer How to use
28
+ 0. 인페인팅의 범위 제한(I2I 전용)
29
+ 1. Inner 옵션은 I2I의 인페인팅에서 칠한 범위 내부만 이미지를 탐색
30
+ 2. Outer 옵션은 I2I의 인페인팅에서 칠한 범위 외부만 이미지를 탐색
31
+ 1. 탐색 키워드 작성
32
+ 1. 탐색할 키워드를 작성(face, person 등등)
33
+ 1. 탐색할 키워드는 문장형도 가능(happy face, running dog)
34
+ 2. 탐색할 키워드를 .으로 분할 가능(face. arm, face. chest)
35
+ 2. 탐색할 키워드에 사용 가능한 추가 옵션 존재
36
+ 1. &lt;area:type&gt;을 이용하여 특정 범위 탐색 가능
37
+ 1. 범위 종류는 left, right, top, bottom, all이 존재
38
+ 2. &lt;file:filename&gt;을 이용하여 특정 파일 탐색 가능
39
+ 1. 특정 파일의 위치는 models/ddsdmask
40
+ 3. &lt;model:type&gt;을 이용하여 특정 모델 탐색 가능
41
+ 1. type은 face_media_full, face_media_short와 파일명이 존재
42
+ 2. 파일은 models/yolo에 위치
43
+ 4. &lt;type1:type2:dilation:confidence&gt; 같이 type1과 type2외에 dilation과 confidence도 추가 입력 가능
44
+ 1. confidence는 model 타입에서만 사용되는 값
45
+ 3. 탐색한 범위를 AND, OR, XOR, NAND, NOR 등의 게이트 옵션으로 연산 가능
46
+ 1. face OR (body NAND outfit) -> 괄호안의 body NAND outfit을 먼저 한 후에 face와 OR 연산을 동작
47
+ 2. 괄호는 최대한 적게 이용. 많이 이용시 많은 VRAM 소모.
48
+ 3. 동작은 왼쪽에서 오른쪽으로 순차적 동작.
49
+ 4. 탐색할 키워드에 옵션으로 여러가지 옵션 조절 가능
50
+ 1. face:0:0.4:4 OR outfit:2:0.5:8
51
+ 2. 순서대로 탐색할 프롬프트, SAM 탐색 레벨(0-2), 민감도(0-1), 팽창값(0-512)을 가짐
52
+ 3. 값을 생략하면 초기값으로 세팅
53
+ 2. 긍정 프롬프트 입력
54
+ 1. 인페인팅시 동작시킬 긍정 프롬프트 입력
55
+ 3. 부정 프롬프트 입력
56
+ 1. 인페인팅시 동작시킬 부정 프롬프트 입력
57
+ 4. Denoising, CFG, Steps, Clip skip, Ckpt, Vae 수정
58
+ 1. 인페인팅시 동작에 영향을 주는 옵션
59
+ 5. Split Mask 옵션 체크
60
+ 1. 체크시 마스크가 떨어져 있는것이 존재한다면 따로 인페인팅.
61
+ 1. 따로 인페인팅시 퀄리티 상승. 하지만 더 많은 인페인팅을 요구하여 생성속도 하락.
62
+ 6. Remove Area 옵션 체크
63
+ 1. Split Mask 옵션이 Enable 되어야만 동작
64
+ 2. 분할 인페인팅시 일정 크기 이하의 면적은 인페인팅에서 제외
65
+ 6. 생성!
66
+ ### Postprocessing
67
+ 최종적으로 생성된 이미지에 가하는 후보정
68
+ #### Postprocessing How to use
69
+ 1. 가하고자 하는 후보정을 선택
70
+ 2. 생성!
71
+ ### Watermark
72
+ 이미지 생성 최종본에 자신의 증명을 기입하는 기능
73
+ #### Watermark How to use
74
+ 1. 기입할 증명의 종류 선택(글자, 이미지)
75
+ 2. 선택한 종류를 입력
76
+ 3. 선택한 종류의 크기와 위치를 지정
77
+ 4. Padding으로 해당 위치에서 얼만큼 떨어져 있을지 설정
78
+ 5. Alpha로 얼만큼 투명할지 결정
79
+ 6. 생성!
80
+
81
+ ### Video
82
+ [![Stable Diffusion - DDSD 확장 기능 (No - Talking)](http://img.youtube.com/vi/9wfZyJhPPho/0.jpg)](https://youtu.be/9wfZyJhPPho)
83
+
84
+ ## Installation
85
+ 1. 다운로드 [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)와 [cuDNN](https://developer.nvidia.com/rdp/cudnn-archive)
86
+ 1. 자신이 가진 WebUI와 동일한 버전의 `CUDA`와 `cuDNN`버전으로 설치
87
+ 1. 이것은 다운로드를 편하게 하기위한 구글링크. [CUDA 117](https://drive.google.com/file/d/1HRTOLTB44-pRcrwIw9lQak2OC2ohNle3/view?usp=share_link)와 [cuDNN](https://drive.google.com/file/d/1QcgaxUra0WnCWrCLjsWp_QKw1PKcvqpj/view?usp=share_link)
88
+ 2. `CUDA` 설치 후 해당 폴더에 `cuDNN` 덮어쓰기
89
+ 3. 일정 버전은 Easy Install을 지원. `CUDA`와 `cuDNN` 불필요.
90
+ 1. 지원버전 (torch == 1.13.1+cu117, torch==2.0.0+cu117 , torch==2.0.0+cu118)
91
+ 2. 확장탭에서 설치 `https://github.com/NeoGraph-K/sd-webui-ddsd` 또는 다운로드 후 `extension/` 에 풀어넣기
92
+ 3. WebUI를 완전히 재시작
93
+
94
+ ## Credits
95
+
96
+ dustysys/[ddetailer](https://github.com/dustysys/ddetailer)
97
+
98
+ AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
99
+
100
+ facebookresearch/[Segment Anything](https://github.com/facebookresearch/segment-anything)
101
+
102
+ IDEA-Research/[GroundingDINO](https://github.com/IDEA-Research/GroundingDINO)
103
+
104
+ IDEA-Research/[Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
105
+
106
+ continue-revolution/[sd-webui-segment-anything](https://github.com/continue-revolution/sd-webui-segment-anything)
107
+
108
+ Bing-su/[adetailer](https://github.com/Bing-su/adetailer)
exhm/detailer/sd-webui-ddsd-orig/config/Empty.ddcfg ADDED
@@ -0,0 +1 @@
 
 
1
+ {"enable_script_names": "dynamic_thresholding;dynamic_prompting", "disable_watermark": true, "disable_postprocess": true, "disable_upscaler": true, "ddetailer_before_upscaler": false, "scalevalue": 2, "upscaler_sample": "Original", "overlap": 32, "upscaler_index": "SwinIR_4x", "rewidth": 512, "reheight": 512, "denoising_strength": 0.1, "upscaler_ckpt": "Original", "upscaler_vae": "Original", "disable_detailer": true, "disable_mask_paint_mode": true, "inpaint_mask_mode": "Inner", "detailer_sample": "Original", "detailer_sam_model": "sam_vit_b_01ec64.pth", "detailer_dino_model": "groundingdino_swinb_cogcoor.pth", "dino_full_res_inpaint": true, "dino_inpaint_padding": 0, "detailer_mask_blur": 4, "disable_outpaint": true, "outpaint_sample": "Original", "outpaint_mask_blur": 8, "dino_detect_count": 5, "dino_detection_ckpt_1": "Original", "dino_detection_vae_1": "Original", "dino_detection_prompt_1": "", "dino_detection_positive_1": "", "dino_detection_negative_1": "", "dino_detection_denoise_1": 0.4, "dino_detection_cfg_1": 0, "dino_detection_steps_1": 0, "dino_detection_spliter_disable_1": true, "dino_detection_spliter_remove_area_1": 16, "dino_detection_clip_skip_1": 0, "dino_detection_ckpt_2": "Original", "dino_detection_vae_2": "Original", "dino_detection_prompt_2": "", "dino_detection_positive_2": "", "dino_detection_negative_2": "", "dino_detection_denoise_2": 0.4, "dino_detection_cfg_2": 0, "dino_detection_steps_2": 0, "dino_detection_spliter_disable_2": true, "dino_detection_spliter_remove_area_2": 16, "dino_detection_clip_skip_2": 0, "dino_detection_ckpt_3": "Original", "dino_detection_vae_3": "Original", "dino_detection_prompt_3": "", "dino_detection_positive_3": "", "dino_detection_negative_3": "", "dino_detection_denoise_3": 0.4, "dino_detection_cfg_3": 0, "dino_detection_steps_3": 0, "dino_detection_spliter_disable_3": true, "dino_detection_spliter_remove_area_3": 16, "dino_detection_clip_skip_3": 0, "dino_detection_ckpt_4": "Original", "dino_detection_vae_4": "Original", "dino_detection_prompt_4": "", "dino_detection_positive_4": "", "dino_detection_negative_4": "", "dino_detection_denoise_4": 0.4, "dino_detection_cfg_4": 0, "dino_detection_steps_4": 0, "dino_detection_spliter_disable_4": true, "dino_detection_spliter_remove_area_4": 16, "dino_detection_clip_skip_4": 0, "dino_detection_ckpt_5": "Original", "dino_detection_vae_5": "Original", "dino_detection_prompt_5": "", "dino_detection_positive_5": "", "dino_detection_negative_5": "", "dino_detection_denoise_5": 0.4, "dino_detection_cfg_5": 0, "dino_detection_steps_5": 0, "dino_detection_spliter_disable_5": true, "dino_detection_spliter_remove_area_5": 16, "dino_detection_clip_skip_5": 0, "watermark_count": 2, "watermark_type_1": "Text", "watermark_position_1": "Center", "watermark_image_1": null, "watermark_image_size_width_1": 100, "watermark_image_size_height_1": 100, "watermark_text_1": "", "watermark_text_color_1": null, "watermark_text_font_1": "Courier New", "watermark_text_size_1": 50, "watermark_padding_1": 10, "watermark_alpha_1": 0.4, "watermark_type_2": "Text", "watermark_position_2": "Center", "watermark_image_2": null, "watermark_image_size_width_2": 100, "watermark_image_size_height_2": 100, "watermark_text_2": "", "watermark_text_color_2": null, "watermark_text_font_2": "Courier New", "watermark_text_size_2": 50, "watermark_padding_2": 10, "watermark_alpha_2": 0.4, "postprocessing_count": 2, "pp_type_1": "none", "pp_saturation_strength_1": 1.1, "pp_sharpening_radius_1": 2, "pp_sharpening_percent_1": 150, "pp_sharpening_threshold_1": 3, "pp_gaussian_radius_1": 2, "pp_brightness_strength_1": 1.1, "pp_color_strength_1": 1.1, "pp_contrast_strength_1": 1.1, "pp_hue_strength_1": 0, "pp_bilateral_sigmaC_1": 10, "pp_bilateral_sigmaS_1": 10, "pp_color_tint_type_name_1": "warm", "pp_color_tint_lut_name_1": "FGCineBasic.cube", "pp_type_2": "none", "pp_saturation_strength_2": 1.1, "pp_sharpening_radius_2": 2, "pp_sharpening_percent_2": 150, "pp_sharpening_threshold_2": 3, "pp_gaussian_radius_2": 2, "pp_brightness_strength_2": 1.1, "pp_color_strength_2": 1.1, "pp_contrast_strength_2": 1.1, "pp_hue_strength_2": 0, "pp_bilateral_sigmaC_2": 10, "pp_bilateral_sigmaS_2": 10, "pp_color_tint_type_name_2": "warm", "pp_color_tint_lut_name_2": "FGCineBasic.cube", "outpaint_count": 4, "outpaint_positive_1": "FGCineBasic.cube", "outpaint_negative_1": "", "outpaint_denoise_1": "", "outpaint_cfg_1": 0.8, "outpaint_steps_1": 0, "outpaint_pixels_1": 80, "outpaint_direction_1": 128, "outpaint_positive_2": "FGCineBasic.cube", "outpaint_negative_2": "", "outpaint_denoise_2": "", "outpaint_cfg_2": 0.8, "outpaint_steps_2": 0, "outpaint_pixels_2": 80, "outpaint_direction_2": 128, "outpaint_positive_3": "", "outpaint_negative_3": "", "outpaint_denoise_3": 0.8, "outpaint_cfg_3": 0, "outpaint_steps_3": 80, "outpaint_pixels_3": 128, "outpaint_direction_3": "None", "outpaint_positive_4": "", "outpaint_negative_4": "", "outpaint_denoise_4": 0.8, "outpaint_cfg_4": 0, "outpaint_steps_4": 80, "outpaint_pixels_4": 128, "outpaint_direction_4": "None"}
exhm/detailer/sd-webui-ddsd-orig/install.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+
4
+ import launch
5
+
6
+
7
+ def check_system_machine():
8
+ system = platform.system()
9
+ machine = platform.machine()
10
+ return (system, machine) in [('Windows', 'AMD64'), ('Linux', 'x86_64')]
11
+
12
+
13
+ def check_python_version(low: int, high: int):
14
+ ver = platform.python_version_tuple()
15
+ if int(ver[0]) == 3 and low <= int(ver[1]) <= high:
16
+ return ver[0] + ver[1]
17
+ return None
18
+
19
+
20
+ def install_pycocotools():
21
+ base = 'https://github.com/Bing-su/dddetailer/releases/download/pycocotools/'
22
+ urls = {
23
+ 'Windows': 'pycocotools-2.0.6-cp{ver}-cp{ver}-win_amd64.whl',
24
+ 'Linux': 'pycocotools-2.0.6-cp{ver}-cp{ver}-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
25
+ }
26
+
27
+ python_version = check_python_version(8, 11)
28
+ if not check_system_machine() or not python_version:
29
+ launch.run_pip('install pycocotools', 'sd-webui-ddsd requirement: pycocotools')
30
+ return
31
+
32
+ url = urls[platform.system()].format(ver=python_version)
33
+ launch.run_pip(f'install {base + url}', 'sd-webui-ddsd requirement: pycocotools')
34
+
35
+
36
+ def install_groundingdino():
37
+ import torch
38
+ from packaging.version import parse
39
+
40
+ # torch_version: '1.13.1' or '2.0.0' or ...
41
+ torch_version = parse(torch.__version__).base_version
42
+ # cuda_version: '117' or '118' or 'None'
43
+ cuda_version = torch.version.cuda.replace('.', '')
44
+ python_version = check_python_version(9, 10)
45
+
46
+ if (
47
+ not check_system_machine()
48
+ or (torch_version, cuda_version)
49
+ not in [('1.13.1', '117'), ('2.0.0', '117'), ('2.0.0', '118')]
50
+ or not python_version
51
+ ):
52
+ launch.run_pip('install git+https://github.com/IDEA-Research/GroundingDINO', 'sd-webui-ddsd requirement: groundingdino')
53
+ return
54
+
55
+ system = 'win' if platform.system() == 'Windows' else 'linux'
56
+ machine = 'amd64' if platform.machine() == 'AMD64' else 'x86_64'
57
+
58
+ url = 'https://github.com/Bing-su/GroundingDINO/releases/download/wheel-0.1.0/groundingdino-0.1.0+torch{torch}.cu{cuda}-cp{py}-cp{py}-{system}_{machine}.whl'
59
+ url = url.format(
60
+ torch=torch_version,
61
+ cuda=cuda_version,
62
+ py=python_version,
63
+ system=system,
64
+ machine=machine,
65
+ )
66
+
67
+ launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
68
+
69
+
70
+ current_dir = os.path.dirname(os.path.realpath(__file__))
71
+ req_file = os.path.join(current_dir, 'requirements.txt')
72
+
73
+ with open(req_file) as file:
74
+ for lib in file:
75
+ version = None
76
+ lib = lib.strip()
77
+ lib = 'skimage' if lib == 'scikit-image' else lib
78
+ if '==' in lib:
79
+ lib, version = [x.strip() for x in lib.split('==')]
80
+ if not launch.is_installed(lib):
81
+ if lib == 'pycocotools':
82
+ install_pycocotools()
83
+ elif lib == 'groundingdino':
84
+ install_groundingdino()
85
+ elif lib == 'skimage':
86
+ launch.run_pip(
87
+ f'install scikit-image',
88
+ f'sd-webui-ddsd requirement: scikit-image'
89
+ )
90
+ elif lib == 'pillow_lut':
91
+ launch.run_pip(
92
+ f'install pillow_lut',
93
+ f'sd-webui-ddsd requirement: pillow_lut'
94
+ )
95
+ else:
96
+ lib = lib if version is None else lib + '==' + version
97
+ launch.run_pip(
98
+ f'install {lib}',
99
+ f'sd-webui-ddsd requirement: {lib}'
100
+ )
exhm/detailer/sd-webui-ddsd-orig/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pycocotools
2
+ segment_anything
3
+ groundingdino
4
+ scipy
5
+ scikit-image
6
+ pillow_lut
7
+ ultralytics==8.0.87
8
+ mediapipe==0.9.3.0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd.cpython-310.pyc ADDED
Binary file (53.1 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_bs.cpython-310.pyc ADDED
Binary file (2.6 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_dino.cpython-310.pyc ADDED
Binary file (3.43 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc ADDED
Binary file (4.74 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_sam.cpython-310.pyc ADDED
Binary file (3.2 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_utils.cpython-310.pyc ADDED
Binary file (13.2 kB). View file
 
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd.py ADDED
The diff for this file is too large to render. See raw diff
 
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_bs.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import torch
5
+
6
+ import mediapipe as mp
7
+ import numpy as np
8
+
9
+ from PIL import Image, ImageDraw
10
+ from ultralytics import YOLO
11
+
12
+ from modules import safe
13
+ from modules.shared import cmd_opts
14
+ from modules.paths import models_path
15
+
16
+ yolo_models_path = os.path.join(models_path, 'yolo')
17
+
18
+ def mediapipe_face_detect(image, model_type, confidence):
19
+ width, height = image.size
20
+ image_np = np.array(image)
21
+
22
+ mp_face_detection = mp.solutions.face_detection
23
+ with mp_face_detection.FaceDetection(model_selection=model_type, min_detection_confidence=confidence) as face_detector:
24
+ predictor = face_detector.process(image_np)
25
+
26
+ if predictor.detections is None: return None
27
+
28
+ bboxes = []
29
+ for detection in predictor.detections:
30
+
31
+ bbox = detection.location_data.relative_bounding_box
32
+ x1 = bbox.xmin * width
33
+ y1 = bbox.ymin * height
34
+ x2 = x1 + bbox.width * width
35
+ y2 = y1 + bbox.height * height
36
+ bboxes.append([x1,y1,x2,y2])
37
+
38
+ return create_mask_from_bbox(image, bboxes)
39
+
40
+ def ultralytics_predict(image, model_type, confidence, device):
41
+ models = [os.path.join(yolo_models_path,x) for x in os.listdir(yolo_models_path) if (x.endswith('.pt') or x.endswith('.pth')) and os.path.splitext(os.path.basename(x))[0].upper() == model_type]
42
+ if len(models) == 0: return None
43
+ model = YOLO(models[0])
44
+ predictor = model(image, conf=confidence, show_labels=False, device=device)
45
+ bboxes = predictor[0].boxes.xyxy.cpu().numpy()
46
+ if bboxes.size == 0: return None
47
+ bboxes = bboxes.tolist()
48
+ return create_mask_from_bbox(image, bboxes)
49
+
50
+ def create_mask_from_bbox(image, bboxes):
51
+ mask = Image.new('L', image.size, 0)
52
+ draw = ImageDraw.Draw(mask)
53
+ for bbox in bboxes:
54
+ draw.rectangle(bbox, fill=255)
55
+ return np.array(mask)
56
+
57
+ def bs_model(image, model_type, confidence):
58
+ image = Image.fromarray(image)
59
+ orig = torch.load
60
+ torch.load = safe.unsafe_torch_load
61
+ if model_type == 'FACE_MEDIA_FULL':
62
+ mask = mediapipe_face_detect(image, 1, confidence)
63
+ elif model_type == 'FACE_MEDIA_SHORT':
64
+ mask = mediapipe_face_detect(image, 0, confidence)
65
+ else:
66
+ device = ''
67
+ if getattr(cmd_opts, 'lowvram', False) or getattr(cmd_opts, 'medvram', False):
68
+ device = 'cpu'
69
+ mask = ultralytics_predict(image, model_type, confidence, device)
70
+ torch.load = orig
71
+ return mask
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_dino.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import torch
4
+ import copy
5
+ import cv2
6
+ from collections import OrderedDict
7
+
8
+ from modules import shared
9
+ from modules.devices import device, torch_gc, cpu
10
+
11
+ import groundingdino.datasets.transforms as T
12
+ from groundingdino.models import build_model
13
+ from groundingdino.util.slconfig import SLConfig
14
+ from modules.paths import models_path
15
+ from groundingdino.util.utils import clean_state_dict
16
+
17
+ dino_model_cache = OrderedDict()
18
+ grounding_models_dir = os.path.join(models_path, "grounding")
19
+
20
+ def dino_model_list():
21
+ return [x for x in os.listdir(grounding_models_dir) if x.endswith('.pth')]
22
+
23
+ def dino_config_file_name(dino_model_name:str):
24
+ return dino_model_name.replace('.pth','.py')
25
+
26
+ def clear_dino_cache():
27
+ dino_model_cache.clear()
28
+ gc.collect()
29
+ torch_gc()
30
+
31
+ def load_dino_model(dino_checkpoint):
32
+ print(f"Initializing GroundingDINO {dino_checkpoint}")
33
+ if dino_checkpoint in dino_model_cache:
34
+ dino = dino_model_cache[dino_checkpoint]
35
+ if shared.cmd_opts.lowvram:
36
+ dino.to(device=device)
37
+ else:
38
+ clear_dino_cache()
39
+ args = SLConfig.fromfile(os.path.join(grounding_models_dir,dino_config_file_name(dino_checkpoint)))
40
+ dino = build_model(args)
41
+ checkpoint = torch.load(os.path.join(grounding_models_dir,dino_checkpoint),map_location='cpu')
42
+ dino.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
43
+ dino.to(device=device)
44
+ dino_model_cache[dino_checkpoint] = dino
45
+ dino.eval()
46
+ return dino
47
+
48
+
49
+ def load_dino_image(image_pil):
50
+ transform = T.Compose(
51
+ [
52
+ T.RandomResize([800], max_size=1333),
53
+ T.ToTensor(),
54
+ T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
55
+ ]
56
+ )
57
+ image, _ = transform(image_pil, None) # 3, h, w
58
+ return image
59
+
60
+
61
+ def get_grounding_output(model, image, caption, box_threshold):
62
+ caption = caption.lower()
63
+ caption = caption.strip()
64
+ if not caption.endswith("."):
65
+ caption = caption + "."
66
+ image = image.to(device)
67
+ with torch.no_grad():
68
+ outputs = model(image[None], captions=[caption])
69
+ if shared.cmd_opts.lowvram:
70
+ model.to(cpu)
71
+ logits = outputs["pred_logits"].sigmoid()[0] # (nq, 256)
72
+ boxes = outputs["pred_boxes"][0] # (nq, 4)
73
+
74
+ # filter output
75
+ logits_filt = logits.clone()
76
+ boxes_filt = boxes.clone()
77
+ filt_mask = logits_filt.max(dim=1)[0] > box_threshold
78
+ logits_filt = logits_filt[filt_mask] # num_filt, 256
79
+ boxes_filt = boxes_filt[filt_mask] # num_filt, 4
80
+
81
+ return boxes_filt.cpu()
82
+
83
+
84
+ def dino_predict_internal(input_image, dino_model_name, text_prompt, box_threshold):
85
+ print("Running GroundingDINO Inference")
86
+ dino_image = load_dino_image(input_image.convert("RGB"))
87
+ dino_model = load_dino_model(dino_model_name)
88
+
89
+ boxes_filt = get_grounding_output(
90
+ dino_model, dino_image, text_prompt, box_threshold
91
+ )
92
+
93
+ H, W = input_image.size[1], input_image.size[0]
94
+ for i in range(boxes_filt.size(0)):
95
+ boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
96
+ boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
97
+ boxes_filt[i][2:] += boxes_filt[i][:2]
98
+ clear_dino_cache()
99
+ return boxes_filt
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_postprocess.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import cv2
4
+ from PIL import Image, ImageEnhance, ImageFilter, ImageOps
5
+ from pillow_lut import load_cube_file
6
+ from scipy.interpolate import UnivariateSpline
7
+
8
+ from modules.paths import models_path
9
+
10
+ lut_model_dir = os.path.join(models_path, "lut")
11
+
12
+ def lut_model_list():
13
+ return [x for x in os.listdir(lut_model_dir) if x.lower().endswith('.cube')]
14
+
15
+ def saturation_image(image:Image.Image, strength:float) -> Image.Image: # 채도 조절
16
+ return ImageEnhance.Color(image).enhance(strength)
17
+ def sharpening_image(image:Image.Image, radius:float, percent:int, threshold:float) -> Image.Image: # 선명도 조절
18
+ return image.filter(ImageFilter.UnsharpMask(radius=radius, percent=percent, threshold=threshold))
19
+ def gaussian_blur_image(image:Image.Image, radius:float) -> Image.Image: # 흐림도 조절
20
+ return image.filter(ImageFilter.GaussianBlur(radius=radius))
21
+ def brightness_image(image:Image.Image, strength:float) -> Image.Image: # 밝기 조절
22
+ return ImageEnhance.Brightness(image).enhance(strength)
23
+ def color_image(image:Image.Image, strength:float) -> Image.Image: # 색조 조절
24
+ return ImageEnhance.Color(image).enhance(strength)
25
+ def contrast_image(image:Image.Image, strength:float) -> Image.Image: # 대비 조절
26
+ return ImageEnhance.Contrast(image).enhance(strength)
27
+ def color_extraction_image(image:Image.Image, lower:tuple[int,int,int], upper:tuple[int,int,int], strength:float) -> Image.Image: # 색상 추출 및 변화
28
+ image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2HSV)
29
+ mask = cv2.inRange(image_np, lower, upper)
30
+ image_np = image_np.astype(np.float64)
31
+ image_np[mask != 0] *= strength
32
+ image_np = image_np.astype(np.uint8)
33
+ return Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_HSV2RGB))
34
+ def hue_image(image:Image.Image, strength:float) -> Image.Image: # Hue 조절
35
+ image_np = np.array(image)
36
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)
37
+ image_np[..., 0] = (image_np[..., 0] + strength * 180) % 180
38
+ return Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_HSV2RGB))
39
+ def inversion_image(image:Image.Image) -> Image.Image: # 반전
40
+ return ImageOps.invert(image)
41
+ def bilateral_image(image:Image.Image, sigmaC:int, sigmaS:int) -> Image.Image: # 양방향 필터
42
+ image_np = np.array(image)
43
+ return Image.fromarray(cv2.bilateralFilter(image_np, -1, sigmaC, sigmaS))
44
+ def color_tint_lut_image(image:Image.Image, lut_file:str) -> Image.Image: # 색상 조절
45
+ lut = load_cube_file(os.path.join(lut_model_dir, lut_file))
46
+ return image.filter(lut)
47
+ def color_tint_type_image(image:Image.Image, type:str) -> Image.Image: # 색온도 조절(Warm, Cool)
48
+ increase = UnivariateSpline([0,64,128,192,256],[0,70,140,210,256])(range(256))
49
+ decrease = UnivariateSpline([0,64,128,192,256],[0,30,80,120,192])(range(256))
50
+ image_np = np.array(image)
51
+ r, g, b = cv2.split(image_np)
52
+ r = cv2.LUT(r, increase if type == 'warm' else decrease).astype(np.uint8)
53
+ b = cv2.LUT(b, decrease if type == 'warm' else increase).astype(np.uint8)
54
+ image_np = cv2.merge((r, g, b))
55
+ h, s, v = cv2.split(cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV))
56
+ s = cv2.LUT(s, increase if type == 'warm' else decrease).astype(np.uint8)
57
+ return Image.fromarray(cv2.cvtColor(cv2.merge((h, s, v)), cv2.COLOR_HSV2RGB))
58
+
59
+ def ddsd_postprocess(image:Image.Image, pptype:str,
60
+ saturation_strength:float,
61
+ sharpening_radius:float, sharpening_percent:int, sharpening_threshold:float,
62
+ gaussian_blur_radius:float,
63
+ brightness_strength:float,
64
+ color_strength:float,
65
+ contrast_strength:float,
66
+ #color_extraction_lower:tuple[int,int,int], color_extraction_upper:tuple[int,int,int], color_extraction_strength:float,
67
+ hue_strength:float,
68
+ bilateral_sigmaC:int, bilateral_sigmaS:int,
69
+ color_tint_lut_file:str,
70
+ color_tint_type_name:str) -> Image.Image:
71
+ if pptype == 'saturation': return saturation_image(image, saturation_strength)
72
+ if pptype == 'sharpening': return sharpening_image(image, sharpening_radius, sharpening_percent, sharpening_threshold)
73
+ if pptype == 'gaussian blur': return gaussian_blur_image(image, gaussian_blur_radius)
74
+ if pptype == 'brightness': return brightness_image(image, brightness_strength)
75
+ if pptype == 'color': return color_image(image, color_strength)
76
+ if pptype == 'contrast': return contrast_image(image, contrast_strength)
77
+ #if pptype == 'color extraction': return color_extraction_image(image, color_extraction_lower, color_extraction_upper, color_extraction_strength)
78
+ if pptype == 'hue': return hue_image(hue_strength)
79
+ if pptype == 'inversion': return inversion_image(image)
80
+ if pptype == 'bilateral': return bilateral_image(image, bilateral_sigmaC, bilateral_sigmaS)
81
+ if pptype == 'color tint(type)': return color_tint_type_image(image, color_tint_type_name)
82
+ if pptype == 'color tint(lut)': return color_tint_lut_image(image, color_tint_lut_file)
83
+ return image
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_sam.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ import gc
5
+ import cv2
6
+
7
+ from modules import shared
8
+ from modules.paths import models_path
9
+ from modules.safe import unsafe_torch_load, load
10
+ from modules.devices import device, torch_gc, cpu
11
+
12
+ from PIL import Image
13
+ from collections import OrderedDict
14
+ from scipy.ndimage import binary_dilation
15
+ from segment_anything import SamPredictor, sam_model_registry
16
+ from scripts.ddsd_dino import dino_predict_internal, clear_dino_cache
17
+
18
+ sam_model_cache = OrderedDict()
19
+ sam_model_dir = os.path.join(models_path, "sam")
20
+
21
+ def sam_model_list():
22
+ return [x for x in os.listdir(sam_model_dir) if x.endswith('.pth')]
23
+
24
+ def load_sam_model(sam_checkpoint):
25
+ model_type = '_'.join(sam_checkpoint.split('_')[1:-1])
26
+ sam_checkpoint = os.path.join(sam_model_dir, sam_checkpoint)
27
+ torch.load = unsafe_torch_load
28
+ sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
29
+ sam.to(device=device)
30
+ sam.eval()
31
+ torch.load = load
32
+ return sam
33
+
34
+ def clear_sam_cache():
35
+ sam_model_cache.clear()
36
+ gc.collect()
37
+ torch_gc()
38
+
39
+ def clear_cache():
40
+ clear_sam_cache()
41
+ clear_dino_cache()
42
+
43
+ def dilate_mask(mask, dilation):
44
+ dilation_kernel = np.ones((dilation, dilation), np.uint8)
45
+ return cv2.dilate(mask, dilation_kernel)
46
+
47
+ def init_sam_model(sam_model_name):
48
+ print('Initializing SAM')
49
+ if sam_model_name in sam_model_cache:
50
+ sam = sam_model_cache[sam_model_name]
51
+ if(shared.cmd_opts.lowvram):
52
+ sam.to(device=device)
53
+ return sam
54
+ elif sam_model_name in sam_model_list():
55
+ clear_sam_cache()
56
+ sam_model_cache[sam_model_name] = load_sam_model(sam_model_name)
57
+ return sam_model_cache[sam_model_name]
58
+ else:
59
+ Exception(f'{sam_model_name} not found, please download model to models/sam')
60
+
61
+ def sam_predict(sam_model_name, dino_model_name, image, image_np, image_np_rgb, dino_text, dino_box_threshold, dilation, sam_level):
62
+ print('Start SAM Processing')
63
+
64
+ assert dino_text, 'Please input dino text'
65
+
66
+ boxes = dino_predict_internal(image, dino_model_name, dino_text, dino_box_threshold)
67
+
68
+ if boxes.shape[0] < 1: return None
69
+
70
+ sam = init_sam_model(sam_model_name)
71
+
72
+ print(f'Running SAM Inference {image_np_rgb.shape}')
73
+ predictor = SamPredictor(sam)
74
+ predictor.set_image(image_np_rgb)
75
+ transformed_boxes = predictor.transform.apply_boxes_torch(boxes, image_np.shape[:2])
76
+ masks, _, _ = predictor.predict_torch(
77
+ point_coords = None,
78
+ point_labels = None,
79
+ boxes = transformed_boxes.to(device),
80
+ multimask_output = True
81
+ )
82
+
83
+ masks = masks.permute(1,0,2,3).cpu().numpy()
84
+
85
+ if shared.cmd_opts.lowvram:
86
+ sam.to(cpu)
87
+ clear_sam_cache()
88
+
89
+ return dilate_mask(np.any(masks[sam_level], axis=0).astype(np.uint8) * 255,dilation)
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_utils.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import numpy as np
4
+ import cv2
5
+ import gc
6
+ import matplotlib.font_manager
7
+ from glob import glob
8
+ from PIL import Image, ImageDraw, ImageFont
9
+ from scripts.ddsd_sam import sam_predict, clear_cache, dilate_mask
10
+ from scripts.ddsd_bs import bs_model
11
+ from modules.devices import torch_gc
12
+ from skimage import measure, exposure
13
+
14
+ from modules.paths import models_path
15
+ from modules.processing import StableDiffusionProcessingImg2Img
16
+
17
+ token_split = re.compile(r"(AND|OR|NOR|XOR|NAND)")
18
+ token_first = re.compile(r'\(([^()]+)\)')
19
+ token_match = re.compile(r'(\d+)GROUPMASK')
20
+ token_file = re.compile(r'\s*<(.*)>\s*')
21
+
22
+ ddsd_mask_path = os.path.join(models_path, "ddsdmask")
23
+ mask_embed = {}
24
+
25
+ def startup():
26
+ global mask_embed
27
+ if not os.path.exists(ddsd_mask_path):
28
+ os.makedirs(ddsd_mask_path)
29
+ with open(os.path.join(ddsd_mask_path, 'put_in_mask_here.txt'),'w') as f: pass
30
+
31
+ masks = glob(os.path.join(ddsd_mask_path,'**\\*'))
32
+ masks = [(x, *os.path.splitext(os.path.basename(x))) for x in masks if os.path.isfile(x)]
33
+ masks = [(x, y) for x, y, z in masks if z in ['.png', '.jpg', '.jpeg', '.webp']]
34
+ mask_embed = {y.upper():x for x, y in masks}
35
+
36
+ startup()
37
+
38
+ def try_convert(data, type, default, min, max):
39
+ try:
40
+ convert = type(data)
41
+ if convert < min: return min
42
+ if convert > max: return max
43
+ return convert
44
+ except (ValueError, TypeError):
45
+ return default
46
+
47
+ def prompt_spliter(prompt:str, split_token:str, count:int):
48
+ spliter = prompt.split(split_token)
49
+ while len(spliter) < count:
50
+ spliter.append('')
51
+ return spliter[:count]
52
+
53
+ def combine_masks(mask, combine_masks_option, mask2):
54
+ if combine_masks_option == 'AND': return cv2.bitwise_and(mask, mask2)
55
+ if combine_masks_option == 'OR': return cv2.bitwise_or(mask, mask2)
56
+ if combine_masks_option == 'XOR': return cv2.bitwise_xor(mask, mask2)
57
+ if combine_masks_option == 'NOR': return cv2.bitwise_not(cv2.bitwise_or(mask, mask2))
58
+ if combine_masks_option == 'NAND': return cv2.bitwise_not(cv2.bitwise_and(mask,mask2))
59
+
60
+ def dino_detect_from_prompt(prompt:str, detailer_sam_model, detailer_dino_model, init_image, disable_mask_paint_mode, inpaint_mask_mode, image_mask):
61
+ clear_cache()
62
+ image_np_zero = np.array(init_image.convert('L'))
63
+ image_np_zero[:,:] = 0
64
+ image_np = np.array(init_image)
65
+ image_np_rgb = image_np[:,:,:3].copy()
66
+ image_set = (init_image, image_np, image_np_rgb, image_np_zero)
67
+ model_set = (detailer_sam_model, detailer_dino_model)
68
+ result = dino_prompt_detector(prompt, model_set, image_set)
69
+ clear_cache()
70
+ if np.array_equal(result, image_np_zero): return None
71
+ if disable_mask_paint_mode: return result
72
+ if image_mask is None: return result
73
+ image_mask = np.array(image_mask.resize((result.shape[1],result.shape[0])).convert('L'))
74
+ image_mask = np.resize(image_mask, result.shape)
75
+ if inpaint_mask_mode == 'Inner': return cv2.bitwise_and(result, image_mask)
76
+ if inpaint_mask_mode == 'Outer': return cv2.bitwise_and(result, cv2.bitwise_not(image_mask))
77
+ return None
78
+
79
+ def dino_prompt_token_file(prompt:str, image_np_zero, image_np_rgb):
80
+ usage_type, usage, dilation, confidence = prompt_spliter(prompt, ':', 4)
81
+ usage_type = usage_type.upper()
82
+ usage = usage.upper()
83
+ confidence = try_convert(confidence, float, 0.3, 0, 1)
84
+ if usage_type == 'AREA':
85
+ if usage == 'LEFT':
86
+ image_np_zero[:,:image_np_zero.shape[1] // 2] = 255
87
+ image_np_zero[:,image_np_zero.shape[1] // 2:] = 0
88
+ elif usage == 'RIGHT':
89
+ image_np_zero[:,:image_np_zero.shape[1] // 2] = 0
90
+ image_np_zero[:,image_np_zero.shape[1] // 2:] = 255
91
+ elif usage == 'TOP':
92
+ image_np_zero[:image_np_zero.shape[0] // 2,:] = 255
93
+ image_np_zero[image_np_zero.shape[0] // 2:,:] = 0
94
+ elif usage == 'BOTTOM':
95
+ image_np_zero[:image_np_zero.shape[0] // 2,:] = 0
96
+ image_np_zero[image_np_zero.shape[0] // 2:,:] = 255
97
+ elif usage == 'ALL':
98
+ image_np_zero[:,:] = 255
99
+ if usage_type == 'FILE':
100
+ if usage in mask_embed:
101
+ image = Image.open(mask_embed[usage]).convert('L')
102
+ h, w = image_np_zero.shape[:2]
103
+ image = image.resize((w, h))
104
+ image_np_zero = np.array(image)
105
+ if usage_type == 'MODEL':
106
+ mask = bs_model(image_np_rgb, usage, confidence)
107
+ if mask is None: return image_np_zero
108
+ image_np_zero = mask
109
+ return dilate_mask(image_np_zero, try_convert(dilation, int, 2, 0, 512))
110
+
111
+ def dino_prompt_detector(prompt:str, model_set, image_set):
112
+ find = token_first.search(prompt)
113
+ result_group = {}
114
+ result_count = 0
115
+ while find:
116
+ result_group[f'{result_count}GROUPMASK'] = dino_prompt_detector(find.group(1), model_set, image_set)
117
+ prompt = prompt.replace(find.group(), f' {result_count}GROUPMASK ')
118
+ result_count += 1
119
+ find = token_first.search(prompt)
120
+
121
+ spliter = token_split.split(prompt)
122
+
123
+ while len(spliter) > 1:
124
+ left, operator, right = spliter[:3]
125
+ if not isinstance(left, np.ndarray):
126
+ match = token_match.match(left.strip())
127
+ if match is None:
128
+ match = token_file.match(left)
129
+ if match is None:
130
+ dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(left, ':', 4)
131
+ left = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
132
+ try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
133
+ try_convert(dilation.strip(), int, 16, 0, 512),
134
+ try_convert(sam_level.strip(), int, 0, 0, 2))
135
+ if left is None: left = image_set[3].copy()
136
+ else:
137
+ left = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
138
+ else:
139
+ left = result_group[left.strip()]
140
+ if not isinstance(right, np.ndarray):
141
+ match = token_match.match(right.strip())
142
+ if match is None:
143
+ match = token_file.match(right)
144
+ if match is None:
145
+ dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(right, ':', 4)
146
+ right = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
147
+ try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
148
+ try_convert(dilation.strip(), int, 16, 0, 512),
149
+ try_convert(sam_level.strip(), int, 0, 0, 2))
150
+ if right is None: right = image_set[3].copy()
151
+ else:
152
+ right = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
153
+ else:
154
+ right = result_group[right.strip()]
155
+ spliter[:3] = [combine_masks(left, operator, right)]
156
+ gc.collect()
157
+ torch_gc()
158
+ if isinstance(spliter[0], np.ndarray): return spliter[0]
159
+ match = token_file.match(spliter[0])
160
+ if match is None:
161
+ dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(spliter[0], ':', 4)
162
+ target = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
163
+ try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
164
+ try_convert(dilation.strip(), int, 16, 0, 512),
165
+ try_convert(sam_level.strip(), int, 0, 0, 2))
166
+ if target is None: return image_set[3].copy()
167
+ else:
168
+ target = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
169
+ return target
170
+
171
+ def mask_spliter_and_remover(mask, area):
172
+ gc.collect()
173
+ torch_gc()
174
+ labels = measure.label(mask)
175
+ regions = measure.regionprops(labels)
176
+
177
+ for r in regions:
178
+ if r.area < area:
179
+ for coord in r.coords:
180
+ labels[coord[0], coord[1]] = 0
181
+
182
+ num_labels = np.max(labels)
183
+
184
+ label_images = []
185
+ for x in range(num_labels):
186
+ label_image = np.zeros_like(mask, dtype=np.uint8)
187
+ label_image[labels == (x + 1)] = 255
188
+ label_images.append(label_image)
189
+ return label_images
190
+
191
+ def I2I_Generator_Create(p, i2i_sample, i2i_mask_blur, full_res_inpainting, inpainting_padding, init_image, denoise, cfg, steps, width, height, tiling, scripts, scripts_list, alwaysonscripts_list, script_args, positive, negative, fill = 1):
192
+ i2i = StableDiffusionProcessingImg2Img(
193
+ init_images = [init_image],
194
+ resize_mode = 0,
195
+ denoising_strength = 0,
196
+ mask = None,
197
+ mask_blur= i2i_mask_blur,
198
+ inpainting_fill = fill,
199
+ inpaint_full_res = full_res_inpainting,
200
+ inpaint_full_res_padding= inpainting_padding,
201
+ inpainting_mask_invert= 0,
202
+ sd_model=p.sd_model,
203
+ outpath_samples=p.outpath_samples,
204
+ outpath_grids=p.outpath_grids,
205
+ restore_faces=p.restore_faces,
206
+ prompt='',
207
+ negative_prompt='',
208
+ styles=p.styles,
209
+ seed=p.seed,
210
+ subseed=p.subseed,
211
+ subseed_strength=p.subseed_strength,
212
+ seed_resize_from_h=p.seed_resize_from_h,
213
+ seed_resize_from_w=p.seed_resize_from_w,
214
+ sampler_name=i2i_sample,
215
+ n_iter=1,
216
+ batch_size=1,
217
+ steps=steps,
218
+ cfg_scale=cfg,
219
+ width=width,
220
+ height=height,
221
+ tiling=tiling,
222
+ )
223
+ i2i.denoising_strength = denoise
224
+ i2i.do_not_save_grid = True
225
+ i2i.do_not_save_samples = True
226
+ i2i.override_settings = {}
227
+ i2i.override_settings_restore_afterwards = {}
228
+ i2i.scripts = scripts
229
+ i2i.scripts.scripts = scripts_list.copy()
230
+ i2i.scripts.alwayson_scripts = alwaysonscripts_list.copy()
231
+ i2i.script_args = script_args
232
+ i2i.prompt = positive
233
+ i2i.negative_prompt = negative
234
+ i2i.sub_processing = True
235
+
236
+ return i2i
237
+
238
+ def get_fonts_list():
239
+ fonts, font_paths = [], {}
240
+ fonts_list = matplotlib.font_manager.findSystemFonts()
241
+ for font in fonts_list:
242
+ try:
243
+ fonts.append(matplotlib.font_manager.FontProperties(fname=font).get_name())
244
+ font_paths[fonts[-1]] = font
245
+ except RuntimeError:
246
+ print(f'Skip font file: {font}')
247
+ return fonts, font_paths
248
+
249
+ def image_apply_watermark(image, watermark_type, watermark_position, watermark_image, watermark_image_size_width, watermark_image_size_height, watermark_text, watermark_text_color, watermark_text_font, watermark_text_size, watermark_padding, watermark_alpha):
250
+ gc.collect()
251
+ torch_gc()
252
+ if watermark_type == 'Text':
253
+ font = ImageFont.truetype(watermark_text_font, watermark_text_size)
254
+ copy_image = image.copy()
255
+ draw = ImageDraw.Draw(copy_image)
256
+ text_width, text_height = font.getsize(watermark_text)
257
+ left, right, top, bottom = 0 + watermark_padding, image.size[0] - watermark_padding, 0 + watermark_padding, image.size[1] - watermark_padding
258
+ if watermark_position == 'Left': position = (left, (top + bottom) // 2 - text_height // 2)
259
+ elif watermark_position == 'Left-Top': position = (left, top)
260
+ elif watermark_position == 'Top': position = ((left + right) // 2 - text_width // 2, top)
261
+ elif watermark_position == 'Right-Top': position = (right - text_width,top)
262
+ elif watermark_position == 'Right': position = (right - text_width, (top + bottom) // 2 - text_height // 2)
263
+ elif watermark_position == 'Right-Bottom': position = (right - text_width, bottom - text_height)
264
+ elif watermark_position == 'Bottom': position = ((left + right) // 2 - text_width // 2,bottom - text_height)
265
+ elif watermark_position == 'Left-Bottom': position = (left, bottom - text_height)
266
+ elif watermark_position == 'Center': position = ((left + right) // 2 - text_width // 2, (top + bottom) // 2 - text_height // 2)
267
+ draw.text(position, watermark_text, font=font, fill=tuple(int(watermark_text_color[x:x+2], 16) for x in (1,3,5)))
268
+ result = Image.blend(image, copy_image, watermark_alpha)
269
+ elif watermark_type == 'Image':
270
+ left, right, top, bottom = 0 + watermark_padding, image.size[0] - watermark_padding, 0 + watermark_padding, image.size[1] - watermark_padding
271
+ if watermark_position == 'Left': position = (left, (top + bottom) // 2 - watermark_image_size_height // 2)
272
+ elif watermark_position == 'Left-Top': position = (left, top)
273
+ elif watermark_position == 'Top': position = ((left + right) // 2 - watermark_image_size_width // 2, top)
274
+ elif watermark_position == 'Right-Top': position = (right - watermark_image_size_width,top)
275
+ elif watermark_position == 'Right': position = (right - watermark_image_size_width, (top + bottom) // 2 - watermark_image_size_height // 2)
276
+ elif watermark_position == 'Right-Bottom': position = (right - watermark_image_size_width, bottom - watermark_image_size_height)
277
+ elif watermark_position == 'Bottom': position = ((left + right) // 2 - watermark_image_size_width // 2,bottom - watermark_image_size_height)
278
+ elif watermark_position == 'Left-Bottom': position = (left, bottom - watermark_image_size_height)
279
+ elif watermark_position == 'Center': position = ((left + right) // 2 - watermark_image_size_width // 2, (top + bottom) // 2 - watermark_image_size_height // 2)
280
+ copy_np = np.array(image)
281
+ copy_np_origin = copy_np.copy()
282
+ water_image = cv2.resize(watermark_image.copy(), (watermark_image_size_width, watermark_image_size_height))
283
+ mask = np.where(np.all(water_image == [255, 255, 255], axis=-1), 0, 255)
284
+ alpha = np.zeros((water_image.shape[0], water_image.shape[1]), dtype=np.uint8)
285
+ alpha[:,:] = mask
286
+ copy_np_crop = copy_np[position[1]:position[1]+watermark_image_size_height, position[0]:position[0]+watermark_image_size_width, :]
287
+ copy_np_crop[alpha.nonzero()] = water_image[alpha.nonzero()]
288
+ copy_np[position[1]:position[1]+watermark_image_size_height, position[0]:position[0]+watermark_image_size_width, :] = copy_np_crop
289
+ result = Image.fromarray(cv2.addWeighted(copy_np_origin, 1 - watermark_alpha, copy_np, watermark_alpha, 0))
290
+ gc.collect()
291
+ torch_gc()
292
+ return result
293
+
294
+ def matched_noise(image_np, mask_np, noise = 1, color_variation = 0.05):
295
+ def _fft2(data):
296
+ if data.ndim > 2:
297
+ out_fft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128)
298
+ for c in range(data.shape[2]):
299
+ c_data = data[:,:,c]
300
+ out_fft[:,:,c] = np.fft.fft2(np.fft.fftshift(c_data), norm='ortho')
301
+ out_fft[:,:,c] = np.fft.ifftshift(out_fft[:,:,c])
302
+ else:
303
+ out_fft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128)
304
+ out_fft[:,:] = np.fft.fft2(np.fft.fftshift(data), norm='ortho')
305
+ out_fft[:,:] = np.fft.ifftshift(out_fft[:,:])
306
+ return out_fft
307
+ def _ifft2(data):
308
+ if data.ndim > 2:
309
+ out_ifft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128)
310
+ for c in range(data.shape[2]):
311
+ c_data = data[:, :, c]
312
+ out_ifft[:, :, c] = np.fft.ifft2(np.fft.fftshift(c_data), norm="ortho")
313
+ out_ifft[:, :, c] = np.fft.ifftshift(out_ifft[:, :, c])
314
+ else:
315
+ out_ifft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128)
316
+ out_ifft[:, :] = np.fft.ifft2(np.fft.fftshift(data), norm="ortho")
317
+ out_ifft[:, :] = np.fft.ifftshift(out_ifft[:, :])
318
+ return out_ifft
319
+ def _get_gaussian_window(width, height, std=3.14, mode=0):
320
+ window_scale_x = float(width / min(width, height))
321
+ window_scale_y = float(height / min(width, height))
322
+ window = np.zeros((width, height))
323
+ x = (np.arange(width) / width * 2. - 1.) * window_scale_x
324
+ for y in range(height):
325
+ fy = (y / height * 2. - 1.) * window_scale_y
326
+ if mode == 0:
327
+ window[:, y] = np.exp(-(x ** 2 + fy ** 2) * std)
328
+ else:
329
+ window[:, y] = (1 / ((x ** 2 + 1.) * (fy ** 2 + 1.))) ** (std / 3.14)
330
+ return window
331
+ def _get_masked_window_rgb(np_mask_grey, hardness=1.0):
332
+ np_mask_rgb = np.zeros((np_mask_grey.shape[0], np_mask_grey.shape[1], 3))
333
+ if hardness != 1.0:
334
+ hardened = np_mask_grey[:] ** hardness
335
+ else:
336
+ hardened = np_mask_grey[:]
337
+ for c in range(3):
338
+ np_mask_rgb[:, :, c] = hardened[:]
339
+ return np_mask_rgb
340
+
341
+ width = image_np.shape[0]
342
+ height = image_np.shape[1]
343
+ channel = image_np.shape[2]
344
+
345
+ image_np = image_np[:] * (1.0 - mask_np)
346
+ mask_np_grey = (np.sum(mask_np, axis=2) / 3.0)
347
+ img_mask = mask_np_grey > 1e-6
348
+ ref_mask = mask_np_grey < 1e-3
349
+
350
+ image_windowed = image_np * (1.0 - _get_masked_window_rgb(mask_np_grey))
351
+ image_windowed /= np.max(image_windowed)
352
+ image_windowed += np.average(image_np) * mask_np
353
+
354
+ src_fft = _fft2(image_windowed)
355
+ src_dist = np.absolute(src_fft)
356
+ src_phase = src_fft / src_dist
357
+
358
+ rng = np.random.default_rng(0)
359
+
360
+ noise_window = _get_gaussian_window(width, height, mode=1)
361
+ noise_rgb = rng.random((width,height, channel))
362
+ noise_grey = (np.sum(noise_rgb, axis=2) / 3.0)
363
+ noise_rgb *= color_variation
364
+ for c in range(channel):
365
+ noise_rgb[:,:,c] += (1.0 - color_variation) * noise_grey
366
+
367
+ noise_fft = _fft2(noise_rgb)
368
+ for c in range(channel):
369
+ noise_fft[:,:,c] *= noise_window
370
+ noise_rgb = np.real(_ifft2(noise_fft))
371
+ shaped_noise_fft = _fft2(noise_rgb)
372
+ shaped_noise_fft[:,:,:] = np.absolute(shaped_noise_fft[:,:,:]) ** 2 * (src_dist ** noise) * src_phase
373
+
374
+ brightness_variation = 0
375
+ contrast_adjusted_np = image_np[:] * (brightness_variation + 1.0) - brightness_variation * 2.0
376
+
377
+ shaped_noise = np.real(_ifft2(shaped_noise_fft))
378
+ shaped_noise -= np.min(shaped_noise)
379
+ shaped_noise /= np.max(shaped_noise)
380
+ shaped_noise[img_mask, :] = exposure.match_histograms(shaped_noise[img_mask, :] ** 1.0, contrast_adjusted_np[ref_mask, :], channel_axis = 1)
381
+ shaped_noise = image_np[:] * (1.0 - mask_np) + shaped_noise * mask_np
382
+
383
+ return np.clip(shaped_noise[:], 0.0, 1.0)
exhm/detailer/sd-webui-ddsd/.gitignore ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea
161
+ *.pt
162
+ *.pth
163
+ *.ckpt
164
+ *.safetensors
165
+ models/control_sd15_scribble.pth
166
+ detected_maps/
167
+
168
+ # Ignore all .ddcfg files except for Empty.ddcfg
169
+ config/*.ddcfg
170
+ !config/Empty.ddcfg
exhm/detailer/sd-webui-ddsd/README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # sd-webui-ddsd
2
+ 자동으로 동작하는 후보정 작업 확장.
3
+
4
+ ## What is
5
+ ### Outpaint
6
+ #### Outpaint How to use
7
+ 1. 증가시킬 픽셀을 선택
8
+ 2. 증가시킬 방향 선택
9
+ 1. 방향이 None이면 미동작
10
+ 3. 증가시킬때 사용할 프롬프트 작성(전체 인페인팅시 이용)
11
+ 1. 비어있을때 원본 프롬프트 사용
12
+ 4. Denoise, CFG, Step 선택
13
+ 1. Step은 최소 원본 Step 2 ~ 3배 이상 적절한 값 요구
14
+ 5. 생성!
15
+ ### Upscale
16
+ 이미지를 특정 크기로 잘라내어 타일별 업스케일을 하는 도구. 업스케일시 VRAM을 적게 소모.
17
+ #### Upscale How to use
18
+ 1. 크기를 키울때 사용할 upscaler 모델 선택
19
+ 2. 크기를 키울 배수 선택
20
+ 3. 가로, 세로를 내가 단일로 생성할 수 있는 이미지의 최대 크기로 선택(이미지 생성 속도를 최대한 빠르게 하기 위하여)
21
+ 1. 가로 또는 세로중 한개를 0으로 세팅시 업스케일만 동작(세부 구조를 디테일하게하는 인페인팅이 동작하지 않음)
22
+ 4. before running 체크
23
+ 1. 체크시 업스케일을 먼저 돌려서 인페인팅의 퀄리티 상승. 단, 인페인팅시 더 많은 VRAM 요구
24
+ 5. 생성!
25
+ ### Detect Detailer
26
+ 특정 키워드로 이미지를 탐색 후 인페인팅하는 도구.
27
+ #### Detect Detailer How to use
28
+ 0. 인페인팅의 범위 제한(I2I 전용)
29
+ 1. Inner 옵션은 I2I의 인페인팅에서 칠한 범위 내부만 이미지를 탐색
30
+ 2. Outer 옵션은 I2I의 인페인팅에서 칠한 범위 외부만 이미지를 탐색
31
+ 1. 탐색 키워드 작성
32
+ 1. 탐색할 키워드를 작성(face, person 등등)
33
+ 1. 탐색할 키워드는 문장형도 가능(happy face, running dog)
34
+ 2. 탐색할 키워드를 .으로 분할 가능(face. arm, face. chest)
35
+ 2. 탐색할 키워드에 사용 가능한 추가 옵션 존재
36
+ 1. &lt;area:type&gt;을 이용하여 특정 범위 탐색 가능
37
+ 1. 범위 종류는 left, right, top, bottom, all이 존재
38
+ 2. &lt;file:filename&gt;을 이용하여 특정 파일 탐색 가능
39
+ 1. 특정 파일의 위치는 models/ddsdmask
40
+ 3. &lt;model:type&gt;을 이용하여 특정 모델 탐색 가능
41
+ 1. type은 face_media_full, face_media_short와 파일명이 존재
42
+ 2. 파일은 models/yolo에 위치
43
+ 4. &lt;type1:type2:dilation:confidence&gt; 같이 type1과 type2외에 dilation과 confidence도 추가 입력 가능
44
+ 1. confidence는 model 타입에서만 사용되는 값
45
+ 3. 탐색한 범위를 AND, OR, XOR, NAND, NOR 등의 게이트 옵션으로 연산 가능
46
+ 1. face OR (body NAND outfit) -> 괄호안의 body NAND outfit을 먼저 한 후에 face와 OR 연산을 동작
47
+ 2. 괄호는 최대한 적게 이용. 많이 이용시 많은 VRAM 소모.
48
+ 3. 동작은 왼쪽에서 오른쪽으로 순차적 동작.
49
+ 4. 탐색할 키워드에 옵션으로 여러가지 옵션 조절 가능
50
+ 1. face:0:0.4:4 OR outfit:2:0.5:8
51
+ 2. 순서대로 탐색할 프롬프트, SAM 탐색 레벨(0-2), 민감도(0-1), 팽창값(0-512)을 가짐
52
+ 3. 값을 생략하면 초기값으로 세팅
53
+ 2. 긍정 프롬프트 입력
54
+ 1. 인페인팅시 동작시킬 긍정 프롬프트 입력
55
+ 3. 부정 프롬프트 입력
56
+ 1. 인페인팅시 동작시킬 부정 프롬프트 입력
57
+ 4. Denoising, CFG, Steps, Clip skip, Ckpt, Vae 수정
58
+ 1. 인페인팅시 동작에 영향을 주는 옵션
59
+ 5. Split Mask 옵션 체크
60
+ 1. 체크시 마스크가 떨어져 있는것이 존재한다면 따로 인페인팅.
61
+ 1. 따로 인페인팅시 퀄리티 상승. 하지만 더 많은 인페인팅을 요구하여 생성속도 하락.
62
+ 6. Remove Area 옵션 체크
63
+ 1. Split Mask 옵션이 Enable 되어야만 동작
64
+ 2. 분할 인페인팅시 일정 크기 이하의 면적은 인페인팅에서 제외
65
+ 6. 생성!
66
+ ### Postprocessing
67
+ 최종적으로 생성된 이미지에 가하는 후보정
68
+ #### Postprocessing How to use
69
+ 1. 가하고자 하는 후보정을 선택
70
+ 2. 생성!
71
+ ### Watermark
72
+ 이미지 생성 최종본에 자신의 증명을 기입하는 기능
73
+ #### Watermark How to use
74
+ 1. 기입할 증명의 종류 선택(글자, 이미지)
75
+ 2. 선택한 종류를 입력
76
+ 3. 선택한 종류의 크기와 위치를 지정
77
+ 4. Padding으로 해당 위치에서 얼만큼 떨어져 있을지 설정
78
+ 5. Alpha로 얼만큼 투명할지 결정
79
+ 6. 생성!
80
+
81
+ ### Video
82
+ [![Stable Diffusion - DDSD 확장 기능 (No - Talking)](http://img.youtube.com/vi/9wfZyJhPPho/0.jpg)](https://youtu.be/9wfZyJhPPho)
83
+
84
+ ## Installation
85
+ 1. 다운로드 [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)와 [cuDNN](https://developer.nvidia.com/rdp/cudnn-archive)
86
+ 1. 자신이 가진 WebUI와 동일한 버전의 `CUDA`와 `cuDNN`버전으로 설치
87
+ 1. 이것은 다운로드를 편하게 하기위한 구글링크. [CUDA 117](https://drive.google.com/file/d/1HRTOLTB44-pRcrwIw9lQak2OC2ohNle3/view?usp=share_link)와 [cuDNN](https://drive.google.com/file/d/1QcgaxUra0WnCWrCLjsWp_QKw1PKcvqpj/view?usp=share_link)
88
+ 2. `CUDA` 설치 후 해당 폴더에 `cuDNN` 덮어쓰기
89
+ 3. 일정 버전은 Easy Install을 지원. `CUDA`와 `cuDNN` 불필요.
90
+ 1. 지원버전 (torch == 1.13.1+cu117, torch==2.0.0+cu117 , torch==2.0.0+cu118)
91
+ 2. 확장탭에서 설치 `https://github.com/NeoGraph-K/sd-webui-ddsd` 또는 다운로드 후 `extension/` 에 풀어넣기
92
+ 3. WebUI를 완전히 재시작
93
+
94
+ ## Credits
95
+
96
+ dustysys/[ddetailer](https://github.com/dustysys/ddetailer)
97
+
98
+ AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
99
+
100
+ facebookresearch/[Segment Anything](https://github.com/facebookresearch/segment-anything)
101
+
102
+ IDEA-Research/[GroundingDINO](https://github.com/IDEA-Research/GroundingDINO)
103
+
104
+ IDEA-Research/[Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
105
+
106
+ continue-revolution/[sd-webui-segment-anything](https://github.com/continue-revolution/sd-webui-segment-anything)
107
+
108
+ Bing-su/[adetailer](https://github.com/Bing-su/adetailer)
exhm/detailer/sd-webui-ddsd/config/Empty.ddcfg ADDED
@@ -0,0 +1 @@
 
 
1
+ {"enable_script_names": "dynamic_thresholding;dynamic_prompting", "disable_watermark": true, "disable_postprocess": true, "disable_upscaler": true, "ddetailer_before_upscaler": false, "scalevalue": 2, "upscaler_sample": "Original", "overlap": 32, "upscaler_index": "SwinIR_4x", "rewidth": 512, "reheight": 512, "denoising_strength": 0.1, "upscaler_ckpt": "Original", "upscaler_vae": "Original", "disable_detailer": true, "disable_mask_paint_mode": true, "inpaint_mask_mode": "Inner", "detailer_sample": "Original", "detailer_sam_model": "sam_vit_b_01ec64.pth", "detailer_dino_model": "groundingdino_swinb_cogcoor.pth", "dino_full_res_inpaint": true, "dino_inpaint_padding": 0, "detailer_mask_blur": 4, "disable_outpaint": true, "outpaint_sample": "Original", "outpaint_mask_blur": 8, "dino_detect_count": 5, "dino_detection_ckpt_1": "Original", "dino_detection_vae_1": "Original", "dino_detection_prompt_1": "", "dino_detection_positive_1": "", "dino_detection_negative_1": "", "dino_detection_denoise_1": 0.4, "dino_detection_cfg_1": 0, "dino_detection_steps_1": 0, "dino_detection_spliter_disable_1": true, "dino_detection_spliter_remove_area_1": 16, "dino_detection_clip_skip_1": 0, "dino_detection_ckpt_2": "Original", "dino_detection_vae_2": "Original", "dino_detection_prompt_2": "", "dino_detection_positive_2": "", "dino_detection_negative_2": "", "dino_detection_denoise_2": 0.4, "dino_detection_cfg_2": 0, "dino_detection_steps_2": 0, "dino_detection_spliter_disable_2": true, "dino_detection_spliter_remove_area_2": 16, "dino_detection_clip_skip_2": 0, "dino_detection_ckpt_3": "Original", "dino_detection_vae_3": "Original", "dino_detection_prompt_3": "", "dino_detection_positive_3": "", "dino_detection_negative_3": "", "dino_detection_denoise_3": 0.4, "dino_detection_cfg_3": 0, "dino_detection_steps_3": 0, "dino_detection_spliter_disable_3": true, "dino_detection_spliter_remove_area_3": 16, "dino_detection_clip_skip_3": 0, "dino_detection_ckpt_4": "Original", "dino_detection_vae_4": "Original", "dino_detection_prompt_4": "", "dino_detection_positive_4": "", "dino_detection_negative_4": "", "dino_detection_denoise_4": 0.4, "dino_detection_cfg_4": 0, "dino_detection_steps_4": 0, "dino_detection_spliter_disable_4": true, "dino_detection_spliter_remove_area_4": 16, "dino_detection_clip_skip_4": 0, "dino_detection_ckpt_5": "Original", "dino_detection_vae_5": "Original", "dino_detection_prompt_5": "", "dino_detection_positive_5": "", "dino_detection_negative_5": "", "dino_detection_denoise_5": 0.4, "dino_detection_cfg_5": 0, "dino_detection_steps_5": 0, "dino_detection_spliter_disable_5": true, "dino_detection_spliter_remove_area_5": 16, "dino_detection_clip_skip_5": 0, "watermark_count": 2, "watermark_type_1": "Text", "watermark_position_1": "Center", "watermark_image_1": null, "watermark_image_size_width_1": 100, "watermark_image_size_height_1": 100, "watermark_text_1": "", "watermark_text_color_1": null, "watermark_text_font_1": "Courier New", "watermark_text_size_1": 50, "watermark_padding_1": 10, "watermark_alpha_1": 0.4, "watermark_type_2": "Text", "watermark_position_2": "Center", "watermark_image_2": null, "watermark_image_size_width_2": 100, "watermark_image_size_height_2": 100, "watermark_text_2": "", "watermark_text_color_2": null, "watermark_text_font_2": "Courier New", "watermark_text_size_2": 50, "watermark_padding_2": 10, "watermark_alpha_2": 0.4, "postprocessing_count": 2, "pp_type_1": "none", "pp_saturation_strength_1": 1.1, "pp_sharpening_radius_1": 2, "pp_sharpening_percent_1": 150, "pp_sharpening_threshold_1": 3, "pp_gaussian_radius_1": 2, "pp_brightness_strength_1": 1.1, "pp_color_strength_1": 1.1, "pp_contrast_strength_1": 1.1, "pp_hue_strength_1": 0, "pp_bilateral_sigmaC_1": 10, "pp_bilateral_sigmaS_1": 10, "pp_color_tint_type_name_1": "warm", "pp_color_tint_lut_name_1": "FGCineBasic.cube", "pp_type_2": "none", "pp_saturation_strength_2": 1.1, "pp_sharpening_radius_2": 2, "pp_sharpening_percent_2": 150, "pp_sharpening_threshold_2": 3, "pp_gaussian_radius_2": 2, "pp_brightness_strength_2": 1.1, "pp_color_strength_2": 1.1, "pp_contrast_strength_2": 1.1, "pp_hue_strength_2": 0, "pp_bilateral_sigmaC_2": 10, "pp_bilateral_sigmaS_2": 10, "pp_color_tint_type_name_2": "warm", "pp_color_tint_lut_name_2": "FGCineBasic.cube", "outpaint_count": 4, "outpaint_positive_1": "FGCineBasic.cube", "outpaint_negative_1": "", "outpaint_denoise_1": "", "outpaint_cfg_1": 0.8, "outpaint_steps_1": 0, "outpaint_pixels_1": 80, "outpaint_direction_1": 128, "outpaint_positive_2": "FGCineBasic.cube", "outpaint_negative_2": "", "outpaint_denoise_2": "", "outpaint_cfg_2": 0.8, "outpaint_steps_2": 0, "outpaint_pixels_2": 80, "outpaint_direction_2": 128, "outpaint_positive_3": "", "outpaint_negative_3": "", "outpaint_denoise_3": 0.8, "outpaint_cfg_3": 0, "outpaint_steps_3": 80, "outpaint_pixels_3": 128, "outpaint_direction_3": "None", "outpaint_positive_4": "", "outpaint_negative_4": "", "outpaint_denoise_4": 0.8, "outpaint_cfg_4": 0, "outpaint_steps_4": 80, "outpaint_pixels_4": 128, "outpaint_direction_4": "None"}
exhm/detailer/sd-webui-ddsd/install.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+
4
+ import launch
5
+
6
+
7
+ def check_system_machine():
8
+ system = platform.system()
9
+ machine = platform.machine()
10
+ return (system, machine) in [('Windows', 'AMD64'), ('Linux', 'x86_64')]
11
+
12
+
13
+ def check_python_version(low: int, high: int):
14
+ ver = platform.python_version_tuple()
15
+ if int(ver[0]) == 3 and low <= int(ver[1]) <= high:
16
+ return ver[0] + ver[1]
17
+ return None
18
+
19
+
20
+ def install_pycocotools():
21
+ base = 'https://github.com/Bing-su/dddetailer/releases/download/pycocotools/'
22
+ urls = {
23
+ 'Windows': 'pycocotools-2.0.6-cp{ver}-cp{ver}-win_amd64.whl',
24
+ 'Linux': 'pycocotools-2.0.6-cp{ver}-cp{ver}-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
25
+ }
26
+
27
+ python_version = check_python_version(8, 11)
28
+ if not check_system_machine() or not python_version:
29
+ launch.run_pip('install pycocotools', 'sd-webui-ddsd requirement: pycocotools')
30
+ return
31
+
32
+ url = urls[platform.system()].format(ver=python_version)
33
+ launch.run_pip(f'install {base + url}', 'sd-webui-ddsd requirement: pycocotools')
34
+
35
+
36
+ def install_groundingdino():
37
+ import torch
38
+ from packaging.version import parse
39
+
40
+ # torch_version: '1.13.1' or '2.0.0' or ...
41
+ torch_version = parse(torch.__version__).base_version
42
+ # cuda_version: '117' or '118' or 'None'
43
+ cuda_version = torch.version.cuda.replace('.', '')
44
+ python_version = check_python_version(9, 10)
45
+
46
+ system = 'win' if platform.system() == 'Windows' else 'linux'
47
+ machine = 'amd64' if platform.machine() == 'AMD64' else 'x86_64'
48
+
49
+ if torch_version in ['2.1.0', '2.1.1', '2.1.2'] and cuda_version == '121':
50
+ url = 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp{py}-cp{py}-{system}_{machine}.whl'
51
+ url = url.format(
52
+ py=python_version,
53
+ system=system,
54
+ machine=machine,
55
+ )
56
+ launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
57
+ return
58
+
59
+ if (
60
+ not check_system_machine()
61
+ or (torch_version, cuda_version)
62
+ not in [('1.13.1', '117'), ('2.0.1', '117'), ('2.0.1', '118'), ('2.1.0', '121')]
63
+ or not python_version
64
+ ):
65
+ launch.run_pip('install git+https://github.com/IDEA-Research/GroundingDINO', 'sd-webui-ddsd requirement: groundingdino')
66
+ return
67
+
68
+ url = 'https://github.com/Bing-su/GroundingDINO/releases/download/wheel-0.1.0/groundingdino-0.1.0+torch{torch}.cu{cuda}-cp{py}-cp{py}-{system}_{machine}.whl'
69
+ url = url.format(
70
+ torch=torch_version,
71
+ cuda=cuda_version,
72
+ py=python_version,
73
+ system=system,
74
+ machine=machine,
75
+ )
76
+
77
+ launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
78
+
79
+
80
+ current_dir = os.path.dirname(os.path.realpath(__file__))
81
+ req_file = os.path.join(current_dir, 'requirements.txt')
82
+
83
+ with open(req_file) as file:
84
+ for lib in file:
85
+ version = None
86
+ lib = lib.strip()
87
+ lib = 'skimage' if lib == 'scikit-image' else lib
88
+ if '==' in lib:
89
+ lib, version = [x.strip() for x in lib.split('==')]
90
+ if not launch.is_installed(lib):
91
+ if lib == 'pycocotools':
92
+ install_pycocotools()
93
+ elif lib == 'groundingdino':
94
+ install_groundingdino()
95
+ elif lib == 'skimage':
96
+ launch.run_pip(
97
+ f'install scikit-image',
98
+ f'sd-webui-ddsd requirement: scikit-image'
99
+ )
100
+ elif lib == 'pillow_lut':
101
+ launch.run_pip(
102
+ f'install pillow_lut',
103
+ f'sd-webui-ddsd requirement: pillow_lut'
104
+ )
105
+ else:
106
+ lib = lib if version is None else lib + '==' + version
107
+ launch.run_pip(
108
+ f'install {lib}',
109
+ f'sd-webui-ddsd requirement: {lib}'
110
+ )
exhm/detailer/sd-webui-ddsd/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pycocotools
2
+ segment_anything
3
+ groundingdino
4
+ scipy
5
+ scikit-image
6
+ pillow_lut
7
+ ultralytics==8.0.87
8
+ mediapipe==0.9.3.0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd.cpython-310.pyc ADDED
Binary file (53.1 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_bs.cpython-310.pyc ADDED
Binary file (2.59 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_dino.cpython-310.pyc ADDED
Binary file (3.43 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc ADDED
Binary file (4.74 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_sam.cpython-310.pyc ADDED
Binary file (3.2 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_utils.cpython-310.pyc ADDED
Binary file (13.2 kB). View file
 
exhm/detailer/sd-webui-ddsd/scripts/ddsd.py ADDED
The diff for this file is too large to render. See raw diff
 
exhm/detailer/sd-webui-ddsd/scripts/ddsd_bs.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import torch
5
+
6
+ import mediapipe as mp
7
+ import numpy as np
8
+
9
+ from PIL import Image, ImageDraw
10
+ from ultralytics import YOLO
11
+
12
+ from modules import safe
13
+ from modules.shared import cmd_opts
14
+ from modules.paths import models_path
15
+
16
+ yolo_models_path = os.path.join(models_path, 'yolo')
17
+
18
+ def mediapipe_face_detect(image, model_type, confidence):
19
+ width, height = image.size
20
+ image_np = np.array(image)
21
+
22
+ mp_face_detection = mp.solutions.face_detection
23
+ with mp_face_detection.FaceDetection(model_selection=model_type, min_detection_confidence=confidence) as face_detector:
24
+ predictor = face_detector.process(image_np)
25
+
26
+ if predictor.detections is None: return None
27
+
28
+ bboxes = []
29
+ for detection in predictor.detections:
30
+
31
+ bbox = detection.location_data.relative_bounding_box
32
+ x1 = bbox.xmin * width
33
+ y1 = bbox.ymin * height
34
+ x2 = x1 + bbox.width * width
35
+ y2 = y1 + bbox.height * height
36
+ bboxes.append([x1,y1,x2,y2])
37
+
38
+ return create_mask_from_bbox(image, bboxes)
39
+
40
+ def ultralytics_predict(image, model_type, confidence, device):
41
+ models = [os.path.join(yolo_models_path,x) for x in os.listdir(yolo_models_path) if (x.endswith('.pt') or x.endswith('.pth')) and os.path.splitext(os.path.basename(x))[0].upper() == model_type]
42
+ if len(models) == 0: return None
43
+ model = YOLO(models[0])
44
+ predictor = model(image, conf=confidence, show_labels=False, device=device)
45
+ bboxes = predictor[0].boxes.xyxy.cpu().numpy()
46
+ if bboxes.size == 0: return None
47
+ bboxes = bboxes.tolist()
48
+ return create_mask_from_bbox(image, bboxes)
49
+
50
+ def create_mask_from_bbox(image, bboxes):
51
+ mask = Image.new('L', image.size, 0)
52
+ draw = ImageDraw.Draw(mask)
53
+ for bbox in bboxes:
54
+ draw.rectangle(bbox, fill=255)
55
+ return np.array(mask)
56
+
57
+ def bs_model(image, model_type, confidence):
58
+ image = Image.fromarray(image)
59
+ orig = torch.load
60
+ torch.load = safe.unsafe_torch_load
61
+ if model_type == 'FACE_MEDIA_FULL':
62
+ mask = mediapipe_face_detect(image, 1, confidence)
63
+ elif model_type == 'FACE_MEDIA_SHORT':
64
+ mask = mediapipe_face_detect(image, 0, confidence)
65
+ else:
66
+ device = ''
67
+ if getattr(cmd_opts, 'lowvram', False) or getattr(cmd_opts, 'medvram', False):
68
+ device = 'cpu'
69
+ mask = ultralytics_predict(image, model_type, confidence, device)
70
+ torch.load = orig
71
+ return mask