dikdimon commited on Aug 31, 2024

Commit

194b4ef

verified ·

1 Parent(s): 2699174

Upload exhm using SD-Hub extension

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +14 -0
exhm/detailer/dddetailer/.gitignore +10 -0
exhm/detailer/dddetailer/README.md +62 -0
exhm/detailer/dddetailer/config/coco_panoptic.py +98 -0
exhm/detailer/dddetailer/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py +265 -0
exhm/detailer/dddetailer/config/mmdet_anime-face_yolov3.py +177 -0
exhm/detailer/dddetailer/config/mmdet_dd-person_mask2former.py +105 -0
exhm/detailer/dddetailer/install.py +71 -0
exhm/detailer/dddetailer/misc/ddetailer_example_1.png +0 -0
exhm/detailer/dddetailer/misc/ddetailer_example_2.png +0 -0
exhm/detailer/dddetailer/misc/ddetailer_example_3.gif +0 -0
exhm/detailer/dddetailer/pyproject.toml +29 -0
exhm/detailer/dddetailer/scripts/dddetailer.py +1057 -0
exhm/detailer/ddetailer/.gitignore +8 -0
exhm/detailer/ddetailer/README.md +44 -0
exhm/detailer/ddetailer/misc/ddetailer_example_1.png +0 -0
exhm/detailer/ddetailer/misc/ddetailer_example_2.png +0 -0
exhm/detailer/ddetailer/misc/ddetailer_example_3.gif +0 -0
exhm/detailer/ddetailer/scripts/__pycache__/ddetailer.cpython-310.pyc +0 -0
exhm/detailer/ddetailer/scripts/ddetailer.py +536 -0
exhm/detailer/sd-webui-ddsd-orig/.gitignore +170 -0
exhm/detailer/sd-webui-ddsd-orig/README.md +108 -0
exhm/detailer/sd-webui-ddsd-orig/config/Empty.ddcfg +1 -0
exhm/detailer/sd-webui-ddsd-orig/install.py +100 -0
exhm/detailer/sd-webui-ddsd-orig/requirements.txt +8 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_bs.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_dino.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_sam.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_utils.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd.py +0 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_bs.py +71 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_dino.py +99 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_postprocess.py +83 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_sam.py +89 -0
exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_utils.py +383 -0
exhm/detailer/sd-webui-ddsd/.gitignore +170 -0
exhm/detailer/sd-webui-ddsd/README.md +108 -0
exhm/detailer/sd-webui-ddsd/config/Empty.ddcfg +1 -0
exhm/detailer/sd-webui-ddsd/install.py +110 -0
exhm/detailer/sd-webui-ddsd/requirements.txt +8 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_bs.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_dino.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_sam.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_utils.cpython-310.pyc +0 -0
exhm/detailer/sd-webui-ddsd/scripts/ddsd.py +0 -0
exhm/detailer/sd-webui-ddsd/scripts/ddsd_bs.py +71 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+exhm/detailer/stable-diffusion-webui-eyemask/models/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/ComfyUI-nodes-hnmr/examples/workflow_mbw_multi.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/ComfyUI-nodes-hnmr/examples/workflow_xyz.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/default.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/img2img_latent_upscale_process.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-normal1.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-normal2.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple1.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple2.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/latent-upscale/assets/nearest-exact-simple8.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/sd-webui-img2txt/sd-webui-img2txt.gif filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/sd-webui-inpaint-anything/images/inpaint_anything_ui_image_1.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/sd-webui-manga-inpainting/manga_inpainting/repo/examples/representative.png filter=lfs diff=lfs merge=lfs -text
+exhm/extensions[[:space:]]img2/sd-webui-real-image-artifacts/examples/before.png filter=lfs diff=lfs merge=lfs -text

exhm/detailer/dddetailer/.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__
+*.ckpt
+*.pth
+/tmp
+/outputs
+/log
+.vscode
+/test-cases
+.mypy_cache/
+.ruff_cache/

exhm/detailer/dddetailer/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# 돚거 Detection Detailer
+Dotgeo(hijack) Detection Detailer
+ddetailer with torch 2.0, mmcv 2.0, mmdet 3.0
+integrated with [noahge4/ddetailer](https://github.com/noahge4/ddetailer)
+AI실사채널 ChatGPT23님의 [ddetailer 수정본](https://arca.live/b/aireal/72297207) 병합됨
+## Installation
+1. remove original ddetailer extension - `stable-diffusion-webui/extensions/ddetailer` folder
+2. remove original model files - `stable-diffusion-webui/models/mmdet` folder
+3. install from the extensions tab with url `https://github.com/Bing-su/dddetailer`
+## Problem
+The predictive accuracy of the segmentation model has become very poor.
+# Detection Detailer
+An object detection and auto-mask extension for [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui). See [Installation](https://github.com/dustysys/ddetailer#installation).
+![adoringfan](/misc/ddetailer_example_1.png)
+### Segmentation
+Default models enable person and face instance segmentation.
+![amgothic](/misc/ddetailer_example_2.png)
+### Detailing
+With full-resolution inpainting, the extension is handy for improving faces without the hassle of manual masking.
+![zion](/misc/ddetailer_example_3.gif)
+## Installation
+1. Use `git clone https://github.com/dustysys/ddetailer.git` from your SD web UI `/extensions` folder. Alternatively, install from the extensions tab with url `https://github.com/dustysys/ddetailer`
+2. Start or reload SD web UI.
+The models and dependencies should download automatically. To install them manually, follow the [official instructions for installing mmdet](https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-mim-recommended). The models can be [downloaded here](https://huggingface.co/dustysys/ddetailer) and should be placed in `/models/mmdet/bbox` for bounding box (`anime-face_yolov3`) or `/models/mmdet/segm` for instance segmentation models (`dd-person_mask2former`). See the [MMDetection docs](https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html) for guidance on training your own models. For using official MMDetection pretrained models see [here](https://github.com/dustysys/ddetailer/issues/5#issuecomment-1311231989), these are trained for photorealism. See [Troubleshooting](https://github.com/dustysys/ddetailer#troubleshooting) if you encounter issues during installation.
+## Usage
+Select Detection Detailer as the script in SD web UI to use the extension. Click 'Generate' to run the script. Here are some tips:
+- `anime-face_yolov3` can detect the bounding box of faces as the primary model while `dd-person_mask2former` isolates the head's silhouette as the secondary model by using the bitwise AND option. Refer to [this example](https://github.com/dustysys/ddetailer/issues/4#issuecomment-1311200268).
+- The dilation factor expands the mask, while the x & y offsets move the mask around.
+- The script is available in txt2img mode as well and can improve the quality of your 10 pulls with moderate settings (low denoise).
+## Troubleshooting
+If you get the message ERROR: 'Failed building wheel for pycocotools' follow [these steps](https://github.com/dustysys/ddetailer/issues/1#issuecomment-1309415543).
+Any other issues installing, open an [issue](https://github.com/dustysys/ddetailer/issues).
+## Credits
+hysts/[anime-face-detector](https://github.com/hysts/anime-face-detector) - Creator of `anime-face_yolov3`, which has impressive performance on a variety of art styles.
+skytnt/[anime-segmentation](https://huggingface.co/datasets/skytnt/anime-segmentation) - Synthetic dataset used to train `dd-person_mask2former`.
+jerryli27/[AniSeg](https://github.com/jerryli27/AniSeg) - Annotated dataset used to train `dd-person_mask2former`.
+open-mmlab/[mmdetection](https://github.com/open-mmlab/mmdetection) - Object detection toolset. `dd-person_mask2former` was trained via transfer learning using their [R-50 Mask2Former instance segmentation model](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask2former#instance-segmentation) as a base.
+AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) - Web UI for Stable Diffusion, base application for this extension.

exhm/detailer/dddetailer/config/coco_panoptic.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# dataset settings
+dataset_type = "CocoPanopticDataset"
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = "s3://openmmlab/datasets/detection/coco/"
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="LoadPanopticAnnotations", backend_args=backend_args),
+    dict(type="Resize", scale=(1333, 800), keep_ratio=True),
+    dict(type="RandomFlip", prob=0.5),
+    dict(type="PackDetInputs"),
+]
+test_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="Resize", scale=(1333, 800), keep_ratio=True),
+    dict(type="LoadPanopticAnnotations", backend_args=backend_args),
+    dict(
+        type="PackDetInputs",
+        meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
+    ),
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    batch_sampler=dict(type="AspectRatioBatchSampler"),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/panoptic_train2017.json",
+        data_prefix=dict(img="train2017/", seg="annotations/panoptic_train2017/"),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args,
+    ),
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/panoptic_val2017.json",
+        data_prefix=dict(img="val2017/", seg="annotations/panoptic_val2017/"),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args,
+    ),
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type="CocoPanopticMetric",
+    ann_file=data_root + "annotations/panoptic_val2017.json",
+    seg_prefix=data_root + "annotations/panoptic_val2017/",
+    backend_args=backend_args,
+)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=1,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type='DefaultSampler', shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         ann_file='annotations/panoptic_image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type='CocoPanopticMetric',
+#     format_only=True,
+#     ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_panoptic/test')

exhm/detailer/dddetailer/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py ADDED Viewed

	@@ -0,0 +1,265 @@

+_base_ = ["./coco_panoptic.py"]
+image_size = (1024, 1024)
+batch_augments = [
+    dict(
+        type="BatchFixedSizePad",
+        size=image_size,
+        img_pad_value=0,
+        pad_mask=True,
+        mask_pad_value=0,
+        pad_seg=True,
+        seg_pad_value=255,
+    )
+]
+data_preprocessor = dict(
+    type="DetDataPreprocessor",
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    bgr_to_rgb=True,
+    pad_size_divisor=32,
+    pad_mask=True,
+    mask_pad_value=0,
+    pad_seg=True,
+    seg_pad_value=255,
+    batch_augments=batch_augments,
+)
+num_things_classes = 1
+num_stuff_classes = 0
+num_classes = num_things_classes + num_stuff_classes
+model = dict(
+    type="Mask2Former",
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type="ResNet",
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type="BN", requires_grad=False),
+        norm_eval=True,
+        style="pytorch",
+        init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet50"),
+    ),
+    panoptic_head=dict(
+        type="Mask2FormerHead",
+        in_channels=[256, 512, 1024, 2048],  # pass to pixel_decoder inside
+        strides=[4, 8, 16, 32],
+        feat_channels=256,
+        out_channels=256,
+        num_things_classes=num_things_classes,
+        num_stuff_classes=num_stuff_classes,
+        num_queries=100,
+        num_transformer_feat_level=3,
+        pixel_decoder=dict(
+            type="MSDeformAttnPixelDecoder",
+            num_outs=3,
+            norm_cfg=dict(type="GN", num_groups=32),
+            act_cfg=dict(type="ReLU"),
+            encoder=dict(  # DeformableDetrTransformerEncoder
+                num_layers=6,
+                layer_cfg=dict(  # DeformableDetrTransformerEncoderLayer
+                    self_attn_cfg=dict(  # MultiScaleDeformableAttention
+                        embed_dims=256,
+                        num_heads=8,
+                        num_levels=3,
+                        num_points=4,
+                        dropout=0.0,
+                        batch_first=True,
+                    ),
+                    ffn_cfg=dict(
+                        embed_dims=256,
+                        feedforward_channels=1024,
+                        num_fcs=2,
+                        ffn_drop=0.0,
+                        act_cfg=dict(type="ReLU", inplace=True),
+                    ),
+                ),
+            ),
+            positional_encoding=dict(num_feats=128, normalize=True),
+        ),
+        enforce_decoder_input_project=False,
+        positional_encoding=dict(num_feats=128, normalize=True),
+        transformer_decoder=dict(  # Mask2FormerTransformerDecoder
+            return_intermediate=True,
+            num_layers=9,
+            layer_cfg=dict(  # Mask2FormerTransformerDecoderLayer
+                self_attn_cfg=dict(  # MultiheadAttention
+                    embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
+                ),
+                cross_attn_cfg=dict(  # MultiheadAttention
+                    embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
+                ),
+                ffn_cfg=dict(
+                    embed_dims=256,
+                    feedforward_channels=2048,
+                    num_fcs=2,
+                    ffn_drop=0.0,
+                    act_cfg=dict(type="ReLU", inplace=True),
+                ),
+            ),
+            init_cfg=None,
+        ),
+        loss_cls=dict(
+            type="CrossEntropyLoss",
+            use_sigmoid=False,
+            loss_weight=2.0,
+            reduction="mean",
+            class_weight=[1.0] * num_classes + [0.1],
+        ),
+        loss_mask=dict(
+            type="CrossEntropyLoss", use_sigmoid=True, reduction="mean", loss_weight=5.0
+        ),
+        loss_dice=dict(
+            type="DiceLoss",
+            use_sigmoid=True,
+            activate=True,
+            reduction="mean",
+            naive_dice=True,
+            eps=1.0,
+            loss_weight=5.0,
+        ),
+    ),
+    panoptic_fusion_head=dict(
+        type="MaskFormerFusionHead",
+        num_things_classes=num_things_classes,
+        num_stuff_classes=num_stuff_classes,
+        loss_panoptic=None,
+        init_cfg=None,
+    ),
+    train_cfg=dict(
+        num_points=12544,
+        oversample_ratio=3.0,
+        importance_sample_ratio=0.75,
+        assigner=dict(
+            type="HungarianAssigner",
+            match_costs=[
+                dict(type="ClassificationCost", weight=2.0),
+                dict(type="CrossEntropyLossCost", weight=5.0, use_sigmoid=True),
+                dict(type="DiceCost", weight=5.0, pred_act=True, eps=1.0),
+            ],
+        ),
+        sampler=dict(type="MaskPseudoSampler"),
+    ),
+    test_cfg=dict(
+        panoptic_on=True,
+        # For now, the dataset does not support
+        # evaluating semantic segmentation metric.
+        semantic_on=False,
+        instance_on=True,
+        # max_per_image is for instance segmentation.
+        max_per_image=100,
+        iou_thr=0.8,
+        # In Mask2Former's panoptic postprocessing,
+        # it will filter mask area where score is less than 0.5 .
+        filter_low_score=True,
+    ),
+    init_cfg=None,
+)
+# dataset settings
+data_root = "data/coco/"
+train_pipeline = [
+    dict(
+        type="LoadImageFromFile", to_float32=True, backend_args={{_base_.backend_args}}
+    ),
+    dict(
+        type="LoadPanopticAnnotations",
+        with_bbox=True,
+        with_mask=True,
+        with_seg=True,
+        backend_args={{_base_.backend_args}},
+    ),
+    dict(type="RandomFlip", prob=0.5),
+    # large scale jittering
+    dict(
+        type="RandomResize", scale=image_size, ratio_range=(0.1, 2.0), keep_ratio=True
+    ),
+    dict(
+        type="RandomCrop",
+        crop_size=image_size,
+        crop_type="absolute",
+        recompute_bbox=True,
+        allow_negative_crop=True,
+    ),
+    dict(type="PackDetInputs"),
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+val_evaluator = [
+    dict(
+        type="CocoPanopticMetric",
+        ann_file=data_root + "annotations/panoptic_val2017.json",
+        seg_prefix=data_root + "annotations/panoptic_val2017/",
+        backend_args={{_base_.backend_args}},
+    ),
+    dict(
+        type="CocoMetric",
+        ann_file=data_root + "annotations/instances_val2017.json",
+        metric=["bbox", "segm"],
+        backend_args={{_base_.backend_args}},
+    ),
+]
+test_evaluator = val_evaluator
+# optimizer
+embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
+optim_wrapper = dict(
+    type="OptimWrapper",
+    optimizer=dict(
+        type="AdamW", lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)
+    ),
+    paramwise_cfg=dict(
+        custom_keys={
+            "backbone": dict(lr_mult=0.1, decay_mult=1.0),
+            "query_embed": embed_multi,
+            "query_feat": embed_multi,
+            "level_embed": embed_multi,
+        },
+        norm_decay_mult=0.0,
+    ),
+    clip_grad=dict(max_norm=0.01, norm_type=2),
+)
+# learning policy
+max_iters = 368750
+param_scheduler = dict(
+    type="MultiStepLR",
+    begin=0,
+    end=max_iters,
+    by_epoch=False,
+    milestones=[327778, 355092],
+    gamma=0.1,
+)
+# Before 365001th iteration, we do evaluation every 5000 iterations.
+# After 365000th iteration, we do evaluation every 368750 iterations,
+# which means that we do evaluation at the end of training.
+interval = 5000
+dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
+train_cfg = dict(
+    type="IterBasedTrainLoop",
+    max_iters=max_iters,
+    val_interval=interval,
+    dynamic_intervals=dynamic_intervals,
+)
+val_cfg = dict(type="ValLoop")
+test_cfg = dict(type="TestLoop")
+default_hooks = dict(
+    checkpoint=dict(
+        type="CheckpointHook",
+        by_epoch=False,
+        save_last=True,
+        max_keep_ckpts=3,
+        interval=interval,
+    )
+)
+log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False)
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

exhm/detailer/dddetailer/config/mmdet_anime-face_yolov3.py ADDED Viewed

	@@ -0,0 +1,177 @@

+# _base_ = ["../_base_/schedules/schedule_1x.py", "../_base_/default_runtime.py"]
+# model settings
+data_preprocessor = dict(
+    type="DetDataPreprocessor",
+    mean=[0, 0, 0],
+    std=[255.0, 255.0, 255.0],
+    bgr_to_rgb=True,
+    pad_size_divisor=32,
+)
+model = dict(
+    type="YOLOV3",
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type="Darknet",
+        depth=53,
+        out_indices=(3, 4, 5),
+        init_cfg=dict(type="Pretrained", checkpoint="open-mmlab://darknet53"),
+    ),
+    neck=dict(
+        type="YOLOV3Neck",
+        num_scales=3,
+        in_channels=[1024, 512, 256],
+        out_channels=[512, 256, 128],
+    ),
+    bbox_head=dict(
+        type="YOLOV3Head",
+        num_classes=1,
+        in_channels=[512, 256, 128],
+        out_channels=[1024, 512, 256],
+        anchor_generator=dict(
+            type="YOLOAnchorGenerator",
+            base_sizes=[
+                [(116, 90), (156, 198), (373, 326)],
+                [(30, 61), (62, 45), (59, 119)],
+                [(10, 13), (16, 30), (33, 23)],
+            ],
+            strides=[32, 16, 8],
+        ),
+        bbox_coder=dict(type="YOLOBBoxCoder"),
+        featmap_strides=[32, 16, 8],
+        loss_cls=dict(
+            type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0, reduction="sum"
+        ),
+        loss_conf=dict(
+            type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0, reduction="sum"
+        ),
+        loss_xy=dict(
+            type="CrossEntropyLoss", use_sigmoid=True, loss_weight=2.0, reduction="sum"
+        ),
+        loss_wh=dict(type="MSELoss", loss_weight=2.0, reduction="sum"),
+    ),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type="GridAssigner", pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0
+        )
+    ),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        conf_thr=0.005,
+        nms=dict(type="nms", iou_threshold=0.45),
+        max_per_img=100,
+    ),
+)
+# dataset settings
+dataset_type = "CocoDataset"
+data_root = "data/coco/"
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="LoadAnnotations", with_bbox=True),
+    dict(
+        type="Expand",
+        mean=data_preprocessor["mean"],
+        to_rgb=data_preprocessor["bgr_to_rgb"],
+        ratio_range=(1, 2),
+    ),
+    dict(
+        type="MinIoURandomCrop",
+        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+        min_crop_size=0.3,
+    ),
+    dict(type="RandomResize", scale=[(320, 320), (608, 608)], keep_ratio=True),
+    dict(type="RandomFlip", prob=0.5),
+    dict(type="PhotoMetricDistortion"),
+    dict(type="PackDetInputs"),
+]
+test_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="Resize", scale=(608, 608), keep_ratio=True),
+    dict(type="LoadAnnotations", with_bbox=True),
+    dict(
+        type="PackDetInputs",
+        meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
+    ),
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    batch_sampler=dict(type="AspectRatioBatchSampler"),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/instances_train2017.json",
+        data_prefix=dict(img="train2017/"),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args,
+    ),
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/instances_val2017.json",
+        data_prefix=dict(img="val2017/"),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args,
+    ),
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type="CocoMetric",
+    ann_file=data_root + "annotations/instances_val2017.json",
+    metric="bbox",
+    backend_args=backend_args,
+)
+test_evaluator = val_evaluator
+train_cfg = dict(max_epochs=273, val_interval=7)
+# optimizer
+optim_wrapper = dict(
+    type="OptimWrapper",
+    optimizer=dict(type="SGD", lr=0.001, momentum=0.9, weight_decay=0.0005),
+    clip_grad=dict(max_norm=35, norm_type=2),
+)
+# learning policy
+param_scheduler = [
+    dict(type="LinearLR", start_factor=0.1, by_epoch=False, begin=0, end=2000),
+    dict(type="MultiStepLR", by_epoch=True, milestones=[218, 246], gamma=0.1),
+]
+default_hooks = dict(checkpoint=dict(type="CheckpointHook", interval=7))
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)

exhm/detailer/dddetailer/config/mmdet_dd-person_mask2former.py ADDED Viewed

	@@ -0,0 +1,105 @@

+_base_ = ["./mask2former_r50_8xb2-lsj-50e_coco-panoptic.py"]
+num_things_classes = 1
+num_stuff_classes = 0
+num_classes = num_things_classes + num_stuff_classes
+image_size = (1024, 1024)
+batch_augments = [
+    dict(
+        type="BatchFixedSizePad",
+        size=image_size,
+        img_pad_value=0,
+        pad_mask=True,
+        mask_pad_value=0,
+        pad_seg=False,
+    )
+]
+data_preprocessor = dict(
+    type="DetDataPreprocessor",
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    bgr_to_rgb=True,
+    pad_size_divisor=32,
+    pad_mask=True,
+    mask_pad_value=0,
+    pad_seg=False,
+    batch_augments=batch_augments,
+)
+model = dict(
+    data_preprocessor=data_preprocessor,
+    panoptic_head=dict(
+        num_things_classes=num_things_classes,
+        num_stuff_classes=num_stuff_classes,
+        loss_cls=dict(class_weight=[1.0] * num_classes + [0.1]),
+    ),
+    panoptic_fusion_head=dict(
+        num_things_classes=num_things_classes, num_stuff_classes=num_stuff_classes
+    ),
+    test_cfg=dict(panoptic_on=False),
+)
+# dataset settings
+train_pipeline = [
+    dict(type="LoadImageFromFile", to_float32=True, backend_args=None),
+    dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
+    dict(type="RandomFlip", prob=0.5),
+    # large scale jittering
+    dict(
+        type="RandomResize",
+        scale=image_size,
+        ratio_range=(0.1, 2.0),
+        resize_type="Resize",
+        keep_ratio=True,
+    ),
+    dict(
+        type="RandomCrop",
+        crop_size=image_size,
+        crop_type="absolute",
+        recompute_bbox=True,
+        allow_negative_crop=True,
+    ),
+    dict(type="FilterAnnotations", min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True),
+    dict(type="PackDetInputs"),
+]
+test_pipeline = [
+    dict(type="LoadImageFromFile", to_float32=True, backend_args=None),
+    dict(type="Resize", scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
+    dict(
+        type="PackDetInputs",
+        meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
+    ),
+]
+dataset_type = "CocoDataset"
+data_root = "data/coco/"
+train_dataloader = dict(
+    dataset=dict(
+        type=dataset_type,
+        ann_file="annotations/instances_train2017.json",
+        data_prefix=dict(img="train2017/"),
+        pipeline=train_pipeline,
+    )
+)
+val_dataloader = dict(
+    dataset=dict(
+        type=dataset_type,
+        ann_file="annotations/instances_val2017.json",
+        data_prefix=dict(img="val2017/"),
+        pipeline=test_pipeline,
+    )
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    _delete_=True,
+    type="CocoMetric",
+    ann_file=data_root + "annotations/instances_val2017.json",
+    metric=["bbox", "segm"],
+    format_only=False,
+    backend_args=None,
+)
+test_evaluator = val_evaluator

exhm/detailer/dddetailer/install.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import sys
+from pathlib import Path
+from textwrap import dedent
+from packaging import version
+import launch
+from launch import is_installed, run, run_pip
+try:
+    skip_install = launch.args.skip_install
+except Exception:
+    skip_install = False
+python = sys.executable
+def check_ddetailer() -> bool:
+    try:
+        from modules.paths import extensions_dir
+        extensions_path = Path(extensions_dir)
+    except ImportError:
+        from modules.paths import data_path
+        extensions_path = Path(data_path, "extensions")
+    ddetailer_exists = any(p.is_dir() and p.name.startswith("ddetailer") for p in extensions_path.iterdir())
+    return not ddetailer_exists
+def check_install() -> bool:
+    try:
+        import mmcv
+        import mmdet
+        from mmdet.evaluation import get_classes
+    except Exception:
+        return False
+    if not hasattr(mmcv, "__version__") or not hasattr(mmdet, "__version__"):
+        return False
+    v1 = version.parse(mmcv.__version__) >= version.parse("2.0.0")
+    v2 = version.parse(mmdet.__version__) >= version.parse("3.0.0")
+    return v1 and v2
+def install():
+    if not is_installed("pycocotools"):
+        run(f"{python} -m pip install pycocotools", live=True)
+    if not is_installed("mim"):
+        run_pip("install openmim", desc="openmim")
+    if not check_install():
+        print("Uninstalling mmcv mmdet... (if installed)")
+        run(f'"{python}" -m pip uninstall -y mmcv mmcv-full mmdet mmengine', live=True)
+        print("Installing mmcv mmdet...")
+        run(f'"{python}" -m mim install -U mmcv>=2.0.0 mmdet>=3.0.0', live=True)
+if not check_ddetailer():
+    message = """
+    [-] dddetailer: Please remove the following:
+          1. the original ddetailer extension - "stable-diffusion-webui/extensions/ddetailer" folder.
+          2. original model files - "stable-diffusion-webui/models/mmdet" folder.
+    """
+    message = dedent(message)
+    raise RuntimeError(message)
+if not skip_install:
+    install()

exhm/detailer/dddetailer/misc/ddetailer_example_1.png ADDED Viewed

exhm/detailer/dddetailer/misc/ddetailer_example_2.png ADDED Viewed

exhm/detailer/dddetailer/misc/ddetailer_example_3.gif ADDED Viewed

exhm/detailer/dddetailer/pyproject.toml ADDED Viewed

	@@ -0,0 +1,29 @@

+[project]
+name = "dddetailer"
+version = "23.8.0"
+description = "An object detection and auto-mask extension for Stable Diffusion web UI."
+authors = [
+    {name = "dowon", email = "ks2515@naver.com"},
+]
+requires-python = ">=3.8,<3.12"
+readme = "README.md"
+license = {text = "MIT"}
+[project.urls]
+repository = "https://github.com/Bing-su/dddetailer"
+[tool.isort]
+profile = "black"
+known_first_party = ["modules", "launch"]
+[tool.black]
+line-length = 120
+[tool.ruff]
+select = ["A", "B", "C4", "E", "F", "I001", "ISC", "N", "PIE", "PT", "RET", "SIM", "UP", "W"]
+ignore = ["B008", "B905", "E501"]
+unfixable = ["F401"]
+line-length = 120
+[tool.ruff.isort]
+known-first-party = ["modules", "launch"]

exhm/detailer/dddetailer/scripts/dddetailer.py ADDED Viewed

	@@ -0,0 +1,1057 @@

+import os
+import sys
+from copy import copy
+from pathlib import Path
+from textwrap import dedent
+import cv2
+import gradio as gr
+import numpy as np
+from basicsr.utils.download_util import load_file_from_url
+from packaging.version import parse
+from PIL import Image
+from launch import run
+from modules import (
+    devices,
+    images,
+    modelloader,
+    processing,
+    script_callbacks,
+    scripts,
+    shared,
+)
+from modules.paths import data_path, models_path
+from modules.processing import (
+    Processed,
+    StableDiffusionProcessingImg2Img,
+    StableDiffusionProcessingTxt2Img,
+)
+from modules.sd_models import model_hash
+from modules.shared import cmd_opts, opts, state
+DETECTION_DETAILER = "Detection Detailer"
+dd_models_path = os.path.join(models_path, "mmdet")
+python = sys.executable
+def check_ddetailer() -> bool:
+    try:
+        from modules.paths import extensions_dir
+        extensions_path = Path(extensions_dir)
+    except ImportError:
+        from modules.paths import data_path
+        extensions_path = Path(data_path, "extensions")
+    ddetailer_exists = any(p.is_dir() and p.name.startswith("ddetailer") for p in extensions_path.iterdir())
+    return not ddetailer_exists
+def check_install() -> bool:
+    try:
+        import mmcv
+        import mmdet
+        from mmdet.evaluation import get_classes
+    except Exception:
+        return False
+    if not hasattr(mmcv, "__version__") or not hasattr(mmdet, "__version__"):
+        return False
+    v1 = parse(mmcv.__version__) >= parse("2.0.0")
+    v2 = parse(mmdet.__version__) >= parse("3.0.0")
+    return v1 and v2
+def list_models(model_path):
+    model_list = modelloader.load_models(model_path=model_path, ext_filter=[".pth"])
+    def modeltitle(path, shorthash):
+        abspath = os.path.abspath(path)
+        if abspath.startswith(model_path):
+            name = abspath.replace(model_path, "")
+        else:
+            name = os.path.basename(path)
+        if name.startswith(("\\", "/")):
+            name = name[1:]
+        shortname = os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0]
+        return f"{name} [{shorthash}]", shortname
+    models = []
+    for filename in model_list:
+        h = model_hash(filename)
+        title, short_model_name = modeltitle(filename, h)
+        models.append(title)
+    return models
+def startup():
+    if not check_ddetailer():
+        message = """
+        [-] dddetailer: dddetailer doesn't work with the original ddetailer extension.
+                        dddetailer는 원본 ddetailer 확장이 있을 때 동작하지 않습니다.
+        """
+        raise RuntimeError(dedent(message))
+    if not check_install():
+        run(f'"{python}" -m pip uninstall -y mmcv mmcv-full mmdet mmengine')
+        run(f'"{python}" -m pip install openmim', desc="Installing openmim", errdesc="Couldn't install openmim")
+        run(
+            f'"{python}" -m mim install mmcv>=2.0.0 mmdet>=3.0.0',
+            desc="Installing mmdet",
+            errdesc="Couldn't install mmdet",
+        )
+    if len(list_models(dd_models_path)) == 0:
+        print("No detection models found, downloading...")
+        bbox_path = os.path.join(dd_models_path, "bbox")
+        segm_path = os.path.join(dd_models_path, "segm")
+        # bbox
+        load_file_from_url(
+            "https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/bbox/mmdet_anime-face_yolov3.pth",
+            bbox_path,
+        )
+        load_file_from_url(
+            "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mmdet_anime-face_yolov3.py",
+            bbox_path,
+        )
+        # segm
+        load_file_from_url(
+            "https://github.com/Bing-su/dddetailer/releases/download/segm/mmdet_dd-person_mask2former.pth",
+            segm_path,
+        )
+        load_file_from_url(
+            "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mmdet_dd-person_mask2former.py",
+            segm_path,
+        )
+        load_file_from_url(
+            "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py",
+            segm_path,
+        )
+        load_file_from_url(
+            "https://raw.githubusercontent.com/Bing-su/dddetailer/master/config/coco_panoptic.py",
+            segm_path,
+        )
+startup()
+def gr_show(visible=True):
+    return {"visible": visible, "__type__": "update"}
+def ddetailer_extra_generation_params(
+    dd_prompt,
+    dd_neg_prompt,
+    dd_model_a,
+    dd_conf_a,
+    dd_dilation_factor_a,
+    dd_offset_x_a,
+    dd_offset_y_a,
+    dd_preprocess_b,
+    dd_bitwise_op,
+    dd_model_b,
+    dd_conf_b,
+    dd_dilation_factor_b,
+    dd_offset_x_b,
+    dd_offset_y_b,
+    dd_mask_blur,
+    dd_denoising_strength,
+    dd_inpaint_full_res,
+    dd_inpaint_full_res_padding,
+    dd_cfg_scale,
+):
+    params = {
+        "DDetailer prompt": dd_prompt,
+        "DDetailer neg prompt": dd_neg_prompt,
+        "DDetailer model a": dd_model_a,
+        "DDetailer conf a": dd_conf_a,
+        "DDetailer dilation a": dd_dilation_factor_a,
+        "DDetailer offset x a": dd_offset_x_a,
+        "DDetailer offset y a": dd_offset_y_a,
+        "DDetailer preprocess b": dd_preprocess_b,
+        "DDetailer bitwise": dd_bitwise_op,
+        "DDetailer model b": dd_model_b,
+        "DDetailer conf b": dd_conf_b,
+        "DDetailer dilation b": dd_dilation_factor_b,
+        "DDetailer offset x b": dd_offset_x_b,
+        "DDetailer offset y b": dd_offset_y_b,
+        "DDetailer mask blur": dd_mask_blur,
+        "DDetailer denoising": dd_denoising_strength,
+        "DDetailer inpaint full": dd_inpaint_full_res,
+        "DDetailer inpaint padding": dd_inpaint_full_res_padding,
+        "DDetailer cfg": dd_cfg_scale,
+        "Script": DETECTION_DETAILER,
+    }
+    if not dd_prompt:
+        params.pop("DDetailer prompt")
+    if not dd_neg_prompt:
+        params.pop("DDetailer neg prompt")
+    return params
+class DetectionDetailerScript(scripts.Script):
+    def title(self):
+        return DETECTION_DETAILER
+    def show(self, is_img2img):
+        return True
+    def ui(self, is_img2img):
+        import modules.ui
+        model_list = list_models(dd_models_path)
+        model_list.insert(0, "None")
+        if is_img2img:
+            info = gr.HTML(
+                '<p style="margin-bottom:0.75em">Recommended settings: Use from inpaint tab, inpaint at full res ON, denoise < 0.5</p>'
+            )
+        else:
+            info = gr.HTML("")
+        dd_prompt = None
+        with gr.Group():
+            if not is_img2img:
+                with gr.Row():
+                    dd_prompt = gr.Textbox(
+                        label="dd_prompt",
+                        elem_id="t2i_dd_prompt",
+                        show_label=False,
+                        lines=3,
+                        placeholder="Ddetailer Prompt",
+                    )
+                with gr.Row():
+                    dd_neg_prompt = gr.Textbox(
+                        label="dd_neg_prompt",
+                        elem_id="t2i_dd_neg_prompt",
+                        show_label=False,
+                        lines=2,
+                        placeholder="Ddetailer Negative prompt",
+                    )
+            with gr.Row():
+                dd_model_a = gr.Dropdown(
+                    label="Primary detection model (A)",
+                    choices=model_list,
+                    value="None",
+                    visible=True,
+                    type="value",
+                )
+            with gr.Row():
+                dd_conf_a = gr.Slider(
+                    label="Detection confidence threshold % (A)",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=30,
+                    visible=True,
+                )
+                dd_dilation_factor_a = gr.Slider(
+                    label="Dilation factor (A)",
+                    minimum=0,
+                    maximum=255,
+                    step=1,
+                    value=4,
+                    visible=True,
+                )
+            with gr.Row():
+                dd_offset_x_a = gr.Slider(
+                    label="X offset (A)",
+                    minimum=-200,
+                    maximum=200,
+                    step=1,
+                    value=0,
+                    visible=True,
+                )
+                dd_offset_y_a = gr.Slider(
+                    label="Y offset (A)",
+                    minimum=-200,
+                    maximum=200,
+                    step=1,
+                    value=0,
+                    visible=True,
+                )
+            with gr.Row():
+                dd_preprocess_b = gr.Checkbox(
+                    label="Inpaint model B detections before model A runs",
+                    value=False,
+                    visible=True,
+                )
+                dd_bitwise_op = gr.Radio(
+                    label="Bitwise operation",
+                    choices=["None", "A&B", "A-B"],
+                    value="None",
+                    visible=True,
+                )
+        br = gr.HTML("<br>")
+        with gr.Group():
+            with gr.Row():
+                dd_model_b = gr.Dropdown(
+                    label="Secondary detection model (B) (optional)",
+                    choices=model_list,
+                    value="None",
+                    visible=True,
+                    type="value",
+                )
+            with gr.Row():
+                dd_conf_b = gr.Slider(
+                    label="Detection confidence threshold % (B)",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=30,
+                    visible=True,
+                )
+                dd_dilation_factor_b = gr.Slider(
+                    label="Dilation factor (B)",
+                    minimum=0,
+                    maximum=255,
+                    step=1,
+                    value=4,
+                    visible=True,
+                )
+            with gr.Row():
+                dd_offset_x_b = gr.Slider(
+                    label="X offset (B)",
+                    minimum=-200,
+                    maximum=200,
+                    step=1,
+                    value=0,
+                    visible=True,
+                )
+                dd_offset_y_b = gr.Slider(
+                    label="Y offset (B)",
+                    minimum=-200,
+                    maximum=200,
+                    step=1,
+                    value=0,
+                    visible=True,
+                )
+        with gr.Group():
+            with gr.Row():
+                dd_mask_blur = gr.Slider(
+                    label="Mask blur ",
+                    minimum=0,
+                    maximum=64,
+                    step=1,
+                    value=4,
+                    visible=(not is_img2img),
+                )
+                dd_denoising_strength = gr.Slider(
+                    label="Denoising strength (Inpaint)",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.01,
+                    value=0.4,
+                    visible=(not is_img2img),
+                )
+            with gr.Row():
+                dd_inpaint_full_res = gr.Checkbox(
+                    label="Inpaint at full resolution ",
+                    value=True,
+                    visible=(not is_img2img),
+                )
+                dd_inpaint_full_res_padding = gr.Slider(
+                    label="Inpaint at full resolution padding, pixels ",
+                    minimum=0,
+                    maximum=256,
+                    step=4,
+                    value=32,
+                    visible=(not is_img2img),
+                )
+            with gr.Row():
+                dd_cfg_scale = gr.Slider(
+                    label="CFG Scale",
+                    minimum=0,
+                    maximum=30,
+                    step=0.5,
+                    value=7,
+                    visible=True,
+                )
+        dd_model_a.change(
+            lambda modelname: {
+                dd_model_b: gr_show(modelname != "None"),
+                dd_conf_a: gr_show(modelname != "None"),
+                dd_dilation_factor_a: gr_show(modelname != "None"),
+                dd_offset_x_a: gr_show(modelname != "None"),
+                dd_offset_y_a: gr_show(modelname != "None"),
+            },
+            inputs=[dd_model_a],
+            outputs=[
+                dd_model_b,
+                dd_conf_a,
+                dd_dilation_factor_a,
+                dd_offset_x_a,
+                dd_offset_y_a,
+            ],
+        )
+        dd_model_b.change(
+            lambda modelname: {
+                dd_preprocess_b: gr_show(modelname != "None"),
+                dd_bitwise_op: gr_show(modelname != "None"),
+                dd_conf_b: gr_show(modelname != "None"),
+                dd_dilation_factor_b: gr_show(modelname != "None"),
+                dd_offset_x_b: gr_show(modelname != "None"),
+                dd_offset_y_b: gr_show(modelname != "None"),
+            },
+            inputs=[dd_model_b],
+            outputs=[
+                dd_preprocess_b,
+                dd_bitwise_op,
+                dd_conf_b,
+                dd_dilation_factor_b,
+                dd_offset_x_b,
+                dd_offset_y_b,
+            ],
+        )
+        if dd_prompt:
+            self.infotext_fields = (
+                (dd_prompt, "DDetailer prompt"),
+                (dd_neg_prompt, "DDetailer neg prompt"),
+                (dd_model_a, "DDetailer model a"),
+                (dd_conf_a, "DDetailer conf a"),
+                (dd_dilation_factor_a, "DDetailer dilation a"),
+                (dd_offset_x_a, "DDetailer offset x a"),
+                (dd_offset_y_a, "DDetailer offset y a"),
+                (dd_preprocess_b, "DDetailer preprocess b"),
+                (dd_bitwise_op, "DDetailer bitwise"),
+                (dd_model_b, "DDetailer model b"),
+                (dd_conf_b, "DDetailer conf b"),
+                (dd_dilation_factor_b, "DDetailer dilation b"),
+                (dd_offset_x_b, "DDetailer offset x b"),
+                (dd_offset_y_b, "DDetailer offset y b"),
+                (dd_mask_blur, "DDetailer mask blur"),
+                (dd_denoising_strength, "DDetailer denoising"),
+                (dd_inpaint_full_res, "DDetailer inpaint full"),
+                (dd_inpaint_full_res_padding, "DDetailer inpaint padding"),
+                (dd_cfg_scale, "DDetailer cfg"),
+            )
+        ret = [
+            info,
+            dd_model_a,
+            dd_conf_a,
+            dd_dilation_factor_a,
+            dd_offset_x_a,
+            dd_offset_y_a,
+            dd_preprocess_b,
+            dd_bitwise_op,
+            br,
+            dd_model_b,
+            dd_conf_b,
+            dd_dilation_factor_b,
+            dd_offset_x_b,
+            dd_offset_y_b,
+            dd_mask_blur,
+            dd_denoising_strength,
+            dd_inpaint_full_res,
+            dd_inpaint_full_res_padding,
+            dd_cfg_scale,
+        ]
+        if not is_img2img:
+            ret += [dd_prompt, dd_neg_prompt]
+        return ret
+    def run(
+        self,
+        p,
+        info,
+        dd_model_a,
+        dd_conf_a,
+        dd_dilation_factor_a,
+        dd_offset_x_a,
+        dd_offset_y_a,
+        dd_preprocess_b,
+        dd_bitwise_op,
+        br,
+        dd_model_b,
+        dd_conf_b,
+        dd_dilation_factor_b,
+        dd_offset_x_b,
+        dd_offset_y_b,
+        dd_mask_blur,
+        dd_denoising_strength,
+        dd_inpaint_full_res,
+        dd_inpaint_full_res_padding,
+        dd_cfg_scale,
+        dd_prompt=None,
+        dd_neg_prompt=None,
+    ):
+        processing.fix_seed(p)
+        seed = p.seed
+        subseed = p.subseed
+        p.batch_size = 1
+        ddetail_count = p.n_iter
+        p.n_iter = 1
+        p.do_not_save_grid = True
+        p.do_not_save_samples = True
+        is_txt2img = isinstance(p, StableDiffusionProcessingTxt2Img)
+        info = ""
+        # ddetailer info
+        extra_generation_params = ddetailer_extra_generation_params(
+            dd_prompt,
+            dd_neg_prompt,
+            dd_model_a,
+            dd_conf_a,
+            dd_dilation_factor_a,
+            dd_offset_x_a,
+            dd_offset_y_a,
+            dd_preprocess_b,
+            dd_bitwise_op,
+            dd_model_b,
+            dd_conf_b,
+            dd_dilation_factor_b,
+            dd_offset_x_b,
+            dd_offset_y_b,
+            dd_mask_blur,
+            dd_denoising_strength,
+            dd_inpaint_full_res,
+            dd_inpaint_full_res_padding,
+            dd_cfg_scale,
+        )
+        p.extra_generation_params.update(extra_generation_params)
+        p_txt = copy(p)
+        if not is_txt2img:
+            orig_image = p.init_images[0]
+        else:
+            img2img_sampler_name = p_txt.sampler_name
+            # PLMS/UniPC do not support img2img so we just silently switch to DDIM
+            if p_txt.sampler_name in ["PLMS", "UniPC"]:
+                img2img_sampler_name = "DDIM"
+            p_txt_prompt = dd_prompt if dd_prompt else p_txt.prompt
+            p_txt_neg_prompt = dd_neg_prompt if dd_neg_prompt else p_txt.negative_prompt
+            p = StableDiffusionProcessingImg2Img(
+                init_images=None,
+                resize_mode=0,
+                denoising_strength=dd_denoising_strength,
+                mask=None,
+                mask_blur=dd_mask_blur,
+                inpainting_fill=1,
+                inpaint_full_res=dd_inpaint_full_res,
+                inpaint_full_res_padding=dd_inpaint_full_res_padding,
+                inpainting_mask_invert=0,
+                sd_model=p_txt.sd_model,
+                outpath_samples=p_txt.outpath_samples,
+                outpath_grids=p_txt.outpath_grids,
+                prompt=p_txt_prompt,
+                negative_prompt=p_txt_neg_prompt,
+                styles=p_txt.styles,
+                seed=p_txt.seed,
+                subseed=p_txt.subseed,
+                subseed_strength=p_txt.subseed_strength,
+                seed_resize_from_h=p_txt.seed_resize_from_h,
+                seed_resize_from_w=p_txt.seed_resize_from_w,
+                sampler_name=img2img_sampler_name,
+                n_iter=p_txt.n_iter,
+                steps=p_txt.steps,
+                cfg_scale=p_txt.cfg_scale,
+                width=p_txt.width,
+                height=p_txt.height,
+                tiling=p_txt.tiling,
+                extra_generation_params=p_txt.extra_generation_params,
+            )
+            p.do_not_save_grid = True
+            p.do_not_save_samples = True
+            p.cached_c = [None, None]
+            p.cached_uc = [None, None]
+            p.scripts = p_txt.scripts
+            p.script_args = p_txt.script_args
+        # output info
+        all_prompts = []
+        all_negative_prompts = []
+        all_seeds = []
+        all_subseeds = []
+        infotexts = []
+        output_images = []
+        state.job_count = ddetail_count
+        for n in range(ddetail_count):
+            devices.torch_gc()
+            start_seed = seed + n
+            all_prompts.append(p_txt.prompt)
+            all_negative_prompts.append(p_txt.negative_prompt)
+            all_seeds.append(start_seed)
+            all_subseeds.append(subseed + n)
+            if is_txt2img:
+                print(f"Processing initial image for output generation {n + 1}.")
+                p_txt.seed = start_seed
+                processed = processing.process_images(p_txt)
+                init_image = processed.images[0]
+                info = processed.info
+                if not dd_prompt:
+                    p.prompt = processed.all_prompts[0]
+                if not dd_neg_prompt:
+                    p.negative_prompt = processed.all_negative_prompts[0]
+                all_prompts[n] = processed.all_prompts[0]
+                all_negative_prompts[n] = processed.all_negative_prompts[0]
+            else:
+                init_image = orig_image
+                p.prompt = p_txt.prompt
+                p.negative_prompt = p_txt.negative_prompt
+            p.cfg_scale = dd_cfg_scale
+            if opts.enable_pnginfo:
+                init_image.info["parameters"] = info
+            infotexts.append(info)
+            output_images.append(init_image)
+            masks_a = []
+            masks_b_pre = []
+            # Optional secondary pre-processing run
+            if dd_model_b != "None" and dd_preprocess_b:
+                label_b_pre = "B"
+                results_b_pre = inference(init_image, dd_model_b, dd_conf_b / 100.0, label_b_pre)
+                masks_b_pre = create_segmasks(results_b_pre)
+                masks_b_pre = dilate_masks(masks_b_pre, dd_dilation_factor_b, 1)
+                masks_b_pre = offset_masks(masks_b_pre, dd_offset_x_b, dd_offset_y_b)
+                if len(masks_b_pre) > 0:
+                    results_b_pre = update_result_masks(results_b_pre, masks_b_pre)
+                    segmask_preview_b = create_segmask_preview(results_b_pre, init_image)
+                    shared.state.current_image = segmask_preview_b
+                    if opts.dd_save_previews:
+                        images.save_image(
+                            segmask_preview_b,
+                            opts.outdir_ddetailer_previews,
+                            "",
+                            start_seed,
+                            p.prompt,
+                            opts.samples_format,
+                            p=p,
+                        )
+                    gen_count = len(masks_b_pre)
+                    state.job_count += gen_count
+                    print(f"Processing {gen_count} model {label_b_pre} detections for output generation {n + 1}.")
+                    p.seed = start_seed
+                    p.init_images = [init_image]
+                    for i in range(gen_count):
+                        p.image_mask = masks_b_pre[i]
+                        if opts.dd_save_masks:
+                            images.save_image(
+                                masks_b_pre[i],
+                                opts.outdir_ddetailer_masks,
+                                "",
+                                start_seed,
+                                p.prompt,
+                                opts.samples_format,
+                                p=p,
+                            )
+                        processed = processing.process_images(p)
+                        if not is_txt2img:
+                            p.prompt = processed.all_prompts[0]
+                            p.negative_prompt = processed.all_negative_prompts[0]
+                        p.seed = processed.seed + 1
+                        p.subseed = processed.subseed + 1
+                        p.init_images = [processed.images[0]]
+                    if gen_count > 0:
+                        output_images[n] = processed.images[0]
+                        init_image = processed.images[0]
+                else:
+                    print(f"No model B detections for output generation {n} with current settings.")
+            # Primary run
+            if dd_model_a != "None":
+                label_a = "A"
+                if dd_model_b != "None" and dd_bitwise_op != "None":
+                    label_a = dd_bitwise_op
+                results_a = inference(init_image, dd_model_a, dd_conf_a / 100.0, label_a)
+                masks_a = create_segmasks(results_a)
+                masks_a = dilate_masks(masks_a, dd_dilation_factor_a, 1)
+                masks_a = offset_masks(masks_a, dd_offset_x_a, dd_offset_y_a)
+                if dd_model_b != "None" and dd_bitwise_op != "None":
+                    label_b = "B"
+                    results_b = inference(init_image, dd_model_b, dd_conf_b / 100.0, label_b)
+                    masks_b = create_segmasks(results_b)
+                    masks_b = dilate_masks(masks_b, dd_dilation_factor_b, 1)
+                    masks_b = offset_masks(masks_b, dd_offset_x_b, dd_offset_y_b)
+                    if len(masks_b) > 0:
+                        combined_mask_b = combine_masks(masks_b)
+                        for i in reversed(range(len(masks_a))):
+                            if dd_bitwise_op == "A&B":
+                                masks_a[i] = bitwise_and_masks(masks_a[i], combined_mask_b)
+                            elif dd_bitwise_op == "A-B":
+                                masks_a[i] = subtract_masks(masks_a[i], combined_mask_b)
+                            if is_allblack(masks_a[i]):
+                                del masks_a[i]
+                                for result in results_a:
+                                    del result[i]
+                    else:
+                        print("No model B detections to overlap with model A masks")
+                        results_a = []
+                        masks_a = []
+                if len(masks_a) > 0:
+                    results_a = update_result_masks(results_a, masks_a)
+                    segmask_preview_a = create_segmask_preview(results_a, init_image)
+                    shared.state.current_image = segmask_preview_a
+                    if opts.dd_save_previews:
+                        images.save_image(
+                            segmask_preview_a,
+                            opts.outdir_ddetailer_previews,
+                            "",
+                            start_seed,
+                            p.prompt,
+                            opts.samples_format,
+                            p=p,
+                        )
+                    gen_count = len(masks_a)
+                    state.job_count += gen_count
+                    print(f"Processing {gen_count} model {label_a} detections for output generation {n + 1}.")
+                    p.seed = start_seed
+                    p.init_images = [init_image]
+                    for i in range(gen_count):
+                        p.image_mask = masks_a[i]
+                        if opts.dd_save_masks:
+                            images.save_image(
+                                masks_a[i],
+                                opts.outdir_ddetailer_masks,
+                                "",
+                                start_seed,
+                                p.prompt,
+                                opts.samples_format,
+                                p=p,
+                            )
+                        processed = processing.process_images(p)
+                        if not is_txt2img:
+                            p.prompt = processed.all_prompts[0]
+                            p.negative_prompt = processed.all_negative_prompts[0]
+                            info = processed.info
+                            all_prompts[n] = processed.all_prompts[0]
+                            all_negative_prompts[n] = processed.all_negative_prompts[0]
+                        p.seed = processed.seed + 1
+                        p.subseed = processed.subseed + 1
+                        p.init_images = [processed.images[0]]
+                    if gen_count > 0:
+                        final_image = processed.images[0]
+                        if opts.enable_pnginfo:
+                            final_image.info["parameters"] = info
+                        output_images[n] = final_image
+                        infotexts[n] = info
+                        if opts.samples_save:
+                            images.save_image(
+                                final_image,
+                                p.outpath_samples,
+                                "",
+                                start_seed,
+                                p.prompt,
+                                opts.samples_format,
+                                info=info,
+                                p=p,
+                            )
+                else:
+                    print(f"No model {label_a} detections for output generation {n} with current settings.")
+                    if opts.samples_save:
+                        images.save_image(
+                            init_image,
+                            p.outpath_samples,
+                            "",
+                            start_seed,
+                            p.prompt,
+                            opts.samples_format,
+                            info=info,
+                            p=p,
+                        )
+            state.job = f"Generation {n + 1} out of {state.job_count}"
+        if dd_prompt or dd_neg_prompt:
+            params_txt = os.path.join(data_path, "params.txt")
+            with open(params_txt, "w", encoding="utf-8") as file:
+                file.write(infotexts[0])
+        return Processed(
+            p,
+            output_images,
+            seed,
+            infotexts[0],
+            all_prompts=all_prompts,
+            all_negative_prompts=all_negative_prompts,
+            all_seeds=all_seeds,
+            all_subseeds=all_subseeds,
+            infotexts=infotexts,
+        )
+def modeldataset(model_shortname):
+    path = modelpath(model_shortname)
+    dataset = "coco" if "mmdet" in path and "segm" in path else "bbox"
+    return dataset
+def modelpath(model_shortname):
+    model_list = modelloader.load_models(model_path=dd_models_path, ext_filter=[".pth"])
+    model_h = model_shortname.split("[")[-1].split("]")[0]
+    for path in model_list:
+        if model_hash(path) == model_h:
+            return path
+    return None
+def update_result_masks(results, masks):
+    for i in range(len(masks)):
+        boolmask = np.array(masks[i], dtype=bool)
+        results[2][i] = boolmask
+    return results
+def create_segmask_preview(results, image):
+    labels = results[0]
+    bboxes = results[1]
+    segms = results[2]
+    scores = results[3]
+    cv2_image = np.array(image)
+    cv2_image = cv2_image[:, :, ::-1].copy()
+    for i in range(len(segms)):
+        color = np.full_like(cv2_image, np.random.randint(100, 256, (1, 3), dtype=np.uint8))
+        alpha = 0.2
+        color_image = cv2.addWeighted(cv2_image, alpha, color, 1 - alpha, 0)
+        cv2_mask = segms[i].astype(np.uint8) * 255
+        cv2_mask_bool = np.array(segms[i], dtype=bool)
+        centroid = np.mean(np.argwhere(cv2_mask_bool), axis=0)
+        centroid_x, centroid_y = int(centroid[1]), int(centroid[0])
+        cv2_mask_rgb = cv2.merge((cv2_mask, cv2_mask, cv2_mask))
+        cv2_image = np.where(cv2_mask_rgb == 255, color_image, cv2_image)
+        text_color = tuple([int(x) for x in (color[0][0] - 100)])
+        name = labels[i]
+        score = scores[i]
+        score = str(score)[:4]
+        text = name + ":" + score
+        cv2.putText(
+            cv2_image,
+            text,
+            (centroid_x - 30, centroid_y),
+            cv2.FONT_HERSHEY_DUPLEX,
+            0.4,
+            text_color,
+            1,
+            cv2.LINE_AA,
+        )
+    if len(segms) > 0:
+        preview_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))
+    else:
+        preview_image = image
+    return preview_image
+def is_allblack(mask):
+    cv2_mask = np.array(mask)
+    return cv2.countNonZero(cv2_mask) == 0
+def bitwise_and_masks(mask1, mask2):
+    cv2_mask1 = np.array(mask1)
+    cv2_mask2 = np.array(mask2)
+    cv2_mask = cv2.bitwise_and(cv2_mask1, cv2_mask2)
+    mask = Image.fromarray(cv2_mask)
+    return mask
+def subtract_masks(mask1, mask2):
+    cv2_mask1 = np.array(mask1)
+    cv2_mask2 = np.array(mask2)
+    cv2_mask = cv2.subtract(cv2_mask1, cv2_mask2)
+    mask = Image.fromarray(cv2_mask)
+    return mask
+def dilate_masks(masks, dilation_factor, iter=1):
+    if dilation_factor == 0:
+        return masks
+    dilated_masks = []
+    kernel = np.ones((dilation_factor, dilation_factor), np.uint8)
+    for i in range(len(masks)):
+        cv2_mask = np.array(masks[i])
+        dilated_mask = cv2.dilate(cv2_mask, kernel, iter)
+        dilated_masks.append(Image.fromarray(dilated_mask))
+    return dilated_masks
+def offset_masks(masks, offset_x, offset_y):
+    if offset_x == 0 and offset_y == 0:
+        return masks
+    offset_masks = []
+    for i in range(len(masks)):
+        cv2_mask = np.array(masks[i])
+        offset_mask = cv2_mask.copy()
+        offset_mask = np.roll(offset_mask, -offset_y, axis=0)
+        offset_mask = np.roll(offset_mask, offset_x, axis=1)
+        offset_masks.append(Image.fromarray(offset_mask))
+    return offset_masks
+def combine_masks(masks):
+    initial_cv2_mask = np.array(masks[0])
+    combined_cv2_mask = initial_cv2_mask
+    for i in range(1, len(masks)):
+        cv2_mask = np.array(masks[i])
+        combined_cv2_mask = cv2.bitwise_or(combined_cv2_mask, cv2_mask)
+    combined_mask = Image.fromarray(combined_cv2_mask)
+    return combined_mask
+def on_ui_settings():
+    shared.opts.add_option(
+        "dd_save_previews",
+        shared.OptionInfo(False, "Save mask previews", section=("ddetailer", DETECTION_DETAILER)),
+    )
+    shared.opts.add_option(
+        "outdir_ddetailer_previews",
+        shared.OptionInfo(
+            "extensions/dddetailer/outputs/masks-previews",
+            "Output directory for mask previews",
+            section=("ddetailer", DETECTION_DETAILER),
+        ),
+    )
+    shared.opts.add_option(
+        "dd_save_masks",
+        shared.OptionInfo(False, "Save masks", section=("ddetailer", DETECTION_DETAILER)),
+    )
+    shared.opts.add_option(
+        "outdir_ddetailer_masks",
+        shared.OptionInfo(
+            "extensions/dddetailer/outputs/masks",
+            "Output directory for masks",
+            section=("ddetailer", DETECTION_DETAILER),
+        ),
+    )
+def create_segmasks(results):
+    segms = results[2]
+    segmasks = []
+    for i in range(len(segms)):
+        cv2_mask = segms[i].astype(np.uint8) * 255
+        mask = Image.fromarray(cv2_mask)
+        segmasks.append(mask)
+    return segmasks
+from mmdet.apis import inference_detector, init_detector
+from mmdet.evaluation import get_classes
+def get_device():
+    device = devices.get_optimal_device_name()
+    if device == "mps":
+        return device
+    if any(getattr(cmd_opts, vram, False) for vram in ["lowvram", "medvram"]):
+        return "cpu"
+    return device
+def inference(image, modelname, conf_thres, label):
+    path = modelpath(modelname)
+    if "mmdet" in path and "bbox" in path:
+        results = inference_mmdet_bbox(image, modelname, conf_thres, label)
+    elif "mmdet" in path and "segm" in path:
+        results = inference_mmdet_segm(image, modelname, conf_thres, label)
+    return results
+def inference_mmdet_segm(image, modelname, conf_thres, label):
+    model_checkpoint = modelpath(modelname)
+    model_config = os.path.splitext(model_checkpoint)[0] + ".py"
+    model_device = get_device()
+    model = init_detector(model_config, model_checkpoint, device=model_device)
+    mmdet_results = inference_detector(model, np.array(image)).pred_instances
+    bboxes = mmdet_results.bboxes.cpu().numpy()
+    segms = mmdet_results.masks.cpu().numpy()
+    scores = mmdet_results.scores.cpu().numpy()
+    dataset = modeldataset(modelname)
+    classes = get_classes(dataset)
+    n, m = bboxes.shape
+    if n == 0:
+        return [[], [], [], []]
+    labels = mmdet_results.labels
+    filter_inds = np.where(scores > conf_thres)[0]
+    results = [[], [], [], []]
+    for i in filter_inds:
+        results[0].append(label + "-" + classes[labels[i]])
+        results[1].append(bboxes[i])
+        results[2].append(segms[i])
+        results[3].append(scores[i])
+    return results
+def inference_mmdet_bbox(image, modelname, conf_thres, label):
+    model_checkpoint = modelpath(modelname)
+    model_config = os.path.splitext(model_checkpoint)[0] + ".py"
+    model_device = get_device()
+    model = init_detector(model_config, model_checkpoint, device=model_device)
+    output = inference_detector(model, np.array(image)).pred_instances
+    cv2_image = np.array(image)
+    cv2_image = cv2_image[:, :, ::-1].copy()
+    cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
+    segms = []
+    for x0, y0, x1, y1 in output.bboxes:
+        cv2_mask = np.zeros((cv2_gray.shape), np.uint8)
+        cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
+        cv2_mask_bool = cv2_mask.astype(bool)
+        segms.append(cv2_mask_bool)
+    n, m = output.bboxes.shape
+    if n == 0:
+        return [[], [], [], []]
+    bboxes = output.bboxes.cpu().numpy()
+    scores = output.scores.cpu().numpy()
+    filter_inds = np.where(scores > conf_thres)[0]
+    results = [[], [], [], []]
+    for i in filter_inds:
+        results[0].append(label)
+        results[1].append(bboxes[i])
+        results[2].append(segms[i])
+        results[3].append(scores[i])
+    return results
+script_callbacks.on_ui_settings(on_ui_settings)

exhm/detailer/ddetailer/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__
+*.ckpt
+*.pth
+/tmp
+/outputs
+/log
+.vscode
+/test-cases

exhm/detailer/ddetailer/README.md ADDED Viewed

	@@ -0,0 +1,44 @@

+Detection and img2img have come a long way. This project is no longer maintained and there are now several alternatives for this function. See [μ Detection Detailer](https://github.com/wkpark/uddetailer) or [adetailer](https://github.com/Bing-su/adetailer) implementations.
+# Detection Detailer
+An object detection and auto-mask extension for [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui). See [Installation](https://github.com/dustysys/ddetailer#installation).
+![adoringfan](/misc/ddetailer_example_1.png)
+### Segmentation
+Default models enable person and face instance segmentation.
+![amgothic](/misc/ddetailer_example_2.png)
+### Detailing
+With full-resolution inpainting, the extension is handy for improving faces without the hassle of manual masking.
+![zion](/misc/ddetailer_example_3.gif)
+## Installation
+1. Use `git clone https://github.com/dustysys/ddetailer.git` from your SD web UI `/extensions` folder. Alternatively, install from the extensions tab with url `https://github.com/dustysys/ddetailer`
+2. Start or reload SD web UI.
+The models and dependencies should download automatically. To install them manually, follow the [official instructions for installing mmdet](https://mmcv.readthedocs.io/en/latest/get_started/installation.html#install-with-mim-recommended). The models can be [downloaded here](https://huggingface.co/dustysys/ddetailer) and should be placed in `/models/mmdet/bbox` for bounding box (`anime-face_yolov3`) or `/models/mmdet/segm` for instance segmentation models (`dd-person_mask2former`). See the [MMDetection docs](https://mmdetection.readthedocs.io/en/latest/1_exist_data_model.html) for guidance on training your own models. For using official MMDetection pretrained models see [here](https://github.com/dustysys/ddetailer/issues/5#issuecomment-1311231989), these are trained for photorealism. See [Troubleshooting](https://github.com/dustysys/ddetailer#troubleshooting) if you encounter issues during installation.
+## Usage
+Select Detection Detailer as the script in SD web UI to use the extension. Click 'Generate' to run the script. Here are some tips:
+- `anime-face_yolov3` can detect the bounding box of faces as the primary model while `dd-person_mask2former` isolates the head's silhouette as the secondary model by using the bitwise AND option. Refer to [this example](https://github.com/dustysys/ddetailer/issues/4#issuecomment-1311200268).
+- The dilation factor expands the mask, while the x & y offsets move the mask around.
+- The script is available in txt2img mode as well and can improve the quality of your 10 pulls with moderate settings (low denoise).
+## Troubleshooting
+If you get the message ERROR: 'Failed building wheel for pycocotools' follow [these steps](https://github.com/dustysys/ddetailer/issues/1#issuecomment-1309415543).
+Any other issues installing, open an [issue](https://github.com/dustysys/ddetailer/issues).
+## Credits
+hysts/[anime-face-detector](https://github.com/hysts/anime-face-detector) - Creator of `anime-face_yolov3`, which has impressive performance on a variety of art styles.
+skytnt/[anime-segmentation](https://huggingface.co/datasets/skytnt/anime-segmentation) - Synthetic dataset used to train `dd-person_mask2former`.
+jerryli27/[AniSeg](https://github.com/jerryli27/AniSeg) - Annotated dataset used to train `dd-person_mask2former`.
+open-mmlab/[mmdetection](https://github.com/open-mmlab/mmdetection) - Object detection toolset. `dd-person_mask2former` was trained via transfer learning using their [R-50 Mask2Former instance segmentation model](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask2former#instance-segmentation) as a base.
+AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) - Web UI for Stable Diffusion, base application for this extension.

exhm/detailer/ddetailer/misc/ddetailer_example_1.png ADDED Viewed

exhm/detailer/ddetailer/misc/ddetailer_example_2.png ADDED Viewed

exhm/detailer/ddetailer/misc/ddetailer_example_3.gif ADDED Viewed

exhm/detailer/ddetailer/scripts/__pycache__/ddetailer.cpython-310.pyc ADDED Viewed

Binary file (16.5 kB). View file

exhm/detailer/ddetailer/scripts/ddetailer.py ADDED Viewed

	@@ -0,0 +1,536 @@

+import os
+import sys
+import cv2
+from PIL import Image
+import numpy as np
+import gradio as gr
+from modules import processing, images
+from modules import scripts, script_callbacks, shared, devices, modelloader
+from modules.processing import Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img
+from modules.shared import opts, cmd_opts, state
+from modules.sd_models import model_hash
+from modules.paths import models_path
+from basicsr.utils.download_util import load_file_from_url
+dd_models_path = os.path.join(models_path, "mmdet")
+def list_models(model_path):
+        model_list = modelloader.load_models(model_path=model_path, ext_filter=[".pth"])
+        def modeltitle(path, shorthash):
+            abspath = os.path.abspath(path)
+            if abspath.startswith(model_path):
+                name = abspath.replace(model_path, '')
+            else:
+                name = os.path.basename(path)
+            if name.startswith("\\") or name.startswith("/"):
+                name = name[1:]
+            shortname = os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0]
+            return f'{name} [{shorthash}]', shortname
+        models = []
+        for filename in model_list:
+            h = model_hash(filename)
+            title, short_model_name = modeltitle(filename, h)
+            models.append(title)
+        return models
+def startup():
+    from launch import is_installed, run
+    if not is_installed("mmdet"):
+        python = sys.executable
+        run(f'"{python}" -m pip install -U openmim', desc="Installing openmim", errdesc="Couldn't install openmim")
+        run(f'"{python}" -m mim install mmcv-full', desc=f"Installing mmcv-full", errdesc=f"Couldn't install mmcv-full")
+        run(f'"{python}" -m pip install mmdet', desc=f"Installing mmdet", errdesc=f"Couldn't install mmdet")
+    if (len(list_models(dd_models_path)) == 0):
+        print("No detection models found, downloading...")
+        bbox_path = os.path.join(dd_models_path, "bbox")
+        segm_path = os.path.join(dd_models_path, "segm")
+        load_file_from_url("https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/bbox/mmdet_anime-face_yolov3.pth", bbox_path)
+        load_file_from_url("https://huggingface.co/dustysys/ddetailer/raw/main/mmdet/bbox/mmdet_anime-face_yolov3.py", bbox_path)
+        load_file_from_url("https://huggingface.co/dustysys/ddetailer/resolve/main/mmdet/segm/mmdet_dd-person_mask2former.pth", segm_path)
+        load_file_from_url("https://huggingface.co/dustysys/ddetailer/raw/main/mmdet/segm/mmdet_dd-person_mask2former.py", segm_path)
+startup()
+def gr_show(visible=True):
+    return {"visible": visible, "__type__": "update"}
+class DetectionDetailerScript(scripts.Script):
+    def title(self):
+        return "Detection Detailer"
+    def show(self, is_img2img):
+        return True
+    def ui(self, is_img2img):
+        import modules.ui
+        model_list = list_models(dd_models_path)
+        model_list.insert(0, "None")
+        if is_img2img:
+            info = gr.HTML("<p style=\"margin-bottom:0.75em\">Recommended settings: Use from inpaint tab, inpaint at full res ON, denoise <0.5</p>")
+        else:
+            info = gr.HTML("")
+        with gr.Group():
+            with gr.Row():
+                dd_model_a = gr.Dropdown(label="Primary detection model (A)", choices=model_list,value = "None", visible=True, type="value")
+            with gr.Row():
+                dd_conf_a = gr.Slider(label='Detection confidence threshold % (A)', minimum=0, maximum=100, step=1, value=30, visible=False)
+                dd_dilation_factor_a = gr.Slider(label='Dilation factor (A)', minimum=0, maximum=255, step=1, value=4, visible=False)
+            with gr.Row():
+                dd_offset_x_a = gr.Slider(label='X offset (A)', minimum=-200, maximum=200, step=1, value=0, visible=False)
+                dd_offset_y_a = gr.Slider(label='Y offset (A)', minimum=-200, maximum=200, step=1, value=0, visible=False)
+            with gr.Row():
+                dd_preprocess_b = gr.Checkbox(label='Inpaint model B detections before model A runs', value=False, visible=False)
+                dd_bitwise_op = gr.Radio(label='Bitwise operation', choices=['None', 'A&B', 'A-B'], value="None", visible=False)
+        br = gr.HTML("<br>")
+        with gr.Group():
+            with gr.Row():
+                dd_model_b = gr.Dropdown(label="Secondary detection model (B) (optional)", choices=model_list,value = "None", visible =False, type="value")
+            with gr.Row():
+                dd_conf_b = gr.Slider(label='Detection confidence threshold % (B)', minimum=0, maximum=100, step=1, value=30, visible=False)
+                dd_dilation_factor_b = gr.Slider(label='Dilation factor (B)', minimum=0, maximum=255, step=1, value=4, visible=False)
+            with gr.Row():
+                dd_offset_x_b = gr.Slider(label='X offset (B)', minimum=-200, maximum=200, step=1, value=0, visible=False)
+                dd_offset_y_b = gr.Slider(label='Y offset (B)', minimum=-200, maximum=200, step=1, value=0, visible=False)
+        with gr.Group():
+            with gr.Row():
+                dd_mask_blur = gr.Slider(label='Mask blur ', minimum=0, maximum=64, step=1, value=4, visible=(not is_img2img))
+                dd_denoising_strength = gr.Slider(label='Denoising strength (Inpaint)', minimum=0.0, maximum=1.0, step=0.01, value=0.4, visible=(not is_img2img))
+            with gr.Row():
+                dd_inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution ', value=True, visible = (not is_img2img))
+                dd_inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels ', minimum=0, maximum=256, step=4, value=32, visible=(not is_img2img))
+        dd_model_a.change(
+            lambda modelname: {
+                dd_model_b:gr_show( modelname != "None" ),
+                dd_conf_a:gr_show( modelname != "None" ),
+                dd_dilation_factor_a:gr_show( modelname != "None"),
+                dd_offset_x_a:gr_show( modelname != "None" ),
+                dd_offset_y_a:gr_show( modelname != "None" )
+            },
+            inputs= [dd_model_a],
+            outputs =[dd_model_b, dd_conf_a, dd_dilation_factor_a, dd_offset_x_a, dd_offset_y_a]
+        )
+        dd_model_b.change(
+            lambda modelname: {
+                dd_preprocess_b:gr_show( modelname != "None" ),
+                dd_bitwise_op:gr_show( modelname != "None" ),
+                dd_conf_b:gr_show( modelname != "None" ),
+                dd_dilation_factor_b:gr_show( modelname != "None"),
+                dd_offset_x_b:gr_show( modelname != "None" ),
+                dd_offset_y_b:gr_show( modelname != "None" )
+            },
+            inputs= [dd_model_b],
+            outputs =[dd_preprocess_b, dd_bitwise_op, dd_conf_b, dd_dilation_factor_b, dd_offset_x_b, dd_offset_y_b]
+        )
+        return [info,
+                dd_model_a,
+                dd_conf_a, dd_dilation_factor_a,
+                dd_offset_x_a, dd_offset_y_a,
+                dd_preprocess_b, dd_bitwise_op,
+                br,
+                dd_model_b,
+                dd_conf_b, dd_dilation_factor_b,
+                dd_offset_x_b, dd_offset_y_b,
+                dd_mask_blur, dd_denoising_strength,
+                dd_inpaint_full_res, dd_inpaint_full_res_padding
+        ]
+    def run(self, p, info,
+                     dd_model_a,
+                     dd_conf_a, dd_dilation_factor_a,
+                     dd_offset_x_a, dd_offset_y_a,
+                     dd_preprocess_b, dd_bitwise_op,
+                     br,
+                     dd_model_b,
+                     dd_conf_b, dd_dilation_factor_b,
+                     dd_offset_x_b, dd_offset_y_b,
+                     dd_mask_blur, dd_denoising_strength,
+                     dd_inpaint_full_res, dd_inpaint_full_res_padding):
+        processing.fix_seed(p)
+        initial_info = None
+        seed = p.seed
+        p.batch_size = 1
+        ddetail_count = p.n_iter
+        p.n_iter = 1
+        p.do_not_save_grid = True
+        p.do_not_save_samples = True
+        is_txt2img = isinstance(p, StableDiffusionProcessingTxt2Img)
+        if (not is_txt2img):
+            orig_image = p.init_images[0]
+        else:
+            p_txt = p
+            p = StableDiffusionProcessingImg2Img(
+                    init_images = None,
+                    resize_mode = 0,
+                    denoising_strength = dd_denoising_strength,
+                    mask = None,
+                    mask_blur= dd_mask_blur,
+                    inpainting_fill = 1,
+                    inpaint_full_res = dd_inpaint_full_res,
+                    inpaint_full_res_padding= dd_inpaint_full_res_padding,
+                    inpainting_mask_invert= 0,
+                    sd_model=p_txt.sd_model,
+                    outpath_samples=p_txt.outpath_samples,
+                    outpath_grids=p_txt.outpath_grids,
+                    prompt=p_txt.prompt,
+                    negative_prompt=p_txt.negative_prompt,
+                    styles=p_txt.styles,
+                    seed=p_txt.seed,
+                    subseed=p_txt.subseed,
+                    subseed_strength=p_txt.subseed_strength,
+                    seed_resize_from_h=p_txt.seed_resize_from_h,
+                    seed_resize_from_w=p_txt.seed_resize_from_w,
+                    sampler_name=p_txt.sampler_name,
+                    n_iter=p_txt.n_iter,
+                    steps=p_txt.steps,
+                    cfg_scale=p_txt.cfg_scale,
+                    width=p_txt.width,
+                    height=p_txt.height,
+                    tiling=p_txt.tiling,
+                )
+            p.do_not_save_grid = True
+            p.do_not_save_samples = True
+        output_images = []
+        state.job_count = ddetail_count
+        for n in range(ddetail_count):
+            devices.torch_gc()
+            start_seed = seed + n
+            if ( is_txt2img ):
+                print(f"Processing initial image for output generation {n + 1}.")
+                p_txt.seed = start_seed
+                processed = processing.process_images(p_txt)
+                init_image = processed.images[0]
+            else:
+                init_image = orig_image
+            output_images.append(init_image)
+            masks_a = []
+            masks_b_pre = []
+            # Optional secondary pre-processing run
+            if (dd_model_b != "None" and dd_preprocess_b):
+                label_b_pre = "B"
+                results_b_pre = inference(init_image, dd_model_b, dd_conf_b/100.0, label_b_pre)
+                masks_b_pre = create_segmasks(results_b_pre)
+                masks_b_pre = dilate_masks(masks_b_pre, dd_dilation_factor_b, 1)
+                masks_b_pre = offset_masks(masks_b_pre,dd_offset_x_b, dd_offset_y_b)
+                if (len(masks_b_pre) > 0):
+                    results_b_pre = update_result_masks(results_b_pre, masks_b_pre)
+                    segmask_preview_b = create_segmask_preview(results_b_pre, init_image)
+                    shared.state.current_image = segmask_preview_b
+                    if ( opts.dd_save_previews):
+                        images.save_image(segmask_preview_b, opts.outdir_ddetailer_previews, "", start_seed, p.prompt, opts.samples_format, p=p)
+                    gen_count = len(masks_b_pre)
+                    state.job_count += gen_count
+                    print(f"Processing {gen_count} model {label_b_pre} detections for output generation {n + 1}.")
+                    p.seed = start_seed
+                    p.init_images = [init_image]
+                    for i in range(gen_count):
+                        p.image_mask = masks_b_pre[i]
+                        if ( opts.dd_save_masks):
+                            images.save_image(masks_b_pre[i], opts.outdir_ddetailer_masks, "", start_seed, p.prompt, opts.samples_format, p=p)
+                        processed = processing.process_images(p)
+                        p.seed = processed.seed + 1
+                        p.init_images = processed.images
+                    if (gen_count > 0):
+                        output_images[n] = processed.images[0]
+                        init_image = processed.images[0]
+                else:
+                    print(f"No model B detections for output generation {n} with current settings.")
+            # Primary run
+            if (dd_model_a != "None"):
+                label_a = "A"
+                if (dd_model_b != "None" and dd_bitwise_op != "None"):
+                    label_a = dd_bitwise_op
+                results_a = inference(init_image, dd_model_a, dd_conf_a/100.0, label_a)
+                masks_a = create_segmasks(results_a)
+                masks_a = dilate_masks(masks_a, dd_dilation_factor_a, 1)
+                masks_a = offset_masks(masks_a,dd_offset_x_a, dd_offset_y_a)
+                if (dd_model_b != "None" and dd_bitwise_op != "None"):
+                    label_b = "B"
+                    results_b = inference(init_image, dd_model_b, dd_conf_b/100.0, label_b)
+                    masks_b = create_segmasks(results_b)
+                    masks_b = dilate_masks(masks_b, dd_dilation_factor_b, 1)
+                    masks_b = offset_masks(masks_b,dd_offset_x_b, dd_offset_y_b)
+                    if (len(masks_b) > 0):
+                        combined_mask_b = combine_masks(masks_b)
+                        for i in reversed(range(len(masks_a))):
+                            if (dd_bitwise_op == "A&B"):
+                                masks_a[i] = bitwise_and_masks(masks_a[i], combined_mask_b)
+                            elif (dd_bitwise_op == "A-B"):
+                                masks_a[i] = subtract_masks(masks_a[i], combined_mask_b)
+                            if (is_allblack(masks_a[i])):
+                                del masks_a[i]
+                                for result in results_a:
+                                    del result[i]
+                    else:
+                        print("No model B detections to overlap with model A masks")
+                        results_a = []
+                        masks_a = []
+                if (len(masks_a) > 0):
+                    results_a = update_result_masks(results_a, masks_a)
+                    segmask_preview_a = create_segmask_preview(results_a, init_image)
+                    shared.state.current_image = segmask_preview_a
+                    if ( opts.dd_save_previews):
+                        images.save_image(segmask_preview_a, opts.outdir_ddetailer_previews, "", start_seed, p.prompt, opts.samples_format, p=p)
+                    gen_count = len(masks_a)
+                    state.job_count += gen_count
+                    print(f"Processing {gen_count} model {label_a} detections for output generation {n + 1}.")
+                    p.seed = start_seed
+                    p.init_images = [init_image]
+                    for i in range(gen_count):
+                        p.image_mask = masks_a[i]
+                        if ( opts.dd_save_masks):
+                            images.save_image(masks_a[i], opts.outdir_ddetailer_masks, "", start_seed, p.prompt, opts.samples_format, p=p)
+                        processed = processing.process_images(p)
+                        if initial_info is None:
+                            initial_info = processed.info
+                        p.seed = processed.seed + 1
+                        p.init_images = processed.images
+                    if (gen_count > 0):
+                        output_images[n] = processed.images[0]
+                        if ( opts.samples_save ):
+                            images.save_image(processed.images[0], p.outpath_samples, "", start_seed, p.prompt, opts.samples_format, info=initial_info, p=p)
+                else:
+                    print(f"No model {label_a} detections for output generation {n} with current settings.")
+            state.job = f"Generation {n + 1} out of {state.job_count}"
+        if (initial_info is None):
+            initial_info = "No detections found."
+        return Processed(p, output_images, seed, initial_info)
+def modeldataset(model_shortname):
+    path = modelpath(model_shortname)
+    if ("mmdet" in path and "segm" in path):
+        dataset = 'coco'
+    else:
+        dataset = 'bbox'
+    return dataset
+def modelpath(model_shortname):
+    model_list = modelloader.load_models(model_path=dd_models_path, ext_filter=[".pth"])
+    model_h = model_shortname.split("[")[-1].split("]")[0]
+    for path in model_list:
+        if ( model_hash(path) == model_h):
+            return path
+def update_result_masks(results, masks):
+    for i in range(len(masks)):
+        boolmask = np.array(masks[i], dtype=bool)
+        results[2][i] = boolmask
+    return results
+def create_segmask_preview(results, image):
+    labels = results[0]
+    bboxes = results[1]
+    segms = results[2]
+    cv2_image = np.array(image)
+    cv2_image = cv2_image[:, :, ::-1].copy()
+    for i in range(len(segms)):
+        color = np.full_like(cv2_image, np.random.randint(100, 256, (1, 3), dtype=np.uint8))
+        alpha = 0.2
+        color_image = cv2.addWeighted(cv2_image, alpha, color, 1-alpha, 0)
+        cv2_mask = segms[i].astype(np.uint8) * 255
+        cv2_mask_bool = np.array(segms[i], dtype=bool)
+        centroid = np.mean(np.argwhere(cv2_mask_bool),axis=0)
+        centroid_x, centroid_y = int(centroid[1]), int(centroid[0])
+        cv2_mask_rgb = cv2.merge((cv2_mask, cv2_mask, cv2_mask))
+        cv2_image = np.where(cv2_mask_rgb == 255, color_image, cv2_image)
+        text_color = tuple([int(x) for x in ( color[0][0] - 100 )])
+        name = labels[i]
+        score = bboxes[i][4]
+        score = str(score)[:4]
+        text = name + ":" + score
+        cv2.putText(cv2_image, text, (centroid_x - 30, centroid_y), cv2.FONT_HERSHEY_DUPLEX, 0.4, text_color, 1, cv2.LINE_AA)
+    if ( len(segms) > 0):
+        preview_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))
+    else:
+        preview_image = image
+    return preview_image
+def is_allblack(mask):
+    cv2_mask = np.array(mask)
+    return cv2.countNonZero(cv2_mask) == 0
+def bitwise_and_masks(mask1, mask2):
+    cv2_mask1 = np.array(mask1)
+    cv2_mask2 = np.array(mask2)
+    cv2_mask = cv2.bitwise_and(cv2_mask1, cv2_mask2)
+    mask = Image.fromarray(cv2_mask)
+    return mask
+def subtract_masks(mask1, mask2):
+    cv2_mask1 = np.array(mask1)
+    cv2_mask2 = np.array(mask2)
+    cv2_mask = cv2.subtract(cv2_mask1, cv2_mask2)
+    mask = Image.fromarray(cv2_mask)
+    return mask
+def dilate_masks(masks, dilation_factor, iter=1):
+    if dilation_factor == 0:
+        return masks
+    dilated_masks = []
+    kernel = np.ones((dilation_factor,dilation_factor), np.uint8)
+    for i in range(len(masks)):
+        cv2_mask = np.array(masks[i])
+        dilated_mask = cv2.dilate(cv2_mask, kernel, iter)
+        dilated_masks.append(Image.fromarray(dilated_mask))
+    return dilated_masks
+def offset_masks(masks, offset_x, offset_y):
+    if (offset_x == 0 and offset_y == 0):
+        return masks
+    offset_masks = []
+    for i in range(len(masks)):
+        cv2_mask = np.array(masks[i])
+        offset_mask = cv2_mask.copy()
+        offset_mask = np.roll(offset_mask, -offset_y, axis=0)
+        offset_mask = np.roll(offset_mask, offset_x, axis=1)
+        offset_masks.append(Image.fromarray(offset_mask))
+    return offset_masks
+def combine_masks(masks):
+    initial_cv2_mask = np.array(masks[0])
+    combined_cv2_mask = initial_cv2_mask
+    for i in range(1, len(masks)):
+        cv2_mask = np.array(masks[i])
+        combined_cv2_mask = cv2.bitwise_or(combined_cv2_mask, cv2_mask)
+    combined_mask = Image.fromarray(combined_cv2_mask)
+    return combined_mask
+def on_ui_settings():
+    shared.opts.add_option("dd_save_previews", shared.OptionInfo(False, "Save mask previews", section=("ddetailer", "Detection Detailer")))
+    shared.opts.add_option("outdir_ddetailer_previews", shared.OptionInfo("extensions/ddetailer/outputs/masks-previews", 'Output directory for mask previews', section=("ddetailer", "Detection Detailer")))
+    shared.opts.add_option("dd_save_masks", shared.OptionInfo(False, "Save masks", section=("ddetailer", "Detection Detailer")))
+    shared.opts.add_option("outdir_ddetailer_masks", shared.OptionInfo("extensions/ddetailer/outputs/masks", 'Output directory for masks', section=("ddetailer", "Detection Detailer")))
+def create_segmasks(results):
+    segms = results[2]
+    segmasks = []
+    for i in range(len(segms)):
+        cv2_mask = segms[i].astype(np.uint8) * 255
+        mask = Image.fromarray(cv2_mask)
+        segmasks.append(mask)
+    return segmasks
+import mmcv
+from mmdet.core import get_classes
+from mmdet.apis import (inference_detector,
+                        init_detector)
+def get_device():
+    device_id = shared.cmd_opts.device_id
+    if device_id is not None:
+        cuda_device = f"cuda:{device_id}"
+    else:
+        cuda_device = "cpu"
+    return cuda_device
+def inference(image, modelname, conf_thres, label):
+    path = modelpath(modelname)
+    if ( "mmdet" in path and "bbox" in path ):
+        results = inference_mmdet_bbox(image, modelname, conf_thres, label)
+    elif ( "mmdet" in path and "segm" in path):
+        results = inference_mmdet_segm(image, modelname, conf_thres, label)
+    return results
+def inference_mmdet_segm(image, modelname, conf_thres, label):
+    model_checkpoint = modelpath(modelname)
+    model_config = os.path.splitext(model_checkpoint)[0] + ".py"
+    model_device = get_device()
+    model = init_detector(model_config, model_checkpoint, device=model_device)
+    mmdet_results = inference_detector(model, np.array(image))
+    bbox_results, segm_results = mmdet_results
+    dataset = modeldataset(modelname)
+    classes = get_classes(dataset)
+    labels = [
+        np.full(bbox.shape[0], i, dtype=np.int32)
+        for i, bbox in enumerate(bbox_results)
+    ]
+    n,m = bbox_results[0].shape
+    if (n == 0):
+        return [[],[],[]]
+    labels = np.concatenate(labels)
+    bboxes = np.vstack(bbox_results)
+    segms = mmcv.concat_list(segm_results)
+    filter_inds = np.where(bboxes[:,-1] > conf_thres)[0]
+    results = [[],[],[]]
+    for i in filter_inds:
+        results[0].append(label + "-" + classes[labels[i]])
+        results[1].append(bboxes[i])
+        results[2].append(segms[i])
+    return results
+def inference_mmdet_bbox(image, modelname, conf_thres, label):
+    model_checkpoint = modelpath(modelname)
+    model_config = os.path.splitext(model_checkpoint)[0] + ".py"
+    model_device = get_device()
+    model = init_detector(model_config, model_checkpoint, device=model_device)
+    results = inference_detector(model, np.array(image))
+    cv2_image = np.array(image)
+    cv2_image = cv2_image[:, :, ::-1].copy()
+    cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
+    segms = []
+    for (x0, y0, x1, y1, conf) in results[0]:
+        cv2_mask = np.zeros((cv2_gray.shape), np.uint8)
+        cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
+        cv2_mask_bool = cv2_mask.astype(bool)
+        segms.append(cv2_mask_bool)
+    n,m = results[0].shape
+    if (n == 0):
+        return [[],[],[]]
+    bboxes = np.vstack(results[0])
+    filter_inds = np.where(bboxes[:,-1] > conf_thres)[0]
+    results = [[],[],[]]
+    for i in filter_inds:
+        results[0].append(label)
+        results[1].append(bboxes[i])
+        results[2].append(segms[i])
+    return results
+script_callbacks.on_ui_settings(on_ui_settings)

exhm/detailer/sd-webui-ddsd-orig/.gitignore ADDED Viewed

	@@ -0,0 +1,170 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea
+*.pt
+*.pth
+*.ckpt
+*.safetensors
+models/control_sd15_scribble.pth
+detected_maps/
+# Ignore all .ddcfg files except for Empty.ddcfg
+config/*.ddcfg
+!config/Empty.ddcfg

exhm/detailer/sd-webui-ddsd-orig/README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+# sd-webui-ddsd
+자동으로 동작하는 후보정 작업 확장.
+## What is
+### Outpaint
+#### Outpaint How to use
+1. 증가시킬 픽셀을 선택
+2. 증가시킬 방향 선택
+    1. 방향이 None이면 미동작
+3. 증가시킬때 사용할 프롬프트 작성(전체 인페인팅시 이용)
+    1. 비어있을때 원본 프롬프트 사용
+4. Denoise, CFG, Step 선택
+    1. Step은 최소 원본 Step 2 ~ 3배 이상 적절한 값 요구
+5. 생성!
+### Upscale
+이미지를 특정 크기로 잘라내어 타일별 업스케일을 하는 도구. 업스케일시 VRAM을 적게 소모.
+#### Upscale How to use
+1. 크기를 키울때 사용할 upscaler 모델 선택
+2. 크기를 키울 배수 선택
+3. 가로, 세로를 내가 단일로 생성할 수 있는 이미지의 최대 크기로 선택(이미지 생성 속도를 최대한 빠르게 하기 위하여)
+    1. 가로 또는 세로중 한개를 0으로 세팅시 업스케일만 동작(세부 구조를 디테일하게하는 인페인팅이 동작하지 않음)
+4. before running 체크
+    1. 체크시 업스케일을 먼저 돌려서 인페인팅의 퀄리티 상승. 단, 인페인팅시 더 많은 VRAM 요구
+5. 생성!
+### Detect Detailer
+특정 키워드로 이미지를 탐색 후 인페인팅하는 도구.
+#### Detect Detailer How to use
+0. 인페인팅의 범위 제한(I2I 전용)
+    1. Inner 옵션은 I2I의 인페인팅에서 칠한 범위 내부만 이미지를 탐색
+    2. Outer 옵션은 I2I의 인페인팅에서 칠한 범위 외부만 이미지를 탐색
+1. 탐색 키워드 작성
+    1. 탐색할 키워드를 작성(face, person 등등)
+        1. 탐색할 키워드는 문장형도 가능(happy face, running dog)
+        2. 탐색할 키워드를 .으로 분할 가능(face. arm, face. chest)
+    2. 탐색할 키워드에 사용 가능한 추가 옵션 존재
+        1. &lt;area:type&gt;을 이용하여 특정 범위 탐색 가능
+            1. 범위 종류는 left, right, top, bottom, all이 존재
+        2. &lt;file:filename&gt;을 이용하여 특정 파일 탐색 가능
+            1. 특정 파일의 위치는 models/ddsdmask
+        3. &lt;model:type&gt;을 이용하여 특정 모델 탐색 가능
+            1. type은 face_media_full, face_media_short와 파일명이 존재
+            2. 파일은 models/yolo에 위치
+        4. &lt;type1:type2:dilation:confidence&gt; 같이 type1과 type2외에 dilation과 confidence도 추가 입력 가능
+            1. confidence는 model 타입에서만 사용되는 값
+    3. 탐색한 범위를 AND, OR, XOR, NAND, NOR 등의 게이트 옵션으로 연산 가능
+        1. face OR (body NAND outfit) -> 괄호안의 body NAND outfit을 먼저 한 후에 face와 OR 연산을 동작
+        2. 괄호는 최대한 적게 이용. 많이 이용시 많은 VRAM 소모.
+        3. 동작은 왼쪽에서 오른쪽으로 순차적 동작.
+    4. 탐색할 키워드에 옵션으로 여러가지 옵션 조절 가능
+        1. face:0:0.4:4 OR outfit:2:0.5:8
+        2. 순서대로 탐색할 프롬프트, SAM 탐색 레벨(0-2), 민감도(0-1), 팽창값(0-512)을 가짐
+        3. 값을 생략하면 초기값으로 세팅
+2. 긍정 프롬프트 입력
+    1. 인페인팅시 동작시킬 긍정 프롬프트 입력
+3. 부정 프롬프트 입력
+    1. 인페인팅시 동작시킬 부정 프롬프트 입력
+4. Denoising, CFG, Steps, Clip skip, Ckpt, Vae 수정
+    1. 인페인팅시 동작에 영향을 주는 옵션
+5. Split Mask 옵션 체크
+    1. 체크시 마스크가 떨어져 있는것이 존재한다면 따로 인페인팅.
+        1. 따로 인페인팅시 퀄리티 상승. 하지만 더 많은 인페인팅을 요구하여 생성속도 하락.
+6. Remove Area 옵션 체크
+    1. Split Mask 옵션이 Enable 되어야만 동작
+    2. 분할 인페인팅시 일정 크기 이하의 면적은 인페인팅에서 제외
+6. 생성!
+### Postprocessing
+최종적으로 생성된 이미지에 가하는 후보정
+#### Postprocessing How to use
+1. 가하고자 하는 후보정을 선택
+2. 생성!
+### Watermark
+이미지 생성 최종본에 자신의 증명을 기입하는 기능
+#### Watermark How to use
+1. 기입할 증명의 종류 선택(글자, 이미지)
+2. 선택한 종류를 입력
+3. 선택한 종류의 크기와 위치를 지정
+4. Padding으로 해당 위치에서 얼만큼 떨어져 있을지 설정
+5. Alpha로 얼만큼 투명할지 결정
+6. 생성!
+### Video
+[![Stable Diffusion - DDSD 확장 기능  (No - Talking)](http://img.youtube.com/vi/9wfZyJhPPho/0.jpg)](https://youtu.be/9wfZyJhPPho)
+## Installation
+1. 다운로드 [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)와 [cuDNN](https://developer.nvidia.com/rdp/cudnn-archive)
+    1. 자신이 가진 WebUI와 동일한 버전의 `CUDA`와 `cuDNN`버전으로 설치
+        1. 이것은 다운로드를 편하게 하기위한 구글링크. [CUDA 117](https://drive.google.com/file/d/1HRTOLTB44-pRcrwIw9lQak2OC2ohNle3/view?usp=share_link)와 [cuDNN](https://drive.google.com/file/d/1QcgaxUra0WnCWrCLjsWp_QKw1PKcvqpj/view?usp=share_link)
+    2. `CUDA` 설치 후 해당 폴더에 `cuDNN` 덮어쓰기
+    3. 일정 버전은 Easy Install을 지원. `CUDA`와 `cuDNN` 불필요.
+        1. 지원버전 (torch == 1.13.1+cu117, torch==2.0.0+cu117 , torch==2.0.0+cu118)
+2. 확장탭에서 설치 `https://github.com/NeoGraph-K/sd-webui-ddsd` 또는 다운로드 후 `extension/` 에 풀어넣기
+3. WebUI를 완전히 재시작
+## Credits
+dustysys/[ddetailer](https://github.com/dustysys/ddetailer)
+AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
+facebookresearch/[Segment Anything](https://github.com/facebookresearch/segment-anything)
+IDEA-Research/[GroundingDINO](https://github.com/IDEA-Research/GroundingDINO)
+IDEA-Research/[Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
+continue-revolution/[sd-webui-segment-anything](https://github.com/continue-revolution/sd-webui-segment-anything)
+Bing-su/[adetailer](https://github.com/Bing-su/adetailer)

exhm/detailer/sd-webui-ddsd-orig/config/Empty.ddcfg ADDED Viewed

	@@ -0,0 +1 @@

+ {"enable_script_names": "dynamic_thresholding;dynamic_prompting", "disable_watermark": true, "disable_postprocess": true, "disable_upscaler": true, "ddetailer_before_upscaler": false, "scalevalue": 2, "upscaler_sample": "Original", "overlap": 32, "upscaler_index": "SwinIR_4x", "rewidth": 512, "reheight": 512, "denoising_strength": 0.1, "upscaler_ckpt": "Original", "upscaler_vae": "Original", "disable_detailer": true, "disable_mask_paint_mode": true, "inpaint_mask_mode": "Inner", "detailer_sample": "Original", "detailer_sam_model": "sam_vit_b_01ec64.pth", "detailer_dino_model": "groundingdino_swinb_cogcoor.pth", "dino_full_res_inpaint": true, "dino_inpaint_padding": 0, "detailer_mask_blur": 4, "disable_outpaint": true, "outpaint_sample": "Original", "outpaint_mask_blur": 8, "dino_detect_count": 5, "dino_detection_ckpt_1": "Original", "dino_detection_vae_1": "Original", "dino_detection_prompt_1": "", "dino_detection_positive_1": "", "dino_detection_negative_1": "", "dino_detection_denoise_1": 0.4, "dino_detection_cfg_1": 0, "dino_detection_steps_1": 0, "dino_detection_spliter_disable_1": true, "dino_detection_spliter_remove_area_1": 16, "dino_detection_clip_skip_1": 0, "dino_detection_ckpt_2": "Original", "dino_detection_vae_2": "Original", "dino_detection_prompt_2": "", "dino_detection_positive_2": "", "dino_detection_negative_2": "", "dino_detection_denoise_2": 0.4, "dino_detection_cfg_2": 0, "dino_detection_steps_2": 0, "dino_detection_spliter_disable_2": true, "dino_detection_spliter_remove_area_2": 16, "dino_detection_clip_skip_2": 0, "dino_detection_ckpt_3": "Original", "dino_detection_vae_3": "Original", "dino_detection_prompt_3": "", "dino_detection_positive_3": "", "dino_detection_negative_3": "", "dino_detection_denoise_3": 0.4, "dino_detection_cfg_3": 0, "dino_detection_steps_3": 0, "dino_detection_spliter_disable_3": true, "dino_detection_spliter_remove_area_3": 16, "dino_detection_clip_skip_3": 0, "dino_detection_ckpt_4": "Original", "dino_detection_vae_4": "Original", "dino_detection_prompt_4": "", "dino_detection_positive_4": "", "dino_detection_negative_4": "", "dino_detection_denoise_4": 0.4, "dino_detection_cfg_4": 0, "dino_detection_steps_4": 0, "dino_detection_spliter_disable_4": true, "dino_detection_spliter_remove_area_4": 16, "dino_detection_clip_skip_4": 0, "dino_detection_ckpt_5": "Original", "dino_detection_vae_5": "Original", "dino_detection_prompt_5": "", "dino_detection_positive_5": "", "dino_detection_negative_5": "", "dino_detection_denoise_5": 0.4, "dino_detection_cfg_5": 0, "dino_detection_steps_5": 0, "dino_detection_spliter_disable_5": true, "dino_detection_spliter_remove_area_5": 16, "dino_detection_clip_skip_5": 0, "watermark_count": 2, "watermark_type_1": "Text", "watermark_position_1": "Center", "watermark_image_1": null, "watermark_image_size_width_1": 100, "watermark_image_size_height_1": 100, "watermark_text_1": "", "watermark_text_color_1": null, "watermark_text_font_1": "Courier New", "watermark_text_size_1": 50, "watermark_padding_1": 10, "watermark_alpha_1": 0.4, "watermark_type_2": "Text", "watermark_position_2": "Center", "watermark_image_2": null, "watermark_image_size_width_2": 100, "watermark_image_size_height_2": 100, "watermark_text_2": "", "watermark_text_color_2": null, "watermark_text_font_2": "Courier New", "watermark_text_size_2": 50, "watermark_padding_2": 10, "watermark_alpha_2": 0.4, "postprocessing_count": 2, "pp_type_1": "none", "pp_saturation_strength_1": 1.1, "pp_sharpening_radius_1": 2, "pp_sharpening_percent_1": 150, "pp_sharpening_threshold_1": 3, "pp_gaussian_radius_1": 2, "pp_brightness_strength_1": 1.1, "pp_color_strength_1": 1.1, "pp_contrast_strength_1": 1.1, "pp_hue_strength_1": 0, "pp_bilateral_sigmaC_1": 10, "pp_bilateral_sigmaS_1": 10, "pp_color_tint_type_name_1": "warm", "pp_color_tint_lut_name_1": "FGCineBasic.cube", "pp_type_2": "none", "pp_saturation_strength_2": 1.1, "pp_sharpening_radius_2": 2, "pp_sharpening_percent_2": 150, "pp_sharpening_threshold_2": 3, "pp_gaussian_radius_2": 2, "pp_brightness_strength_2": 1.1, "pp_color_strength_2": 1.1, "pp_contrast_strength_2": 1.1, "pp_hue_strength_2": 0, "pp_bilateral_sigmaC_2": 10, "pp_bilateral_sigmaS_2": 10, "pp_color_tint_type_name_2": "warm", "pp_color_tint_lut_name_2": "FGCineBasic.cube", "outpaint_count": 4, "outpaint_positive_1": "FGCineBasic.cube", "outpaint_negative_1": "", "outpaint_denoise_1": "", "outpaint_cfg_1": 0.8, "outpaint_steps_1": 0, "outpaint_pixels_1": 80, "outpaint_direction_1": 128, "outpaint_positive_2": "FGCineBasic.cube", "outpaint_negative_2": "", "outpaint_denoise_2": "", "outpaint_cfg_2": 0.8, "outpaint_steps_2": 0, "outpaint_pixels_2": 80, "outpaint_direction_2": 128, "outpaint_positive_3": "", "outpaint_negative_3": "", "outpaint_denoise_3": 0.8, "outpaint_cfg_3": 0, "outpaint_steps_3": 80, "outpaint_pixels_3": 128, "outpaint_direction_3": "None", "outpaint_positive_4": "", "outpaint_negative_4": "", "outpaint_denoise_4": 0.8, "outpaint_cfg_4": 0, "outpaint_steps_4": 80, "outpaint_pixels_4": 128, "outpaint_direction_4": "None"}

exhm/detailer/sd-webui-ddsd-orig/install.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+import platform
+import launch
+def check_system_machine():
+    system = platform.system()
+    machine = platform.machine()
+    return (system, machine) in [('Windows', 'AMD64'), ('Linux', 'x86_64')]
+def check_python_version(low: int, high: int):
+    ver = platform.python_version_tuple()
+    if int(ver[0]) == 3 and low <= int(ver[1]) <= high:
+        return ver[0] + ver[1]
+    return None
+def install_pycocotools():
+    base = 'https://github.com/Bing-su/dddetailer/releases/download/pycocotools/'
+    urls = {
+        'Windows': 'pycocotools-2.0.6-cp{ver}-cp{ver}-win_amd64.whl',
+        'Linux': 'pycocotools-2.0.6-cp{ver}-cp{ver}-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
+    }
+    python_version = check_python_version(8, 11)
+    if not check_system_machine() or not python_version:
+        launch.run_pip('install pycocotools', 'sd-webui-ddsd requirement: pycocotools')
+        return
+    url = urls[platform.system()].format(ver=python_version)
+    launch.run_pip(f'install {base + url}', 'sd-webui-ddsd requirement: pycocotools')
+def install_groundingdino():
+    import torch
+    from packaging.version import parse
+    # torch_version: '1.13.1' or '2.0.0' or ...
+    torch_version = parse(torch.__version__).base_version
+    # cuda_version: '117' or '118' or 'None'
+    cuda_version = torch.version.cuda.replace('.', '')
+    python_version = check_python_version(9, 10)
+    if (
+        not check_system_machine()
+        or (torch_version, cuda_version)
+        not in [('1.13.1', '117'), ('2.0.0', '117'), ('2.0.0', '118')]
+        or not python_version
+    ):
+        launch.run_pip('install git+https://github.com/IDEA-Research/GroundingDINO', 'sd-webui-ddsd requirement: groundingdino')
+        return
+    system = 'win' if platform.system() == 'Windows' else 'linux'
+    machine = 'amd64' if platform.machine() == 'AMD64' else 'x86_64'
+    url = 'https://github.com/Bing-su/GroundingDINO/releases/download/wheel-0.1.0/groundingdino-0.1.0+torch{torch}.cu{cuda}-cp{py}-cp{py}-{system}_{machine}.whl'
+    url = url.format(
+        torch=torch_version,
+        cuda=cuda_version,
+        py=python_version,
+        system=system,
+        machine=machine,
+    )
+    launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
+current_dir = os.path.dirname(os.path.realpath(__file__))
+req_file = os.path.join(current_dir, 'requirements.txt')
+with open(req_file) as file:
+    for lib in file:
+        version = None
+        lib = lib.strip()
+        lib = 'skimage' if lib == 'scikit-image' else lib
+        if '==' in lib:
+            lib, version = [x.strip() for x in lib.split('==')]
+        if not launch.is_installed(lib):
+            if lib == 'pycocotools':
+                install_pycocotools()
+            elif lib == 'groundingdino':
+                install_groundingdino()
+            elif lib == 'skimage':
+                launch.run_pip(
+                    f'install scikit-image',
+                    f'sd-webui-ddsd requirement: scikit-image'
+                )
+            elif lib == 'pillow_lut':
+                launch.run_pip(
+                    f'install pillow_lut',
+                    f'sd-webui-ddsd requirement: pillow_lut'
+                )
+            else:
+                lib = lib if version is None else lib + '==' + version
+                launch.run_pip(
+                    f'install {lib}',
+                    f'sd-webui-ddsd requirement: {lib}'
+                )

exhm/detailer/sd-webui-ddsd-orig/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pycocotools
+segment_anything
+groundingdino
+scipy
+scikit-image
+pillow_lut
+ultralytics==8.0.87
+mediapipe==0.9.3.0

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd.cpython-310.pyc ADDED Viewed

Binary file (53.1 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_bs.cpython-310.pyc ADDED Viewed

Binary file (2.6 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_dino.cpython-310.pyc ADDED Viewed

Binary file (3.43 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc ADDED Viewed

Binary file (4.74 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_sam.cpython-310.pyc ADDED Viewed

Binary file (3.2 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/__pycache__/ddsd_utils.cpython-310.pyc ADDED Viewed

Binary file (13.2 kB). View file

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd.py ADDED Viewed

The diff for this file is too large to render. See raw diff

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_bs.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from __future__ import annotations
+import os
+import torch
+import mediapipe as mp
+import numpy as np
+from PIL import Image, ImageDraw
+from ultralytics import YOLO
+from modules import safe
+from modules.shared import cmd_opts
+from modules.paths import models_path
+yolo_models_path = os.path.join(models_path, 'yolo')
+def mediapipe_face_detect(image, model_type, confidence):
+    width, height = image.size
+    image_np = np.array(image)
+    mp_face_detection = mp.solutions.face_detection
+    with mp_face_detection.FaceDetection(model_selection=model_type, min_detection_confidence=confidence) as face_detector:
+        predictor = face_detector.process(image_np)
+    if predictor.detections is None: return None
+    bboxes = []
+    for detection in predictor.detections:
+        bbox = detection.location_data.relative_bounding_box
+        x1 = bbox.xmin * width
+        y1 = bbox.ymin * height
+        x2 = x1 + bbox.width * width
+        y2 = y1 + bbox.height * height
+        bboxes.append([x1,y1,x2,y2])
+    return create_mask_from_bbox(image, bboxes)
+def ultralytics_predict(image, model_type, confidence, device):
+    models = [os.path.join(yolo_models_path,x) for x in os.listdir(yolo_models_path) if (x.endswith('.pt') or x.endswith('.pth')) and os.path.splitext(os.path.basename(x))[0].upper() == model_type]
+    if len(models) == 0: return None
+    model = YOLO(models[0])
+    predictor = model(image, conf=confidence, show_labels=False, device=device)
+    bboxes = predictor[0].boxes.xyxy.cpu().numpy()
+    if bboxes.size == 0: return None
+    bboxes = bboxes.tolist()
+    return create_mask_from_bbox(image, bboxes)
+def create_mask_from_bbox(image, bboxes):
+    mask = Image.new('L', image.size, 0)
+    draw = ImageDraw.Draw(mask)
+    for bbox in bboxes:
+        draw.rectangle(bbox, fill=255)
+    return np.array(mask)
+def bs_model(image, model_type, confidence):
+    image = Image.fromarray(image)
+    orig = torch.load
+    torch.load = safe.unsafe_torch_load
+    if model_type == 'FACE_MEDIA_FULL':
+        mask = mediapipe_face_detect(image, 1, confidence)
+    elif model_type == 'FACE_MEDIA_SHORT':
+        mask = mediapipe_face_detect(image, 0, confidence)
+    else:
+        device = ''
+        if getattr(cmd_opts, 'lowvram', False) or getattr(cmd_opts, 'medvram', False):
+            device = 'cpu'
+        mask = ultralytics_predict(image, model_type, confidence, device)
+    torch.load = orig
+    return mask

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_dino.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import gc
+import torch
+import copy
+import cv2
+from collections import OrderedDict
+from modules import shared
+from modules.devices import device, torch_gc, cpu
+import groundingdino.datasets.transforms as T
+from groundingdino.models import build_model
+from groundingdino.util.slconfig import SLConfig
+from modules.paths import models_path
+from groundingdino.util.utils import clean_state_dict
+dino_model_cache = OrderedDict()
+grounding_models_dir = os.path.join(models_path, "grounding")
+def dino_model_list():
+    return [x for x in os.listdir(grounding_models_dir) if x.endswith('.pth')]
+def dino_config_file_name(dino_model_name:str):
+    return dino_model_name.replace('.pth','.py')
+def clear_dino_cache():
+    dino_model_cache.clear()
+    gc.collect()
+    torch_gc()
+def load_dino_model(dino_checkpoint):
+    print(f"Initializing GroundingDINO {dino_checkpoint}")
+    if dino_checkpoint in dino_model_cache:
+        dino = dino_model_cache[dino_checkpoint]
+        if shared.cmd_opts.lowvram:
+            dino.to(device=device)
+    else:
+        clear_dino_cache()
+        args = SLConfig.fromfile(os.path.join(grounding_models_dir,dino_config_file_name(dino_checkpoint)))
+        dino = build_model(args)
+        checkpoint = torch.load(os.path.join(grounding_models_dir,dino_checkpoint),map_location='cpu')
+        dino.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
+        dino.to(device=device)
+        dino_model_cache[dino_checkpoint] = dino
+    dino.eval()
+    return dino
+def load_dino_image(image_pil):
+    transform = T.Compose(
+        [
+            T.RandomResize([800], max_size=1333),
+            T.ToTensor(),
+            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ]
+    )
+    image, _ = transform(image_pil, None)  # 3, h, w
+    return image
+def get_grounding_output(model, image, caption, box_threshold):
+    caption = caption.lower()
+    caption = caption.strip()
+    if not caption.endswith("."):
+        caption = caption + "."
+    image = image.to(device)
+    with torch.no_grad():
+        outputs = model(image[None], captions=[caption])
+    if shared.cmd_opts.lowvram:
+        model.to(cpu)
+    logits = outputs["pred_logits"].sigmoid()[0]  # (nq, 256)
+    boxes = outputs["pred_boxes"][0]  # (nq, 4)
+    # filter output
+    logits_filt = logits.clone()
+    boxes_filt = boxes.clone()
+    filt_mask = logits_filt.max(dim=1)[0] > box_threshold
+    logits_filt = logits_filt[filt_mask]  # num_filt, 256
+    boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
+    return boxes_filt.cpu()
+def dino_predict_internal(input_image, dino_model_name, text_prompt, box_threshold):
+    print("Running GroundingDINO Inference")
+    dino_image = load_dino_image(input_image.convert("RGB"))
+    dino_model = load_dino_model(dino_model_name)
+    boxes_filt = get_grounding_output(
+        dino_model, dino_image, text_prompt, box_threshold
+    )
+    H, W = input_image.size[1], input_image.size[0]
+    for i in range(boxes_filt.size(0)):
+        boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
+        boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
+        boxes_filt[i][2:] += boxes_filt[i][:2]
+    clear_dino_cache()
+    return boxes_filt

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_postprocess.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import numpy as np
+import cv2
+from PIL import Image, ImageEnhance, ImageFilter, ImageOps
+from pillow_lut import load_cube_file
+from scipy.interpolate import UnivariateSpline
+from modules.paths import models_path
+lut_model_dir = os.path.join(models_path, "lut")
+def lut_model_list():
+    return [x for x in os.listdir(lut_model_dir) if x.lower().endswith('.cube')]
+def saturation_image(image:Image.Image, strength:float) -> Image.Image: # 채도 조절
+    return ImageEnhance.Color(image).enhance(strength)
+def sharpening_image(image:Image.Image, radius:float, percent:int, threshold:float) -> Image.Image: # 선명도 조절
+    return image.filter(ImageFilter.UnsharpMask(radius=radius, percent=percent, threshold=threshold))
+def gaussian_blur_image(image:Image.Image, radius:float) -> Image.Image: # 흐림도 조절
+    return image.filter(ImageFilter.GaussianBlur(radius=radius))
+def brightness_image(image:Image.Image, strength:float) -> Image.Image: # 밝기 조절
+    return ImageEnhance.Brightness(image).enhance(strength)
+def color_image(image:Image.Image, strength:float) -> Image.Image: # 색조 조절
+    return ImageEnhance.Color(image).enhance(strength)
+def contrast_image(image:Image.Image, strength:float) -> Image.Image: # 대비 조절
+    return ImageEnhance.Contrast(image).enhance(strength)
+def color_extraction_image(image:Image.Image, lower:tuple[int,int,int], upper:tuple[int,int,int], strength:float) -> Image.Image: # 색상 추출 및 변화
+    image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2HSV)
+    mask = cv2.inRange(image_np, lower, upper)
+    image_np = image_np.astype(np.float64)
+    image_np[mask != 0] *= strength
+    image_np = image_np.astype(np.uint8)
+    return Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_HSV2RGB))
+def hue_image(image:Image.Image, strength:float) -> Image.Image: # Hue 조절
+    image_np = np.array(image)
+    image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)
+    image_np[..., 0] = (image_np[..., 0] + strength * 180) % 180
+    return Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_HSV2RGB))
+def inversion_image(image:Image.Image) -> Image.Image: # 반전
+    return ImageOps.invert(image)
+def bilateral_image(image:Image.Image, sigmaC:int, sigmaS:int) -> Image.Image: # 양방향 필터
+    image_np = np.array(image)
+    return Image.fromarray(cv2.bilateralFilter(image_np, -1, sigmaC, sigmaS))
+def color_tint_lut_image(image:Image.Image, lut_file:str) -> Image.Image: # 색상 조절
+    lut = load_cube_file(os.path.join(lut_model_dir, lut_file))
+    return image.filter(lut)
+def color_tint_type_image(image:Image.Image, type:str) -> Image.Image: # 색온도 조절(Warm, Cool)
+    increase = UnivariateSpline([0,64,128,192,256],[0,70,140,210,256])(range(256))
+    decrease = UnivariateSpline([0,64,128,192,256],[0,30,80,120,192])(range(256))
+    image_np = np.array(image)
+    r, g, b = cv2.split(image_np)
+    r = cv2.LUT(r, increase if type == 'warm' else decrease).astype(np.uint8)
+    b = cv2.LUT(b, decrease if type == 'warm' else increase).astype(np.uint8)
+    image_np = cv2.merge((r, g, b))
+    h, s, v = cv2.split(cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV))
+    s = cv2.LUT(s, increase if type == 'warm' else decrease).astype(np.uint8)
+    return Image.fromarray(cv2.cvtColor(cv2.merge((h, s, v)), cv2.COLOR_HSV2RGB))
+def ddsd_postprocess(image:Image.Image, pptype:str,
+                     saturation_strength:float,
+                     sharpening_radius:float, sharpening_percent:int, sharpening_threshold:float,
+                     gaussian_blur_radius:float,
+                     brightness_strength:float,
+                     color_strength:float,
+                     contrast_strength:float,
+                     #color_extraction_lower:tuple[int,int,int], color_extraction_upper:tuple[int,int,int], color_extraction_strength:float,
+                     hue_strength:float,
+                     bilateral_sigmaC:int, bilateral_sigmaS:int,
+                     color_tint_lut_file:str,
+                     color_tint_type_name:str) -> Image.Image:
+    if pptype == 'saturation': return saturation_image(image, saturation_strength)
+    if pptype == 'sharpening': return sharpening_image(image, sharpening_radius, sharpening_percent, sharpening_threshold)
+    if pptype == 'gaussian blur': return gaussian_blur_image(image, gaussian_blur_radius)
+    if pptype == 'brightness': return brightness_image(image, brightness_strength)
+    if pptype == 'color': return color_image(image, color_strength)
+    if pptype == 'contrast': return contrast_image(image, contrast_strength)
+    #if pptype == 'color extraction': return color_extraction_image(image, color_extraction_lower, color_extraction_upper, color_extraction_strength)
+    if pptype == 'hue': return hue_image(hue_strength)
+    if pptype == 'inversion': return inversion_image(image)
+    if pptype == 'bilateral': return bilateral_image(image, bilateral_sigmaC, bilateral_sigmaS)
+    if pptype == 'color tint(type)': return color_tint_type_image(image, color_tint_type_name)
+    if pptype == 'color tint(lut)': return color_tint_lut_image(image, color_tint_lut_file)
+    return image

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_sam.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import numpy as np
+import torch
+import gc
+import cv2
+from modules import shared
+from modules.paths import models_path
+from modules.safe import unsafe_torch_load, load
+from modules.devices import device, torch_gc, cpu
+from PIL import Image
+from collections import OrderedDict
+from scipy.ndimage import binary_dilation
+from segment_anything import SamPredictor, sam_model_registry
+from scripts.ddsd_dino import dino_predict_internal, clear_dino_cache
+sam_model_cache = OrderedDict()
+sam_model_dir = os.path.join(models_path, "sam")
+def sam_model_list():
+    return [x for x in os.listdir(sam_model_dir) if x.endswith('.pth')]
+def load_sam_model(sam_checkpoint):
+    model_type = '_'.join(sam_checkpoint.split('_')[1:-1])
+    sam_checkpoint = os.path.join(sam_model_dir, sam_checkpoint)
+    torch.load = unsafe_torch_load
+    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
+    sam.to(device=device)
+    sam.eval()
+    torch.load = load
+    return sam
+def clear_sam_cache():
+    sam_model_cache.clear()
+    gc.collect()
+    torch_gc()
+def clear_cache():
+    clear_sam_cache()
+    clear_dino_cache()
+def dilate_mask(mask, dilation):
+    dilation_kernel = np.ones((dilation, dilation), np.uint8)
+    return cv2.dilate(mask, dilation_kernel)
+def init_sam_model(sam_model_name):
+    print('Initializing SAM')
+    if sam_model_name in sam_model_cache:
+        sam = sam_model_cache[sam_model_name]
+        if(shared.cmd_opts.lowvram):
+            sam.to(device=device)
+        return sam
+    elif sam_model_name in sam_model_list():
+        clear_sam_cache()
+        sam_model_cache[sam_model_name] = load_sam_model(sam_model_name)
+        return sam_model_cache[sam_model_name]
+    else:
+        Exception(f'{sam_model_name} not found, please download model to models/sam')
+def sam_predict(sam_model_name, dino_model_name, image, image_np, image_np_rgb, dino_text, dino_box_threshold, dilation, sam_level):
+    print('Start SAM Processing')
+    assert dino_text, 'Please input dino text'
+    boxes = dino_predict_internal(image, dino_model_name, dino_text, dino_box_threshold)
+    if boxes.shape[0] < 1: return None
+    sam = init_sam_model(sam_model_name)
+    print(f'Running SAM Inference {image_np_rgb.shape}')
+    predictor = SamPredictor(sam)
+    predictor.set_image(image_np_rgb)
+    transformed_boxes = predictor.transform.apply_boxes_torch(boxes, image_np.shape[:2])
+    masks, _, _ = predictor.predict_torch(
+        point_coords = None,
+        point_labels = None,
+        boxes = transformed_boxes.to(device),
+        multimask_output = True
+    )
+    masks = masks.permute(1,0,2,3).cpu().numpy()
+    if shared.cmd_opts.lowvram:
+        sam.to(cpu)
+    clear_sam_cache()
+    return dilate_mask(np.any(masks[sam_level], axis=0).astype(np.uint8) * 255,dilation)

exhm/detailer/sd-webui-ddsd-orig/scripts/ddsd_utils.py ADDED Viewed

	@@ -0,0 +1,383 @@

+import os
+import re
+import numpy as np
+import cv2
+import gc
+import matplotlib.font_manager
+from glob import glob
+from PIL import Image, ImageDraw, ImageFont
+from scripts.ddsd_sam import sam_predict, clear_cache, dilate_mask
+from scripts.ddsd_bs import bs_model
+from modules.devices import torch_gc
+from skimage import measure, exposure
+from modules.paths import models_path
+from modules.processing import StableDiffusionProcessingImg2Img
+token_split = re.compile(r"(AND|OR|NOR|XOR|NAND)")
+token_first = re.compile(r'\(([^()]+)\)')
+token_match = re.compile(r'(\d+)GROUPMASK')
+token_file = re.compile(r'\s*<(.*)>\s*')
+ddsd_mask_path = os.path.join(models_path, "ddsdmask")
+mask_embed = {}
+def startup():
+    global mask_embed
+    if not os.path.exists(ddsd_mask_path):
+        os.makedirs(ddsd_mask_path)
+        with open(os.path.join(ddsd_mask_path, 'put_in_mask_here.txt'),'w') as f: pass
+    masks = glob(os.path.join(ddsd_mask_path,'**\\*'))
+    masks = [(x, *os.path.splitext(os.path.basename(x))) for x in masks if os.path.isfile(x)]
+    masks = [(x, y) for x, y, z in masks if z in ['.png', '.jpg', '.jpeg', '.webp']]
+    mask_embed = {y.upper():x for x, y in masks}
+startup()
+def try_convert(data, type, default, min, max):
+    try:
+        convert = type(data)
+        if convert < min: return min
+        if convert > max: return max
+        return convert
+    except (ValueError, TypeError):
+        return default
+def prompt_spliter(prompt:str, split_token:str, count:int):
+    spliter = prompt.split(split_token)
+    while len(spliter) < count:
+        spliter.append('')
+    return spliter[:count]
+def combine_masks(mask, combine_masks_option, mask2):
+    if combine_masks_option == 'AND': return cv2.bitwise_and(mask, mask2)
+    if combine_masks_option == 'OR': return cv2.bitwise_or(mask, mask2)
+    if combine_masks_option == 'XOR': return cv2.bitwise_xor(mask, mask2)
+    if combine_masks_option == 'NOR': return cv2.bitwise_not(cv2.bitwise_or(mask, mask2))
+    if combine_masks_option == 'NAND': return cv2.bitwise_not(cv2.bitwise_and(mask,mask2))
+def dino_detect_from_prompt(prompt:str, detailer_sam_model, detailer_dino_model, init_image, disable_mask_paint_mode, inpaint_mask_mode, image_mask):
+    clear_cache()
+    image_np_zero = np.array(init_image.convert('L'))
+    image_np_zero[:,:] = 0
+    image_np = np.array(init_image)
+    image_np_rgb = image_np[:,:,:3].copy()
+    image_set = (init_image, image_np, image_np_rgb, image_np_zero)
+    model_set = (detailer_sam_model, detailer_dino_model)
+    result = dino_prompt_detector(prompt, model_set, image_set)
+    clear_cache()
+    if np.array_equal(result, image_np_zero): return None
+    if disable_mask_paint_mode: return result
+    if image_mask is None: return result
+    image_mask = np.array(image_mask.resize((result.shape[1],result.shape[0])).convert('L'))
+    image_mask = np.resize(image_mask, result.shape)
+    if inpaint_mask_mode == 'Inner': return cv2.bitwise_and(result, image_mask)
+    if inpaint_mask_mode == 'Outer': return cv2.bitwise_and(result, cv2.bitwise_not(image_mask))
+    return None
+def dino_prompt_token_file(prompt:str, image_np_zero, image_np_rgb):
+    usage_type, usage, dilation, confidence = prompt_spliter(prompt, ':', 4)
+    usage_type = usage_type.upper()
+    usage = usage.upper()
+    confidence = try_convert(confidence, float, 0.3, 0, 1)
+    if usage_type == 'AREA':
+        if usage == 'LEFT':
+            image_np_zero[:,:image_np_zero.shape[1] // 2] = 255
+            image_np_zero[:,image_np_zero.shape[1] // 2:] = 0
+        elif usage == 'RIGHT':
+            image_np_zero[:,:image_np_zero.shape[1] // 2] = 0
+            image_np_zero[:,image_np_zero.shape[1] // 2:] = 255
+        elif usage == 'TOP':
+            image_np_zero[:image_np_zero.shape[0] // 2,:] = 255
+            image_np_zero[image_np_zero.shape[0] // 2:,:] = 0
+        elif usage == 'BOTTOM':
+            image_np_zero[:image_np_zero.shape[0] // 2,:] = 0
+            image_np_zero[image_np_zero.shape[0] // 2:,:] = 255
+        elif usage == 'ALL':
+            image_np_zero[:,:] = 255
+    if usage_type == 'FILE':
+        if usage in mask_embed:
+            image = Image.open(mask_embed[usage]).convert('L')
+            h, w = image_np_zero.shape[:2]
+            image = image.resize((w, h))
+            image_np_zero = np.array(image)
+    if usage_type == 'MODEL':
+        mask = bs_model(image_np_rgb, usage, confidence)
+        if mask is None: return image_np_zero
+        image_np_zero = mask
+    return dilate_mask(image_np_zero, try_convert(dilation, int, 2, 0, 512))
+def dino_prompt_detector(prompt:str, model_set, image_set):
+    find = token_first.search(prompt)
+    result_group = {}
+    result_count = 0
+    while find:
+        result_group[f'{result_count}GROUPMASK'] = dino_prompt_detector(find.group(1), model_set, image_set)
+        prompt = prompt.replace(find.group(), f' {result_count}GROUPMASK ')
+        result_count += 1
+        find = token_first.search(prompt)
+    spliter = token_split.split(prompt)
+    while len(spliter) > 1:
+        left, operator, right = spliter[:3]
+        if not isinstance(left, np.ndarray):
+            match = token_match.match(left.strip())
+            if match is None:
+                match = token_file.match(left)
+                if match is None:
+                    dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(left, ':', 4)
+                    left = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
+                                    try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
+                                    try_convert(dilation.strip(), int, 16, 0, 512),
+                                    try_convert(sam_level.strip(), int, 0, 0, 2))
+                    if left is None: left = image_set[3].copy()
+                else:
+                    left = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
+            else:
+                left = result_group[left.strip()]
+        if not isinstance(right, np.ndarray):
+            match = token_match.match(right.strip())
+            if match is None:
+                match = token_file.match(right)
+                if match is None:
+                    dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(right, ':', 4)
+                    right = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
+                                    try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
+                                    try_convert(dilation.strip(), int, 16, 0, 512),
+                                    try_convert(sam_level.strip(), int, 0, 0, 2))
+                    if right is None: right = image_set[3].copy()
+                else:
+                    right = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
+            else:
+                right = result_group[right.strip()]
+        spliter[:3] = [combine_masks(left, operator, right)]
+        gc.collect()
+        torch_gc()
+    if isinstance(spliter[0], np.ndarray): return spliter[0]
+    match = token_file.match(spliter[0])
+    if match is None:
+        dino_text, sam_level, dino_box_threshold, dilation = prompt_spliter(spliter[0], ':', 4)
+        target = sam_predict(model_set[0], model_set[1], image_set[0], image_set[1], image_set[2], dino_text,
+                                    try_convert(dino_box_threshold.strip(), float, 0.3, 0, 1.0),
+                                    try_convert(dilation.strip(), int, 16, 0, 512),
+                                    try_convert(sam_level.strip(), int, 0, 0, 2))
+        if target is None: return image_set[3].copy()
+    else:
+        target = dino_prompt_token_file(match.group(1), image_set[3].copy(), image_set[2].copy())
+    return target
+def mask_spliter_and_remover(mask, area):
+    gc.collect()
+    torch_gc()
+    labels = measure.label(mask)
+    regions = measure.regionprops(labels)
+    for r in regions:
+        if r.area < area:
+            for coord in r.coords:
+                labels[coord[0], coord[1]] = 0
+    num_labels = np.max(labels)
+    label_images = []
+    for x in range(num_labels):
+        label_image = np.zeros_like(mask, dtype=np.uint8)
+        label_image[labels == (x + 1)] = 255
+        label_images.append(label_image)
+    return label_images
+def I2I_Generator_Create(p, i2i_sample, i2i_mask_blur, full_res_inpainting, inpainting_padding, init_image, denoise, cfg, steps, width, height, tiling, scripts, scripts_list, alwaysonscripts_list, script_args, positive, negative, fill = 1):
+    i2i = StableDiffusionProcessingImg2Img(
+                init_images = [init_image],
+                resize_mode = 0,
+                denoising_strength = 0,
+                mask = None,
+                mask_blur= i2i_mask_blur,
+                inpainting_fill = fill,
+                inpaint_full_res = full_res_inpainting,
+                inpaint_full_res_padding= inpainting_padding,
+                inpainting_mask_invert= 0,
+                sd_model=p.sd_model,
+                outpath_samples=p.outpath_samples,
+                outpath_grids=p.outpath_grids,
+                restore_faces=p.restore_faces,
+                prompt='',
+                negative_prompt='',
+                styles=p.styles,
+                seed=p.seed,
+                subseed=p.subseed,
+                subseed_strength=p.subseed_strength,
+                seed_resize_from_h=p.seed_resize_from_h,
+                seed_resize_from_w=p.seed_resize_from_w,
+                sampler_name=i2i_sample,
+                n_iter=1,
+                batch_size=1,
+                steps=steps,
+                cfg_scale=cfg,
+                width=width,
+                height=height,
+                tiling=tiling,
+            )
+    i2i.denoising_strength = denoise
+    i2i.do_not_save_grid = True
+    i2i.do_not_save_samples = True
+    i2i.override_settings = {}
+    i2i.override_settings_restore_afterwards = {}
+    i2i.scripts = scripts
+    i2i.scripts.scripts = scripts_list.copy()
+    i2i.scripts.alwayson_scripts = alwaysonscripts_list.copy()
+    i2i.script_args = script_args
+    i2i.prompt = positive
+    i2i.negative_prompt = negative
+    i2i.sub_processing = True
+    return i2i
+def get_fonts_list():
+    fonts, font_paths = [], {}
+    fonts_list = matplotlib.font_manager.findSystemFonts()
+    for font in fonts_list:
+        try:
+            fonts.append(matplotlib.font_manager.FontProperties(fname=font).get_name())
+            font_paths[fonts[-1]] = font
+        except RuntimeError:
+            print(f'Skip font file: {font}')
+    return fonts, font_paths
+def image_apply_watermark(image, watermark_type, watermark_position, watermark_image, watermark_image_size_width, watermark_image_size_height, watermark_text, watermark_text_color, watermark_text_font, watermark_text_size, watermark_padding, watermark_alpha):
+    gc.collect()
+    torch_gc()
+    if watermark_type == 'Text':
+        font = ImageFont.truetype(watermark_text_font, watermark_text_size)
+        copy_image = image.copy()
+        draw = ImageDraw.Draw(copy_image)
+        text_width, text_height = font.getsize(watermark_text)
+        left, right, top, bottom = 0 + watermark_padding, image.size[0] - watermark_padding, 0 + watermark_padding, image.size[1] - watermark_padding
+        if watermark_position == 'Left': position = (left, (top + bottom) // 2 - text_height // 2)
+        elif watermark_position == 'Left-Top': position = (left, top)
+        elif watermark_position == 'Top': position = ((left + right) // 2 - text_width // 2, top)
+        elif watermark_position == 'Right-Top': position = (right - text_width,top)
+        elif watermark_position == 'Right': position = (right - text_width, (top + bottom) // 2 - text_height // 2)
+        elif watermark_position == 'Right-Bottom': position = (right - text_width, bottom - text_height)
+        elif watermark_position == 'Bottom': position = ((left + right) // 2 - text_width // 2,bottom - text_height)
+        elif watermark_position == 'Left-Bottom': position = (left, bottom - text_height)
+        elif watermark_position == 'Center': position = ((left + right) // 2 - text_width // 2, (top + bottom) // 2 - text_height // 2)
+        draw.text(position, watermark_text, font=font, fill=tuple(int(watermark_text_color[x:x+2], 16) for x in (1,3,5)))
+        result = Image.blend(image, copy_image, watermark_alpha)
+    elif watermark_type == 'Image':
+        left, right, top, bottom = 0 + watermark_padding, image.size[0] - watermark_padding, 0 + watermark_padding, image.size[1] - watermark_padding
+        if watermark_position == 'Left': position = (left, (top + bottom) // 2 - watermark_image_size_height // 2)
+        elif watermark_position == 'Left-Top': position = (left, top)
+        elif watermark_position == 'Top': position = ((left + right) // 2 - watermark_image_size_width // 2, top)
+        elif watermark_position == 'Right-Top': position = (right - watermark_image_size_width,top)
+        elif watermark_position == 'Right': position = (right - watermark_image_size_width, (top + bottom) // 2 - watermark_image_size_height // 2)
+        elif watermark_position == 'Right-Bottom': position = (right - watermark_image_size_width, bottom - watermark_image_size_height)
+        elif watermark_position == 'Bottom': position = ((left + right) // 2 - watermark_image_size_width // 2,bottom - watermark_image_size_height)
+        elif watermark_position == 'Left-Bottom': position = (left, bottom - watermark_image_size_height)
+        elif watermark_position == 'Center': position = ((left + right) // 2 - watermark_image_size_width // 2, (top + bottom) // 2 - watermark_image_size_height // 2)
+        copy_np = np.array(image)
+        copy_np_origin = copy_np.copy()
+        water_image = cv2.resize(watermark_image.copy(), (watermark_image_size_width, watermark_image_size_height))
+        mask = np.where(np.all(water_image == [255, 255, 255], axis=-1), 0, 255)
+        alpha = np.zeros((water_image.shape[0], water_image.shape[1]), dtype=np.uint8)
+        alpha[:,:] = mask
+        copy_np_crop = copy_np[position[1]:position[1]+watermark_image_size_height, position[0]:position[0]+watermark_image_size_width, :]
+        copy_np_crop[alpha.nonzero()] = water_image[alpha.nonzero()]
+        copy_np[position[1]:position[1]+watermark_image_size_height, position[0]:position[0]+watermark_image_size_width, :] = copy_np_crop
+        result = Image.fromarray(cv2.addWeighted(copy_np_origin, 1 - watermark_alpha, copy_np, watermark_alpha, 0))
+    gc.collect()
+    torch_gc()
+    return result
+def matched_noise(image_np, mask_np, noise = 1, color_variation = 0.05):
+    def _fft2(data):
+        if data.ndim > 2:
+            out_fft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128)
+            for c in range(data.shape[2]):
+                c_data = data[:,:,c]
+                out_fft[:,:,c] = np.fft.fft2(np.fft.fftshift(c_data), norm='ortho')
+                out_fft[:,:,c] = np.fft.ifftshift(out_fft[:,:,c])
+        else:
+            out_fft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128)
+            out_fft[:,:] = np.fft.fft2(np.fft.fftshift(data), norm='ortho')
+            out_fft[:,:] = np.fft.ifftshift(out_fft[:,:])
+        return out_fft
+    def _ifft2(data):
+        if data.ndim > 2:
+            out_ifft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128)
+            for c in range(data.shape[2]):
+                c_data = data[:, :, c]
+                out_ifft[:, :, c] = np.fft.ifft2(np.fft.fftshift(c_data), norm="ortho")
+                out_ifft[:, :, c] = np.fft.ifftshift(out_ifft[:, :, c])
+        else:
+            out_ifft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128)
+            out_ifft[:, :] = np.fft.ifft2(np.fft.fftshift(data), norm="ortho")
+            out_ifft[:, :] = np.fft.ifftshift(out_ifft[:, :])
+        return out_ifft
+    def _get_gaussian_window(width, height, std=3.14, mode=0):
+        window_scale_x = float(width / min(width, height))
+        window_scale_y = float(height / min(width, height))
+        window = np.zeros((width, height))
+        x = (np.arange(width) / width * 2. - 1.) * window_scale_x
+        for y in range(height):
+            fy = (y / height * 2. - 1.) * window_scale_y
+            if mode == 0:
+                window[:, y] = np.exp(-(x ** 2 + fy ** 2) * std)
+            else:
+                window[:, y] = (1 / ((x ** 2 + 1.) * (fy ** 2 + 1.))) ** (std / 3.14)
+        return window
+    def _get_masked_window_rgb(np_mask_grey, hardness=1.0):
+        np_mask_rgb = np.zeros((np_mask_grey.shape[0], np_mask_grey.shape[1], 3))
+        if hardness != 1.0:
+            hardened = np_mask_grey[:] ** hardness
+        else:
+            hardened = np_mask_grey[:]
+        for c in range(3):
+            np_mask_rgb[:, :, c] = hardened[:]
+        return np_mask_rgb
+    width  = image_np.shape[0]
+    height = image_np.shape[1]
+    channel = image_np.shape[2]
+    image_np = image_np[:] * (1.0 - mask_np)
+    mask_np_grey = (np.sum(mask_np, axis=2) / 3.0)
+    img_mask = mask_np_grey > 1e-6
+    ref_mask = mask_np_grey < 1e-3
+    image_windowed = image_np * (1.0 - _get_masked_window_rgb(mask_np_grey))
+    image_windowed /= np.max(image_windowed)
+    image_windowed += np.average(image_np) * mask_np
+    src_fft = _fft2(image_windowed)
+    src_dist = np.absolute(src_fft)
+    src_phase = src_fft / src_dist
+    rng = np.random.default_rng(0)
+    noise_window = _get_gaussian_window(width, height, mode=1)
+    noise_rgb = rng.random((width,height, channel))
+    noise_grey = (np.sum(noise_rgb, axis=2) / 3.0)
+    noise_rgb *= color_variation
+    for c in range(channel):
+        noise_rgb[:,:,c] += (1.0 - color_variation) * noise_grey
+    noise_fft = _fft2(noise_rgb)
+    for c in range(channel):
+        noise_fft[:,:,c] *= noise_window
+    noise_rgb = np.real(_ifft2(noise_fft))
+    shaped_noise_fft = _fft2(noise_rgb)
+    shaped_noise_fft[:,:,:] = np.absolute(shaped_noise_fft[:,:,:]) ** 2 * (src_dist ** noise) * src_phase
+    brightness_variation = 0
+    contrast_adjusted_np = image_np[:] * (brightness_variation + 1.0) - brightness_variation * 2.0
+    shaped_noise = np.real(_ifft2(shaped_noise_fft))
+    shaped_noise -= np.min(shaped_noise)
+    shaped_noise /= np.max(shaped_noise)
+    shaped_noise[img_mask, :] = exposure.match_histograms(shaped_noise[img_mask, :] ** 1.0, contrast_adjusted_np[ref_mask, :], channel_axis = 1)
+    shaped_noise = image_np[:] * (1.0 - mask_np) + shaped_noise * mask_np
+    return np.clip(shaped_noise[:], 0.0, 1.0)

exhm/detailer/sd-webui-ddsd/.gitignore ADDED Viewed

	@@ -0,0 +1,170 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea
+*.pt
+*.pth
+*.ckpt
+*.safetensors
+models/control_sd15_scribble.pth
+detected_maps/
+# Ignore all .ddcfg files except for Empty.ddcfg
+config/*.ddcfg
+!config/Empty.ddcfg

exhm/detailer/sd-webui-ddsd/README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+# sd-webui-ddsd
+자동으로 동작하는 후보정 작업 확장.
+## What is
+### Outpaint
+#### Outpaint How to use
+1. 증가시킬 픽셀을 선택
+2. 증가시킬 방향 선택
+    1. 방향이 None이면 미동작
+3. 증가시킬때 사용할 프롬프트 작성(전체 인페인팅시 이용)
+    1. 비어있을때 원본 프롬프트 사용
+4. Denoise, CFG, Step 선택
+    1. Step은 최소 원본 Step 2 ~ 3배 이상 적절한 값 요구
+5. 생성!
+### Upscale
+이미지를 특정 크기로 잘라내어 타일별 업스케일을 하는 도구. 업스케일시 VRAM을 적게 소모.
+#### Upscale How to use
+1. 크기를 키울때 사용할 upscaler 모델 선택
+2. 크기를 키울 배수 선택
+3. 가로, 세로를 내가 단일로 생성할 수 있는 이미지의 최대 크기로 선택(이미지 생성 속도를 최대한 빠르게 하기 위하여)
+    1. 가로 또는 세로중 한개를 0으로 세팅시 업스케일만 동작(세부 구조를 디테일하게하는 인페인팅이 동작하지 않음)
+4. before running 체크
+    1. 체크시 업스케일을 먼저 돌려서 인페인팅의 퀄리티 상승. 단, 인페인팅시 더 많은 VRAM 요구
+5. 생성!
+### Detect Detailer
+특정 키워드로 이미지를 탐색 후 인페인팅하는 도구.
+#### Detect Detailer How to use
+0. 인페인팅의 범위 제한(I2I 전용)
+    1. Inner 옵션은 I2I의 인페인팅에서 칠한 범위 내부만 이미지를 탐색
+    2. Outer 옵션은 I2I의 인페인팅에서 칠한 범위 외부만 이미지를 탐색
+1. 탐색 키워드 작성
+    1. 탐색할 키워드를 작성(face, person 등등)
+        1. 탐색할 키워드는 문장형도 가능(happy face, running dog)
+        2. 탐색할 키워드를 .으로 분할 가능(face. arm, face. chest)
+    2. 탐색할 키워드에 사용 가능한 추가 옵션 존재
+        1. &lt;area:type&gt;을 이용하여 특정 범위 탐색 가능
+            1. 범위 종류는 left, right, top, bottom, all이 존재
+        2. &lt;file:filename&gt;을 이용하여 특정 파일 탐색 가능
+            1. 특정 파일의 위치는 models/ddsdmask
+        3. &lt;model:type&gt;을 이용하여 특정 모델 탐색 가능
+            1. type은 face_media_full, face_media_short와 파일명이 존재
+            2. 파일은 models/yolo에 위치
+        4. &lt;type1:type2:dilation:confidence&gt; 같이 type1과 type2외에 dilation과 confidence도 추가 입력 가능
+            1. confidence는 model 타입에서만 사용되는 값
+    3. 탐색한 범위를 AND, OR, XOR, NAND, NOR 등의 게이트 옵션으로 연산 가능
+        1. face OR (body NAND outfit) -> 괄호안의 body NAND outfit을 먼저 한 후에 face와 OR 연산을 동작
+        2. 괄호는 최대한 적게 이용. 많이 이용시 많은 VRAM 소모.
+        3. 동작은 왼쪽에서 오른쪽으로 순차적 동작.
+    4. 탐색할 키워드에 옵션으로 여러가지 옵션 조절 가능
+        1. face:0:0.4:4 OR outfit:2:0.5:8
+        2. 순서대로 탐색할 프롬프트, SAM 탐색 레벨(0-2), 민감도(0-1), 팽창값(0-512)을 가짐
+        3. 값을 생략하면 초기값으로 세팅
+2. 긍정 프롬프트 입력
+    1. 인페인팅시 동작시킬 긍정 프롬프트 입력
+3. 부정 프롬프트 입력
+    1. 인페인팅시 동작시킬 부정 프롬프트 입력
+4. Denoising, CFG, Steps, Clip skip, Ckpt, Vae 수정
+    1. 인페인팅시 동작에 영향을 주는 옵션
+5. Split Mask 옵션 체크
+    1. 체크시 마스크가 떨어져 있는것이 존재한다면 따로 인페인팅.
+        1. 따로 인페인팅시 퀄리티 상승. 하지만 더 많은 인페인팅을 요구하여 생성속도 하락.
+6. Remove Area 옵션 체크
+    1. Split Mask 옵션이 Enable 되어야만 동작
+    2. 분할 인페인팅시 일정 크기 이하의 면적은 인페인팅에서 제외
+6. 생성!
+### Postprocessing
+최종적으로 생성된 이미지에 가하는 후보정
+#### Postprocessing How to use
+1. 가하고자 하는 후보정을 선택
+2. 생성!
+### Watermark
+이미지 생성 최종본에 자신의 증명을 기입하는 기능
+#### Watermark How to use
+1. 기입할 증명의 종류 선택(글자, 이미지)
+2. 선택한 종류를 입력
+3. 선택한 종류의 크기와 위치를 지정
+4. Padding으로 해당 위치에서 얼만큼 떨어져 있을지 설정
+5. Alpha로 얼만큼 투명할지 결정
+6. 생성!
+### Video
+[![Stable Diffusion - DDSD 확장 기능  (No - Talking)](http://img.youtube.com/vi/9wfZyJhPPho/0.jpg)](https://youtu.be/9wfZyJhPPho)
+## Installation
+1. 다운로드 [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)와 [cuDNN](https://developer.nvidia.com/rdp/cudnn-archive)
+    1. 자신이 가진 WebUI와 동일한 버전의 `CUDA`와 `cuDNN`버전으로 설치
+        1. 이것은 다운로드를 편하게 하기위한 구글링크. [CUDA 117](https://drive.google.com/file/d/1HRTOLTB44-pRcrwIw9lQak2OC2ohNle3/view?usp=share_link)와 [cuDNN](https://drive.google.com/file/d/1QcgaxUra0WnCWrCLjsWp_QKw1PKcvqpj/view?usp=share_link)
+    2. `CUDA` 설치 후 해당 폴더에 `cuDNN` 덮어쓰기
+    3. 일정 버전은 Easy Install을 지원. `CUDA`와 `cuDNN` 불필요.
+        1. 지원버전 (torch == 1.13.1+cu117, torch==2.0.0+cu117 , torch==2.0.0+cu118)
+2. 확장탭에서 설치 `https://github.com/NeoGraph-K/sd-webui-ddsd` 또는 다운로드 후 `extension/` 에 풀어넣기
+3. WebUI를 완전히 재시작
+## Credits
+dustysys/[ddetailer](https://github.com/dustysys/ddetailer)
+AUTOMATIC1111/[stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
+facebookresearch/[Segment Anything](https://github.com/facebookresearch/segment-anything)
+IDEA-Research/[GroundingDINO](https://github.com/IDEA-Research/GroundingDINO)
+IDEA-Research/[Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
+continue-revolution/[sd-webui-segment-anything](https://github.com/continue-revolution/sd-webui-segment-anything)
+Bing-su/[adetailer](https://github.com/Bing-su/adetailer)

exhm/detailer/sd-webui-ddsd/config/Empty.ddcfg ADDED Viewed

	@@ -0,0 +1 @@

exhm/detailer/sd-webui-ddsd/install.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import platform
+import launch
+def check_system_machine():
+    system = platform.system()
+    machine = platform.machine()
+    return (system, machine) in [('Windows', 'AMD64'), ('Linux', 'x86_64')]
+def check_python_version(low: int, high: int):
+    ver = platform.python_version_tuple()
+    if int(ver[0]) == 3 and low <= int(ver[1]) <= high:
+        return ver[0] + ver[1]
+    return None
+def install_pycocotools():
+    base = 'https://github.com/Bing-su/dddetailer/releases/download/pycocotools/'
+    urls = {
+        'Windows': 'pycocotools-2.0.6-cp{ver}-cp{ver}-win_amd64.whl',
+        'Linux': 'pycocotools-2.0.6-cp{ver}-cp{ver}-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
+    }
+    python_version = check_python_version(8, 11)
+    if not check_system_machine() or not python_version:
+        launch.run_pip('install pycocotools', 'sd-webui-ddsd requirement: pycocotools')
+        return
+    url = urls[platform.system()].format(ver=python_version)
+    launch.run_pip(f'install {base + url}', 'sd-webui-ddsd requirement: pycocotools')
+def install_groundingdino():
+    import torch
+    from packaging.version import parse
+    # torch_version: '1.13.1' or '2.0.0' or ...
+    torch_version = parse(torch.__version__).base_version
+    # cuda_version: '117' or '118' or 'None'
+    cuda_version = torch.version.cuda.replace('.', '')
+    python_version = check_python_version(9, 10)
+    system = 'win' if platform.system() == 'Windows' else 'linux'
+    machine = 'amd64' if platform.machine() == 'AMD64' else 'x86_64'
+    if torch_version in ['2.1.0', '2.1.1', '2.1.2'] and cuda_version == '121':
+        url = 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp{py}-cp{py}-{system}_{machine}.whl'
+        url = url.format(
+            py=python_version,
+            system=system,
+            machine=machine,
+        )
+        launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
+        return
+    if (
+        not check_system_machine()
+        or (torch_version, cuda_version)
+        not in [('1.13.1', '117'), ('2.0.1', '117'), ('2.0.1', '118'), ('2.1.0', '121')]
+        or not python_version
+    ):
+        launch.run_pip('install git+https://github.com/IDEA-Research/GroundingDINO', 'sd-webui-ddsd requirement: groundingdino')
+        return
+    url = 'https://github.com/Bing-su/GroundingDINO/releases/download/wheel-0.1.0/groundingdino-0.1.0+torch{torch}.cu{cuda}-cp{py}-cp{py}-{system}_{machine}.whl'
+    url = url.format(
+        torch=torch_version,
+        cuda=cuda_version,
+        py=python_version,
+        system=system,
+        machine=machine,
+    )
+    launch.run_pip(f'install {url}', 'sd-webui-ddsd requirement: groundingdino')
+current_dir = os.path.dirname(os.path.realpath(__file__))
+req_file = os.path.join(current_dir, 'requirements.txt')
+with open(req_file) as file:
+    for lib in file:
+        version = None
+        lib = lib.strip()
+        lib = 'skimage' if lib == 'scikit-image' else lib
+        if '==' in lib:
+            lib, version = [x.strip() for x in lib.split('==')]
+        if not launch.is_installed(lib):
+            if lib == 'pycocotools':
+                install_pycocotools()
+            elif lib == 'groundingdino':
+                install_groundingdino()
+            elif lib == 'skimage':
+                launch.run_pip(
+                    f'install scikit-image',
+                    f'sd-webui-ddsd requirement: scikit-image'
+                )
+            elif lib == 'pillow_lut':
+                launch.run_pip(
+                    f'install pillow_lut',
+                    f'sd-webui-ddsd requirement: pillow_lut'
+                )
+            else:
+                lib = lib if version is None else lib + '==' + version
+                launch.run_pip(
+                    f'install {lib}',
+                    f'sd-webui-ddsd requirement: {lib}'
+                )

exhm/detailer/sd-webui-ddsd/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pycocotools
+segment_anything
+groundingdino
+scipy
+scikit-image
+pillow_lut
+ultralytics==8.0.87
+mediapipe==0.9.3.0

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd.cpython-310.pyc ADDED Viewed

Binary file (53.1 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_bs.cpython-310.pyc ADDED Viewed

Binary file (2.59 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_dino.cpython-310.pyc ADDED Viewed

Binary file (3.43 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_postprocess.cpython-310.pyc ADDED Viewed

Binary file (4.74 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_sam.cpython-310.pyc ADDED Viewed

Binary file (3.2 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/__pycache__/ddsd_utils.cpython-310.pyc ADDED Viewed

Binary file (13.2 kB). View file

exhm/detailer/sd-webui-ddsd/scripts/ddsd.py ADDED Viewed

The diff for this file is too large to render. See raw diff

exhm/detailer/sd-webui-ddsd/scripts/ddsd_bs.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from __future__ import annotations
+import os
+import torch
+import mediapipe as mp
+import numpy as np
+from PIL import Image, ImageDraw
+from ultralytics import YOLO
+from modules import safe
+from modules.shared import cmd_opts
+from modules.paths import models_path
+yolo_models_path = os.path.join(models_path, 'yolo')
+def mediapipe_face_detect(image, model_type, confidence):
+    width, height = image.size
+    image_np = np.array(image)
+    mp_face_detection = mp.solutions.face_detection
+    with mp_face_detection.FaceDetection(model_selection=model_type, min_detection_confidence=confidence) as face_detector:
+        predictor = face_detector.process(image_np)
+    if predictor.detections is None: return None
+    bboxes = []
+    for detection in predictor.detections:
+        bbox = detection.location_data.relative_bounding_box
+        x1 = bbox.xmin * width
+        y1 = bbox.ymin * height
+        x2 = x1 + bbox.width * width
+        y2 = y1 + bbox.height * height
+        bboxes.append([x1,y1,x2,y2])
+    return create_mask_from_bbox(image, bboxes)
+def ultralytics_predict(image, model_type, confidence, device):
+    models = [os.path.join(yolo_models_path,x) for x in os.listdir(yolo_models_path) if (x.endswith('.pt') or x.endswith('.pth')) and os.path.splitext(os.path.basename(x))[0].upper() == model_type]
+    if len(models) == 0: return None
+    model = YOLO(models[0])
+    predictor = model(image, conf=confidence, show_labels=False, device=device)
+    bboxes = predictor[0].boxes.xyxy.cpu().numpy()
+    if bboxes.size == 0: return None
+    bboxes = bboxes.tolist()
+    return create_mask_from_bbox(image, bboxes)
+def create_mask_from_bbox(image, bboxes):
+    mask = Image.new('L', image.size, 0)
+    draw = ImageDraw.Draw(mask)
+    for bbox in bboxes:
+        draw.rectangle(bbox, fill=255)
+    return np.array(mask)
+def bs_model(image, model_type, confidence):
+    image = Image.fromarray(image)
+    orig = torch.load
+    torch.load = safe.unsafe_torch_load
+    if model_type == 'FACE_MEDIA_FULL':
+        mask = mediapipe_face_detect(image, 1, confidence)
+    elif model_type == 'FACE_MEDIA_SHORT':
+        mask = mediapipe_face_detect(image, 0, confidence)
+    else:
+        device = ''
+        if getattr(cmd_opts, 'lowvram', False) or getattr(cmd_opts, 'medvram', False):
+            device = 'cpu'
+        mask = ultralytics_predict(image, model_type, confidence, device)
+    torch.load = orig
+    return mask