sdfafdfsdf kelseye commited on
Commit
f5d7eba
·
0 Parent(s):

Duplicate from DiffSynth-Studio/Qwen-Image-Edit-F2P

Browse files

Co-authored-by: kelseye.xh <kelseye@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/qwen_woman_0.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assets/qwen_woman_2.jpg filter=lfs diff=lfs merge=lfs -text
38
+ assets/qwen_woman_3.jpg filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ pipeline_tag: image-to-image
4
+ library_name: diffusers
5
+ tags:
6
+ - lora
7
+ ---
8
+ # Qwen-Image-Edit Face Generation Image Model
9
+ ## Model Introduction
10
+
11
+ This model is based on the [Qwen-Image-Edit](https://www.modelscope.cn/models/Qwen/Qwen-Image-Edit) face-controlled image generation model. Given a cropped facial image as input, it generates full portrait images of the same person.
12
+
13
+ ## Result Demonstration
14
+
15
+ |Face|Generated Image 1|Generated Image 2|Generated Image 3|Generated Image 4|
16
+ |-|-|-|-|-|
17
+ |![](./assets/qwen_woman_face_crop.png)|![](./assets/qwen_woman_0.jpg)|![](./assets/qwen_woman_1.jpg)|![](./assets/qwen_woman_2.jpg)|![](./assets/qwen_woman_3.jpg)|
18
+
19
+
20
+
21
+ ## Inference Code
22
+ ```
23
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
24
+ cd DiffSynth-Studio
25
+ pip install -e .
26
+ ```
27
+
28
+ ```python
29
+ from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
30
+ import torch
31
+ from modelscope import snapshot_download, dataset_snapshot_download
32
+ from PIL import Image
33
+
34
+ pipe = QwenImagePipeline.from_pretrained(
35
+ torch_dtype=torch.bfloat16,
36
+ device="cuda",
37
+ model_configs=[
38
+ ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
39
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
40
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
41
+ ],
42
+ tokenizer_config=None,
43
+ processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
44
+ )
45
+ snapshot_download("DiffSynth-Studio/Qwen-Image-Edit-F2P", local_dir="models/DiffSynth-Studio/Qwen-Image-Edit-F2P", allow_file_pattern="model.safetensors")
46
+ pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Edit-F2P/model.safetensors")
47
+ dataset_snapshot_download(
48
+ dataset_id="DiffSynth-Studio/example_image_dataset",
49
+ local_dir="./data/example_image_dataset",
50
+ allow_file_pattern="f2p/qwen_woman_face_crop.png"
51
+ )
52
+ face_image = Image.open("data/example_image_dataset/f2p/qwen_woman_face_crop.png").convert("RGB")
53
+ ```
54
+
55
+ ```python
56
+ prompt = "Photography. A young woman wearing a yellow dress stands in a flower field, with a background of colorful flowers and green grass."
57
+ image = pipe(prompt, edit_image=face_image, seed=42, num_inference_steps=40, height=1152, width=864)
58
+ image.save(f"image.jpg")
59
+ ```
60
+
61
+ Face Auto-Cropping
62
+ ```python
63
+ import torch
64
+ from PIL import Image
65
+ import numpy as np
66
+ from insightface.app import FaceAnalysis
67
+ import cv2
68
+
69
+ class FaceDetector(torch.nn.Module):
70
+
71
+ def __init__(self):
72
+ super().__init__()
73
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
74
+ provider_options = [{"device_id": 0}, {}]
75
+ self.app_640 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
76
+ self.app_640.prepare(ctx_id=0, det_size=(640, 640))
77
+ self.app_320 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
78
+ self.app_320.prepare(ctx_id=0, det_size=(320, 320))
79
+ self.app_160 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
80
+ self.app_160.prepare(ctx_id=0, det_size=(160, 160))
81
+
82
+ def _detect_face(self, id_image_cv2):
83
+ face_info = self.app_640.get(id_image_cv2)
84
+ if len(face_info) > 0:
85
+ return face_info
86
+ face_info = self.app_320.get(id_image_cv2)
87
+ if len(face_info) > 0:
88
+ return face_info
89
+ face_info = self.app_160.get(id_image_cv2)
90
+ return face_info
91
+
92
+ def crop_face(self, id_image):
93
+ face_info = self._detect_face(cv2.cvtColor(np.array(id_image), cv2.COLOR_RGB2BGR))
94
+ if len(face_info) == 0:
95
+ return None
96
+ else:
97
+ bbox = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[-1]['bbox']
98
+ return id_image.crop(list(map(int, bbox)))
99
+
100
+
101
+ face_detector = FaceDetector()
102
+ face_image = face_detector.crop_face(Image.open("image_2.jpg"))
103
+ face_image.save("face_crop.jpg")
104
+ ```
README_from_modelscope.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ frameworks:
3
+ - Pytorch
4
+ license: Apache License 2.0
5
+ tags: []
6
+ tasks:
7
+ - image-to-image
8
+ base_model:
9
+ - Qwen/Qwen-Image-Edit
10
+ base_model_relation: adapter
11
+ ---
12
+
13
+ # Qwen-Image-Edit 人脸生成图像模型
14
+ ## 模型介绍
15
+
16
+ 本模型是基于 [Qwen-Image-Edit](https://www.modelscope.cn/models/Qwen/Qwen-Image-Edit) 人脸控制图像生成模型。输入裁剪下的人脸图像,输出该人的人像图片。
17
+
18
+ ## 效果展示
19
+
20
+ |人脸|生成图1|生成图2|生成图3|生成图4|
21
+ |-|-|-|-|-|
22
+ |![](./assets/qwen_woman_face_crop.png)|![](./assets/qwen_woman_0.jpg)|![](./assets/qwen_woman_1.jpg)|![](./assets/qwen_woman_2.jpg)|![](./assets/qwen_woman_3.jpg)|
23
+
24
+
25
+
26
+ ## 推理代码
27
+ ```
28
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
29
+ cd DiffSynth-Studio
30
+ pip install -e .
31
+ ```
32
+
33
+ ```python
34
+ from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
35
+ import torch
36
+ from modelscope import snapshot_download, dataset_snapshot_download
37
+ from PIL import Image
38
+
39
+ pipe = QwenImagePipeline.from_pretrained(
40
+ torch_dtype=torch.bfloat16,
41
+ device="cuda",
42
+ model_configs=[
43
+ ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
44
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
45
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
46
+ ],
47
+ tokenizer_config=None,
48
+ processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
49
+ )
50
+ snapshot_download("DiffSynth-Studio/Qwen-Image-Edit-F2P", local_dir="models/DiffSynth-Studio/Qwen-Image-Edit-F2P", allow_file_pattern="model.safetensors")
51
+ pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Edit-F2P/model.safetensors")
52
+ dataset_snapshot_download(
53
+ dataset_id="DiffSynth-Studio/example_image_dataset",
54
+ local_dir="./data/example_image_dataset",
55
+ allow_file_pattern="f2p/qwen_woman_face_crop.png"
56
+ )
57
+ face_image = Image.open("data/example_image_dataset/f2p/qwen_woman_face_crop.png").convert("RGB")
58
+
59
+ prompt = "摄影。一个年轻女性穿着黄色连衣裙,站在花田中,背景是五颜六色的花朵和绿色的草地。"
60
+ image = pipe(prompt, edit_image=face_image, seed=42, num_inference_steps=40, height=1152, width=864)
61
+ image.save(f"image.jpg")
62
+ ```
63
+ 人脸自动裁剪
64
+ ```python
65
+ import torch
66
+ from PIL import Image
67
+ import numpy as np
68
+ from insightface.app import FaceAnalysis
69
+ import cv2
70
+
71
+ class FaceDetector(torch.nn.Module):
72
+
73
+ def __init__(self):
74
+ super().__init__()
75
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
76
+ provider_options = [{"device_id": 0}, {}]
77
+ self.app_640 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
78
+ self.app_640.prepare(ctx_id=0, det_size=(640, 640))
79
+ self.app_320 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
80
+ self.app_320.prepare(ctx_id=0, det_size=(320, 320))
81
+ self.app_160 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
82
+ self.app_160.prepare(ctx_id=0, det_size=(160, 160))
83
+
84
+ def _detect_face(self, id_image_cv2):
85
+ face_info = self.app_640.get(id_image_cv2)
86
+ if len(face_info) > 0:
87
+ return face_info
88
+ face_info = self.app_320.get(id_image_cv2)
89
+ if len(face_info) > 0:
90
+ return face_info
91
+ face_info = self.app_160.get(id_image_cv2)
92
+ return face_info
93
+
94
+ def crop_face(self, id_image):
95
+ face_info = self._detect_face(cv2.cvtColor(np.array(id_image), cv2.COLOR_RGB2BGR))
96
+ if len(face_info) == 0:
97
+ return None
98
+ else:
99
+ bbox = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[-1]['bbox']
100
+ return id_image.crop(list(map(int, bbox)))
101
+
102
+
103
+ face_detector = FaceDetector()
104
+ face_image = face_detector.crop_face(Image.open("image_2.jpg"))
105
+ face_image.save("face_crop.jpg")
106
+
107
+ ```
assets/qwen_woman_0.jpg ADDED

Git LFS Details

  • SHA256: 5d306379835b67bfa75221c911f4b825f7fafa33177ba4386cec527f5eb165e9
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
assets/qwen_woman_1.jpg ADDED
assets/qwen_woman_2.jpg ADDED

Git LFS Details

  • SHA256: 9634cdbb70afae33abbd7d5e3d98e903bde6af431e39ba1cfdf44fb54bd6ca2a
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
assets/qwen_woman_3.jpg ADDED

Git LFS Details

  • SHA256: 2a0c34106b89569563a9c6c2b3e0530095bcedb8b54c32b3dfeab5425591b9f2
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
assets/qwen_woman_face_crop.png ADDED
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"image-to-image"}
edit_0917.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaec9705c770f3453a802ff317f5095c806cc27b069b760015f71bd26179f2e
3
+ size 472047152
edit_0922_lora_step13000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da99840137731e62a6cd74f9b98e42da2d62ec7011927f199b8d9bb2ba7ed23f
3
+ size 472047184
edit_0928_lora_step40000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2766a84f5d01b14b172c0307ef69c077fe9c399d304359eadb5fa5819594a175
3
+ size 472047184