kelseye commited on
Commit
c06e0c5
·
verified ·
1 Parent(s): e470f73

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/qwen_woman_0.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assets/qwen_woman_2.jpg filter=lfs diff=lfs merge=lfs -text
38
+ assets/qwen_woman_3.jpg filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # Qwen-Image-Edit Face Generation Image Model
5
+ ## Model Introduction
6
+
7
+ This model is based on the [Qwen-Image-Edit](https://www.modelscope.cn/models/Qwen/Qwen-Image-Edit) face-controlled image generation model. Given a cropped facial image as input, it generates full portrait images of the same person.
8
+
9
+ ## Result Demonstration
10
+
11
+ |Face|Generated Image 1|Generated Image 2|Generated Image 3|Generated Image 4|
12
+ |-|-|-|-|-|
13
+ |![](./assets/qwen_woman_face_crop.png)|![](./assets/qwen_woman_0.jpg)|![](./assets/qwen_woman_1.jpg)|![](./assets/qwen_woman_2.jpg)|![](./assets/qwen_woman_3.jpg)|
14
+
15
+
16
+
17
+ ## Inference Code
18
+ ```
19
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
20
+ cd DiffSynth-Studio
21
+ pip install -e .
22
+ ```
23
+
24
+ ```python
25
+ from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
26
+ import torch
27
+ from modelscope import snapshot_download, dataset_snapshot_download
28
+ from PIL import Image
29
+
30
+ pipe = QwenImagePipeline.from_pretrained(
31
+ torch_dtype=torch.bfloat16,
32
+ device="cuda",
33
+ model_configs=[
34
+ ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
35
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
36
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
37
+ ],
38
+ tokenizer_config=None,
39
+ processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
40
+ )
41
+ snapshot_download("DiffSynth-Studio/Qwen-Image-Edit-F2P", local_dir="models/DiffSynth-Studio/Qwen-Image-Edit-F2P", allow_file_pattern="model.safetensors")
42
+ pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Edit-F2P/model.safetensors")
43
+ dataset_snapshot_download(
44
+ dataset_id="DiffSynth-Studio/example_image_dataset",
45
+ local_dir="./data/example_image_dataset",
46
+ allow_file_pattern="f2p/qwen_woman_face_crop.png"
47
+ )
48
+ face_image = Image.open("data/example_image_dataset/f2p/qwen_woman_face_crop.png").convert("RGB")
49
+ ```
50
+
51
+ ```python
52
+ prompt = "Photography. A young woman wearing a yellow dress stands in a flower field, with a background of colorful flowers and green grass."
53
+ image = pipe(prompt, edit_image=face_image, seed=42, num_inference_steps=40, height=1152, width=864)
54
+ image.save(f"image.jpg")
55
+ ```
56
+
57
+ Face Auto-Cropping
58
+ ```python
59
+ import torch
60
+ from PIL import Image
61
+ import numpy as np
62
+ from insightface.app import FaceAnalysis
63
+ import cv2
64
+
65
+ class FaceDetector(torch.nn.Module):
66
+
67
+ def __init__(self):
68
+ super().__init__()
69
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
70
+ provider_options = [{"device_id": 0}, {}]
71
+ self.app_640 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
72
+ self.app_640.prepare(ctx_id=0, det_size=(640, 640))
73
+ self.app_320 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
74
+ self.app_320.prepare(ctx_id=0, det_size=(320, 320))
75
+ self.app_160 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
76
+ self.app_160.prepare(ctx_id=0, det_size=(160, 160))
77
+
78
+ def _detect_face(self, id_image_cv2):
79
+ face_info = self.app_640.get(id_image_cv2)
80
+ if len(face_info) > 0:
81
+ return face_info
82
+ face_info = self.app_320.get(id_image_cv2)
83
+ if len(face_info) > 0:
84
+ return face_info
85
+ face_info = self.app_160.get(id_image_cv2)
86
+ return face_info
87
+
88
+ def crop_face(self, id_image):
89
+ face_info = self._detect_face(cv2.cvtColor(np.array(id_image), cv2.COLOR_RGB2BGR))
90
+ if len(face_info) == 0:
91
+ return None
92
+ else:
93
+ bbox = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[-1]['bbox']
94
+ return id_image.crop(list(map(int, bbox)))
95
+
96
+
97
+ face_detector = FaceDetector()
98
+ face_image = face_detector.crop_face(Image.open("image_2.jpg"))
99
+ face_image.save("face_crop.jpg")
100
+ ```
README_from_modelscope.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ frameworks:
3
+ - Pytorch
4
+ license: Apache License 2.0
5
+ tags: []
6
+ tasks:
7
+ - image-to-image
8
+ base_model:
9
+ - Qwen/Qwen-Image-Edit
10
+ base_model_relation: adapter
11
+ ---
12
+
13
+ # Qwen-Image-Edit 人脸生成图像模型
14
+ ## 模型介绍
15
+
16
+ 本模型是基于 [Qwen-Image-Edit](https://www.modelscope.cn/models/Qwen/Qwen-Image-Edit) 人脸控制图像生成模型。输入裁剪下的人脸图像,输出该人的人像图片。
17
+
18
+ ## 效果展示
19
+
20
+ |人脸|生成图1|生成图2|生成图3|生成图4|
21
+ |-|-|-|-|-|
22
+ |![](./assets/qwen_woman_face_crop.png)|![](./assets/qwen_woman_0.jpg)|![](./assets/qwen_woman_1.jpg)|![](./assets/qwen_woman_2.jpg)|![](./assets/qwen_woman_3.jpg)|
23
+
24
+
25
+
26
+ ## 推理代码
27
+ ```
28
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
29
+ cd DiffSynth-Studio
30
+ pip install -e .
31
+ ```
32
+
33
+ ```python
34
+ from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
35
+ import torch
36
+ from modelscope import snapshot_download, dataset_snapshot_download
37
+ from PIL import Image
38
+
39
+ pipe = QwenImagePipeline.from_pretrained(
40
+ torch_dtype=torch.bfloat16,
41
+ device="cuda",
42
+ model_configs=[
43
+ ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
44
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
45
+ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
46
+ ],
47
+ tokenizer_config=None,
48
+ processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
49
+ )
50
+ snapshot_download("DiffSynth-Studio/Qwen-Image-Edit-F2P", local_dir="models/DiffSynth-Studio/Qwen-Image-Edit-F2P", allow_file_pattern="model.safetensors")
51
+ pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Edit-F2P/model.safetensors")
52
+ dataset_snapshot_download(
53
+ dataset_id="DiffSynth-Studio/example_image_dataset",
54
+ local_dir="./data/example_image_dataset",
55
+ allow_file_pattern="f2p/qwen_woman_face_crop.png"
56
+ )
57
+ face_image = Image.open("data/example_image_dataset/f2p/qwen_woman_face_crop.png").convert("RGB")
58
+
59
+ prompt = "摄影。一个年轻女性穿着黄色连衣裙,站在花田中,背景是五颜六色的花朵和绿色的草地。"
60
+ image = pipe(prompt, edit_image=face_image, seed=42, num_inference_steps=40, height=1152, width=864)
61
+ image.save(f"image.jpg")
62
+ ```
63
+ 人脸自动裁剪
64
+ ```python
65
+ import torch
66
+ from PIL import Image
67
+ import numpy as np
68
+ from insightface.app import FaceAnalysis
69
+ import cv2
70
+
71
+ class FaceDetector(torch.nn.Module):
72
+
73
+ def __init__(self):
74
+ super().__init__()
75
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
76
+ provider_options = [{"device_id": 0}, {}]
77
+ self.app_640 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
78
+ self.app_640.prepare(ctx_id=0, det_size=(640, 640))
79
+ self.app_320 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
80
+ self.app_320.prepare(ctx_id=0, det_size=(320, 320))
81
+ self.app_160 = FaceAnalysis(name='antelopev2', providers=providers, provider_options=provider_options)
82
+ self.app_160.prepare(ctx_id=0, det_size=(160, 160))
83
+
84
+ def _detect_face(self, id_image_cv2):
85
+ face_info = self.app_640.get(id_image_cv2)
86
+ if len(face_info) > 0:
87
+ return face_info
88
+ face_info = self.app_320.get(id_image_cv2)
89
+ if len(face_info) > 0:
90
+ return face_info
91
+ face_info = self.app_160.get(id_image_cv2)
92
+ return face_info
93
+
94
+ def crop_face(self, id_image):
95
+ face_info = self._detect_face(cv2.cvtColor(np.array(id_image), cv2.COLOR_RGB2BGR))
96
+ if len(face_info) == 0:
97
+ return None
98
+ else:
99
+ bbox = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[-1]['bbox']
100
+ return id_image.crop(list(map(int, bbox)))
101
+
102
+
103
+ face_detector = FaceDetector()
104
+ face_image = face_detector.crop_face(Image.open("image_2.jpg"))
105
+ face_image.save("face_crop.jpg")
106
+
107
+ ```
assets/qwen_woman_0.jpg ADDED

Git LFS Details

  • SHA256: 5d306379835b67bfa75221c911f4b825f7fafa33177ba4386cec527f5eb165e9
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
assets/qwen_woman_1.jpg ADDED
assets/qwen_woman_2.jpg ADDED

Git LFS Details

  • SHA256: 9634cdbb70afae33abbd7d5e3d98e903bde6af431e39ba1cfdf44fb54bd6ca2a
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
assets/qwen_woman_3.jpg ADDED

Git LFS Details

  • SHA256: 2a0c34106b89569563a9c6c2b3e0530095bcedb8b54c32b3dfeab5425591b9f2
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
assets/qwen_woman_face_crop.png ADDED
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"image-to-image"}
edit_0917.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaec9705c770f3453a802ff317f5095c806cc27b069b760015f71bd26179f2e
3
+ size 472047152
edit_0922_lora_step13000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da99840137731e62a6cd74f9b98e42da2d62ec7011927f199b8d9bb2ba7ed23f
3
+ size 472047184
edit_0928_lora_step40000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2766a84f5d01b14b172c0307ef69c077fe9c399d304359eadb5fa5819594a175
3
+ size 472047184