Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -53,7 +53,7 @@ pipe.load_ip_adapter(
|
|
| 53 |
# cache_dir=CACHE_DIR
|
| 54 |
# )
|
| 55 |
|
| 56 |
-
def generate_sticker(input_image: Image.Image,
|
| 57 |
"""
|
| 58 |
Given a user image and a prompt, generates a sticker/emoji-style portrait.
|
| 59 |
"""
|
|
@@ -66,7 +66,7 @@ def generate_sticker(input_image: Image.Image, prompt: str):
|
|
| 66 |
# ).to(DEVICE)
|
| 67 |
|
| 68 |
# Preprocess the image (resize, etc)
|
| 69 |
-
face_img = input_image.convert("RGB").resize((
|
| 70 |
# inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
|
| 71 |
# with torch.no_grad():
|
| 72 |
# image_embeds = vision_encoder(**inputs).image_embeds
|
|
@@ -77,15 +77,19 @@ def generate_sticker(input_image: Image.Image, prompt: str):
|
|
| 77 |
# IP-Adapter expects the reference image via image_embeds, which is produced by this function:
|
| 78 |
# image_embeds = pipe.prepare_ip_adapter_image_embeds(face_img)
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Run inference (low strength for identity preservation)
|
| 81 |
result = pipe(
|
| 82 |
prompt=prompt,
|
| 83 |
image=init_image,
|
| 84 |
# image_embeds=image_embeds,
|
| 85 |
ip_adapter_image=face_img,
|
| 86 |
-
strength=0.
|
| 87 |
-
guidance_scale=
|
| 88 |
-
num_inference_steps=
|
| 89 |
)
|
| 90 |
# Return the generated image (as PIL)
|
| 91 |
return result.images[0]
|
|
|
|
| 53 |
# cache_dir=CACHE_DIR
|
| 54 |
# )
|
| 55 |
|
| 56 |
+
def generate_sticker(input_image: Image.Image, style: str = Form("chibi")):
|
| 57 |
"""
|
| 58 |
Given a user image and a prompt, generates a sticker/emoji-style portrait.
|
| 59 |
"""
|
|
|
|
| 66 |
# ).to(DEVICE)
|
| 67 |
|
| 68 |
# Preprocess the image (resize, etc)
|
| 69 |
+
face_img = input_image.convert("RGB").resize((512, 512))
|
| 70 |
# inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
|
| 71 |
# with torch.no_grad():
|
| 72 |
# image_embeds = vision_encoder(**inputs).image_embeds
|
|
|
|
| 77 |
# IP-Adapter expects the reference image via image_embeds, which is produced by this function:
|
| 78 |
# image_embeds = pipe.prepare_ip_adapter_image_embeds(face_img)
|
| 79 |
|
| 80 |
+
prompt == (f"A set of twelve {style}-style digital stickers"
|
| 81 |
+
"each with a different expression: laughing, angry, crying, sulking, thinking, sleepy, blowing a kiss, winking, surprised, happy, sad, and confused. "
|
| 82 |
+
"Each sticker has a bold black outline and a transparent background, in a playful, close-up cartoon style."
|
| 83 |
+
)
|
| 84 |
# Run inference (low strength for identity preservation)
|
| 85 |
result = pipe(
|
| 86 |
prompt=prompt,
|
| 87 |
image=init_image,
|
| 88 |
# image_embeds=image_embeds,
|
| 89 |
ip_adapter_image=face_img,
|
| 90 |
+
strength=0.6,
|
| 91 |
+
guidance_scale=8,
|
| 92 |
+
num_inference_steps=40
|
| 93 |
)
|
| 94 |
# Return the generated image (as PIL)
|
| 95 |
return result.images[0]
|