Spaces:

KarthikAI
/

Sticker_Diffusion

Sleeping

App Files Files Community

Sticker_Diffusion / utils.py

KarthikAI

Update utils.py

8ecbb74 verified 6 months ago

raw

history blame

2.96 kB

	import os
	os.environ["HF_HOME"] = "/data/huggingface"
	os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
	os.makedirs("/data/huggingface/hub", exist_ok=True)

	import torch
	from diffusers import StableDiffusionImg2ImgPipeline
	from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
	from PIL import Image


	# --- Place any download or path setup here --- old
	# MODEL_ID ="runwayml/stable-diffusion-v1-5" # Can swap for custom path if using IP-Adapter
	# ADAPTER_PATH = "/workspace/.cache/huggingface/ip_adapter/ip-adapter_sd15.bin"
	# ADAPTER_DIR = "/workspace/.cache/huggingface/ip_adapter"
	# DEVICE = "cpu"
	# MODEL_CACHE = "/workspace/.cache/huggingface"

	# ---- SETTINGS ----
	MODEL_ID = "runwayml/stable-diffusion-v1-5"
	IPADAPTER_REPO = "h94/IP-Adapter"
	IPADAPTER_WEIGHT_NAME = "ip-adapter_sd15.bin"
	DEVICE = "cpu" # Change to "cuda" if you have GPU
	CACHE_DIR = os.environ.get("HF_HOME", "/data/huggingface")

	# (Optional) Download IP-Adapter weights and patch pipeline if desired

	# Load the model ONCE at startup, not per request!
	pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	cache_dir=CACHE_DIR,
	# safety_checker=None, # Disable for demo/testing; enable in prod
	).to(DEVICE)

	pipe.load_ip_adapter(
	pretrained_model_name_or_path_or_dict=IPADAPTER_REPO,
	subfolder="models",
	weight_name=IPADAPTER_WEIGHT_NAME
	)

	# Load vision encoder and processor for IP-Adapter embedding
	vision_encoder = CLIPVisionModelWithProjection.from_pretrained(
	"h94/IP-Adapter", # repo_id (main IP-Adapter repo)
	subfolder="clip_vision_model",# subfolder within the repo!
	cache_dir=CACHE_DIR
	)

	image_processor = CLIPImageProcessor.from_pretrained(
	"h94/IP-Adapter",
	subfolder="clip_vision_model",
	cache_dir=CACHE_DIR
	)

	def generate_sticker(input_image: Image.Image, prompt: str):
	"""
	Given a user image and a prompt, generates a sticker/emoji-style portrait.
	"""
	# Load the model (download if not present)
	# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
	# MODEL_ID,
	# torch_dtype=torch.float32,
	# cache_dir=MODEL_CACHE,
	# safety_checker=None, # Disable for demo/testing
	# ).to(DEVICE)

	# Preprocess the image (resize, etc)
	face_img = input_image.convert("RGB").resize((224, 224))
	inputs = image_processor(images=face_img, return_tensors="pt").to(DEVICE)
	with torch.no_grad():
	image_embeds = vision_encoder(**inputs).image_embeds

	# 2. Prepare image for SD pipeline
	init_image = input_image.convert("RGB").resize((512, 512))

	# Run inference (low strength for identity preservation)
	result = pipe(
	prompt=prompt,
	image=init_image,
	image_embeds=image_embeds,
	strength=0.65,
	guidance_scale=7.5,
	num_inference_steps=30
	)
	# Return the generated image (as PIL)
	return result.images[0]