Spaces:

umerfarooq29
/

aisketch

Sleeping

App Files Files Community

aisketch / app.py

umerfarooq29

Create app.py

5a67aab verified 4 months ago

raw

history blame contribute delete

6.07 kB

	# app.py
	import os
	import io
	import random
	from PIL import Image, ImageOps
	import numpy as np
	import streamlit as st

	# ---- ML libs ----
	import torch
	from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
	from huggingface_hub import login

	# ---- OpenCV for simple preproc (Canny) ----
	import cv2

	st.set_page_config(page_title="Sketch2Face (Streamlit + ControlNet)", layout="centered")

	st.title("Sketch2Face — turn your face sketches into stylized images")
	st.write("Upload a face sketch (line drawing). Use the prompt to guide style, pose & mood.")

	# Get HF token (recommended to set as secret on HF Spaces or as env var locally)
	HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
	if HF_TOKEN:
	try:
	login(token=HF_TOKEN)
	except Exception:
	pass
	else:
	st.warning("No Hugging Face token found. On Spaces, add HF_TOKEN in Settings → Secrets for model download. Locally use 'huggingface-cli login'.")

	# Sidebar controls
	with st.sidebar:
	st.header("Generation settings")
	model_id = st.text_input("Stable Diffusion model (hf repo)", value="runwayml/stable-diffusion-v1-5")
	controlnet_id = st.text_input("ControlNet (canny) repo", value="lllyasviel/sd-controlnet-canny")
	prompt = st.text_area("Prompt", value="A realistic portrait of a young man, soft lighting, cinematic")
	negative_prompt = st.text_area("Negative prompt (optional)", value="lowres, deformed, extra fingers, watermark")
	guidance_scale = st.slider("Guidance scale", 1.0, 20.0, 7.5)
	strength = st.slider("Strength (how much to change sketch)", 0.1, 1.0, 0.7)
	num_inference_steps = st.slider("Steps", 10, 60, 28)
	seed = st.number_input("Seed (0 for random)", min_value=0, max_value=999999999, value=0, step=1)
	use_gpu = st.checkbox("Use GPU (if available)", value=True)
	run_btn = st.button("Generate")

	# Upload sketch
	uploaded = st.file_uploader("Upload your sketch (png/jpg). Prefer simple line art.", type=["png","jpg","jpeg"])
	example_col1, example_col2 = st.columns(2)
	with example_col1:
	st.markdown("Tip:** clear black lines on white background work best.")
	with example_col2:
	st.markdown("Tip: crop to face / 1:1 or 3:4 ratio.")

	@st.cache_resource(show_spinner=False)
	def load_models(sd_model_id, cn_model_id, device):
	# Load ControlNet then the combined pipeline
	controlnet = ControlNetModel.from_pretrained(
	cn_model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32
	)
	pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
	sd_model_id,
	controlnet=controlnet,
	safety_checker=None,
	torch_dtype=torch.float16 if device=="cuda" else torch.float32,
	)
	# Scheduler & device
	pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
	if device == "cuda":
	pipe.enable_xformers_memory_efficient_attention()
	pipe.to("cuda")
	else:
	pipe.to("cpu")
	return pipe

	def prepare_control_image_pil(pil_img, target_size=512):
	# Ensure grayscale -> convert to single-channel edge map using Canny
	img = pil_img.convert("RGB")
	open_cv_image = np.array(img)[:, :, ::-1] # RGB->BGR
	gray = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2GRAY)
	# Auto-threshold can be useful; here we use fixed, but you can expose sliders
	edges = cv2.Canny(gray, 100, 200)
	edges = cv2.resize(edges, (target_size, target_size))
	# convert single channel to 3-channel PIL
	edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
	return Image.fromarray(edges_rgb)

	def prepare_init_image(pil_img, target_size=512):
	img = pil_img.convert("RGB")
	img = ImageOps.fit(img, (target_size, target_size), Image.LANCZOS)
	return img

	if run_btn:
	if not uploaded:
	st.error("Please upload a sketch first.")
	else:
	device = "cuda" if (torch.cuda.is_available() and use_gpu) else "cpu"
	with st.spinner("Loading models (first run may take ~1-2 minutes)..."):
	pipe = load_models(model_id, controlnet_id, device)
	# load user image
	img = Image.open(uploaded)
	control_image = prepare_control_image_pil(img, target_size=512)
	init_image = prepare_init_image(img, target_size=512)

	# seed
	gen_seed = None if seed == 0 else int(seed)
	generator = torch.Generator(device=device)
	if gen_seed is not None:
	generator = generator.manual_seed(gen_seed)
	else:
	generator = None

	with st.spinner("Generating..."):
	try:
	output = pipe.img2img(
	prompt=prompt,
	image=init_image,
	control_image=control_image,
	negative_prompt=negative_prompt or None,
	strength=float(strength),
	guidance_scale=float(guidance_scale),
	num_inference_steps=int(num_inference_steps),
	generator=generator,
	)
	except Exception as e:
	st.exception(f"Generation failed: {e}")
	raise

	result = output.images[0]
	st.image(result, caption="Generated image", use_column_width=True)
	# offer download
	buf = io.BytesIO()
	result.save(buf, format="PNG")
	buf.seek(0)
	st.download_button("Download image (PNG)", data=buf, file_name="sketch2face.png", mime="image/png")

	# Show sample control image / debug
	if uploaded:
	try:
	img = Image.open(uploaded)
	control_img = prepare_control_image_pil(img, target_size=256)
	st.caption("Preview: internal Canny/control image (what ControlNet sees)")
	st.image(control_img)
	except Exception:
	pass

	st.markdown("---")
	st.markdown("Made for sketch-to-face. Adjust prompt & strength. For best results, upload clear line sketches and try style prompts like 'photorealistic', 'studio lighting', or artists' names (check licenses).")