# app.py import os import io import random from PIL import Image, ImageOps import numpy as np import streamlit as st # ---- ML libs ---- import torch from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler from huggingface_hub import login # ---- OpenCV for simple preproc (Canny) ---- import cv2 st.set_page_config(page_title="Sketch2Face (Streamlit + ControlNet)", layout="centered") st.title("Sketch2Face — turn your face sketches into stylized images") st.write("Upload a face sketch (line drawing). Use the prompt to guide style, pose & mood.") # Get HF token (recommended to set as secret on HF Spaces or as env var locally) HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") if HF_TOKEN: try: login(token=HF_TOKEN) except Exception: pass else: st.warning("No Hugging Face token found. On Spaces, add HF_TOKEN in Settings → Secrets for model download. Locally use 'huggingface-cli login'.") # Sidebar controls with st.sidebar: st.header("Generation settings") model_id = st.text_input("Stable Diffusion model (hf repo)", value="runwayml/stable-diffusion-v1-5") controlnet_id = st.text_input("ControlNet (canny) repo", value="lllyasviel/sd-controlnet-canny") prompt = st.text_area("Prompt", value="A realistic portrait of a young man, soft lighting, cinematic") negative_prompt = st.text_area("Negative prompt (optional)", value="lowres, deformed, extra fingers, watermark") guidance_scale = st.slider("Guidance scale", 1.0, 20.0, 7.5) strength = st.slider("Strength (how much to change sketch)", 0.1, 1.0, 0.7) num_inference_steps = st.slider("Steps", 10, 60, 28) seed = st.number_input("Seed (0 for random)", min_value=0, max_value=999999999, value=0, step=1) use_gpu = st.checkbox("Use GPU (if available)", value=True) run_btn = st.button("Generate") # Upload sketch uploaded = st.file_uploader("Upload your sketch (png/jpg). Prefer simple line art.", type=["png","jpg","jpeg"]) example_col1, example_col2 = st.columns(2) with example_col1: st.markdown("**Tip:**** clear black lines on white background work best.") with example_col2: st.markdown("**Tip:** crop to face / 1:1 or 3:4 ratio.") @st.cache_resource(show_spinner=False) def load_models(sd_model_id, cn_model_id, device): # Load ControlNet then the combined pipeline controlnet = ControlNetModel.from_pretrained( cn_model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32 ) pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( sd_model_id, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 if device=="cuda" else torch.float32, ) # Scheduler & device pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) if device == "cuda": pipe.enable_xformers_memory_efficient_attention() pipe.to("cuda") else: pipe.to("cpu") return pipe def prepare_control_image_pil(pil_img, target_size=512): # Ensure grayscale -> convert to single-channel edge map using Canny img = pil_img.convert("RGB") open_cv_image = np.array(img)[:, :, ::-1] # RGB->BGR gray = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2GRAY) # Auto-threshold can be useful; here we use fixed, but you can expose sliders edges = cv2.Canny(gray, 100, 200) edges = cv2.resize(edges, (target_size, target_size)) # convert single channel to 3-channel PIL edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) return Image.fromarray(edges_rgb) def prepare_init_image(pil_img, target_size=512): img = pil_img.convert("RGB") img = ImageOps.fit(img, (target_size, target_size), Image.LANCZOS) return img if run_btn: if not uploaded: st.error("Please upload a sketch first.") else: device = "cuda" if (torch.cuda.is_available() and use_gpu) else "cpu" with st.spinner("Loading models (first run may take ~1-2 minutes)..."): pipe = load_models(model_id, controlnet_id, device) # load user image img = Image.open(uploaded) control_image = prepare_control_image_pil(img, target_size=512) init_image = prepare_init_image(img, target_size=512) # seed gen_seed = None if seed == 0 else int(seed) generator = torch.Generator(device=device) if gen_seed is not None: generator = generator.manual_seed(gen_seed) else: generator = None with st.spinner("Generating..."): try: output = pipe.img2img( prompt=prompt, image=init_image, control_image=control_image, negative_prompt=negative_prompt or None, strength=float(strength), guidance_scale=float(guidance_scale), num_inference_steps=int(num_inference_steps), generator=generator, ) except Exception as e: st.exception(f"Generation failed: {e}") raise result = output.images[0] st.image(result, caption="Generated image", use_column_width=True) # offer download buf = io.BytesIO() result.save(buf, format="PNG") buf.seek(0) st.download_button("Download image (PNG)", data=buf, file_name="sketch2face.png", mime="image/png") # Show sample control image / debug if uploaded: try: img = Image.open(uploaded) control_img = prepare_control_image_pil(img, target_size=256) st.caption("Preview: internal Canny/control image (what ControlNet sees)") st.image(control_img) except Exception: pass st.markdown("---") st.markdown("Made for sketch-to-face. Adjust prompt & strength. For best results, upload clear line sketches and try style prompts like 'photorealistic', 'studio lighting', or artists' names (check licenses).")