Spaces:

umerfarooq29
/

aisketch

Sleeping

File size: 6,066 Bytes

5a67aab

# app.py
import os
import io
import random
from PIL import Image, ImageOps
import numpy as np
import streamlit as st

# ---- ML libs ----
import torch
from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
from huggingface_hub import login

# ---- OpenCV for simple preproc (Canny) ----
import cv2

st.set_page_config(page_title="Sketch2Face (Streamlit + ControlNet)", layout="centered")

st.title("Sketch2Face — turn your face sketches into stylized images")
st.write("Upload a face sketch (line drawing). Use the prompt to guide style, pose & mood.")

# Get HF token (recommended to set as secret on HF Spaces or as env var locally)
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
if HF_TOKEN:
    try:
        login(token=HF_TOKEN)
    except Exception:
        pass
else:
    st.warning("No Hugging Face token found. On Spaces, add HF_TOKEN in Settings → Secrets for model download. Locally use 'huggingface-cli login'.")

# Sidebar controls
with st.sidebar:
    st.header("Generation settings")
    model_id = st.text_input("Stable Diffusion model (hf repo)", value="runwayml/stable-diffusion-v1-5")
    controlnet_id = st.text_input("ControlNet (canny) repo", value="lllyasviel/sd-controlnet-canny")
    prompt = st.text_area("Prompt", value="A realistic portrait of a young man, soft lighting, cinematic")
    negative_prompt = st.text_area("Negative prompt (optional)", value="lowres, deformed, extra fingers, watermark")
    guidance_scale = st.slider("Guidance scale", 1.0, 20.0, 7.5)
    strength = st.slider("Strength (how much to change sketch)", 0.1, 1.0, 0.7)
    num_inference_steps = st.slider("Steps", 10, 60, 28)
    seed = st.number_input("Seed (0 for random)", min_value=0, max_value=999999999, value=0, step=1)
    use_gpu = st.checkbox("Use GPU (if available)", value=True)
    run_btn = st.button("Generate")

# Upload sketch
uploaded = st.file_uploader("Upload your sketch (png/jpg). Prefer simple line art.", type=["png","jpg","jpeg"])
example_col1, example_col2 = st.columns(2)
with example_col1:
    st.markdown("**Tip:**** clear black lines on white background work best.")
with example_col2:
    st.markdown("**Tip:** crop to face / 1:1 or 3:4 ratio.")

@st.cache_resource(show_spinner=False)
def load_models(sd_model_id, cn_model_id, device):
    # Load ControlNet then the combined pipeline
    controlnet = ControlNetModel.from_pretrained(
        cn_model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32
    )
    pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
        sd_model_id,
        controlnet=controlnet,
        safety_checker=None,
        torch_dtype=torch.float16 if device=="cuda" else torch.float32,
    )
    # Scheduler & device
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    if device == "cuda":
        pipe.enable_xformers_memory_efficient_attention()
        pipe.to("cuda")
    else:
        pipe.to("cpu")
    return pipe

def prepare_control_image_pil(pil_img, target_size=512):
    # Ensure grayscale -> convert to single-channel edge map using Canny
    img = pil_img.convert("RGB")
    open_cv_image = np.array(img)[:, :, ::-1]  # RGB->BGR
    gray = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2GRAY)
    # Auto-threshold can be useful; here we use fixed, but you can expose sliders
    edges = cv2.Canny(gray, 100, 200)
    edges = cv2.resize(edges, (target_size, target_size))
    # convert single channel to 3-channel PIL
    edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    return Image.fromarray(edges_rgb)

def prepare_init_image(pil_img, target_size=512):
    img = pil_img.convert("RGB")
    img = ImageOps.fit(img, (target_size, target_size), Image.LANCZOS)
    return img

if run_btn:
    if not uploaded:
        st.error("Please upload a sketch first.")
    else:
        device = "cuda" if (torch.cuda.is_available() and use_gpu) else "cpu"
        with st.spinner("Loading models (first run may take ~1-2 minutes)..."):
            pipe = load_models(model_id, controlnet_id, device)
        # load user image
        img = Image.open(uploaded)
        control_image = prepare_control_image_pil(img, target_size=512)
        init_image = prepare_init_image(img, target_size=512)

        # seed
        gen_seed = None if seed == 0 else int(seed)
        generator = torch.Generator(device=device)
        if gen_seed is not None:
            generator = generator.manual_seed(gen_seed)
        else:
            generator = None

        with st.spinner("Generating..."):
            try:
                output = pipe.img2img(
                    prompt=prompt,
                    image=init_image,
                    control_image=control_image,
                    negative_prompt=negative_prompt or None,
                    strength=float(strength),
                    guidance_scale=float(guidance_scale),
                    num_inference_steps=int(num_inference_steps),
                    generator=generator,
                )
            except Exception as e:
                st.exception(f"Generation failed: {e}")
                raise

        result = output.images[0]
        st.image(result, caption="Generated image", use_column_width=True)
        # offer download
        buf = io.BytesIO()
        result.save(buf, format="PNG")
        buf.seek(0)
        st.download_button("Download image (PNG)", data=buf, file_name="sketch2face.png", mime="image/png")

# Show sample control image / debug
if uploaded:
    try:
        img = Image.open(uploaded)
        control_img = prepare_control_image_pil(img, target_size=256)
        st.caption("Preview: internal Canny/control image (what ControlNet sees)")
        st.image(control_img)
    except Exception:
        pass

st.markdown("---")
st.markdown("Made for sketch-to-face. Adjust prompt & strength. For best results, upload clear line sketches and try style prompts like 'photorealistic', 'studio lighting', or artists' names (check licenses).")