from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import JSONResponse from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from diffusers.utils import load_image import torch from io import BytesIO from PIL import Image import base64 app = FastAPI() # Check if CUDA (GPU) is available device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Running on device: {device}") # Load ControlNet model controlnet = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16 ) pipe = StableDiffusionControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16 ).to(device) @app.post("/generate") async def generate( prompt: str = Form(...), negative_prompt: str = Form(""), image: UploadFile = File(...) ): # Load uploaded image input_image = Image.open(BytesIO(await image.read())).convert("RGB").resize((512, 512)) # Generate result = pipe( prompt=prompt, negative_prompt=negative_prompt, image=input_image, num_inference_steps=30, guidance_scale=7.5 ).images[0] # Return image as base64 buffered = BytesIO() result.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return JSONResponse(content={"image": f"data:image/png;base64,{img_str}"})