from typing import Dict import torch from diffusers import DiffusionPipeline from compel import Compel from io import BytesIO import base64 class EndpointHandler: def __init__(self, path: str = ""): print(f"Initializing model from: {path}") self.pipe = DiffusionPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.float16, use_auth_token=True # Required for gated base model ) # Load LoRA weights from your Hugging Face repo print("Loading LoRA weights from: Texttra/Cityscape_Studio") self.pipe.load_lora_weights("Texttra/Cityscape_Studio", weight_name="c1t3_v1.safetensors") # Send to GPU if available if torch.cuda.is_available(): self.pipe.to("cuda") else: self.pipe.to("cpu") self.pipe.enable_model_cpu_offload() # Initialize Compel for prompt conditioning self.compel = Compel( tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder ) print("Model initialized successfully.") def __call__(self, data: Dict) -> Dict: print("Received data:", data) inputs = data.get("inputs", {}) prompt = inputs.get("prompt", "") print("Extracted prompt:", prompt) if not prompt: return {"error": "No prompt provided"} # Generate both prompt and pooled embeddings conditioning, pooled = self.compel(prompt, return_pooled=True) print("Conditioning complete.") # Run the model image = self.pipe( prompt_embeds=conditioning, pooled_prompt_embeds=pooled ).images[0] print("Image generated.") # Encode image to base64 buffer = BytesIO() image.save(buffer, format="PNG") base64_image = base64.b64encode(buffer.getvalue()).decode("utf-8") print("Returning image.") return {"image": base64_image}