# Prediction interface for Cog ⚙️ # https://cog.run/python import os import subprocess import time import torch from transformers import ( CLIPTextModelWithProjection, CLIPTokenizer, ) from diffusers import VQModel from cog import BasePredictor, Input, Path from src.transformer import Transformer2DModel from src.pipeline import Pipeline from src.scheduler import Scheduler MODEL_CACHE = "model_cache" MODEL_URL = ( f"https://weights.replicate.delivery/default/viiika/Meissonic/{MODEL_CACHE}.tar" ) os.environ.update( { "HF_DATASETS_OFFLINE": "1", "TRANSFORMERS_OFFLINE": "1", "HF_HOME": MODEL_CACHE, "TORCH_HOME": MODEL_CACHE, "HF_DATASETS_CACHE": MODEL_CACHE, "TRANSFORMERS_CACHE": MODEL_CACHE, "HUGGINGFACE_HUB_CACHE": MODEL_CACHE, } ) def download_weights(url, dest): start = time.time() print("downloading url: ", url) print("downloading to: ", dest) subprocess.check_call(["pget", "-x", url, dest], close_fds=False) print("downloading took: ", time.time() - start) class Predictor(BasePredictor): def setup(self) -> None: """Load the model into memory to make running multiple predictions efficient""" if not os.path.exists(MODEL_CACHE): download_weights(MODEL_URL, MODEL_CACHE) model_path = f"{MODEL_CACHE}/MeissonFlow/Meissonic" model = Transformer2DModel.from_pretrained(model_path, subfolder="transformer") vq_model = VQModel.from_pretrained(model_path, subfolder="vqvae") text_encoder = CLIPTextModelWithProjection.from_pretrained( # more stable sampling for some cases f"{MODEL_CACHE}/laion/CLIP-ViT-H-14-laion2B-s32B-b79K" ) tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer") scheduler = Scheduler.from_pretrained(model_path, subfolder="scheduler") self.pipe = Pipeline( vq_model, tokenizer=tokenizer, text_encoder=text_encoder, transformer=model, scheduler=scheduler, ).to("cuda") def predict( self, prompt: str = Input( description="Input prompt", default="a photo of an astronaut riding a horse on mars", ), negative_prompt: str = Input( description="Specify things to not see in the output", default="worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark", ), num_inference_steps: int = Input( description="Number of denoising steps", ge=1, le=100, default=64 ), guidance_scale: float = Input( description="Scale for classifier-free guidance", ge=0, le=20, default=9 ), seed: int = Input( description="Random seed. Leave blank to randomize the seed", default=None ), ) -> Path: """Run a single prediction on the model""" if seed is None: seed = int.from_bytes(os.urandom(2), "big") print(f"Using seed: {seed}") torch.manual_seed(seed) image = self.pipe( prompt=prompt, negative_prompt=negative_prompt, height=1024, width=1024, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, ).images[0] output_path = f"/tmp/out.png" image.save(output_path) return Path(output_path)