|
|
import os |
|
|
import sys |
|
|
sys.path.append("./") |
|
|
|
|
|
import torch |
|
|
from torchvision import transforms |
|
|
from src.transformer import Transformer2DModel |
|
|
from src.pipeline import Pipeline |
|
|
from src.scheduler import Scheduler |
|
|
from transformers import ( |
|
|
CLIPTextModelWithProjection, |
|
|
CLIPTokenizer, |
|
|
) |
|
|
from diffusers import VQModel |
|
|
|
|
|
device = 'cuda' |
|
|
|
|
|
model_path = "MeissonFlow/Meissonic" |
|
|
model = Transformer2DModel.from_pretrained(model_path,subfolder="transformer",) |
|
|
vq_model = VQModel.from_pretrained(model_path, subfolder="vqvae", ) |
|
|
|
|
|
text_encoder = CLIPTextModelWithProjection.from_pretrained( |
|
|
"laion/CLIP-ViT-H-14-laion2B-s32B-b79K" |
|
|
) |
|
|
tokenizer = CLIPTokenizer.from_pretrained(model_path,subfolder="tokenizer",) |
|
|
scheduler = Scheduler.from_pretrained(model_path,subfolder="scheduler",) |
|
|
pipe=Pipeline(vq_model, tokenizer=tokenizer,text_encoder=text_encoder,transformer=model,scheduler=scheduler) |
|
|
|
|
|
pipe = pipe.to(device) |
|
|
|
|
|
steps = 64 |
|
|
CFG = 9 |
|
|
resolution = 1024 |
|
|
negative_prompt = "worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark" |
|
|
|
|
|
prompts = [ |
|
|
"Two actors are posing for a pictur with one wearing a black and white face paint.", |
|
|
"A large body of water with a rock in the middle and mountains in the background.", |
|
|
"A white and blue coffee mug with a picture of a man on it.", |
|
|
"A statue of a man with a crown on his head.", |
|
|
"A man in a yellow wet suit is holding a big black dog in the water.", |
|
|
"A white table with a vase of flowers and a cup of coffee on top of it.", |
|
|
"A woman stands on a dock in the fog.", |
|
|
"A woman is standing next to a picture of another woman." |
|
|
] |
|
|
|
|
|
batched_generation = False |
|
|
num_images = len(prompts) if batched_generation else 1 |
|
|
|
|
|
images = pipe( |
|
|
prompt=prompts[:num_images], |
|
|
negative_prompt=[negative_prompt] * num_images, |
|
|
height=resolution, |
|
|
width=resolution, |
|
|
guidance_scale=CFG, |
|
|
num_inference_steps=steps |
|
|
).images |
|
|
|
|
|
output_dir = "./output" |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
for i, prompt in enumerate(prompts[:num_images]): |
|
|
sanitized_prompt = prompt.replace(" ", "_") |
|
|
file_path = os.path.join(output_dir, f"{sanitized_prompt}_{resolution}_{steps}_{CFG}.png") |
|
|
images[i].save(file_path) |
|
|
print(f"The {i+1}/{num_images} image is saved to {file_path}") |
|
|
|