| from diffusers import StableDiffusion3Pipeline | |
| import torch | |
| from PIL import Image | |
| import os | |
| import json | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Diffusion Pipeline with Arguments") | |
| parser.add_argument( | |
| "--json_filename", | |
| type=str, | |
| required=True, | |
| help="Path to the JSON file containing text data", | |
| ) | |
| parser.add_argument( | |
| "--cuda", type=int, required=True, help="CUDA device to use for processing" | |
| ) | |
| args = parser.parse_args() | |
| json_filename = args.json_filename | |
| cuda_device = f"cuda:{args.cuda}" | |
| print(json_filename, cuda_device) | |
| image_dir = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" | |
| with open(json_filename, "r") as f: | |
| json_data = json.load(f) | |
| pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16) | |
| pipe.to('cuda') | |
| for text in json_data: | |
| image = pipe( | |
| prompt=text["conversations"][1]["value"], | |
| prompt_3=text["conversations"][1]["value"], | |
| negative_prompt="", | |
| num_inference_steps=100, | |
| height=1024, | |
| width=1024, | |
| guidance_scale=10.0, | |
| max_sequence_length=512, | |
| ).images[0] | |
| subdir = text["image"].split("/")[0] | |
| if not os.path.exists(os.path.join(image_dir, subdir)): | |
| os.makedirs(os.path.join(image_dir, subdir)) | |
| image_path = os.path.join(image_dir, text["image"]) | |
| image.save(image_path) | |
| print("所有图像已成功生成并保存。") |