|
|
import re |
|
|
from diffusers import DiffusionPipeline, FluxPipeline |
|
|
from live_preview_helpers import FLUXPipelineWithIntermediateOutputs |
|
|
import torch |
|
|
import os |
|
|
from openai import OpenAI |
|
|
import subprocess |
|
|
|
|
|
T2I_MODELS = { |
|
|
"Stable Diffusion v2.1": "stabilityai/stable-diffusion-2-1", |
|
|
"SDXL-Turbo": "stabilityai/sdxl-turbo", |
|
|
"Stable Diffusion v3.5-medium": "stabilityai/stable-diffusion-3.5-medium", |
|
|
"Flux.1-dev": "black-forest-labs/FLUX.1-dev", |
|
|
} |
|
|
|
|
|
SCENARIOS = { |
|
|
"Product advertisement": "You are designing an advertising campaign for a new line of coffee machines. To ensure the campaign resonates with a wider audience, you use generative models to create marketing images that showcase a variety of users interacting with the product.", |
|
|
"Tourist promotion": "You are creating a travel campaign to attract a diverse range of visitors to a specific destination. To make the promotional materials more engaging and inclusive, you use generative models to design posters that highlight a broader array of experiences.", |
|
|
"Fictional character generation": "You are creating a narrative superhero game where the player often interacts with multiple other non-player characters in the story. To test how different characters would affect the experience of gameplay, you decide to use generative models to help construct characters for (play)testing.", |
|
|
"Interior Design": "You have a one-bedroom apartment and want to arrange your bed, desk, and dresser in the best way possible. You love the color white and want to ensure your space feels bright and open. To make a decision, you need a way to visualize different furniture placements before setting everything up.", |
|
|
|
|
|
} |
|
|
|
|
|
PROMPTS = { |
|
|
"Product advertisement": "Design a marketing advertisement image for a coffee machine.", |
|
|
"Tourist promotion": "Design a travel promotional poster to showcase the beauty and attractions of a tourist destination.", |
|
|
"Fictional character generation": "Generate a character of a superhero.", |
|
|
"Interior Design": "Generate an one-bedroom apartment interior design.", |
|
|
|
|
|
} |
|
|
|
|
|
IMAGES = { |
|
|
"Product advertisement": {"baseline": ["images/scenario1_base1.png","images/scenario1_base2.png","images/scenario1_base3.png","images/scenario1_base4.png"], |
|
|
"ours": ["images/scenario1_our1.png","images/scenario1_our2.png","images/scenario1_our3.png","images/scenario1_our4.png"]}, |
|
|
"Tourist promotion": {"baseline": ["images/scenario5_base1.png","images/scenario5_base2.png","images/scenario5_base3.png","images/scenario5_base4.png"], |
|
|
"ours": ["images/scenario5_our1.png","images/scenario5_our2.png","images/scenario5_our3.png","images/scenario5_our4.png"]}, |
|
|
"Fictional character generation": {"baseline": ["images/scenario2_base1.png","images/scenario2_base2.png","images/scenario2_base3.png","images/scenario2_base4.png"], |
|
|
"ours": ["images/scenario2_our1.png","images/scenario2_our2.png","images/scenario2_our3.png","images/scenario2_our4.png"]}, |
|
|
"Interior Design": {"baseline": ["images/scenario3_base1.png","images/scenario3_base2.png","images/scenario3_base3.png","images/scenario3_base4.png"], |
|
|
"ours": ["images/scenario3_our1.png","images/scenario3_our2.png","images/scenario3_our3.png","images/scenario3_our4.png"]}, |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
OPTIONS = ["Very Unsatisfied", "Unsatisfied", "Slightly Unsatisfied", "Neutral", "Slightly Satisfied", "Satisfied", "Very Satisfied"] |
|
|
IMAGE_OPTIONS = ["First Image", "Second Image", "Third Image", "Fourth Image", "None of them"] |
|
|
INSTRUCTION = "π **Instruction**: Now, we want to understand your satisfaction with the images generated. <br /> π Step 1: You will start from evaluating the following images based on the given prompt. <br /> π Step 2: Then please modify the prompt according to your expectations for the given scenario background, and answer the evaluation question **until you are satisfied** with at least one of the images generated below. If you are not satisfied with the generated images, you can repeatedly modify the prompts for at most **5 times**." |
|
|
def clean_cache(): |
|
|
subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True) |
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
def setup_model(t2i_model_repo, torch_dtype, device): |
|
|
if t2i_model_repo == "stabilityai/sdxl-turbo" or t2i_model_repo == "stabilityai/stable-diffusion-3.5-medium" or t2i_model_repo == "stabilityai/stable-diffusion-2-1": |
|
|
pipe = DiffusionPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device) |
|
|
elif t2i_model_repo == "black-forest-labs/FLUX.1-dev": |
|
|
|
|
|
pipe = FLUXPipelineWithIntermediateOutputs.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device) |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
return pipe |
|
|
|
|
|
def init_gpt_api(): |
|
|
return OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
def call_gpt_api(messages, client, model, seed, max_tokens, temperature, top_p): |
|
|
completion = client.chat.completions.create( |
|
|
model=model, |
|
|
messages=messages, |
|
|
seed=seed, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
) |
|
|
return completion.choices[0].message.content |
|
|
|
|
|
def clean_response_gpt(res: str): |
|
|
prompts = re.findall(r'\d+\.\s"?(.*?)"?(?=\n|$)', res) |
|
|
return prompts |
|
|
|
|
|
|
|
|
def get_refine_msg(prompt, num_prompts): |
|
|
messages = [{"role": "system", "content": f"You are a helpful, respectful and precise assistant. You will be asked to generate {num_prompts} refined prompts. Only respond with those refined prompts"}] |
|
|
|
|
|
message = f"""Given a prompt, modify the prompt for me to explore variations in subject attributes, actions, and contextual details, while retaining the semantic consistency of the original description. |
|
|
|
|
|
Follow the following refinement instruction: |
|
|
1. Subject: refine broad terms into specific subsets, focusing on but not restricted on ethinity, gender, age of human. |
|
|
2. Object: modify the brand, color of object(s) only if it's not specified in the prompt. |
|
|
3. Setting: add details to the background environment, such as change of temporal or spatial details (e.g., day to night, indoor to outdoor). |
|
|
4. Action: add more details to the action or specify the object or goal of the action. |
|
|
|
|
|
For example, given this prompt: a person is drinking a coffee in a coffee shop, the refined prompts could be: |
|
|
'an elderly woman is drinking a coffee in a coffee shop' (subject adjective) |
|
|
'an asian young woman is drinking a coffee in a coffee shop' (subject adjective) |
|
|
'a young woman is drinking a hot coffee with her left hand in a coffee shop' (action details) |
|
|
'a woman is drinking a coffee in an outdoor coffee shop in the garden' (setting details) |
|
|
If there is no human in the sentence, you do not need to add person intentionally. |
|
|
If you use adjectives, they should be visual. So don't use something like 'interesting'. Please also vary the number of modifications but do not change the number of subjects/objects that have been specified in the prompt. Remember not to change the predefined concepts that have been specified in the prompt. e.g. don't change a boy to several boys. |
|
|
|
|
|
Can you give me {num_prompts} modified prompts for the prompt '{prompt}' please.""" |
|
|
|
|
|
messages.append({"role": "user", "content": f"{message}"}) |
|
|
return messages |
|
|
|