File size: 12,124 Bytes
9cf98ec 64dd181 9cf98ec 64dd181 9cf98ec 64dd181 9cf98ec 64dd181 73570ff 64dd181 9cf98ec 64dd181 9cf98ec a65a087 9cf98ec 1a23e90 9cf98ec 64dd181 3cb5d4c 64dd181 1a23e90 d751ed8 1a23e90 64dd181 1a23e90 64dd181 1a23e90 64dd181 1a23e90 64dd181 a65a087 64dd181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import re
from diffusers import DiffusionPipeline, FluxPipeline
from live_preview_helpers import FLUXPipelineWithIntermediateOutputs
import torch
import os
from openai import OpenAI
import subprocess
import spaces #[uncomment to use ZeroGPU]
import base64
from io import BytesIO
T2I_MODELS = {
"Stable Diffusion v2.1": "stabilityai/stable-diffusion-2-1",
"SDXL-Turbo": "stabilityai/sdxl-turbo",
"Stable Diffusion v3.5-medium": "stabilityai/stable-diffusion-3.5-medium", # Default
"Flux.1-dev": "black-forest-labs/FLUX.1-dev",
}
SCENARIOS = {
"Product advertisement": "You are designing an advertising campaign for a new line of coffee machines. To ensure the campaign resonates with a wider audience, you use generative models to create marketing images that showcase a variety of users interacting with the product.",
"Tourist promotion": "You are creating a travel campaign to attract a variety of visitors to a specific destination. To make the promotional materials more engaging, you use generative models to design posters that highlight a broader array of experiences.",
"Fictional character generation": "You are creating a superhero video game that’s fun and relatable to a range of users. You decide to use generative models to help visualize a new character.",
"Interior Design": "You are helping design the furniture layout for a model one-bedroom rental apartment. To make the apartment appealing to different potential tenants, you try to visualize different furniture placements before setting everything up.",
}
PROMPTS = {
"Product advertisement": "Design an advertisement image showcasing a range of users operating coffee machines.",
"Tourist promotion": "Design a promotional poster to attract a variety of visitors to a tourist destination.",
"Fictional character generation": "Design a video game superhero character that is relatable. ",
"Interior Design": "Design an apartment that’s appealing to potential tenants.",
}
IMAGES = {
"Product advertisement": {"baseline": ["images/scenario1_base1.png","images/scenario1_base2.png","images/scenario1_base3.png","images/scenario1_base4.png"],
"ours": ["images/scenario1_ours1.png","images/scenario1_ours2.png","images/scenario1_ours3.png","images/scenario1_ours4.png"]},
"Tourist promotion": {"baseline": ["images/scenario2_base1.png","images/scenario2_base2.png","images/scenario2_base3.png","images/scenario2_base4.png"],
"ours": ["images/scenario2_ours1.png","images/scenario2_ours2.png","images/scenario2_ours3.png","images/scenario2_ours4.png"]},
"Fictional character generation": {"baseline": ["images/scenario3_base1.png","images/scenario3_base2.png","images/scenario3_base3.png","images/scenario3_base4.png"],
"ours": ["images/scenario3_ours1.png","images/scenario3_ours2.png","images/scenario3_ours3.png","images/scenario3_ours4.png"]},
"Interior Design": {"baseline": ["images/scenario4_base1.png","images/scenario4_base2.png","images/scenario4_base3.png","images/scenario4_base4.png"],
"ours": ["images/scenario4_ours1.png","images/scenario4_ours2.png","images/scenario4_ours3.png","images/scenario4_ours4.png"]},
}
OPTIONS = ["Very Unsatisfied", "Unsatisfied", "Slightly Unsatisfied", "Neutral", "Slightly Satisfied", "Satisfied", "Very Satisfied"]
IMAGE_OPTIONS = ["First Image", "Second Image", "Third Image", "Fourth Image"]
INSTRUCTION = "📌 **Instruction**: Now, we want to understand your satisfaction with the images generated. <br /> 📌 Step 1: You will start from evaluating the following images based on the given prompt. <br /> 📌 Step 2: Then please modify the prompt according to your expectations for the given scenario background, and answer the evaluation question **until you are satisfied** with at least one of the images generated below. If you are not satisfied with the generated images, you can repeatedly modify the prompts for at most **5 times**."
def clean_cache():
subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
if torch.cuda.is_available():
torch.cuda.empty_cache()
def setup_model(t2i_model_repo, torch_dtype, device):
if t2i_model_repo == "stabilityai/sdxl-turbo" or t2i_model_repo == "stabilityai/stable-diffusion-3.5-medium" or t2i_model_repo == "stabilityai/stable-diffusion-2-1":
pipe = DiffusionPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
elif t2i_model_repo == "black-forest-labs/FLUX.1-dev" or "black-forest-labs/FLUX.1-schnell":
# pipe = FluxPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
pipe = FLUXPipelineWithIntermediateOutputs.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
torch.cuda.empty_cache()
return pipe
def init_gpt_api():
return OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def call_gpt_api(messages, client, model, seed, max_tokens, temperature, top_p):
completion = client.chat.completions.create(
model=model,
messages=messages,
seed=seed,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return completion.choices[0].message.content
def clean_response_gpt(res: str):
prompts = re.findall(r'\d+\.\s"?(.*?)"?(?=\n|$)', res)
return prompts
def clean_refined_prompt_response_gpt(res: str):
# Using regex to extract the refined prompt
match = re.search(r"\*\*Refined Prompt:\*\*\n\n(.+)", res, re.DOTALL)
if match:
refined_prompt = match.group(1).strip()
else:
refined_prompt = res.strip() # Fallback: Use full text if no match found
return refined_prompt
def get_refine_msg(prompt, num_prompts):
messages = [{"role": "system", "content": f"You are a helpful, respectful and precise assistant. You will be asked to generate {num_prompts} refined prompts. Only respond with those refined prompts"}]
message = f"""Given a prompt, modify the prompt for me to explore variations in subject attributes, actions, and contextual details, while retaining the semantic consistency of the original description.
Follow the following refinement instruction:
1. Subject: refine broad terms into specific subsets, focusing on but not restricted on ethinity, gender, age of human.
2. Object: modify the brand, color of object(s) only if it's not specified in the prompt.
3. Setting: add details to the background environment, such as change of temporal or spatial details (e.g., day to night, indoor to outdoor).
4. Action: add more details to the action or specify the object or goal of the action.
For example, given this prompt: a person is drinking a coffee in a coffee shop, the refined prompts could be:
'an elderly woman is drinking a coffee in a coffee shop' (subject adjective)
'an asian young woman is drinking a coffee in a coffee shop' (subject adjective)
'a young woman is drinking a hot coffee with her left hand in a coffee shop' (action details)
'a woman is drinking a coffee in an outdoor coffee shop in the garden' (setting details)
If there is no human in the sentence, you do not need to add person intentionally.
If you use adjectives, they should be visual. So don't use something like 'interesting'. Please also vary the number of modifications but do not change the number of subjects/objects that have been specified in the prompt. Remember not to change the predefined concepts that have been specified in the prompt. e.g. don't change a boy to several boys.
Can you give me {num_prompts} modified prompts for the prompt '{prompt}' please."""
messages.append({"role": "user", "content": f"{message}"})
return messages
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def get_personalize_message(prompt, history_prompts, history_feedback, like_image, dislike_image):
messages = [
{"role": "system", "content": f"You are a prompt refinement assistant. Your task is to improve a user’s text prompt based on their prompt revision history, satisfaction ratings, and preferences inferred from selected images. Your goal is to refine the prompt while maintaining its original meaning, improving clarity, specificity, and alignment with user preferences."}
]
message = f"""The refinement should preserve the core meaning of the current prompt while improving its clarity, specificity, and style based on user feedback.
### **Input Format:**
1. **Prompt History**: A list of previously revised prompts and their corresponding satisfaction ratings.
2. **Rating Scale**: Very Unsatisfied, Unsatisfied, Slightly Unsatisfied, Neutral, Slightly Satisfied, Satisfied, Very Satisfied
3. **User-Selected Image Preferences**:
- **Preferred Image**: The image the user found most satisfactory.
- **Disliked Image**: The image the user found least satisfactory.
*Note: These images are for reference only and should be used to infer stylistic preferences rather than directly modifying prompt content.*
4. **Current Prompt**: The latest prompt from the user, which requires refinement.
### **Refinement Guidelines:**
- Identify and retain/expand patterns/elements in past revisions and correlate them with satisfaction ratings.
- You may expand current prompt in details and incorporate information from retained pattens in past revisions.
- Avoid or adjust features that led to lower ratings.
- Improve clarity, specificity, and descriptive quality while ensuring the prompt remains faithful to its current prompt's meaning.
- The preferred image reflects desirable attributes; the disliked image indicates elements to avoid. Use these for reference but **do not describe them.**
- Output only the refined prompt, no explanations, disclaimers, or formatting.
The first provided image is the user's preferred image, and the second is the disliked image.
Now, refine the following current prompt based on the given user history and preferences:\n"""
message += "Prompt History\n"
for his_prompt, feedback in zip(history_prompts, history_feedback):
message += f"{his_prompt}: {feedback}\n"
message += f"Current Prompt: '{prompt}'\n Refined Prompt:"
messages.append({
"role": "user",
"content": [
{"type": "text", "text": f"{message}"},
],
})
if like_image:
like_image_base64 = encode_image(like_image)
messages[-1]["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{like_image_base64}",
},
})
if dislike_image:
dislike_image_base64 = encode_image(dislike_image)
messages[-1]["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{dislike_image_base64}",
},
})
print(messages)
return messages
@spaces.GPU
def call_llm_refine_prompt(prompt, num_prompts=5, max_tokens=1000, temperature=0.7, top_p=0.9):
print(f"loading {default_llm_model}")
global llm_pipe
if not llm_pipe:
llm_pipe = transformers.pipeline("text-generation", model=default_llm_model, model_kwargs={"torch_dtype": torch_dtype}, device_map="auto")
messages = get_refine_msg(prmpt, num_prompts)
terminators = [
llm_pipe.tokenizer.eos_token_id,
llm_pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = llm_pipe(
messages,
max_new_tokens=max_tokens,
eos_token_id=terminators,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
prompt_list = clean_response_gpt(outputs[0]["generated_text"][-1]["content"])
return prompt_list |