evalstate HF Staff commited on
Commit
27f8979
·
verified ·
1 Parent(s): 9fb9582

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -59
app.py CHANGED
@@ -24,32 +24,32 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
24
  MAX_SEED = np.iinfo(np.int32).max
25
  MAX_IMAGE_SIZE = 1024
26
 
27
- hf_client = InferenceClient(
28
- api_key=os.environ.get("HF_TOKEN"),
29
- )
30
- VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
31
 
32
- SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
33
 
34
- Guidelines:
35
- 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
36
- 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
37
- 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
38
 
39
- Output only the revised prompt and nothing else."""
40
 
41
- SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
42
 
43
- Rules:
44
- - Single instruction only, no commentary
45
- - Use clear, analytical language (avoid "whimsical," "cascading," etc.)
46
- - Specify what changes AND what stays the same (face, lighting, composition)
47
- - Reference actual image elements
48
- - Turn negatives into positives ("don't change X" → "keep X")
49
- - Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
50
- - Keep content PG-13
51
 
52
- Output only the final instruction in plain text and nothing else."""
53
 
54
  def remote_text_encoder(prompts):
55
  from gradio_client import Client
@@ -100,44 +100,44 @@ def image_to_data_uri(img):
100
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
101
  return f"data:image/png;base64,{img_str}"
102
 
103
- def upsample_prompt_logic(prompt, image_list):
104
- try:
105
- if image_list and len(image_list) > 0:
106
- # Image + Text Editing Mode
107
- system_content = SYSTEM_PROMPT_WITH_IMAGES
108
 
109
- # Construct user message with text and images
110
- user_content = [{"type": "text", "text": prompt}]
111
 
112
- for img in image_list:
113
- data_uri = image_to_data_uri(img)
114
- user_content.append({
115
- "type": "image_url",
116
- "image_url": {"url": data_uri}
117
- })
118
 
119
- messages = [
120
- {"role": "system", "content": system_content},
121
- {"role": "user", "content": user_content}
122
- ]
123
- else:
124
- # Text Only Mode
125
- system_content = SYSTEM_PROMPT_TEXT_ONLY
126
- messages = [
127
- {"role": "system", "content": system_content},
128
- {"role": "user", "content": prompt}
129
- ]
130
-
131
- completion = hf_client.chat.completions.create(
132
- model=VLM_MODEL,
133
- messages=messages,
134
- max_tokens=1024
135
- )
136
 
137
- return completion.choices[0].message.content
138
- except Exception as e:
139
- print(f"Upsampling failed: {e}")
140
- return prompt
141
 
142
  # Updated duration function to match generate_image arguments (including progress)
143
  def get_duration(prompt_embeds, image_list, width, height, num_inference_steps, guidance_scale, seed, force_dimensions, progress=gr.Progress(track_tqdm=True)):
@@ -185,11 +185,11 @@ def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024,
185
 
186
  # 1. Upsampling (Network bound - No GPU needed)
187
  final_prompt = prompt
188
- if prompt_upsampling:
189
- progress(0.05, desc="Upsampling prompt...")
190
- final_prompt = upsample_prompt_logic(prompt, image_list)
191
- print(f"Original Prompt: {prompt}")
192
- print(f"Upsampled Prompt: {final_prompt}")
193
 
194
  # 2. Text Encoding (Network bound - No GPU needed)
195
  progress(0.1, desc="Encoding prompt...")
 
24
  MAX_SEED = np.iinfo(np.int32).max
25
  MAX_IMAGE_SIZE = 1024
26
 
27
+ # hf_client = InferenceClient(
28
+ # api_key=os.environ.get("HF_TOKEN"),
29
+ # )
30
+ # VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
31
 
32
+ # SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
33
 
34
+ # Guidelines:
35
+ # 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
36
+ # 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
37
+ # 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
38
 
39
+ # Output only the revised prompt and nothing else."""
40
 
41
+ # SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
42
 
43
+ # Rules:
44
+ # - Single instruction only, no commentary
45
+ # - Use clear, analytical language (avoid "whimsical," "cascading," etc.)
46
+ # - Specify what changes AND what stays the same (face, lighting, composition)
47
+ # - Reference actual image elements
48
+ # - Turn negatives into positives ("don't change X" → "keep X")
49
+ # - Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
50
+ # - Keep content PG-13
51
 
52
+ # Output only the final instruction in plain text and nothing else."""
53
 
54
  def remote_text_encoder(prompts):
55
  from gradio_client import Client
 
100
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
101
  return f"data:image/png;base64,{img_str}"
102
 
103
+ # def upsample_prompt_logic(prompt, image_list):
104
+ # try:
105
+ # if image_list and len(image_list) > 0:
106
+ # # Image + Text Editing Mode
107
+ # system_content = SYSTEM_PROMPT_WITH_IMAGES
108
 
109
+ # # Construct user message with text and images
110
+ # user_content = [{"type": "text", "text": prompt}]
111
 
112
+ # for img in image_list:
113
+ # data_uri = image_to_data_uri(img)
114
+ # user_content.append({
115
+ # "type": "image_url",
116
+ # "image_url": {"url": data_uri}
117
+ # })
118
 
119
+ # messages = [
120
+ # {"role": "system", "content": system_content},
121
+ # {"role": "user", "content": user_content}
122
+ # ]
123
+ # else:
124
+ # # Text Only Mode
125
+ # system_content = SYSTEM_PROMPT_TEXT_ONLY
126
+ # messages = [
127
+ # {"role": "system", "content": system_content},
128
+ # {"role": "user", "content": prompt}
129
+ # ]
130
+
131
+ # completion = hf_client.chat.completions.create(
132
+ # model=VLM_MODEL,
133
+ # messages=messages,
134
+ # max_tokens=1024
135
+ # )
136
 
137
+ # return completion.choices[0].message.content
138
+ # except Exception as e:
139
+ # print(f"Upsampling failed: {e}")
140
+ # return prompt
141
 
142
  # Updated duration function to match generate_image arguments (including progress)
143
  def get_duration(prompt_embeds, image_list, width, height, num_inference_steps, guidance_scale, seed, force_dimensions, progress=gr.Progress(track_tqdm=True)):
 
185
 
186
  # 1. Upsampling (Network bound - No GPU needed)
187
  final_prompt = prompt
188
+ # if prompt_upsampling:
189
+ # progress(0.05, desc="Upsampling prompt...")
190
+ # final_prompt = upsample_prompt_logic(prompt, image_list)
191
+ # print(f"Original Prompt: {prompt}")
192
+ # print(f"Upsampled Prompt: {final_prompt}")
193
 
194
  # 2. Text Encoding (Network bound - No GPU needed)
195
  progress(0.1, desc="Encoding prompt...")