FLUX.2-dev

Running on Zero

App Files Files Community

evalstate HF Staff commited on 16 days ago

Commit

27f8979

verified ·

1 Parent(s): 9fb9582

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -59

app.py CHANGED Viewed

@@ -24,32 +24,32 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-hf_client = InferenceClient(
-    api_key=os.environ.get("HF_TOKEN"),
-)
-VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
-SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
-Guidelines:
-1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
-2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
-3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
-Output only the revised prompt and nothing else."""
-SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
-Rules:
-- Single instruction only, no commentary
-- Use clear, analytical language (avoid "whimsical," "cascading," etc.)
-- Specify what changes AND what stays the same (face, lighting, composition)
-- Reference actual image elements
-- Turn negatives into positives ("don't change X" → "keep X")
-- Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
-- Keep content PG-13
-Output only the final instruction in plain text and nothing else."""
 def remote_text_encoder(prompts):
     from gradio_client import Client
@@ -100,44 +100,44 @@ def image_to_data_uri(img):
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return f"data:image/png;base64,{img_str}"
-def upsample_prompt_logic(prompt, image_list):
-    try:
-        if image_list and len(image_list) > 0:
-            # Image + Text Editing Mode
-            system_content = SYSTEM_PROMPT_WITH_IMAGES
-            # Construct user message with text and images
-            user_content = [{"type": "text", "text": prompt}]
-            for img in image_list:
-                data_uri = image_to_data_uri(img)
-                user_content.append({
-                    "type": "image_url",
-                    "image_url": {"url": data_uri}
-                })
-            messages = [
-                {"role": "system", "content": system_content},
-                {"role": "user", "content": user_content}
-            ]
-        else:
-            # Text Only Mode
-            system_content = SYSTEM_PROMPT_TEXT_ONLY
-            messages = [
-                {"role": "system", "content": system_content},
-                {"role": "user", "content": prompt}
-            ]
-        completion = hf_client.chat.completions.create(
-            model=VLM_MODEL,
-            messages=messages,
-            max_tokens=1024
-        )
-        return completion.choices[0].message.content
-    except Exception as e:
-        print(f"Upsampling failed: {e}")
-        return prompt
 # Updated duration function to match generate_image arguments (including progress)
 def get_duration(prompt_embeds, image_list, width, height, num_inference_steps, guidance_scale, seed, force_dimensions, progress=gr.Progress(track_tqdm=True)):
@@ -185,11 +185,11 @@ def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024,
     # 1. Upsampling (Network bound - No GPU needed)
     final_prompt = prompt
-    if prompt_upsampling:
-        progress(0.05, desc="Upsampling prompt...")
-        final_prompt = upsample_prompt_logic(prompt, image_list)
-        print(f"Original Prompt: {prompt}")
-        print(f"Upsampled Prompt: {final_prompt}")
     # 2. Text Encoding (Network bound - No GPU needed)
     progress(0.1, desc="Encoding prompt...")

 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+# hf_client = InferenceClient(
+#     api_key=os.environ.get("HF_TOKEN"),
+# )
+# VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
+# SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent.
+# Guidelines:
+# 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs.
+# 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context.
+# 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish.
+# Output only the revised prompt and nothing else."""
+# SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests).
+# Rules:
+# - Single instruction only, no commentary
+# - Use clear, analytical language (avoid "whimsical," "cascading," etc.)
+# - Specify what changes AND what stays the same (face, lighting, composition)
+# - Reference actual image elements
+# - Turn negatives into positives ("don't change X" → "keep X")
+# - Make abstractions concrete ("futuristic" → "glowing cyan neon, metallic panels")
+# - Keep content PG-13
+# Output only the final instruction in plain text and nothing else."""
 def remote_text_encoder(prompts):
     from gradio_client import Client
     img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return f"data:image/png;base64,{img_str}"
+# def upsample_prompt_logic(prompt, image_list):
+#     try:
+#         if image_list and len(image_list) > 0:
+#             # Image + Text Editing Mode
+#             system_content = SYSTEM_PROMPT_WITH_IMAGES
+#             # Construct user message with text and images
+#             user_content = [{"type": "text", "text": prompt}]
+#             for img in image_list:
+#                 data_uri = image_to_data_uri(img)
+#                 user_content.append({
+#                     "type": "image_url",
+#                     "image_url": {"url": data_uri}
+#                 })
+#             messages = [
+#                 {"role": "system", "content": system_content},
+#                 {"role": "user", "content": user_content}
+#             ]
+#         else:
+#             # Text Only Mode
+#             system_content = SYSTEM_PROMPT_TEXT_ONLY
+#             messages = [
+#                 {"role": "system", "content": system_content},
+#                 {"role": "user", "content": prompt}
+#             ]
+#         completion = hf_client.chat.completions.create(
+#             model=VLM_MODEL,
+#             messages=messages,
+#             max_tokens=1024
+#         )
+#         return completion.choices[0].message.content
+#     except Exception as e:
+#         print(f"Upsampling failed: {e}")
+#         return prompt
 # Updated duration function to match generate_image arguments (including progress)
 def get_duration(prompt_embeds, image_list, width, height, num_inference_steps, guidance_scale, seed, force_dimensions, progress=gr.Progress(track_tqdm=True)):
     # 1. Upsampling (Network bound - No GPU needed)
     final_prompt = prompt
+    # if prompt_upsampling:
+    #     progress(0.05, desc="Upsampling prompt...")
+    #     final_prompt = upsample_prompt_logic(prompt, image_list)
+    #     print(f"Original Prompt: {prompt}")
+    #     print(f"Upsampled Prompt: {final_prompt}")
     # 2. Text Encoding (Network bound - No GPU needed)
     progress(0.1, desc="Encoding prompt...")