Spaces:

concauu
/

image_generator

Runtime error

App Files Files Community

concauu commited on Mar 23, 2025

Commit

772461e

verified ·

1 Parent(s): 2b6f6e8

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -205

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '120'
 import numpy as np
 import random
 import spaces
-from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL,UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast, T5Tokenizer, T5EncoderModel
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from io import BytesIO
@@ -26,16 +26,12 @@ def get_hf_token(encrypted_token):
     key = "K4FlQbffvTcDxT2FIhrOPV1eue6ia45FFR3kqp2hHbM="
     if not key:
         raise ValueError("Missing decryption key! Set the DECRYPTION_KEY environment variable.")
-    # Convert key from string to bytes if necessary
     if isinstance(key, str):
         key = key.encode()
     f = Fernet(key)
-    # Decrypt and decode the token
     decrypted_token = f.decrypt(encrypted_token).decode()
     return decrypted_token
 groq_client = Groq(api_key="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")
 decrypted_token = get_hf_token("gAAAAABn3GfShExoJd50nau3B5ZJNiQ9dRD1ACO3XXMwVaIQMkmi59cL-MKGr6SYnsB0E2gGITJG2j29Ar9yjaZP-EC6hHsCBmwKSj4aFtTor9_n0_NdMBv1GtlxZRmwnQwriB-Xr94e")
 login(token=decrypted_token)
@@ -59,17 +55,17 @@ t5_text_encoder = T5EncoderModel.from_pretrained(
 class TextProjection(torch.nn.Module):
     def __init__(self):
         super().__init__()
-        self.proj = torch.nn.Linear(768, 3072)  # Project from 768 to 3072 to match the transformer's expectation
         torch.nn.init.normal_(self.proj.weight, std=0.02)
     def forward(self, x):
         return self.proj(x.to(dtype))
-# Add this override to your existing pipeline setup
 class T5FluxPipeline(FluxPipeline):
     def _get_clip_prompt_embeds(self, prompt, num_images_per_prompt, device):
         """Modified to work with T5 outputs (without classifier-free guidance handling)"""
-        # Get T5 embeddings
         text_inputs = self.tokenizer(
             prompt,
             padding="max_length",
@@ -77,24 +73,16 @@ class T5FluxPipeline(FluxPipeline):
             truncation=True,
             return_tensors="pt",
         ).to(device)
         text_outputs = self.text_encoder(**text_inputs)
         prompt_embeds = text_outputs.last_hidden_state
-        # Use mean pooling instead of CLIP's pooler_output
         pooled_prompt_embeds = prompt_embeds.mean(dim=1)
-        # Expand for batch
         prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
         pooled_prompt_embeds = pooled_prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
         return prompt_embeds, pooled_prompt_embeds
 # Initialize pipeline components
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=dtype).to(device)
-# Custom pipeline with T5 support
 pipe = T5FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     text_encoder=t5_text_encoder,
@@ -104,14 +92,14 @@ pipe = T5FluxPipeline.from_pretrained(
     safety_checker=None
 ).to(device)
-# Add projection layer to pipeline
 pipe.text_projection = TextProjection().to(device, dtype=dtype)
 torch.cuda.empty_cache()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-# Custom low-level CLIP prompt embedder override (returns exactly two tensors)
 def custom_get_clip_prompt_embeds(self, prompt, num_images_per_prompt, device):
     text_inputs = self.tokenizer(
         prompt,
@@ -122,24 +110,14 @@ def custom_get_clip_prompt_embeds(self, prompt, num_images_per_prompt, device):
     ).to(device)
     text_outputs = self.text_encoder(**text_inputs)
     prompt_embeds = text_outputs.last_hidden_state
-    # Use mean pooling along the sequence dimension for pooled embeddings
     pooled_prompt_embeds = prompt_embeds.mean(dim=1)
-    # Repeat for each image in the batch
     prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
     pooled_prompt_embeds = pooled_prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
     return prompt_embeds, pooled_prompt_embeds
-# Override the high-level encode_prompt to use T5 encoding and return three outputs:
-def custom_encode_prompt(    self,
-    prompt,
-    device,
-    num_images_per_prompt,
-    do_classifier_free_guidance=False,
-    negative_prompt=None,
-    prompt_embeds=None,
-    prompt_2=None,
-    **kwargs):
-    # Encode the prompt using the T5 components
     text_inputs = self.tokenizer(
         prompt,
         padding="max_length",
@@ -148,150 +126,120 @@ def custom_encode_prompt(    self,
         return_tensors="pt",
     ).to(device)
     text_outputs = self.text_encoder(**text_inputs)
-    # Project T5 embeddings into CLIP space
     text_embeddings = self.text_projection(text_outputs.last_hidden_state)
-    # Compute pooled embeddings via mean pooling
     pooled_text_embeddings = text_embeddings.mean(dim=1)
     if do_classifier_free_guidance:
-        # For classifier-free guidance, get negative prompt embeddings:
-        uncond_input = self.tokenizer(
-            [negative_prompt] if negative_prompt else [""],
-            padding="max_length",
-            max_length=512,
-            truncation=True,
-            return_tensors="pt",
-        ).to(device)
-        uncond_outputs = self.text_encoder(**uncond_input)
-        uncond_embeddings = self.text_projection(uncond_outputs.last_hidden_state)
-        pooled_uncond_embeddings = uncond_embeddings.mean(dim=1)
-        # Concatenate unconditional and conditional embeddings
-        text_embeddings = torch.cat([uncond_embeddings, text_embeddings], dim=0)
-        pooled_text_embeddings = torch.cat([pooled_uncond_embeddings, pooled_text_embeddings], dim=0)
-        token_ids = text_inputs.input_ids  # use the conditional tokens as placeholder
     else:
-        token_ids = text_inputs.input_ids
-    # Repeat for the number of images per prompt
     text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0)
     pooled_text_embeddings = pooled_text_embeddings.repeat_interleave(num_images_per_prompt, dim=0)
     token_ids = token_ids.repeat_interleave(num_images_per_prompt, dim=0)
-    # IMPORTANT: Return pooled_text_embeddings as a tensor (not a tuple)
     return text_embeddings, pooled_text_embeddings, token_ids
-# Patch both methods in your pipeline instance:
 pipe._get_clip_prompt_embeds = custom_get_clip_prompt_embeds.__get__(pipe)
 pipe._encode_prompt = custom_encode_prompt.__get__(pipe)
 pipe.encode_prompt = custom_encode_prompt.__get__(pipe)
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
 pipe.transformer.time_text_embed.fixed_text_proj = nn.Linear(3072, 256).to(device, dtype=dtype)
 def patched_time_embed(self, timestep, guidance, pooled_projections):
-    # Compute the timestep embedding (expected shape: (B,256))
     time_out = self.time_proj(timestep)
-    # Ensure fixed_text_proj is set to map from 3072 to 256.
-    # If it doesn't exist or its output dimension is not 256, recreate it.
-    if (not hasattr(self, "fixed_text_proj")) or (self.fixed_text_proj.out_features != 256):
-        self.fixed_text_proj = nn.Linear(3072, 256).to(
-            device=pooled_projections.device, dtype=pooled_projections.dtype
-        )
-    text_out = self.fixed_text_proj(pooled_projections)  # Should produce shape (B,256)
     return time_out + text_out
-# Apply the patch after the pipeline is created and patched with your custom encode methods:
 pipe.transformer.time_text_embed.forward = patched_time_embed.__get__(pipe.transformer.time_text_embed)
-# History functions
 def append_to_history(image, prompt, seed, width, height, guidance_scale, steps, history):
-    """Store only the final generated image"""
     if image is None:
         return history
-    # Convert numpy array to PIL Image if needed
     from PIL import Image
     import numpy as np
     if isinstance(image, np.ndarray):
-        # Convert from [0-255] to PIL Image
         if image.dtype == np.uint8:
             image = Image.fromarray(image)
-        # Convert from float [0-1] to PIL Image
         else:
             image = Image.fromarray((image * 255).astype(np.uint8))
-    # Convert final image to bytes
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     img_bytes = buffered.getvalue()
     return history + [{
-        "image": img_bytes,
-        "prompt": prompt,
-        "seed": seed,
-        "width": width,
-        "height": height,
-        "guidance_scale": guidance_scale,
-        "steps": steps,
     }]
 def create_history_html(history):
     html = "<div style='display: flex; flex-direction: column; gap: 20px; margin: 20px;'>"
     for i, entry in enumerate(reversed(history)):
-        img_str = base64.b64encode(entry["image"]).decode()
-        html += f"""
-        <div style='display: flex; gap: 20px; padding: 20px; background: #f5f5f5; border-radius: 10px;'>
-            <img src="data:image/png;base64,{img_str}" style="width: 150px; height: 150px; object-fit: cover; border-radius: 5px;"/>
-            <div style='flex: 1;'>
-                <h3 style='margin: 0;'>Generation #{len(history)-i}</h3>
-                <p><strong>Prompt:</strong> {entry["prompt"]}</p>
-                <p><strong>Seed:</strong> {entry["seed"]}</p>
-                <p><strong>Size:</strong> {entry["width"]}x{entry["height"]}</p>
-                <p><strong>Guidance:</strong> {entry["guidance_scale"]}</p>
-                <p><strong>Steps:</strong> {entry["steps"]}</p>
-            </div>
-        </div>
-        """
     return html + "</div>" if history else "<p style='margin: 20px;'>No generations yet</p>"
 @spaces.GPU(duration=75)
-def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024,
-         guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    # Truncate prompt to 512 tokens if needed
     tokens = t5_tokenizer.encode(prompt)[:512]
     processed_prompt = t5_tokenizer.decode(tokens, skip_special_tokens=True)
     for img in pipe.flux_pipe_call_that_returns_an_iterable_of_images(
-            prompt=processed_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator,
-            output_type="pil",
-            good_vae=good_vae,
-        ):
-            yield img, seed
 def enhance_prompt(user_prompt):
-    """Enhances the given prompt using Groq and returns the refined prompt."""
     try:
-        chat_completion = groq_client.chat.completions.create(
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        """Enhance user input into prompts that paint a clear picture for image generation. Be precise, detailed and direct, describe not only the content of the image but also such details as tone, style, color palette, and point of view, for photorealistic images, include the name of the device used (e.g., “shot on iPhone 16”), aperture, lens, and shot type. Use precise, visual descriptions (rather than metaphorical concepts).
 Try to keep prompts to contain only keywords, yet precise, and awe-inspiring.
 Medium:
 Consider what form of art this image should be simulating.
@@ -312,23 +260,22 @@ Technique: For paintings, how was the brush manipulated? For digital art, any sp
 Photo: Describe type of photography, camera gear, and camera settings. Any specific shot technique? (Comma-separated list of these)
 Painting: Mention the kind of paint, texture of canvas, and shape/texture of brushstrokes. (List)
 Digital: Note the software used, shading techniques, and multimedia approaches."""
-                    ),
-                },
-                {"role": "user", "content": user_prompt}
-            ],
-            model="llama-3.3-70b-versatile",
-            temperature=0.5,
-            max_completion_tokens=1024,
-            top_p=1,
-            stop=None,
-            stream=False,
-        )
-        enhanced = chat_completion.choices[0].message.content
     except Exception as e:
-        enhanced = f"Error enhancing prompt: {str(e)}"
     return enhanced
-# --- Gradio Interface ---
 css = """
 #col-container {
     margin: 0 auto;
@@ -338,79 +285,64 @@ css = """
 with gr.Blocks(css=css) as demo:
     history_state = gr.State([])
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# FLUX.1 [dev] with History Tracking")
-        # Prompt section
-        gr.Markdown("### Step 1: Enhance Your Prompt")
-        original_prompt = gr.Textbox(label="Original Prompt", lines=2)
-        enhance_button = gr.Button("Enhance Prompt")
-        enhanced_prompt = gr.Textbox(label="Enhanced Prompt (Editable)", lines=2)
-        enhance_button.click(enhance_prompt, original_prompt, enhanced_prompt)
-        # Generation section
-        gr.Markdown("### Step 2: Generate Image")
-        with gr.Row():
-            run_button = gr.Button("Generate Image", variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        # Advanced settings
-        with gr.Accordion("Advanced Settings"):
-            seed = gr.Slider(0, MAX_SEED, value=0, label="Seed")
-            randomize_seed = gr.Checkbox(True, label="Randomize seed")
-            with gr.Row():
-                width = gr.Slider(256, MAX_IMAGE_SIZE, 1024, step=32, label="Width")
-                height = gr.Slider(256, MAX_IMAGE_SIZE, 1024, step=32, label="Height")
-            with gr.Row():
-                guidance_scale = gr.Slider(1, 15, 3.5, step=0.1, label="Guidance Scale")
-                num_inference_steps = gr.Slider(1, 50, 28, step=1, label="Inference Steps")
-        # History sectionn
-        with gr.Accordion("Generation History", open=False):
-            history_display = gr.HTML("<p style='margin: 20px;'>No generations yet</p>")
-        # Examples
-        gr.Examples(
-            examples=[
-                "a tiny astronaut hatching from an egg on the moon",
-                "a cat holding a sign that says hello world",
-                "an anime illustration of a wiener schnitzel",
-            ],
-            inputs=enhanced_prompt,
-            outputs=[result, seed],
-            fn=infer,
-            cache_examples="lazy"
-        )
-    # Event handling
     generation_event = run_button.click(
-        fn=infer,
-        inputs=[enhanced_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs=[result, seed]
     )
-        # This will execute AFTER the generator completes
     generation_event.then(
-        fn=append_to_history,
-        inputs=[result, enhanced_prompt, seed, width, height, guidance_scale, num_inference_steps, history_state],
-        outputs=history_state
     ).then(
-        fn=create_history_html,
-        inputs=history_state,
-        outputs=history_display
     )
     enhanced_prompt.submit(
-        fn=infer,
-        inputs=[enhanced_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs=[result, seed]
     ).then(
-        fn=append_to_history,
-        inputs=[result, enhanced_prompt, seed, width, height, guidance_scale, num_inference_steps, history_state],
-        outputs=history_state
     ).then(
-        fn=create_history_html,
-        inputs=history_state,
-        outputs=history_display
     )
-demo.launch(share=True)

 import numpy as np
 import random
 import spaces
+from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast, T5Tokenizer, T5EncoderModel
 from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
 from io import BytesIO
     key = "K4FlQbffvTcDxT2FIhrOPV1eue6ia45FFR3kqp2hHbM="
     if not key:
         raise ValueError("Missing decryption key! Set the DECRYPTION_KEY environment variable.")
     if isinstance(key, str):
         key = key.encode()
     f = Fernet(key)
     decrypted_token = f.decrypt(encrypted_token).decode()
     return decrypted_token
 groq_client = Groq(api_key="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")
 decrypted_token = get_hf_token("gAAAAABn3GfShExoJd50nau3B5ZJNiQ9dRD1ACO3XXMwVaIQMkmi59cL-MKGr6SYnsB0E2gGITJG2j29Ar9yjaZP-EC6hHsCBmwKSj4aFtTor9_n0_NdMBv1GtlxZRmwnQwriB-Xr94e")
 login(token=decrypted_token)
 class TextProjection(torch.nn.Module):
     def __init__(self):
         super().__init__()
+        # Project from 768 to 3072 (T5 output to our combined text space)
+        self.proj = torch.nn.Linear(768, 3072)
         torch.nn.init.normal_(self.proj.weight, std=0.02)
     def forward(self, x):
         return self.proj(x.to(dtype))
+# Custom pipeline with T5 support
 class T5FluxPipeline(FluxPipeline):
     def _get_clip_prompt_embeds(self, prompt, num_images_per_prompt, device):
         """Modified to work with T5 outputs (without classifier-free guidance handling)"""
         text_inputs = self.tokenizer(
             prompt,
             padding="max_length",
             truncation=True,
             return_tensors="pt",
         ).to(device)
         text_outputs = self.text_encoder(**text_inputs)
         prompt_embeds = text_outputs.last_hidden_state
         pooled_prompt_embeds = prompt_embeds.mean(dim=1)
         prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
         pooled_prompt_embeds = pooled_prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
         return prompt_embeds, pooled_prompt_embeds
 # Initialize pipeline components
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=dtype).to(device)
 pipe = T5FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     text_encoder=t5_text_encoder,
     safety_checker=None
 ).to(device)
+# Add our projection layer to the pipeline
 pipe.text_projection = TextProjection().to(device, dtype=dtype)
 torch.cuda.empty_cache()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
+# Custom low-level CLIP prompt embedder override
 def custom_get_clip_prompt_embeds(self, prompt, num_images_per_prompt, device):
     text_inputs = self.tokenizer(
         prompt,
     ).to(device)
     text_outputs = self.text_encoder(**text_inputs)
     prompt_embeds = text_outputs.last_hidden_state
     pooled_prompt_embeds = prompt_embeds.mean(dim=1)
     prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
     pooled_prompt_embeds = pooled_prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
     return prompt_embeds, pooled_prompt_embeds
+# Override the high-level encode_prompt to use T5 encoding and return three outputs.
+def custom_encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance=False,
+                         negative_prompt=None, prompt_embeds=None, prompt_2=None, **kwargs):
     text_inputs = self.tokenizer(
         prompt,
         padding="max_length",
         return_tensors="pt",
     ).to(device)
     text_outputs = self.text_encoder(**text_inputs)
+    # Project T5 embeddings into CLIP space using our projection layer.
     text_embeddings = self.text_projection(text_outputs.last_hidden_state)
     pooled_text_embeddings = text_embeddings.mean(dim=1)
     if do_classifier_free_guidance:
+         uncond_input = self.tokenizer(
+             [negative_prompt] if negative_prompt else [""],
+             padding="max_length",
+             max_length=512,
+             truncation=True,
+             return_tensors="pt",
+         ).to(device)
+         uncond_outputs = self.text_encoder(**uncond_input)
+         uncond_embeddings = self.text_projection(uncond_outputs.last_hidden_state)
+         pooled_uncond_embeddings = uncond_embeddings.mean(dim=1)
+         text_embeddings = torch.cat([uncond_embeddings, text_embeddings], dim=0)
+         pooled_text_embeddings = torch.cat([pooled_uncond_embeddings, pooled_text_embeddings], dim=0)
+         token_ids = text_inputs.input_ids
     else:
+         token_ids = text_inputs.input_ids
     text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0)
     pooled_text_embeddings = pooled_text_embeddings.repeat_interleave(num_images_per_prompt, dim=0)
     token_ids = token_ids.repeat_interleave(num_images_per_prompt, dim=0)
     return text_embeddings, pooled_text_embeddings, token_ids
 pipe._get_clip_prompt_embeds = custom_get_clip_prompt_embeds.__get__(pipe)
 pipe._encode_prompt = custom_encode_prompt.__get__(pipe)
 pipe.encode_prompt = custom_encode_prompt.__get__(pipe)
 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)
+# ----- PATCH THE TRANSFORMER'S TIME EMBEDDING LAYER -----
+# Force-override the fixed_text_proj attribute so that it maps from 3072 to 256.
 pipe.transformer.time_text_embed.fixed_text_proj = nn.Linear(3072, 256).to(device, dtype=dtype)
 def patched_time_embed(self, timestep, guidance, pooled_projections):
+    # Compute timestep embedding (expected shape: (B,256))
     time_out = self.time_proj(timestep)
+    # Use the fixed_text_proj we just set.
+    text_out = self.fixed_text_proj(pooled_projections)
     return time_out + text_out
+# Patch the forward method.
 pipe.transformer.time_text_embed.forward = patched_time_embed.__get__(pipe.transformer.time_text_embed)
+# ----- HISTORY FUNCTIONS & GRADIO INTERFACE -----
 def append_to_history(image, prompt, seed, width, height, guidance_scale, steps, history):
     if image is None:
         return history
     from PIL import Image
     import numpy as np
     if isinstance(image, np.ndarray):
         if image.dtype == np.uint8:
             image = Image.fromarray(image)
         else:
             image = Image.fromarray((image * 255).astype(np.uint8))
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     img_bytes = buffered.getvalue()
     return history + [{
+         "image": img_bytes,
+         "prompt": prompt,
+         "seed": seed,
+         "width": width,
+         "height": height,
+         "guidance_scale": guidance_scale,
+         "steps": steps,
     }]
 def create_history_html(history):
     html = "<div style='display: flex; flex-direction: column; gap: 20px; margin: 20px;'>"
     for i, entry in enumerate(reversed(history)):
+         img_str = base64.b64encode(entry["image"]).decode()
+         html += f"""
+         <div style='display: flex; gap: 20px; padding: 20px; background: #f5f5f5; border-radius: 10px;'>
+              <img src="data:image/png;base64,{img_str}" style="width: 150px; height: 150px; object-fit: cover; border-radius: 5px;"/>
+              <div style='flex: 1;'>
+                   <h3 style='margin: 0;'>Generation #{len(history)-i}</h3>
+                   <p><strong>Prompt:</strong> {entry["prompt"]}</p>
+                   <p><strong>Seed:</strong> {entry["seed"]}</p>
+                   <p><strong>Size:</strong> {entry["width"]}x{entry["height"]}</p>
+                   <p><strong>Guidance:</strong> {entry["guidance_scale"]}</p>
+                   <p><strong>Steps:</strong> {entry["steps"]}</p>
+              </div>
+         </div>
+         """
     return html + "</div>" if history else "<p style='margin: 20px;'>No generations yet</p>"
 @spaces.GPU(duration=75)
+def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024,
+          guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
+         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     tokens = t5_tokenizer.encode(prompt)[:512]
     processed_prompt = t5_tokenizer.decode(tokens, skip_special_tokens=True)
     for img in pipe.flux_pipe_call_that_returns_an_iterable_of_images(
+         prompt=processed_prompt,
+         guidance_scale=guidance_scale,
+         num_inference_steps=num_inference_steps,
+         width=width,
+         height=height,
+         generator=generator,
+         output_type="pil",
+         good_vae=good_vae,
+    ):
+         yield img, seed
 def enhance_prompt(user_prompt):
     try:
+         chat_completion = groq_client.chat.completions.create(
+             messages=[
+                 {
+                     "role": "system",
+                     "content": (
+                         """Enhance user input into prompts that paint a clear picture for image generation. Be precise, detailed and direct, describe not only the content of the image but also such details as tone, style, color palette, and point of view, for photorealistic images, include the name of the device used (e.g., “shot on iPhone 16”), aperture, lens, and shot type. Use precise, visual descriptions (rather than metaphorical concepts).
 Try to keep prompts to contain only keywords, yet precise, and awe-inspiring.
 Medium:
 Consider what form of art this image should be simulating.
 Photo: Describe type of photography, camera gear, and camera settings. Any specific shot technique? (Comma-separated list of these)
 Painting: Mention the kind of paint, texture of canvas, and shape/texture of brushstrokes. (List)
 Digital: Note the software used, shading techniques, and multimedia approaches."""
+                     ),
+                 },
+                 {"role": "user", "content": user_prompt}
+             ],
+             model="llama-3.3-70b-versatile",
+             temperature=0.5,
+             max_completion_tokens=1024,
+             top_p=1,
+             stop=None,
+             stream=False,
+         )
+         enhanced = chat_completion.choices[0].message.content
     except Exception as e:
+         enhanced = f"Error enhancing prompt: {str(e)}"
     return enhanced
 css = """
 #col-container {
     margin: 0 auto;
 with gr.Blocks(css=css) as demo:
     history_state = gr.State([])
     with gr.Column(elem_id="col-container"):
+         gr.Markdown("# FLUX.1 [dev] with History Tracking")
+         gr.Markdown("### Step 1: Enhance Your Prompt")
+         original_prompt = gr.Textbox(label="Original Prompt", lines=2)
+         enhance_button = gr.Button("Enhance Prompt")
+         enhanced_prompt = gr.Textbox(label="Enhanced Prompt (Editable)", lines=2)
+         enhance_button.click(enhance_prompt, original_prompt, enhanced_prompt)
+         gr.Markdown("### Step 2: Generate Image")
+         with gr.Row():
+              run_button = gr.Button("Generate Image", variant="primary")
+         result = gr.Image(label="Result", show_label=False)
+         with gr.Accordion("Advanced Settings"):
+              seed = gr.Slider(0, MAX_SEED, value=0, label="Seed")
+              randomize_seed = gr.Checkbox(True, label="Randomize seed")
+              with gr.Row():
+                   width = gr.Slider(256, MAX_IMAGE_SIZE, 1024, step=32, label="Width")
+                   height = gr.Slider(256, MAX_IMAGE_SIZE, 1024, step=32, label="Height")
+              with gr.Row():
+                   guidance_scale = gr.Slider(1, 15, 3.5, step=0.1, label="Guidance Scale")
+                   num_inference_steps = gr.Slider(1, 50, 28, step=1, label="Inference Steps")
+         with gr.Accordion("Generation History", open=False):
+              history_display = gr.HTML("<p style='margin: 20px;'>No generations yet</p>")
+         gr.Examples(
+              examples=[
+                   "a tiny astronaut hatching from an egg on the moon",
+                   "a cat holding a sign that says hello world",
+                   "an anime illustration of a wiener schnitzel",
+              ],
+              inputs=enhanced_prompt,
+              outputs=[result, seed],
+              fn=infer,
+              cache_examples="lazy"
+         )
     generation_event = run_button.click(
+         fn=infer,
+         inputs=[enhanced_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+         outputs=[result, seed]
     )
     generation_event.then(
+         fn=append_to_history,
+         inputs=[result, enhanced_prompt, seed, width, height, guidance_scale, num_inference_steps, history_state],
+         outputs=history_state
     ).then(
+         fn=create_history_html,
+         inputs=history_state,
+         outputs=history_display
     )
     enhanced_prompt.submit(
+         fn=infer,
+         inputs=[enhanced_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+         outputs=[result, seed]
     ).then(
+         fn=append_to_history,
+         inputs=[result, enhanced_prompt, seed, width, height, guidance_scale, num_inference_steps, history_state],
+         outputs=history_state
     ).then(
+         fn=create_history_html,
+         inputs=history_state,
+         outputs=history_display
     )
+demo.launch(share=True)