Gemini-Image-Edit

Running

App Files Files Community

Varhal commited on May 2, 2025

Commit

e230053

verified ·

1 Parent(s): 2c1f8ae

removed useless comments

Browse files

Files changed (1) hide show

app.py +7 -37

app.py CHANGED Viewed

@@ -52,8 +52,8 @@ def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
             temperature=0.5, # Lower temperature might give more focused tags
             top_p=0.95,
             top_k=40,
-            max_output_tokens=1024, # Tags shouldn't need many tokens
-            response_modalities=["text"], # Explicitly ask for text
             response_mime_type="text/plain", # Expect plain text
         )
@@ -77,10 +77,8 @@ def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
     except Exception as e:
         print(f"Error during tagging API call: {e}")
-        # Return an error message if tagging fails
         return f"Error generating tags: {e}"
     finally:
-        # Clean up uploaded files from the tagging call
         for file in uploaded_files:
             try:
                 client.files.delete(name=file.name)
@@ -91,20 +89,15 @@ def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
 # Function for the main image processing call
 def generate(text, file_name, model="gemini-2.0-flash-exp"):
-    """
-    Sends the image and prompt to Gemini and processes the streamed response.
-    This function is used for the main user request (editing, analysis, etc.).
-    """
     api_key = os.environ.get("geminigoogle")
     if not api_key:
         raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
     client = genai.Client(api_key=api_key)
-    uploaded_files = [] # Keep track of uploaded files for cleanup
-    temp_output_image_path = None # Keep track of generated temp image for cleanup
     try:
-        # Upload the file for the main generation call
         uploaded_files = [client.files.upload(file=file_name)]
         print(f"Uploaded file for generation: {uploaded_files[0].uri}")
@@ -134,8 +127,6 @@ def generate(text, file_name, model="gemini-2.0-flash-exp"):
         text_response = ""
         image_path = None
-        # Use NamedTemporaryFile with delete=False because we need to return the path
-        # We will handle deletion explicitly later.
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
              temp_output_image_path = tmp.name
@@ -149,44 +140,32 @@ def generate(text, file_name, model="gemini-2.0-flash-exp"):
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
-            # Process each part in the chunk
             for part in chunk.candidates[0].content.parts:
                  # Check for text parts
                  text_part = getattr(part, "text", "")
                  if text_part:
                      text_response += text_part
-                 # Check for inline image data
                  if part.inline_data:
                      print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
                      save_binary_file(temp_output_image_path, part.inline_data.data)
                      image_path = temp_output_image_path # Set the output image path
-                     # Note: If the model sends multiple images, this will only save the last one received in a part.
-                     # For typical use cases where one image is expected, this is fine.
-                     # If multiple images could be in different parts of the *same* chunk,
-                     # you'd need more complex handling (e.g., saving each to a separate file).
-                     # If the model sends an image and *then* more text, the loop continues.
-                     # We set image_path here and let the loop finish collecting text.
         print("Generation stream finished.")
-        # The loop finishes after processing all parts of all chunks.
-        # Check if an image was actually saved, otherwise set image_path to None
         if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
              print("No valid image data was received or saved.")
-             image_path = None # Ensure image_path is None if no image data was received/saved
-        return image_path, text_response.strip() # Return the path to the saved image (or None) and the collected text
     except Exception as e:
         print(f"Error during main generation API call: {e}")
-        # Ensure temporary files created before the error are cleaned up
         if temp_output_image_path and os.path.exists(temp_output_image_path):
              os.remove(temp_output_image_path)
         raise e # Re-raise the exception after cleanup
     finally:
-        # Clean up uploaded files from the generation call
         for file in uploaded_files:
             try:
                 client.files.delete(name=file.name)
@@ -202,12 +181,7 @@ def process_image_and_prompt(composite_pil, prompt):
         # 1. Save the input PIL image to a temporary file
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
-            # Ensure image is saved in a format compatible with Gemini, convert if necessary
             if composite_pil.mode == "RGBA":
-                # Convert RGBA to RGB if necessary, as some models prefer RGB
-                # Or handle alpha channel depending on model capabilities.
-                # For simplicity here, saving as PNG should preserve alpha,
-                # but Gemini might interpret it differently. Let's save as PNG.
                  composite_pil.save(composite_path, format="PNG")
             else:
                  composite_pil.save(composite_path, format="PNG") # Save as PNG by default
@@ -221,12 +195,11 @@ def process_image_and_prompt(composite_pil, prompt):
         tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)
         # 3. Call generate for the main image processing based on the user prompt
-        # This function returns the path to a generated image (if any) and text response
         output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)
         # 4. Combine the tag JSON string and the main text response
         # Format the output clearly
-        final_text_output = f"Original Image Tags (JSON): {tag_json_string}\n\n---\n\nGemini Response:\n{main_text_response}"
         # 5. Prepare the image output for the Gradio gallery
         result_img = None
@@ -265,9 +238,6 @@ def process_image_and_prompt(composite_pil, prompt):
             except Exception as cleanup_e:
                 print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")
-        # Clean up the temporary output image file created by generate()
-        # Note: generate() might have already deleted the *uploaded* file via API,
-        # but this handles the local file saved from inline_data.
         if output_image_path and os.path.exists(output_image_path):
              try:
                  os.remove(output_image_path)

             temperature=0.5, # Lower temperature might give more focused tags
             top_p=0.95,
             top_k=40,
+            max_output_tokens=1024,
+            response_modalities=["text"],
             response_mime_type="text/plain", # Expect plain text
         )
     except Exception as e:
         print(f"Error during tagging API call: {e}")
         return f"Error generating tags: {e}"
     finally:
         for file in uploaded_files:
             try:
                 client.files.delete(name=file.name)
 # Function for the main image processing call
 def generate(text, file_name, model="gemini-2.0-flash-exp"):
     api_key = os.environ.get("geminigoogle")
     if not api_key:
         raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
     client = genai.Client(api_key=api_key)
+    uploaded_files = []
+    temp_output_image_path = None
     try:
         uploaded_files = [client.files.upload(file=file_name)]
         print(f"Uploaded file for generation: {uploaded_files[0].uri}")
         text_response = ""
         image_path = None
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
              temp_output_image_path = tmp.name
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
             for part in chunk.candidates[0].content.parts:
                  # Check for text parts
                  text_part = getattr(part, "text", "")
                  if text_part:
                      text_response += text_part
                  if part.inline_data:
                      print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
                      save_binary_file(temp_output_image_path, part.inline_data.data)
                      image_path = temp_output_image_path # Set the output image path
         print("Generation stream finished.")
         if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
              print("No valid image data was received or saved.")
+             image_path = None
+        return image_path, text_response.strip()
     except Exception as e:
         print(f"Error during main generation API call: {e}")
         if temp_output_image_path and os.path.exists(temp_output_image_path):
              os.remove(temp_output_image_path)
         raise e # Re-raise the exception after cleanup
     finally:
         for file in uploaded_files:
             try:
                 client.files.delete(name=file.name)
         # 1. Save the input PIL image to a temporary file
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
             if composite_pil.mode == "RGBA":
                  composite_pil.save(composite_path, format="PNG")
             else:
                  composite_pil.save(composite_path, format="PNG") # Save as PNG by default
         tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)
         # 3. Call generate for the main image processing based on the user prompt
         output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)
         # 4. Combine the tag JSON string and the main text response
         # Format the output clearly
+        final_text_output = f"{tag_json_string},{main_text_response}"
         # 5. Prepare the image output for the Gradio gallery
         result_img = None
             except Exception as cleanup_e:
                 print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")
         if output_image_path and os.path.exists(output_image_path):
              try:
                  os.remove(output_image_path)