Gemini for Image Editing

import os
import tempfile
from PIL import Image
import gradio as gr
import google.generativeai as genai

# Hardcode the Gemini API key
GEMINI_API_KEY = "AIzaSyDL5Rilo7ptJpUOZdY6wy8PJYUcVcnDADs"

def configure_api_key():
    """Configure the Gemini API key."""
    if not GEMINI_API_KEY:
        raise gr.Error("Gemini API key is not set.")
    genai.configure(api_key=GEMINI_API_KEY)

def generate(text, images, model="gemini-2.5-flash"):
    """Generate content using the Gemini model."""
    configure_api_key()
    
    # Convert images to Gemini-compatible format
    contents = []
    for img in images:
        if img.mode == "RGBA":
            img = img.convert("RGB")
        contents.append(img)
    contents.append(text)
    
    try:
        response = genai.GenerativeModel(model).generate_content(contents)
        
        text_response = ""
        image_path = None
        
        for part in response.candidates[0].content.parts:
            if hasattr(part, 'text') and part.text:
                text_response += part.text + "\n"
            elif hasattr(part, 'inline_data') and part.inline_data:
                # Save generated image to a temporary file
                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
                    image_path = tmp.name
                    generated_image = Image.open(BytesIO(part.inline_data.data))
                    generated_image.save(image_path)
                    print(f"Generated image saved to: {image_path} with prompt: {text}")
        
        return image_path, text_response
    except Exception as e:
        raise gr.Error(f"Error generating content: {str(e)}")

def load_uploaded_images(uploaded_files):
    """Load and display uploaded images immediately."""
    uploaded_images = []
    if uploaded_files:
        for file in uploaded_files:
            if file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                img = Image.open(file.name)
                uploaded_images.append(img)
    return uploaded_images

def process_image_and_prompt(uploaded_files, prompt):
    """Process uploaded images and prompt to generate output."""
    try:
        if not uploaded_files:
            raise gr.Error("Please upload at least one image.")
        if not prompt:
            raise gr.Error("Please provide a prompt.")
        
        # Load images
        images = load_uploaded_images(uploaded_files)
        
        # Generate content
        image_path, text_response = generate(
            text=prompt,
            images=images
        )
        
        # Prepare outputs
        output_images = [Image.open(image_path)] if image_path else None
        return images, output_images, text_response
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")

# Gradio interface
with gr.Blocks(css="style.css") as demo:
    gr.HTML("""
    <div class="header-container">
      <div>
          <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
      </div>
      <div>
          <h1>Gemini for Image Editing</h1>
          <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️ | 
          <a href="https://huggingface.co/spaces">Duplicate this Space</a> |
          <a href="https://aistudio.google.com/apikey">Learn about Gemini API</a></p>
      </div>
    </div>
    """)
    
    with gr.Accordion("⚠️ API Configuration ⚠️", open=False):
        gr.Markdown("""
        - **Note:** The Gemini API is pre-configured for this Space.
        - **Issue:** ❗ Sometimes the model returns text instead of an image.
        ### 🔧 Steps to Address:
        1. **🛠️ Duplicate the Space**  
           - Create a copy on Hugging Face Spaces for modifications.
        2. **🔑 API Key Info**  
           - The API key is already set up for this demo. For custom deployments, get your own key from Google AI Studio.
        """)

    with gr.Accordion("📌 Usage Instructions", open=False):
        gr.Markdown("""
        ### 📌 Usage  
        - Upload an image and enter a prompt to generate outputs.
        - If text is returned instead of an image, it will appear in the text output.
        - Supported formats: PNG, JPG, JPEG, WEBP
        - ❌ **Do not use NSFW images!**
        """)

    with gr.Row():
        with gr.Column():
            image_input = gr.File(
                file_types=["image"],
                file_count="multiple",
                label="Upload Images"
            )
            prompt_input = gr.Textbox(
                lines=2,
                placeholder="Enter prompt here...",
                label="Prompt"
            )
            submit_btn = gr.Button("Generate")
        
        with gr.Column():
            uploaded_gallery = gr.Gallery(label="Uploaded Images")
            output_gallery = gr.Gallery(label="Generated Outputs")
            output_text = gr.Textbox(
                label="Gemini Output",
                placeholder="Text response will appear here if no image is generated."
            )

    submit_btn.click(
        fn=process_image_and_prompt,
        inputs=[image_input, prompt_input],
        outputs=[uploaded_gallery, output_gallery, output_text]
    )
    
    image_input.upload(
        fn=load_uploaded_images,
        inputs=[image_input],
        outputs=[uploaded_gallery]
    )

    gr.Markdown("## Try these examples")
    examples = [
        ["data/1.webp", "change text to 'HUGGINGFACE'"],
        ["data/2.webp", "remove the spoon from hand only"],
        ["data/3.webp", "change text to 'AI POWERED'"],
        ["data/1.jpg", "add futuristic style to background"],
    ]
    gr.Examples(
        examples=examples,
        inputs=[image_input, prompt_input]
    )

demo.launch()