File size: 5,820 Bytes
1b3c35b
 
15652b4
1b3c35b
 
 
 
 
279af2f
1b3c35b
 
 
 
 
b8c2f1b
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
15652b4
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19d53bf
1b3c35b
 
 
 
 
 
 
 
 
9fca7c3
1b3c35b
 
 
 
 
 
 
15652b4
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
d12f2bb
1b3c35b
9fca7c3
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efbbf0f
1b3c35b
 
 
 
 
 
 
 
efbbf0f
 
1b3c35b
 
 
 
 
 
efbbf0f
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
efbbf0f
 
1b3c35b
 
efbbf0f
1b3c35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efbbf0f
427cb66
1b3c35b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import os
import tempfile
from PIL import Image
import gradio as gr
import google.generativeai as genai

# Hardcode the Gemini API key
GEMINI_API_KEY = "AIzaSyDL5Rilo7ptJpUOZdY6wy8PJYUcVcnDADs"

def configure_api_key():
    """Configure the Gemini API key."""
    if not GEMINI_API_KEY:
        raise gr.Error("Gemini API key is not set.")
    genai.configure(api_key=GEMINI_API_KEY)

def generate(text, images, model="gemini-2.5-flash"):
    """Generate content using the Gemini model."""
    configure_api_key()
    
    # Convert images to Gemini-compatible format
    contents = []
    for img in images:
        if img.mode == "RGBA":
            img = img.convert("RGB")
        contents.append(img)
    contents.append(text)
    
    try:
        response = genai.GenerativeModel(model).generate_content(contents)
        
        text_response = ""
        image_path = None
        
        for part in response.candidates[0].content.parts:
            if hasattr(part, 'text') and part.text:
                text_response += part.text + "\n"
            elif hasattr(part, 'inline_data') and part.inline_data:
                # Save generated image to a temporary file
                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
                    image_path = tmp.name
                    generated_image = Image.open(BytesIO(part.inline_data.data))
                    generated_image.save(image_path)
                    print(f"Generated image saved to: {image_path} with prompt: {text}")
        
        return image_path, text_response
    except Exception as e:
        raise gr.Error(f"Error generating content: {str(e)}")

def load_uploaded_images(uploaded_files):
    """Load and display uploaded images immediately."""
    uploaded_images = []
    if uploaded_files:
        for file in uploaded_files:
            if file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                img = Image.open(file.name)
                uploaded_images.append(img)
    return uploaded_images

def process_image_and_prompt(uploaded_files, prompt):
    """Process uploaded images and prompt to generate output."""
    try:
        if not uploaded_files:
            raise gr.Error("Please upload at least one image.")
        if not prompt:
            raise gr.Error("Please provide a prompt.")
        
        # Load images
        images = load_uploaded_images(uploaded_files)
        
        # Generate content
        image_path, text_response = generate(
            text=prompt,
            images=images
        )
        
        # Prepare outputs
        output_images = [Image.open(image_path)] if image_path else None
        return images, output_images, text_response
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")

# Gradio interface
with gr.Blocks(css="style.css") as demo:
    gr.HTML("""
    <div class="header-container">
      <div>
          <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
      </div>
      <div>
          <h1>Gemini for Image Editing</h1>
          <p>Powered by <a href="https://gradio.app/">Gradio</a>⚑️ | 
          <a href="https://huggingface.co/spaces">Duplicate this Space</a> |
          <a href="https://aistudio.google.com/apikey">Learn about Gemini API</a></p>
      </div>
    </div>
    """)
    
    with gr.Accordion("⚠️ API Configuration ⚠️", open=False):
        gr.Markdown("""
        - **Note:** The Gemini API is pre-configured for this Space.
        - **Issue:** ❗ Sometimes the model returns text instead of an image.
        ### πŸ”§ Steps to Address:
        1. **πŸ› οΈ Duplicate the Space**  
           - Create a copy on Hugging Face Spaces for modifications.
        2. **πŸ”‘ API Key Info**  
           - The API key is already set up for this demo. For custom deployments, get your own key from Google AI Studio.
        """)

    with gr.Accordion("πŸ“Œ Usage Instructions", open=False):
        gr.Markdown("""
        ### πŸ“Œ Usage  
        - Upload an image and enter a prompt to generate outputs.
        - If text is returned instead of an image, it will appear in the text output.
        - Supported formats: PNG, JPG, JPEG, WEBP
        - ❌ **Do not use NSFW images!**
        """)

    with gr.Row():
        with gr.Column():
            image_input = gr.File(
                file_types=["image"],
                file_count="multiple",
                label="Upload Images"
            )
            prompt_input = gr.Textbox(
                lines=2,
                placeholder="Enter prompt here...",
                label="Prompt"
            )
            submit_btn = gr.Button("Generate")
        
        with gr.Column():
            uploaded_gallery = gr.Gallery(label="Uploaded Images")
            output_gallery = gr.Gallery(label="Generated Outputs")
            output_text = gr.Textbox(
                label="Gemini Output",
                placeholder="Text response will appear here if no image is generated."
            )

    submit_btn.click(
        fn=process_image_and_prompt,
        inputs=[image_input, prompt_input],
        outputs=[uploaded_gallery, output_gallery, output_text]
    )
    
    image_input.upload(
        fn=load_uploaded_images,
        inputs=[image_input],
        outputs=[uploaded_gallery]
    )

    gr.Markdown("## Try these examples")
    examples = [
        ["data/1.webp", "change text to 'HUGGINGFACE'"],
        ["data/2.webp", "remove the spoon from hand only"],
        ["data/3.webp", "change text to 'AI POWERED'"],
        ["data/1.jpg", "add futuristic style to background"],
    ]
    gr.Examples(
        examples=examples,
        inputs=[image_input, prompt_input]
    )

demo.launch()