File size: 14,282 Bytes
43bc49d
 
 
 
 
e60b597
43bc49d
 
 
 
2c1f8ae
43bc49d
2c1f8ae
43bc49d
2c1f8ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b35c170
2c1f8ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e230053
 
2c1f8ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b35c170
2c1f8ae
 
 
 
 
 
 
 
 
b35c170
43bc49d
2c1f8ae
cba75f5
 
 
2c1f8ae
cba75f5
2c1f8ae
e230053
 
b35c170
 
2c1f8ae
 
 
b35c170
 
 
 
 
 
2c1f8ae
 
b35c170
2c1f8ae
b35c170
 
 
 
 
 
 
 
 
2c1f8ae
 
b35c170
 
 
2c1f8ae
43bc49d
2c1f8ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e230053
2c1f8ae
e230053
cba75f5
2c1f8ae
 
 
 
 
b35c170
 
2c1f8ae
 
 
 
 
 
 
 
63e554e
2c1f8ae
 
43fd88f
2c1f8ae
43fd88f
 
2c1f8ae
 
 
 
 
b35c170
2c1f8ae
 
b35c170
2c1f8ae
63e554e
 
 
 
b35c170
2c1f8ae
 
b35c170
2c1f8ae
 
63e554e
 
 
 
b35c170
2c1f8ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cba75f5
43fd88f
2c1f8ae
 
 
 
b35c170
 
2c1f8ae
 
b35c170
2c1f8ae
 
 
 
 
 
 
b35c170
2c1f8ae
 
 
 
b35c170
43bc49d
2c1f8ae
9dde84f
 
43bc49d
 
bb65a12
 
2c1f8ae
e60b597
 
2c1f8ae
 
 
 
 
e60b597
43bc49d
 
 
cba75f5
2c1f8ae
bb65a12
 
2c1f8ae
 
bb65a12
 
 
 
cba75f5
2c1f8ae
 
 
 
cba75f5
bb65a12
43bc49d
bb65a12
 
43bc49d
 
 
2c1f8ae
bb65a12
 
43bc49d
 
 
2c1f8ae
 
bb65a12
 
63e554e
2c1f8ae
cba75f5
bb65a12
2c1f8ae
bb65a12
2c1f8ae
 
b35c170
2c1f8ae
43bc49d
 
2c1f8ae
43bc49d
 
63e554e
e60b597
43bc49d
cba75f5
bb65a12
cba75f5
e60b597
cba75f5
 
 
 
 
 
 
 
e60b597
cba75f5
43bc49d
 
b35c170
bb65a12
43bc49d
 
63e554e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import json
import os
import time
import uuid
import tempfile
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import base64
import mimetypes
from google import genai
from google.genai import types

# Helper function to save binary data
def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

# Function to get tags from an image using Gemini
def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
    """
    Analyzes an image using a text prompt and returns the text response.
    Used specifically for generating tags in this case.
    """
    api_key = os.environ.get("geminigoogle")
    if not api_key:
        # Return a clear message if API key is missing
        return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging."

    client = genai.Client(api_key=api_key)
    uploaded_files = [] # Keep track of uploaded files for cleanup

    try:
        # Upload the file
        uploaded_files = [client.files.upload(file=file_name)]
        print(f"Uploaded file for tagging: {uploaded_files[0].uri}")

        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_uri(
                        file_uri=uploaded_files[0].uri,
                        mime_type=uploaded_files[0].mime_type,
                    ),
                    types.Part.from_text(text=text_prompt),
                ],
            ),
        ]

        # Configure for text-only response (focus on getting JSON)
        generate_content_config = types.GenerateContentConfig(
            temperature=0.5, # Lower temperature might give more focused tags
            top_p=0.95,
            top_k=40,
            max_output_tokens=1024,
            response_modalities=["text"],
            response_mime_type="text/plain", # Expect plain text
        )

        # Use generate_content for a single text response
        response = client.models.generate_content(
            model=model,
            contents=contents,
            config=generate_content_config,
        )

        tag_response = ""
        if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
             # Concatenate all text parts from the response
             for part in response.candidates[0].content.parts:
                 if hasattr(part, 'text'):
                      tag_response += part.text
        else:
            tag_response = "Could not generate tags."

        return tag_response

    except Exception as e:
        print(f"Error during tagging API call: {e}")
        return f"Error generating tags: {e}"
    finally:
        for file in uploaded_files:
            try:
                client.files.delete(name=file.name)
                print(f"Deleted uploaded file after tagging: {file.name}")
            except Exception as cleanup_e:
                print(f"Error deleting uploaded file {file.name}: {cleanup_e}")


# Function for the main image processing call
def generate(text, file_name, model="gemini-2.0-flash-exp"):
    api_key = os.environ.get("geminigoogle")
    if not api_key:
        raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")

    client = genai.Client(api_key=api_key)
    uploaded_files = []
    temp_output_image_path = None

    try:
        uploaded_files = [client.files.upload(file=file_name)]
        print(f"Uploaded file for generation: {uploaded_files[0].uri}")


        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_uri(
                        file_uri=uploaded_files[0].uri,
                        mime_type=uploaded_files[0].mime_type,
                    ),
                    types.Part.from_text(text=text),
                ],
            ),
        ]

        generate_content_config = types.GenerateContentConfig(
            temperature=1,
            top_p=0.95,
            top_k=40,
            max_output_tokens=8192,
            response_modalities=["image", "text"], # Expecting potentially image and text
            response_mime_type="text/plain",
        )

        text_response = ""
        image_path = None

        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             temp_output_image_path = tmp.name

        print("Starting generation stream...")
        # Stream the response
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                continue

            for part in chunk.candidates[0].content.parts:
                 # Check for text parts
                 text_part = getattr(part, "text", "")
                 if text_part:
                     text_response += text_part

                 if part.inline_data:
                     print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
                     save_binary_file(temp_output_image_path, part.inline_data.data)
                     image_path = temp_output_image_path # Set the output image path

        print("Generation stream finished.")

        if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
             print("No valid image data was received or saved.")
             image_path = None

        return image_path, text_response.strip()

    except Exception as e:
        print(f"Error during main generation API call: {e}")
        if temp_output_image_path and os.path.exists(temp_output_image_path):
             os.remove(temp_output_image_path)
        raise e # Re-raise the exception after cleanup

    finally:
        for file in uploaded_files:
            try:
                client.files.delete(name=file.name)
                print(f"Deleted uploaded file after generation: {file.name}")
            except Exception as cleanup_e:
                print(f"Error deleting uploaded file {file.name}: {cleanup_e}")

# Main processing function for Gradio
def process_image_and_prompt(composite_pil, prompt, enable_tagging=True):
    composite_path = None # Initialize input temp file path for finally block
    output_image_path = None # Initialize output temp file path for finally block
    try:
        # 1. Save the input PIL image to a temporary file
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            composite_path = tmp.name
            if composite_pil.mode == "RGBA":
                 composite_pil.save(composite_path, format="PNG")
            else:
                 composite_pil.save(composite_path, format="PNG") # Save as PNG by default


        file_name = composite_path # This is the path to the saved input image file
        model = "gemini-2.0-flash-exp" # Specify the model here

        # 2. Call get_image_tags to get tags from the original image
        tag_json_string = ""
        if enable_tagging:
            tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else."
            tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)

        # 3. Call generate for the main image processing based on the user prompt
        output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)

        # 4. Combine the tag JSON string and the main text response
        # Format the output clearly
        if tag_json_string:
            final_text_output = f"{tag_json_string},{main_text_response}"
        else:
            final_text_output = main_text_response

        # 5. Prepare the image output for the Gradio gallery
        result_img = None
        image_output_list = []
        if output_image_path and os.path.exists(output_image_path):
             try:
                 result_img = Image.open(output_image_path)
                 # Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB)
                 if result_img.mode == "RGBA":
                     result_img = result_img.convert("RGB")
                 image_output_list = [result_img] # Add the image to the list for the gallery
             except Exception as img_e:
                 print(f"Error opening generated image {output_image_path}: {img_e}")
                 # If image opening fails, don't return an image
                 image_output_list = []
                 # Append error to text response
                 final_text_output += f"\n\n---\n\nError loading generated image: {img_e}"


        # 6. Return results to Gradio
        return image_output_list, final_text_output

    except Exception as e:
        # Catch any exceptions during the process
        print(f"An error occurred during processing: {e}")
        # Use Gradio's error handling to display a message in the UI
        raise gr.Error(f"Processing failed: {e}", duration=5)

    finally:
        # 7. Clean up temporary files
        # Clean up the temporary input file
        if composite_path and os.path.exists(composite_path):
            try:
                os.remove(composite_path)
                print(f"Deleted input temporary file: {composite_path}")
            except Exception as cleanup_e:
                print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")

        if output_image_path and os.path.exists(output_image_path):
             try:
                 os.remove(output_image_path)
                 print(f"Deleted output temporary file: {output_image_path}")
             except Exception as cleanup_e:
                 print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}")


# Gradio інтерфейс (unchanged from your original code, except connection)
with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
    ) as demo:
    gr.HTML(
    """
    <div class="header-container">
      <div>
        <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
      </div>
      <div>
        <h1>Gemini for Image Editing</h1>
        <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
         <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
         <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
         Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
      </div>
    </div>
    """
    )

    # Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
    with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
        gr.Markdown("""
    - **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.**
    - ❗ Іноді модель повертає текст замість зображення.
    """)

    with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
        gr.Markdown("""
    ### 📌 Usage
      - Upload an image and enter a prompt to generate outputs.
      - The response will include generated tags for the original image (in JSON format) and Gemini's text output.
      - If an edited image is returned, it will appear in the gallery. If not, only text will appear.
      - Upload Only PNG Image
      - ❌ **Do not use NSFW images!**
    """)

    with gr.Row(elem_classes="main-content"):
        with gr.Column(elem_classes="input-column"):
            image_input = gr.Image(
                type="pil",
                label="Upload Image",
                image_mode="RGBA",
                elem_id="image-input",
                elem_classes="upload-box"
            )
            prompt_input = gr.Textbox(
                lines=2,
                placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')",
                label="Prompt for Gemini",
                elem_classes="prompt-input"
            )
            with_tags = gr.Checkbox(label="Enable Tagging", value=True)
            submit_btn = gr.Button("Generate", elem_classes="generate-btn")

        with gr.Column(elem_classes="output-column"):
            output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True)
            output_text = gr.Textbox(
                label="Gemini Output (Tags + Response)",
                placeholder="Original image tags (JSON) and Gemini's response will appear here.",
                elem_classes="output-text",
                lines=10 # Give more space for combined output
            )

    # Connect the button click to the updated processing function
    submit_btn.click(
        fn=process_image_and_prompt,
        inputs=[image_input, prompt_input, with_tags],
        outputs=[output_gallery, output_text],
    )

    gr.Markdown("## Try these examples", elem_classes="gr-examples-header")

    examples = [
        ["data/1.webp", 'change text to "AMEER"'],
        ["data/2.webp", "remove the spoon from hand only"],
        ["data/3.webp", 'change text to "Make it "'],
        ["data/1.jpg", "add joker style only on face"],
        ["data/1777043.jpg", "add joker style only on face"],
        ["data/2807615.jpg", "add lipstick on lip only"],
        ["data/76860.jpg", "add lipstick on lip only"],
        ["data/2807615.jpg", "make it happy looking face only"],
    ]

    gr.Examples(
        examples=examples,
        inputs=[image_input, prompt_input],
        elem_id="examples-grid"
    )

demo.queue(max_size=50).launch()