File size: 6,027 Bytes
bb0b951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9b914b
bb0b951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9b914b
bb0b951
e9b914b
bb0b951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9b914b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import os
import gradio as gr
from google import genai
from google.genai import types
import tempfile
import uuid
from pathlib import Path

client = None
if os.environ.get("GOOGLE_API_KEY"):
    client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))


def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)
    return file_name


def process_image_with_gemini(image, instruction) -> tuple[str, str, str]:
    output_dir = Path("output_gemini")
    output_dir.mkdir(exist_ok=True)

    request_id = f"request_{uuid.uuid4().hex[:8]}"
    request_folder = output_dir / request_id
    request_folder.mkdir(exist_ok=True)

    input_image_path = request_folder / "input.jpg"
    image.save(input_image_path)

    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_image_path = Path(temp_dir) / "temp_input_image.jpg"
            image.save(temp_image_path)

            files = [
                client.files.upload(file=str(temp_image_path)),
            ]

            model = "gemini-2.0-flash-exp-image-generation"
            contents = [
                types.Content(
                    role="user",
                    parts=[
                        types.Part.from_uri(
                            file_uri=files[0].uri,
                            mime_type="image/jpeg",
                        ),
                        types.Part.from_text(text=instruction),
                    ],
                ),
            ]
            generate_content_config = types.GenerateContentConfig(
                temperature=1,
                top_p=0.95,
                top_k=40,
                max_output_tokens=8192,
                response_modalities=[
                    "image",
                    "text",
                ],
                safety_settings=[
                    types.SafetySetting(
                        category="HARM_CATEGORY_CIVIC_INTEGRITY",
                        threshold="OFF",
                    ),
                ],
                response_mime_type="text/plain",
            )

            response_text = ""
            edited_image_path = None

            for chunk in client.models.generate_content_stream(
                model=model,
                contents=contents,
                config=generate_content_config,
            ):
                if (
                    not chunk.candidates
                    or not chunk.candidates[0].content
                    or not chunk.candidates[0].content.parts
                ):
                    continue

                if hasattr(chunk.candidates[0].content.parts[0], "inline_data"):
                    edited_image_path = request_folder / "edited.jpg"
                    save_binary_file(
                        str(edited_image_path),
                        chunk.candidates[0].content.parts[0].inline_data.data,
                    )
                elif hasattr(chunk.candidates[0].content.parts[0], "text"):
                    response_text += chunk.candidates[0].content.parts[0].text

            if edited_image_path and edited_image_path.exists():
                return str(edited_image_path), response_text or "", "Success"
            return None, response_text or "", "No image generated"

    except Exception as e:
        error_message = str(e)
        if (
            "RESOURCE_EXHAUSTED" in error_message
            or "rate limit" in error_message.lower()
        ):
            return None, "", "Rate limit exceeded. Please try again later."
        return None, "", f"Error: {error_message}"


def process_image(image, instruction):
    """Process an image with Gemini based on given instructions.
    Args:
        image: Input PIL image
        instruction: Text instructions for editing
    Returns:
        Tuple containing (output_image_path, response_text, status_message)
    """
    if image is None:
        return None, "", "Please upload an image."

    if not instruction or instruction.strip() == "":
        return None, "", "Please provide an instruction."

    if client is None:
        return (
            None,
            "",
            "Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.",
        )

    try:
        return process_image_with_gemini(image, instruction)
    except Exception as e:
        return None, "", f"Unexpected error: {str(e)}"


with gr.Blocks(title="Image Editor", theme='Jonny001/GreenEarth_Theme') as app:
    with gr.Column():
        gr.Markdown("# 🖼️ Image Editor")
        gr.Markdown(
            "Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions."
        )

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="pil", label="Upload Image")
                instruction = gr.Textbox(
                    label="Editing Instructions",
                    placeholder="Describe the edits you want to make...",
                    lines=3,
                )
                submit_btn = gr.Button("✨ Process Image", variant="primary")

            with gr.Column():
                output_image = gr.Image(label="Edited Image")
                response_text = gr.Textbox(
                    label="Gemini's Response", lines=3, interactive=False
                )
                status = gr.Textbox(label="Status", interactive=False)

        submit_btn.click(
            fn=process_image,
            inputs=[input_image, instruction],
            outputs=[output_image, response_text, status],
        )

        gr.Markdown(
            """
            ### Notes
            - Processing may take up to 30 seconds
            - If you need to duplicate this space, just remember to set the Google API key as an environment variable
            """,
            elem_classes="footer",
        )


if __name__ == "__main__":
    print("Starting Gemini Image Editor...")
    app.launch(ssr_mode=True)