Spaces:
Running
Running
File size: 14,282 Bytes
43bc49d e60b597 43bc49d 2c1f8ae 43bc49d 2c1f8ae 43bc49d 2c1f8ae b35c170 2c1f8ae e230053 2c1f8ae b35c170 2c1f8ae b35c170 43bc49d 2c1f8ae cba75f5 2c1f8ae cba75f5 2c1f8ae e230053 b35c170 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae 43bc49d 2c1f8ae e230053 2c1f8ae e230053 cba75f5 2c1f8ae b35c170 2c1f8ae 63e554e 2c1f8ae 43fd88f 2c1f8ae 43fd88f 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae 63e554e b35c170 2c1f8ae b35c170 2c1f8ae 63e554e b35c170 2c1f8ae cba75f5 43fd88f 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae b35c170 2c1f8ae b35c170 43bc49d 2c1f8ae 9dde84f 43bc49d bb65a12 2c1f8ae e60b597 2c1f8ae e60b597 43bc49d cba75f5 2c1f8ae bb65a12 2c1f8ae bb65a12 cba75f5 2c1f8ae cba75f5 bb65a12 43bc49d bb65a12 43bc49d 2c1f8ae bb65a12 43bc49d 2c1f8ae bb65a12 63e554e 2c1f8ae cba75f5 bb65a12 2c1f8ae bb65a12 2c1f8ae b35c170 2c1f8ae 43bc49d 2c1f8ae 43bc49d 63e554e e60b597 43bc49d cba75f5 bb65a12 cba75f5 e60b597 cba75f5 e60b597 cba75f5 43bc49d b35c170 bb65a12 43bc49d 63e554e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 |
import json
import os
import time
import uuid
import tempfile
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import base64
import mimetypes
from google import genai
from google.genai import types
# Helper function to save binary data
def save_binary_file(file_name, data):
with open(file_name, "wb") as f:
f.write(data)
# Function to get tags from an image using Gemini
def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
"""
Analyzes an image using a text prompt and returns the text response.
Used specifically for generating tags in this case.
"""
api_key = os.environ.get("geminigoogle")
if not api_key:
# Return a clear message if API key is missing
return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging."
client = genai.Client(api_key=api_key)
uploaded_files = [] # Keep track of uploaded files for cleanup
try:
# Upload the file
uploaded_files = [client.files.upload(file=file_name)]
print(f"Uploaded file for tagging: {uploaded_files[0].uri}")
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=uploaded_files[0].uri,
mime_type=uploaded_files[0].mime_type,
),
types.Part.from_text(text=text_prompt),
],
),
]
# Configure for text-only response (focus on getting JSON)
generate_content_config = types.GenerateContentConfig(
temperature=0.5, # Lower temperature might give more focused tags
top_p=0.95,
top_k=40,
max_output_tokens=1024,
response_modalities=["text"],
response_mime_type="text/plain", # Expect plain text
)
# Use generate_content for a single text response
response = client.models.generate_content(
model=model,
contents=contents,
config=generate_content_config,
)
tag_response = ""
if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
# Concatenate all text parts from the response
for part in response.candidates[0].content.parts:
if hasattr(part, 'text'):
tag_response += part.text
else:
tag_response = "Could not generate tags."
return tag_response
except Exception as e:
print(f"Error during tagging API call: {e}")
return f"Error generating tags: {e}"
finally:
for file in uploaded_files:
try:
client.files.delete(name=file.name)
print(f"Deleted uploaded file after tagging: {file.name}")
except Exception as cleanup_e:
print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
# Function for the main image processing call
def generate(text, file_name, model="gemini-2.0-flash-exp"):
api_key = os.environ.get("geminigoogle")
if not api_key:
raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
client = genai.Client(api_key=api_key)
uploaded_files = []
temp_output_image_path = None
try:
uploaded_files = [client.files.upload(file=file_name)]
print(f"Uploaded file for generation: {uploaded_files[0].uri}")
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=uploaded_files[0].uri,
mime_type=uploaded_files[0].mime_type,
),
types.Part.from_text(text=text),
],
),
]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=["image", "text"], # Expecting potentially image and text
response_mime_type="text/plain",
)
text_response = ""
image_path = None
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
temp_output_image_path = tmp.name
print("Starting generation stream...")
# Stream the response
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
continue
for part in chunk.candidates[0].content.parts:
# Check for text parts
text_part = getattr(part, "text", "")
if text_part:
text_response += text_part
if part.inline_data:
print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
save_binary_file(temp_output_image_path, part.inline_data.data)
image_path = temp_output_image_path # Set the output image path
print("Generation stream finished.")
if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
print("No valid image data was received or saved.")
image_path = None
return image_path, text_response.strip()
except Exception as e:
print(f"Error during main generation API call: {e}")
if temp_output_image_path and os.path.exists(temp_output_image_path):
os.remove(temp_output_image_path)
raise e # Re-raise the exception after cleanup
finally:
for file in uploaded_files:
try:
client.files.delete(name=file.name)
print(f"Deleted uploaded file after generation: {file.name}")
except Exception as cleanup_e:
print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
# Main processing function for Gradio
def process_image_and_prompt(composite_pil, prompt, enable_tagging=True):
composite_path = None # Initialize input temp file path for finally block
output_image_path = None # Initialize output temp file path for finally block
try:
# 1. Save the input PIL image to a temporary file
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
composite_path = tmp.name
if composite_pil.mode == "RGBA":
composite_pil.save(composite_path, format="PNG")
else:
composite_pil.save(composite_path, format="PNG") # Save as PNG by default
file_name = composite_path # This is the path to the saved input image file
model = "gemini-2.0-flash-exp" # Specify the model here
# 2. Call get_image_tags to get tags from the original image
tag_json_string = ""
if enable_tagging:
tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else."
tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)
# 3. Call generate for the main image processing based on the user prompt
output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)
# 4. Combine the tag JSON string and the main text response
# Format the output clearly
if tag_json_string:
final_text_output = f"{tag_json_string},{main_text_response}"
else:
final_text_output = main_text_response
# 5. Prepare the image output for the Gradio gallery
result_img = None
image_output_list = []
if output_image_path and os.path.exists(output_image_path):
try:
result_img = Image.open(output_image_path)
# Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB)
if result_img.mode == "RGBA":
result_img = result_img.convert("RGB")
image_output_list = [result_img] # Add the image to the list for the gallery
except Exception as img_e:
print(f"Error opening generated image {output_image_path}: {img_e}")
# If image opening fails, don't return an image
image_output_list = []
# Append error to text response
final_text_output += f"\n\n---\n\nError loading generated image: {img_e}"
# 6. Return results to Gradio
return image_output_list, final_text_output
except Exception as e:
# Catch any exceptions during the process
print(f"An error occurred during processing: {e}")
# Use Gradio's error handling to display a message in the UI
raise gr.Error(f"Processing failed: {e}", duration=5)
finally:
# 7. Clean up temporary files
# Clean up the temporary input file
if composite_path and os.path.exists(composite_path):
try:
os.remove(composite_path)
print(f"Deleted input temporary file: {composite_path}")
except Exception as cleanup_e:
print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")
if output_image_path and os.path.exists(output_image_path):
try:
os.remove(output_image_path)
print(f"Deleted output temporary file: {output_image_path}")
except Exception as cleanup_e:
print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}")
# Gradio інтерфейс (unchanged from your original code, except connection)
with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
) as demo:
gr.HTML(
"""
<div class="header-container">
<div>
<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
</div>
<div>
<h1>Gemini for Image Editing</h1>
<p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
<a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
<a href="https://aistudio.google.com/apikey">Get an API Key</a> |
Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
</div>
</div>
"""
)
# Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
gr.Markdown("""
- **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.**
- ❗ Іноді модель повертає текст замість зображення.
""")
with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
gr.Markdown("""
### 📌 Usage
- Upload an image and enter a prompt to generate outputs.
- The response will include generated tags for the original image (in JSON format) and Gemini's text output.
- If an edited image is returned, it will appear in the gallery. If not, only text will appear.
- Upload Only PNG Image
- ❌ **Do not use NSFW images!**
""")
with gr.Row(elem_classes="main-content"):
with gr.Column(elem_classes="input-column"):
image_input = gr.Image(
type="pil",
label="Upload Image",
image_mode="RGBA",
elem_id="image-input",
elem_classes="upload-box"
)
prompt_input = gr.Textbox(
lines=2,
placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')",
label="Prompt for Gemini",
elem_classes="prompt-input"
)
with_tags = gr.Checkbox(label="Enable Tagging", value=True)
submit_btn = gr.Button("Generate", elem_classes="generate-btn")
with gr.Column(elem_classes="output-column"):
output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True)
output_text = gr.Textbox(
label="Gemini Output (Tags + Response)",
placeholder="Original image tags (JSON) and Gemini's response will appear here.",
elem_classes="output-text",
lines=10 # Give more space for combined output
)
# Connect the button click to the updated processing function
submit_btn.click(
fn=process_image_and_prompt,
inputs=[image_input, prompt_input, with_tags],
outputs=[output_gallery, output_text],
)
gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
examples = [
["data/1.webp", 'change text to "AMEER"'],
["data/2.webp", "remove the spoon from hand only"],
["data/3.webp", 'change text to "Make it "'],
["data/1.jpg", "add joker style only on face"],
["data/1777043.jpg", "add joker style only on face"],
["data/2807615.jpg", "add lipstick on lip only"],
["data/76860.jpg", "add lipstick on lip only"],
["data/2807615.jpg", "make it happy looking face only"],
]
gr.Examples(
examples=examples,
inputs=[image_input, prompt_input],
elem_id="examples-grid"
)
demo.queue(max_size=50).launch()
|