Spaces:
Running
Running
| import json | |
| import os | |
| import time | |
| import uuid | |
| import tempfile | |
| from PIL import Image, ImageDraw, ImageFont | |
| import gradio as gr | |
| import base64 | |
| import mimetypes | |
| from google import genai | |
| from google.genai import types | |
| # Helper function to save binary data | |
| def save_binary_file(file_name, data): | |
| with open(file_name, "wb") as f: | |
| f.write(data) | |
| # Function to get tags from an image using Gemini | |
| def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"): | |
| """ | |
| Analyzes an image using a text prompt and returns the text response. | |
| Used specifically for generating tags in this case. | |
| """ | |
| api_key = os.environ.get("geminigoogle") | |
| if not api_key: | |
| # Return a clear message if API key is missing | |
| return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging." | |
| client = genai.Client(api_key=api_key) | |
| uploaded_files = [] # Keep track of uploaded files for cleanup | |
| try: | |
| # Upload the file | |
| uploaded_files = [client.files.upload(file=file_name)] | |
| print(f"Uploaded file for tagging: {uploaded_files[0].uri}") | |
| contents = [ | |
| types.Content( | |
| role="user", | |
| parts=[ | |
| types.Part.from_uri( | |
| file_uri=uploaded_files[0].uri, | |
| mime_type=uploaded_files[0].mime_type, | |
| ), | |
| types.Part.from_text(text=text_prompt), | |
| ], | |
| ), | |
| ] | |
| # Configure for text-only response (focus on getting JSON) | |
| generate_content_config = types.GenerateContentConfig( | |
| temperature=0.5, # Lower temperature might give more focused tags | |
| top_p=0.95, | |
| top_k=40, | |
| max_output_tokens=1024, | |
| response_modalities=["text"], | |
| response_mime_type="text/plain", # Expect plain text | |
| ) | |
| # Use generate_content for a single text response | |
| response = client.models.generate_content( | |
| model=model, | |
| contents=contents, | |
| config=generate_content_config, | |
| ) | |
| tag_response = "" | |
| if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts: | |
| # Concatenate all text parts from the response | |
| for part in response.candidates[0].content.parts: | |
| if hasattr(part, 'text'): | |
| tag_response += part.text | |
| else: | |
| tag_response = "Could not generate tags." | |
| return tag_response | |
| except Exception as e: | |
| print(f"Error during tagging API call: {e}") | |
| return f"Error generating tags: {e}" | |
| finally: | |
| for file in uploaded_files: | |
| try: | |
| client.files.delete(name=file.name) | |
| print(f"Deleted uploaded file after tagging: {file.name}") | |
| except Exception as cleanup_e: | |
| print(f"Error deleting uploaded file {file.name}: {cleanup_e}") | |
| # Function for the main image processing call | |
| def generate(text, file_name, model="gemini-2.0-flash-exp"): | |
| api_key = os.environ.get("geminigoogle") | |
| if not api_key: | |
| raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.") | |
| client = genai.Client(api_key=api_key) | |
| uploaded_files = [] | |
| temp_output_image_path = None | |
| try: | |
| uploaded_files = [client.files.upload(file=file_name)] | |
| print(f"Uploaded file for generation: {uploaded_files[0].uri}") | |
| contents = [ | |
| types.Content( | |
| role="user", | |
| parts=[ | |
| types.Part.from_uri( | |
| file_uri=uploaded_files[0].uri, | |
| mime_type=uploaded_files[0].mime_type, | |
| ), | |
| types.Part.from_text(text=text), | |
| ], | |
| ), | |
| ] | |
| generate_content_config = types.GenerateContentConfig( | |
| temperature=1, | |
| top_p=0.95, | |
| top_k=40, | |
| max_output_tokens=8192, | |
| response_modalities=["image", "text"], # Expecting potentially image and text | |
| response_mime_type="text/plain", | |
| ) | |
| text_response = "" | |
| image_path = None | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | |
| temp_output_image_path = tmp.name | |
| print("Starting generation stream...") | |
| # Stream the response | |
| for chunk in client.models.generate_content_stream( | |
| model=model, | |
| contents=contents, | |
| config=generate_content_config, | |
| ): | |
| if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: | |
| continue | |
| for part in chunk.candidates[0].content.parts: | |
| # Check for text parts | |
| text_part = getattr(part, "text", "") | |
| if text_part: | |
| text_response += text_part | |
| if part.inline_data: | |
| print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}") | |
| save_binary_file(temp_output_image_path, part.inline_data.data) | |
| image_path = temp_output_image_path # Set the output image path | |
| print("Generation stream finished.") | |
| if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0: | |
| print("No valid image data was received or saved.") | |
| image_path = None | |
| return image_path, text_response.strip() | |
| except Exception as e: | |
| print(f"Error during main generation API call: {e}") | |
| if temp_output_image_path and os.path.exists(temp_output_image_path): | |
| os.remove(temp_output_image_path) | |
| raise e # Re-raise the exception after cleanup | |
| finally: | |
| for file in uploaded_files: | |
| try: | |
| client.files.delete(name=file.name) | |
| print(f"Deleted uploaded file after generation: {file.name}") | |
| except Exception as cleanup_e: | |
| print(f"Error deleting uploaded file {file.name}: {cleanup_e}") | |
| # Main processing function for Gradio | |
| def process_image_and_prompt(composite_pil, prompt, enable_tagging=True): | |
| composite_path = None # Initialize input temp file path for finally block | |
| output_image_path = None # Initialize output temp file path for finally block | |
| try: | |
| # 1. Save the input PIL image to a temporary file | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | |
| composite_path = tmp.name | |
| if composite_pil.mode == "RGBA": | |
| composite_pil.save(composite_path, format="PNG") | |
| else: | |
| composite_pil.save(composite_path, format="PNG") # Save as PNG by default | |
| file_name = composite_path # This is the path to the saved input image file | |
| model = "gemini-2.0-flash-exp" # Specify the model here | |
| # 2. Call get_image_tags to get tags from the original image | |
| tag_json_string = "" | |
| if enable_tagging: | |
| tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else." | |
| tag_json_string = get_image_tags(file_name, tagging_prompt, model=model) | |
| # 3. Call generate for the main image processing based on the user prompt | |
| output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model) | |
| # 4. Combine the tag JSON string and the main text response | |
| # Format the output clearly | |
| if tag_json_string: | |
| final_text_output = f"{tag_json_string},{main_text_response}" | |
| else: | |
| final_text_output = main_text_response | |
| # 5. Prepare the image output for the Gradio gallery | |
| result_img = None | |
| image_output_list = [] | |
| if output_image_path and os.path.exists(output_image_path): | |
| try: | |
| result_img = Image.open(output_image_path) | |
| # Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB) | |
| if result_img.mode == "RGBA": | |
| result_img = result_img.convert("RGB") | |
| image_output_list = [result_img] # Add the image to the list for the gallery | |
| except Exception as img_e: | |
| print(f"Error opening generated image {output_image_path}: {img_e}") | |
| # If image opening fails, don't return an image | |
| image_output_list = [] | |
| # Append error to text response | |
| final_text_output += f"\n\n---\n\nError loading generated image: {img_e}" | |
| # 6. Return results to Gradio | |
| return image_output_list, final_text_output | |
| except Exception as e: | |
| # Catch any exceptions during the process | |
| print(f"An error occurred during processing: {e}") | |
| # Use Gradio's error handling to display a message in the UI | |
| raise gr.Error(f"Processing failed: {e}", duration=5) | |
| finally: | |
| # 7. Clean up temporary files | |
| # Clean up the temporary input file | |
| if composite_path and os.path.exists(composite_path): | |
| try: | |
| os.remove(composite_path) | |
| print(f"Deleted input temporary file: {composite_path}") | |
| except Exception as cleanup_e: | |
| print(f"Error deleting input temporary file {composite_path}: {cleanup_e}") | |
| if output_image_path and os.path.exists(output_image_path): | |
| try: | |
| os.remove(output_image_path) | |
| print(f"Deleted output temporary file: {output_image_path}") | |
| except Exception as cleanup_e: | |
| print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}") | |
| # Gradio інтерфейс (unchanged from your original code, except connection) | |
| with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок | |
| ) as demo: | |
| gr.HTML( | |
| """ | |
| <div class="header-container"> | |
| <div> | |
| <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo"> | |
| </div> | |
| <div> | |
| <h1>Gemini for Image Editing</h1> | |
| <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️| | |
| <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo | | |
| <a href="https://aistudio.google.com/apikey">Get an API Key</a> | | |
| Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p> | |
| </div> | |
| </div> | |
| """ | |
| ) | |
| # Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться | |
| with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"): | |
| gr.Markdown(""" | |
| - **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.** | |
| - ❗ Іноді модель повертає текст замість зображення. | |
| """) | |
| with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"): | |
| gr.Markdown(""" | |
| ### 📌 Usage | |
| - Upload an image and enter a prompt to generate outputs. | |
| - The response will include generated tags for the original image (in JSON format) and Gemini's text output. | |
| - If an edited image is returned, it will appear in the gallery. If not, only text will appear. | |
| - Upload Only PNG Image | |
| - ❌ **Do not use NSFW images!** | |
| """) | |
| with gr.Row(elem_classes="main-content"): | |
| with gr.Column(elem_classes="input-column"): | |
| image_input = gr.Image( | |
| type="pil", | |
| label="Upload Image", | |
| image_mode="RGBA", | |
| elem_id="image-input", | |
| elem_classes="upload-box" | |
| ) | |
| prompt_input = gr.Textbox( | |
| lines=2, | |
| placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')", | |
| label="Prompt for Gemini", | |
| elem_classes="prompt-input" | |
| ) | |
| with_tags = gr.Checkbox(label="Enable Tagging", value=True) | |
| submit_btn = gr.Button("Generate", elem_classes="generate-btn") | |
| with gr.Column(elem_classes="output-column"): | |
| output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True) | |
| output_text = gr.Textbox( | |
| label="Gemini Output (Tags + Response)", | |
| placeholder="Original image tags (JSON) and Gemini's response will appear here.", | |
| elem_classes="output-text", | |
| lines=10 # Give more space for combined output | |
| ) | |
| # Connect the button click to the updated processing function | |
| submit_btn.click( | |
| fn=process_image_and_prompt, | |
| inputs=[image_input, prompt_input, with_tags], | |
| outputs=[output_gallery, output_text], | |
| ) | |
| gr.Markdown("## Try these examples", elem_classes="gr-examples-header") | |
| examples = [ | |
| ["data/1.webp", 'change text to "AMEER"'], | |
| ["data/2.webp", "remove the spoon from hand only"], | |
| ["data/3.webp", 'change text to "Make it "'], | |
| ["data/1.jpg", "add joker style only on face"], | |
| ["data/1777043.jpg", "add joker style only on face"], | |
| ["data/2807615.jpg", "add lipstick on lip only"], | |
| ["data/76860.jpg", "add lipstick on lip only"], | |
| ["data/2807615.jpg", "make it happy looking face only"], | |
| ] | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[image_input, prompt_input], | |
| elem_id="examples-grid" | |
| ) | |
| demo.queue(max_size=50).launch() | |