Spaces:

abhinav0231
/

SparrowTale

Sleeping

App Files Files Community

abhinav0231 commited on Sep 6, 2025

Commit

177b6e5

verified ·

1 Parent(s): 6340efd

Update image_generation.py

Browse files

Files changed (1) hide show

image_generation.py +14 -28

image_generation.py CHANGED Viewed

@@ -1,25 +1,19 @@
-# image_generation.py
 import os
 import mimetypes
 import json
-import base64
 import streamlit as st
 import io
 import time
 import traceback
 from PIL import Image
 from typing import List, Dict, Optional
-# CORRECT: Import from the modern 'google-genai' SDK
 from google import genai
-from google.genai import types
 from google.api_core import exceptions
 # --- Client Initialization ---
 client = None
 try:
-    # Ensure the API key is set in Hugging Face Space secrets
     api_key = st.secrets.get("GEMINI_API_KEY")
     if api_key:
         client = genai.Client(api_key=api_key)
@@ -34,7 +28,6 @@ except Exception as e:
     st.stop()
 # --- Helper Functions ---
 def save_binary_file(file_name: str, data: bytes):
     """Saves binary data to a file."""
     try:
@@ -47,10 +40,9 @@ def save_binary_file(file_name: str, data: bytes):
 def pil_image_to_part(image: Image.Image) -> types.Part:
     """Converts a PIL Image to a genai.types.Part object."""
     img_byte_arr = io.BytesIO()
-    # Save image to an in-memory byte stream
     image.save(img_byte_arr, format='JPEG')
     img_bytes = img_byte_arr.getvalue()
-    # CORRECT: This works with 'google-genai'
     return types.Part.from_bytes(
         data=img_bytes,
         mime_type='image/jpeg'
@@ -62,8 +54,7 @@ def generate_image_with_gemini(
     context_image: Optional[Image.Image] = None
 ) -> Optional[str]:
     """
-    Generates an image using the Gemini API, optionally with a context image.
-    This function is now fully compatible with the 'google-genai' SDK.
     """
     if not client:
         print("❌ Gemini client not initialized.")
@@ -84,7 +75,7 @@ def generate_image_with_gemini(
             - Lighting and atmosphere
             Style: Cinematic, epic fantasy digital painting with rich details and dramatic lighting.
             Generate an image that illustrates the following scene:"""
-            print(" -> Using previous image as context for consistent styling.")
         else:
             system_prompt = """You are a master storyboard artist creating the opening scene of a visual story.
             IMPORTANT: You MUST generate an image for this request. Create a stunning, cinematic image in an epic fantasy digital painting style with:
@@ -93,8 +84,8 @@ def generate_image_with_gemini(
             - High-quality digital painting aesthetic
             This is the first scene of the story. Generate an image that illustrates:"""
-        # Build the request content
         content_parts.append(types.Part.from_text(system_prompt))
         if context_image:
             content_parts.append(pil_image_to_part(context_image))
@@ -103,12 +94,10 @@ def generate_image_with_gemini(
         contents = [types.Content(role="user", parts=content_parts)]
-        # The model requires both IMAGE and TEXT modalities.
         generate_content_config = types.GenerateContentConfig(
             response_modalities=["IMAGE", "TEXT"],
         )
-        # Generate content using the streaming API
         stream = client.models.generate_content_stream(
             model=model_name,
             contents=contents,
@@ -121,23 +110,18 @@ def generate_image_with_gemini(
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
-            # Iterate through all parts in the chunk
             for part in chunk.candidates[0].content.parts:
                 if part.inline_data and part.inline_data.data:
-                    # This part is an image
                     inline_data = part.inline_data
                     file_extension = mimetypes.guess_extension(inline_data.mime_type) or ".jpg"
                     full_file_name = f"{output_file_base}{file_extension}"
                     save_binary_file(full_file_name, inline_data.data)
                     saved_file_path = full_file_name
                 elif part.text:
-                    # This part is text
                     text_responses.append(part.text)
         if saved_file_path:
             print(f"✅ Successfully generated and saved image: {saved_file_path}")
-            if text_responses:
-                print(f"  -> Accompanied text: {''.join(text_responses)}")
         elif text_responses:
             print(f"⚠️ No image generated. API returned text only: {' '.join(text_responses)}")
         else:
@@ -147,11 +131,15 @@ def generate_image_with_gemini(
     except exceptions.InvalidArgument as e:
         print(f"❌ API Invalid Argument Error: {e}")
-        print("  -> This often means the model or request parameters are wrong. Check model name and modalities.")
         traceback.print_exc()
         return None
     except Exception as e:
-        print(f"❌ An unexpected error occurred during the Gemini API call: {e}")
         traceback.print_exc()
         return None
@@ -160,9 +148,6 @@ def generate_all_images_from_file(
     output_dir: str = "generated_images",
     output_json_path: str = "multimedia_data_with_images.json"
 ) -> List[Dict]:
-    """
-    Reads data from a JSON, generates images sequentially, and saves a new JSON with image paths.
-    """
     try:
         with open(json_path, 'r', encoding='utf-8') as f:
             multimedia_data = json.load(f)
@@ -202,12 +187,13 @@ def generate_all_images_from_file(
                 print(f"✅ Loaded image {saved_image_path} as context for the next generation.")
             except Exception as e:
                 print(f"⚠️ Could not load image {saved_image_path} for context. Error: {e}")
-                previous_image = None # Don't use a corrupted image as context
         else:
             print("❌ No image was generated for this item. Context will be reset.")
             previous_image = None
-    # Save the final JSON with all image paths
     try:
         with open(output_json_path, 'w', encoding='utf-8') as f:
             json.dump(multimedia_data, f, indent=2, ensure_ascii=False)

 import os
 import mimetypes
 import json
 import streamlit as st
 import io
 import time
 import traceback
 from PIL import Image
 from typing import List, Dict, Optional
 from google import genai
+from google.generativeai import types
 from google.api_core import exceptions
 # --- Client Initialization ---
 client = None
 try:
     api_key = st.secrets.get("GEMINI_API_KEY")
     if api_key:
         client = genai.Client(api_key=api_key)
     st.stop()
 # --- Helper Functions ---
 def save_binary_file(file_name: str, data: bytes):
     """Saves binary data to a file."""
     try:
 def pil_image_to_part(image: Image.Image) -> types.Part:
     """Converts a PIL Image to a genai.types.Part object."""
     img_byte_arr = io.BytesIO()
     image.save(img_byte_arr, format='JPEG')
     img_bytes = img_byte_arr.getvalue()
+    # This call was correct and remains the same.
     return types.Part.from_bytes(
         data=img_bytes,
         mime_type='image/jpeg'
     context_image: Optional[Image.Image] = None
 ) -> Optional[str]:
     """
+    Generates an image using the Gemini API, now with all corrections.
     """
     if not client:
         print("❌ Gemini client not initialized.")
             - Lighting and atmosphere
             Style: Cinematic, epic fantasy digital painting with rich details and dramatic lighting.
             Generate an image that illustrates the following scene:"""
+            print(" -> Using previous image as context.")
         else:
             system_prompt = """You are a master storyboard artist creating the opening scene of a visual story.
             IMPORTANT: You MUST generate an image for this request. Create a stunning, cinematic image in an epic fantasy digital painting style with:
             - High-quality digital painting aesthetic
             This is the first scene of the story. Generate an image that illustrates:"""
         content_parts.append(types.Part.from_text(system_prompt))
         if context_image:
             content_parts.append(pil_image_to_part(context_image))
         contents = [types.Content(role="user", parts=content_parts)]
         generate_content_config = types.GenerateContentConfig(
             response_modalities=["IMAGE", "TEXT"],
         )
         stream = client.models.generate_content_stream(
             model=model_name,
             contents=contents,
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
             for part in chunk.candidates[0].content.parts:
                 if part.inline_data and part.inline_data.data:
                     inline_data = part.inline_data
                     file_extension = mimetypes.guess_extension(inline_data.mime_type) or ".jpg"
                     full_file_name = f"{output_file_base}{file_extension}"
                     save_binary_file(full_file_name, inline_data.data)
                     saved_file_path = full_file_name
                 elif part.text:
                     text_responses.append(part.text)
         if saved_file_path:
             print(f"✅ Successfully generated and saved image: {saved_file_path}")
         elif text_responses:
             print(f"⚠️ No image generated. API returned text only: {' '.join(text_responses)}")
         else:
     except exceptions.InvalidArgument as e:
         print(f"❌ API Invalid Argument Error: {e}")
+        traceback.print_exc()
+        return None
+    except TypeError as e:
+        print(f"❌ TypeError during API call: {e}")
+        print("  -> This often indicates an incorrect way of creating API objects like 'Part'. Please double-check SDK documentation.")
         traceback.print_exc()
         return None
     except Exception as e:
+        print(f"❌ An unexpected error occurred: {e}")
         traceback.print_exc()
         return None
     output_dir: str = "generated_images",
     output_json_path: str = "multimedia_data_with_images.json"
 ) -> List[Dict]:
     try:
         with open(json_path, 'r', encoding='utf-8') as f:
             multimedia_data = json.load(f)
                 print(f"✅ Loaded image {saved_image_path} as context for the next generation.")
             except Exception as e:
                 print(f"⚠️ Could not load image {saved_image_path} for context. Error: {e}")
+                previous_image = None
         else:
             print("❌ No image was generated for this item. Context will be reset.")
             previous_image = None
+        time.sleep(2)
     try:
         with open(output_json_path, 'w', encoding='utf-8') as f:
             json.dump(multimedia_data, f, indent=2, ensure_ascii=False)