Spaces:

abhinav0231
/

SparrowTale

Sleeping

App Files Files Community

abhinav0231 commited on Sep 5, 2025

Commit

a296a03

verified ·

1 Parent(s): 94047f1

Update image_generation.py

Browse files

Files changed (1) hide show

image_generation.py +57 -64

image_generation.py CHANGED Viewed

@@ -3,13 +3,13 @@ import mimetypes
 import json
 import streamlit as st
 import google.generativeai as genai
 from typing import List, Dict, Optional
 from PIL import Image
 import io
-import time
 import traceback
-# Configure the client once with the API key
 try:
     api_key = st.secrets.get("GEMINI_API_KEY") or os.getenv("GEMINI_API_KEY")
     if api_key:
@@ -17,15 +17,11 @@ try:
         print("✅ Google AI client for Gemini configured successfully.")
     else:
         print("⚠️ Warning: GEMINI_API_KEY not found.")
-        # Exit if running in a context where it should be mandatory
-        # exit(1)
 except Exception as e:
     print(f"❌ Error configuring Google AI client: {e}")
-    # exit(1)
-# --- Helper Function ---
 def save_binary_file(file_name: str, data: bytes):
-    """Saves binary data to a file."""
     try:
         with open(file_name, "wb") as f:
             f.write(data)
@@ -33,72 +29,73 @@ def save_binary_file(file_name: str, data: bytes):
     except Exception as e:
         print(f"❌ Error saving file {file_name}: {e}")
-# --- IMAGE GENERATION FUNCTION ---
 def generate_image_with_gemini(
     prompt: str,
     output_file_base: str,
     context_image: Optional[Image.Image] = None
 ) -> Optional[str]:
     """
-    Generates an image using the updated Gemini API syntax, optionally using a
-    previous image as context.
     """
     print(f"--- 🎨 Generating image for prompt: '{prompt[:70]}...' ---")
     try:
-        # Define the specific image generation model
-        model = genai.GenerativeModel(model_name="gemini-1.5-flash") # Use a current, valid model
-        # --- UPDATED CONTENT STRUCTURE ---
-        # Build a simple list of content parts. The SDK handles the type.
         content_parts = []
         if context_image:
-            system_prompt = """You are a master storyboard artist creating a visual story sequence.
-IMPORTANT: You MUST generate an image for every request.
-Create a visually consistent image that follows the art style and character design of the provided reference image. Maintain consistency in:
-- Character appearance and clothing
-- Art style and color palette
-- Lighting and atmosphere
-- Overall visual tone
-Style: Cinematic, epic fantasy digital painting with rich details and dramatic lighting.
-Generate an image that illustrates the following scene:"""
             print(" -> Using previous image as context for consistent styling.")
-            # Add the system prompt, context image, and user prompt
-            content_parts = [system_prompt, context_image, f"CREATE AN IMAGE NOW: {prompt}"]
         else:
-            system_prompt = """You are a master storyboard artist creating the opening scene of a visual story.
-IMPORTANT: You MUST generate an image for this request.
-Create a stunning, cinematic image in an epic fantasy digital painting style with:
-- Rich, detailed artwork
-- Dramatic lighting and atmosphere
-- High-quality digital painting aesthetic
-- Vivid colors and intricate details
-This is the first scene of the story. Generate an image that illustrates:"""
-            # Add the system prompt and user prompt
-            content_parts = [system_prompt, f"CREATE AN IMAGE NOW: {prompt}"]
-        # --- UPDATED API CALL ---
-        # The generate_content method now takes a simple list
-        response = model.generate_content(content_parts, stream=True)
         saved_file_path = None
         text_responses = []
-        for chunk in response:
-            # The modern SDK provides image data directly in parts
-            if chunk.parts and chunk.parts[0].file_data:
-                file_data = chunk.parts[0].file_data
-                data_buffer = file_data.data
-                file_extension = mimetypes.guess_extension(file_data.mime_type) or ".jpg"
-                full_file_name = f"{output_file_base}{file_extension}"
-                save_binary_file(full_file_name, data_buffer)
-                saved_file_path = full_file_name
-                print(f"✅ Successfully generated and saved image: {full_file_name}")
-            # Collect any text responses for debugging
-            if chunk.text:
-                text_responses.append(chunk.text)
         if not saved_file_path and text_responses:
             print(f"⚠️ No image generated. API returned text: {' '.join(text_responses)}")
@@ -110,16 +107,13 @@ This is the first scene of the story. Generate an image that illustrates:"""
         traceback.print_exc()
         return None
-# The rest of your script (generate_all_images_from_file) does not need changes
-# as it correctly calls the function above.
 def generate_all_images_from_file(
     json_path: str,
     output_dir: str = "generated_images",
     output_json_path: str = "multimedia_data_with_images.json"
 ) -> List[Dict[str, str]]:
-    """
-    Reads data from a JSON, generates images sequentially, and saves a new JSON with image paths.
-    """
     try:
         with open(json_path, 'r', encoding='utf-8') as f:
             multimedia_data = json.load(f)
@@ -183,5 +177,4 @@ def generate_all_images_from_file(
     except Exception as e:
         print(f"❌ Error saving updated JSON: {e}")
-    return multimedia_data

 import json
 import streamlit as st
 import google.generativeai as genai
+from google.generativeai import types
 from typing import List, Dict, Optional
 from PIL import Image
 import io
 import traceback
+import time
 try:
     api_key = st.secrets.get("GEMINI_API_KEY") or os.getenv("GEMINI_API_KEY")
     if api_key:
         print("✅ Google AI client for Gemini configured successfully.")
     else:
         print("⚠️ Warning: GEMINI_API_KEY not found.")
 except Exception as e:
     print(f"❌ Error configuring Google AI client: {e}")
+# --- Helper Functions (Unchanged) ---
 def save_binary_file(file_name: str, data: bytes):
     try:
         with open(file_name, "wb") as f:
             f.write(data)
     except Exception as e:
         print(f"❌ Error saving file {file_name}: {e}")
+def pil_image_to_part(image: Image.Image) -> types.Part:
+    img_byte_arr = io.BytesIO()
+    image.save(img_byte_arr, format='JPEG')
+    img_bytes = img_byte_arr.getvalue()
+    return types.Part.from_bytes(
+        data=img_bytes,
+        mime_type='image/jpeg'
+    )
 def generate_image_with_gemini(
     prompt: str,
     output_file_base: str,
     context_image: Optional[Image.Image] = None
 ) -> Optional[str]:
     """
+    Generates an image, using your original SDK logic and model.
     """
     print(f"--- 🎨 Generating image for prompt: '{prompt[:70]}...' ---")
     try:
+        # Your model and content structure are preserved
+        model_name = "gemini-2.5-flash-image-preview"
+        model = genai.GenerativeModel(model_name)
         content_parts = []
         if context_image:
+            system_prompt = """You are a master storyboard artist creating a visual story sequence... (rest of your prompt)"""
             print(" -> Using previous image as context for consistent styling.")
+            content_parts.append(types.Part.from_text(text=system_prompt))
+            content_parts.append(pil_image_to_part(context_image))
         else:
+            system_prompt = """You are a master storyboard artist creating the opening scene... (rest of your prompt)"""
+            content_parts.append(types.Part.from_text(text=system_prompt))
+        image_instruction = f"""CREATE AN IMAGE NOW:\n{prompt}\nRemember: You must generate a visual image..."""
+        content_parts.append(types.Part.from_text(text=image_instruction))
+        contents = [types.Content(role="user", parts=content_parts)]
+        generate_content_config = types.GenerateContentConfig(
+            response_modalities=["IMAGE", "TEXT"],
+        )
+        # --- CORRECTED API CALL ---
+        # The streaming call is made directly on the model object, not a separate client.
+        stream = model.generate_content(
+            contents=contents,
+            generation_config=generate_content_config, # Corrected parameter name
+            stream=True
+        )
         saved_file_path = None
         text_responses = []
+        # Your original response handling logic
+        for chunk in stream:
+            if (chunk.candidates and chunk.candidates[0].content and chunk.candidates[0].content.parts):
+                part = chunk.candidates[0].content.parts[0]
+                if part.inline_data and part.inline_data.data:
+                    inline_data = part.inline_data
+                    data_buffer = inline_data.data
+                    file_extension = mimetypes.guess_extension(inline_data.mime_type) or ".jpg"
+                    full_file_name = f"{output_file_base}{file_extension}"
+                    save_binary_file(full_file_name, data_buffer)
+                    saved_file_path = full_file_name
+                    print(f"✅ Successfully generated and saved image: {full_file_name}")
+                elif hasattr(chunk, 'text') and chunk.text:
+                    text_responses.append(chunk.text)
         if not saved_file_path and text_responses:
             print(f"⚠️ No image generated. API returned text: {' '.join(text_responses)}")
         traceback.print_exc()
         return None
 def generate_all_images_from_file(
     json_path: str,
     output_dir: str = "generated_images",
     output_json_path: str = "multimedia_data_with_images.json"
 ) -> List[Dict[str, str]]:
     try:
         with open(json_path, 'r', encoding='utf-8') as f:
             multimedia_data = json.load(f)
     except Exception as e:
         print(f"❌ Error saving updated JSON: {e}")
+    return multimedia_data