Spaces:

AkashKumarave
/

my3

Running

App Files Files Community

AkashKumarave commited on Sep 3

Commit

84a350d

verified ·

1 Parent(s): 6950f14

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -24

app.py CHANGED Viewed

@@ -81,13 +81,15 @@ def validate_image(image_content: bytes):
         img = Image.open(io.BytesIO(image_content))
         if img.format not in ["PNG", "JPEG"]:
             raise HTTPException(status_code=400, detail="Only PNG or JPEG images are supported")
         return True, img.format.lower()
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Image validation error: {str(e)}")
 # ===== API FUNCTIONS =====
 def create_multi_image_task(subject_images: List[bytes], prompt: str):
-    """Create image generation task with Gemini API (exactly two images)"""
     headers = {
         "Content-Type": "application/json"
     }
@@ -103,31 +105,62 @@ def create_multi_image_task(subject_images: List[bytes], prompt: str):
                 }
             })
     payload = {
         "contents": [
             {
                 "parts": [
-                    {"text": prompt},
                     *subject_image_list
                 ]
             }
         ]
     }
-    try:
-        logger.info(f"Sending request to Gemini API with payload: {payload}")
-        response = requests.post(CREATE_TASK_ENDPOINT, json=payload, headers=headers)
-        response.raise_for_status()
-        data = response.json()
-        logger.info(f"API response: {data}")
-        if not data.get("candidates") or not data["candidates"][0].get("content"):
-            raise HTTPException(status_code=500, detail="No valid content returned from API")
-        return data
-    except requests.exceptions.RequestException as e:
-        logger.error(f"API request failed: {str(e)}")
-        if hasattr(e, 'response') and e.response:
-            logger.error(f"API response: {e.response.text}")
-        raise HTTPException(status_code=500, detail=f"API Error: {str(e)}")
 # ===== MAIN PROCESSING =====
 async def generate_image(subject_images: List[bytes], prompt: str):
@@ -159,21 +192,43 @@ async def generate_image(subject_images: List[bytes], prompt: str):
         parts = candidate["content"]["parts"]
         logger.info(f"Response parts: {parts}")
-        # Find the part with inline_data
         image_base64 = None
         for part in parts:
             if "inline_data" in part and "data" in part["inline_data"]:
                 image_base64 = part["inline_data"]["data"]
-                break
             elif "text" in part:
-                logger.info(f"Text part found: {part['text']}")
-        if not image_base64:
-            logger.error(f"No inline_data found in response parts: {parts}")
-            raise HTTPException(status_code=500, detail="No inline_data found in API response")
-        # Decode and save the image
-        image_data = base64.b64decode(image_base64)
         output_dir = Path("/tmp")
         output_dir.mkdir(exist_ok=True)
         output_path = output_dir / f"gemini_output_{int(time.time())}.png"

         img = Image.open(io.BytesIO(image_content))
         if img.format not in ["PNG", "JPEG"]:
             raise HTTPException(status_code=400, detail="Only PNG or JPEG images are supported")
+        logger.info(f"Validated image: format={img.format}, size={size_mb:.2f}MB")
         return True, img.format.lower()
     except Exception as e:
+        logger.error(f"Image validation error: {str(e)}")
         raise HTTPException(status_code=400, detail=f"Image validation error: {str(e)}")
 # ===== API FUNCTIONS =====
 def create_multi_image_task(subject_images: List[bytes], prompt: str):
+    """Create image generation task with Gemini API (up to two images)"""
     headers = {
         "Content-Type": "application/json"
     }
                 }
             })
+    # Use a more descriptive prompt structure as per documentation
+    enhanced_prompt = f"A photorealistic composition combining elements from the provided images: {prompt}. Ensure the scene is cohesive, with soft, natural lighting and a balanced aspect ratio of 16:9."
     payload = {
         "contents": [
             {
                 "parts": [
+                    {"text": enhanced_prompt},
                     *subject_image_list
                 ]
             }
+        ],
+        "generationConfig": {
+            "response_mime_type": "image/png"
+        },
+        "safetySettings": [
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_NONE"
+            }
         ]
     }
+    max_retries = 1
+    for attempt in range(max_retries + 1):
+        try:
+            logger.info(f"Sending request to Gemini API (attempt {attempt + 1}): {payload}")
+            response = requests.post(CREATE_TASK_ENDPOINT, json=payload, headers=headers)
+            response.raise_for_status()
+            data = response.json()
+            logger.info(f"API response: {data}")
+            if "safetyRatings" in data:
+                logger.info(f"Safety ratings: {data['safetyRatings']}")
+            if not data.get("candidates") or not data["candidates"][0].get("content"):
+                raise HTTPException(status_code=500, detail="No valid content returned from API")
+            return data
+        except requests.exceptions.RequestException as e:
+            logger.error(f"API request failed: {str(e)}")
+            if hasattr(e, 'response') and e.response:
+                logger.error(f"API response: {e.response.text}")
+                if e.response.status_code in [429, 500] and attempt < max_retries:
+                    time.sleep(2 ** attempt)  # Exponential backoff
+                    continue
+            raise HTTPException(status_code=500, detail=f"API Error: {str(e)}")
 # ===== MAIN PROCESSING =====
 async def generate_image(subject_images: List[bytes], prompt: str):
         parts = candidate["content"]["parts"]
         logger.info(f"Response parts: {parts}")
+        # Find the part with inline_data or file_uri
         image_base64 = None
+        file_uri = None
+        text_response = None
         for part in parts:
             if "inline_data" in part and "data" in part["inline_data"]:
                 image_base64 = part["inline_data"]["data"]
+                if not image_base64:
+                    logger.warning("Empty inline_data.data received")
+            elif "fileUri" in part:
+                file_uri = part["fileUri"]
+                logger.info(f"File URI found: {file_uri}")
             elif "text" in part:
+                text_response = part["text"]
+                logger.info(f"Text part found: {text_response}")
+        if not image_base64 and not file_uri:
+            error_detail = text_response or "No image data (inline_data or fileUri) found in API response"
+            if image_base64 == "":
+                error_detail = f"Empty inline_data.data returned by API: {text_response or 'No additional details'}"
+            logger.error(f"No image data in response parts: {parts}")
+            raise HTTPException(status_code=500, detail=f"API error: {error_detail}")
+        if file_uri:
+            # Download image from file_uri
+            logger.info(f"Downloading image from {file_uri}")
+            response = requests.get(file_uri)
+            response.raise_for_status()
+            image_data = response.content
+        else:
+            # Decode base64 image
+            try:
+                image_data = base64.b64decode(image_base64)
+            except Exception as e:
+                logger.error(f"Failed to decode base64 image: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Failed to decode image data: {str(e)}")
         output_dir = Path("/tmp")
         output_dir.mkdir(exist_ok=True)
         output_path = output_dir / f"gemini_output_{int(time.time())}.png"