Spaces:
Sleeping
Sleeping
File size: 12,535 Bytes
a6314a0 817a0bc a6314a0 15216cf a6314a0 3203c15 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 2756581 a6314a0 7dc8075 a6314a0 2756581 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 2756581 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 2756581 a6314a0 7dc8075 13324a6 7dc8075 f49b26c a180781 3203c15 13324a6 7dc8075 13324a6 7dc8075 a6314a0 2756581 a6314a0 7dc8075 a6314a0 7dc8075 a6314a0 2756581 a6314a0 7dc8075 a6314a0 b8d3065 817a0bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
# -----------------------
# Image Generation
# -----------------------
import os
import re
import time
import tempfile
import requests
import json
from google import genai
from google.genai import types
import io
import base64
import numpy as np
import cv2
import logging
import uuid
import subprocess
from pathlib import Path
import urllib.parse
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import base64
import os
import uuid
import matplotlib
import matplotlib.pyplot as plt
from io import BytesIO
import dataframe_image as dfi
import uuid
from PIL import ImageFont, ImageDraw, Image
import seaborn as sns
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
token = os.getenv('HF_API')
headers = {"Authorization": f"Bearer {token}"}
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
def is_valid_png(file_path):
"""Check if the PNG file at `file_path` is valid."""
try:
with open(file_path, "rb") as f:
# Read the first 8 bytes to check the PNG signature
header = f.read(8)
if header != b'\x89PNG\r\n\x1a\n':
return False
# Attempt to open and verify the entire image
with Image.open(file_path) as img:
img.verify() # Verify the file integrity
return True
except Exception as e:
print(f"Invalid PNG file at {file_path}: {e}")
return False
def standardize_and_validate_image(file_path):
"""Validate, standardize, and overwrite the image at `file_path`."""
try:
# Verify basic integrity
with Image.open(file_path) as img:
img.verify()
# Reopen and convert to RGB
with Image.open(file_path) as img:
img = img.convert("RGB") # Remove alpha channel if present
# Save to a temporary BytesIO buffer first
buffer = io.BytesIO()
img.save(buffer, format="PNG")
buffer.seek(0)
# Write the buffer to the file
with open(file_path, "wb") as f:
f.write(buffer.getvalue())
return True
except Exception as e:
print(f"Failed to standardize/validate {file_path}: {e}")
return False
def generate_image(prompt_text, style, model="hf"):
"""
Generate an image from a text prompt using one of the following:
- Hugging Face's FLUX.1-dev
- Pollinations Turbo
- Google's Gemini
- Pexels API (for a real photo instead of AI)
Args:
prompt_text (str): The text prompt for image generation or search.
style (str or None): The style of the image (used for HF and Gemini models, ignored for Pexels).
model (str): Which model to use
("hf", "pollinations_turbo", "gemini", or "pexels").
Returns:
tuple: (PIL.Image, base64_string) or (None, None) on error.
"""
try:
if model == "pollinations_turbo":
# URL-encode the prompt and add the query parameter to specify the model as "turbo"
prompt_encoded = urllib.parse.quote(prompt_text)
api_url = f"https://image.pollinations.ai/prompt/{prompt_encoded}?model=turbo"
response = requests.get(api_url)
if response.status_code != 200:
logger.error(f"Pollinations API error: {response.status_code}, {response.text}")
print(f"Error from image generation API: {response.status_code}")
return None, None
image_bytes = response.content
elif model == "gemini":
# For Google's Gemini model
try:
g_api_key = os.getenv("GEMINI")
if not g_api_key:
logger.error("GEMINI_API_KEY not found in environment variables")
print("Google Gemini API key is missing. Please set the GEMINI_API_KEY environment variable.")
return None, None
# Initialize Gemini client
client = genai.Client(api_key=g_api_key)
# Enhance prompt with style
enhanced_prompt = f"image of {prompt_text} in {style} style, high quality, detailed illustration"
# Generate content
response = client.models.generate_content(
model="models/gemini-2.0-flash-exp",
contents=enhanced_prompt,
config=types.GenerateContentConfig(response_modalities=['Text', 'Image'])
)
# Extract image from response
for part in response.candidates[0].content.parts:
if part.inline_data is not None:
image = Image.open(BytesIO(part.inline_data.data))
# Convert to base64 string
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return image, img_str
# If no image was found in the response
logger.error("No image was found in the Gemini API response")
print("Gemini API didn't return an image")
return None, None
except ImportError:
logger.error("Google Gemini libraries not installed")
print("Google Gemini libraries not installed. Install with 'pip install google-genai'")
return None, None
except Exception as e:
logger.error(f"Gemini API error: {str(e)}")
print(f"Error from Gemini image generation: {str(e)}")
return None, None
elif model == "pexels":
# ---------- NEW BRANCH FOR PEXELS -----------
pexels_api_key = os.getenv("PEXELS_API_KEY")
if not pexels_api_key:
logger.error("PEXELS_API_KEY not found in environment variables")
print("Pexels API key is missing. Please set the PEXELS_API_KEY environment variable.")
return None, None
# Call Pexels search endpoint
# e.g. GET https://api.pexels.com/v1/search?query={prompt_text}&per_page=1
headers_pexels = {
"Authorization": pexels_api_key
}
g_api_key = os.getenv("GEMINI")
client = genai.Client(api_key=g_api_key)
new_prompt = client.models.generate_content(model="models/gemini-2.0-flash-lite", contents=f"Generate a keywords or a phrase, (maximum 3 words) to search for an appropriate image on the pexels API based on this prompt: {prompt_text} return only the keywords and nothing else")
pexel_prompt = str(new_prompt.text)
search_url = f"https://api.pexels.com/v1/search?query={pexel_prompt}&per_page=1"
response = requests.get(search_url, headers=headers_pexels)
if response.status_code != 200:
logger.error(f"Pexels API error: {response.status_code}, {response.text} from {pexel_prompt}")
print(f"Error from Pexels API: {response.status_code}")
return None, None
data = response.json()
photos = data.get("photos", [])
if not photos:
logger.error("No photos found for the given prompt on Pexels")
print("No photos found on Pexels for this prompt.")
return None, None
# Take the first photo
photo = photos[0]
# We can pick "src" => "original" or "large2x", etc.
image_url = photo["src"].get("large2x") or photo["src"].get("original")
if not image_url:
logger.error("No suitable image URL found in Pexels photo object")
return None, None
# Download the image
img_resp = requests.get(image_url)
if img_resp.status_code != 200:
logger.error(f"Failed to download Pexels image from {image_url}")
return None, None
image_bytes = img_resp.content
else:
# Default to Hugging Face model
enhanced_prompt = f"{prompt_text} in {style} style, high quality, detailed illustration"
model_id = "black-forest-labs/FLUX.1-dev"
api_url = f"https://api-inference.huggingface.co/models/{model_id}"
payload = {"inputs": enhanced_prompt}
response = requests.post(api_url, headers=headers, json=payload)
if response.status_code != 200:
logger.error(f"Hugging Face API error: {response.status_code}, {response.text}")
print(f"Error from image generation API: {response.status_code}")
return None, None
image_bytes = response.content
# For HF, Pollinations, or Pexels that return image bytes
if model != "gemini":
image = Image.open(io.BytesIO(image_bytes))
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return image, img_str
except Exception as e:
print(f"Error generating image: {e}")
logger.error(f"Image generation error: {str(e)}")
# Return a placeholder image in case of failure
return Image.new('RGB', (1024, 1024), color=(200,200,200)), None
def generate_image_with_retry(prompt_text, style, model="hf", max_retries=3):
"""
Attempt to generate an image using generate_image, retrying up to max_retries if needed.
Args:
prompt_text (str): The text prompt for image generation.
style (str or None): The style of the image (ignored for Pollinations Turbo).
model (str): Which model to use ("hf" or "pollinations_turbo").
max_retries (int): Maximum number of retries.
Returns:
tuple: The generated image and its Base64 string.
"""
for attempt in range(max_retries):
try:
if attempt > 0:
time.sleep(2 ** attempt)
return generate_image(prompt_text, style, model=model)
except Exception as e:
logger.error(f"Attempt {attempt+1} failed: {e}")
if attempt == max_retries - 1:
raise
return None, None
# edit image function
def edit_section_image(image_url: str, gemini_prompt: str):
"""
Downloads the existing image from image_url, uses Google Gemini to edit it
according to gemini_prompt, and returns the new edited image (as a PIL.Image).
"""
try:
# 1) Download the original image
resp = requests.get(image_url)
if resp.status_code != 200:
logger.error(f"Failed to download image from {image_url}")
return None
original_image = Image.open(io.BytesIO(resp.content))
# 2) Initialize Gemini client
g_api_key = os.getenv("GEMINI")
if not g_api_key:
logger.error("GEMINI_API_KEY not found in environment variables")
print("Google Gemini API key is missing. Please set the GEMINI_API_KEY environment variable.")
return None
client = genai.Client(api_key=g_api_key)
# 3) Prepare the prompt_with_image: a list with [ prompt_text, PIL.Image ]
prompt_with_image = [
gemini_prompt,
original_image
]
# 4) Call the Gemini model to edit the image
response = client.models.generate_content(
model="models/gemini-2.0-flash-exp",
contents=prompt_with_image,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
# 5) Extract the edited image from the response
# Typically, the 'response' might have text + image. We want the image part.
edited_image = None
for part in response.candidates[0].content.parts:
if part.inline_data is not None:
edited_image = Image.open(io.BytesIO(part.inline_data.data))
break
if not edited_image:
logger.error("No edited image found in Gemini response")
return None
return edited_image
except Exception as e:
logger.error(f"Error editing section image: {e}")
return None |