Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- helpers.py +8 -0
- image_functions.py +82 -0
- image_processor.py +130 -0
- mongo_logger.py +35 -0
- r2_uploader.py +13 -0
- requirements.txt +3 -0
helpers.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64, os
|
| 2 |
+
|
| 3 |
+
def encode_image_to_base64(image_path: str) -> str:
|
| 4 |
+
with open(image_path, "rb") as f:
|
| 5 |
+
return base64.b64encode(f.read()).decode("utf-8")
|
| 6 |
+
|
| 7 |
+
def is_valid_image(file_name: str) -> bool:
|
| 8 |
+
return file_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp"))
|
image_functions.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import zipfile
|
| 3 |
+
import logging
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
import base64
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
def unzip_images(zip_path: str, extract_to: Optional[str] = None) -> List[str]:
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
logger.debug(f"Starting to unzip images from {zip_path!r}")
|
| 14 |
+
|
| 15 |
+
# Determine extraction directory
|
| 16 |
+
if extract_to is None:
|
| 17 |
+
extract_to = os.path.splitext(os.path.basename(zip_path))[0]
|
| 18 |
+
logger.debug(f"Extraction directory set to {extract_to!r}")
|
| 19 |
+
|
| 20 |
+
# Define supported image extensions
|
| 21 |
+
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
|
| 22 |
+
|
| 23 |
+
extracted_files: List[str] = []
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
# Ensure the zip file exists
|
| 27 |
+
if not os.path.isfile(zip_path):
|
| 28 |
+
logger.error(f"Zip file does not exist: {zip_path}")
|
| 29 |
+
return []
|
| 30 |
+
|
| 31 |
+
# Create extraction directory if needed
|
| 32 |
+
os.makedirs(extract_to, exist_ok=True)
|
| 33 |
+
logger.debug(f"Ensured extraction directory exists: {extract_to!r}")
|
| 34 |
+
|
| 35 |
+
# Open and iterate through the zip
|
| 36 |
+
with zipfile.ZipFile(zip_path, 'r') as zf:
|
| 37 |
+
for member in zf.infolist():
|
| 38 |
+
filename = member.filename
|
| 39 |
+
_, ext = os.path.splitext(filename.lower())
|
| 40 |
+
|
| 41 |
+
# Only extract images
|
| 42 |
+
if ext in image_extensions:
|
| 43 |
+
try:
|
| 44 |
+
zf.extract(member, extract_to)
|
| 45 |
+
file_path = os.path.join(extract_to, filename)
|
| 46 |
+
extracted_files.append(_encode_image_to_base64(file_path))
|
| 47 |
+
logger.info(f"Extracted image: {file_path}")
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to extract {filename}: {e}")
|
| 50 |
+
|
| 51 |
+
if not extracted_files:
|
| 52 |
+
logger.warning(f"No images found in archive: {zip_path}")
|
| 53 |
+
|
| 54 |
+
logger.debug(f"Finished unzipping. Total images extracted: {len(extracted_files)}")
|
| 55 |
+
return extracted_files
|
| 56 |
+
|
| 57 |
+
except zipfile.BadZipFile:
|
| 58 |
+
logger.exception(f"Bad zip file or corrupt archive: {zip_path}")
|
| 59 |
+
return []
|
| 60 |
+
except Exception:
|
| 61 |
+
logger.exception(f"Unexpected error while unzipping {zip_path}")
|
| 62 |
+
return []
|
| 63 |
+
|
| 64 |
+
def _encode_image_to_base64(image_path):
|
| 65 |
+
with open(image_path, "rb") as f:
|
| 66 |
+
return base64.b64encode(f.read()).decode("utf-8")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def generate_image(prompt, size, quality):
|
| 70 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 71 |
+
|
| 72 |
+
result = client.images.generate(
|
| 73 |
+
model="gpt-image-1",
|
| 74 |
+
prompt=prompt,
|
| 75 |
+
size=size,
|
| 76 |
+
quality=quality,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
image_base64 = result.data[0].b64_json
|
| 80 |
+
image_bytes = base64.b64decode(image_base64)
|
| 81 |
+
|
| 82 |
+
return image_bytes
|
image_processor.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import zipfile
|
| 3 |
+
import tempfile
|
| 4 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
+
|
| 6 |
+
from image_functions import generate_image
|
| 7 |
+
from prompt_service import get_prompts
|
| 8 |
+
from r2_uploader import upload_image_to_r2
|
| 9 |
+
from mongo_logger import create_log, update_log_status
|
| 10 |
+
from helpers import encode_image_to_base64, is_valid_image
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def process_zip_and_generate_images(zip_path, category, size, quality, user_prompt):
|
| 14 |
+
try:
|
| 15 |
+
temp_dir = extract_zip_file(zip_path)
|
| 16 |
+
|
| 17 |
+
image_files = get_valid_image_files(temp_dir)
|
| 18 |
+
results = process_image_files(image_files, category, size, quality, user_prompt)
|
| 19 |
+
all_urls = [url for entry in results for url in entry["urls"]]
|
| 20 |
+
|
| 21 |
+
new_base = "https://adgen.statics.lookfinity.net/"
|
| 22 |
+
|
| 23 |
+
modified_urls = [new_base + url.rsplit('/', 1)[-1] for url in all_urls]
|
| 24 |
+
return modified_urls
|
| 25 |
+
|
| 26 |
+
except Exception as e:
|
| 27 |
+
log_error(f"Global error in zip processing: {e}")
|
| 28 |
+
return [], [f"Global error: {e}"]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def extract_zip_file(zip_path):
|
| 32 |
+
|
| 33 |
+
temp_dir = tempfile.TemporaryDirectory()
|
| 34 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
| 35 |
+
zip_ref.extractall(temp_dir.name)
|
| 36 |
+
|
| 37 |
+
return temp_dir
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_valid_image_files(temp_dir):
|
| 41 |
+
|
| 42 |
+
valid_files = []
|
| 43 |
+
zip_contents = os.listdir(temp_dir.name)
|
| 44 |
+
|
| 45 |
+
for file in zip_contents:
|
| 46 |
+
file_path = os.path.join(temp_dir.name, file)
|
| 47 |
+
if is_valid_image(file) and "__MACOSX" not in file:
|
| 48 |
+
valid_files.append((file, file_path))
|
| 49 |
+
|
| 50 |
+
return valid_files
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def process_image_files(image_files, category, size, quality, user_prompt):
|
| 54 |
+
final_results = []
|
| 55 |
+
logs = []
|
| 56 |
+
|
| 57 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
| 58 |
+
futures = []
|
| 59 |
+
|
| 60 |
+
for file_name, file_path in image_files:
|
| 61 |
+
log_id = create_log(category, file_name)
|
| 62 |
+
logs.append(f"Processing {file_name}")
|
| 63 |
+
|
| 64 |
+
futures.append(
|
| 65 |
+
executor.submit(
|
| 66 |
+
process_single_image,
|
| 67 |
+
file_name,
|
| 68 |
+
file_path,
|
| 69 |
+
category,
|
| 70 |
+
size,
|
| 71 |
+
quality,
|
| 72 |
+
user_prompt,
|
| 73 |
+
log_id,
|
| 74 |
+
logs
|
| 75 |
+
)
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
for future in as_completed(futures):
|
| 79 |
+
result = future.result()
|
| 80 |
+
if result:
|
| 81 |
+
final_results.append(result)
|
| 82 |
+
|
| 83 |
+
return final_results
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def process_single_image(file_name, file_path, category, size, quality, user_prompt, log_id, logs):
|
| 87 |
+
try:
|
| 88 |
+
base64_img = encode_image_to_base64(file_path)
|
| 89 |
+
logs.append(f"Prompts generated for {file_name}")
|
| 90 |
+
|
| 91 |
+
prompts = get_prompts(base64_img, category, user_prompt)
|
| 92 |
+
logs.append(f"Generating images for {file_name}")
|
| 93 |
+
|
| 94 |
+
image_urls = generate_images_from_prompts(prompts, size, quality)
|
| 95 |
+
|
| 96 |
+
status = "completed" if image_urls else "failed"
|
| 97 |
+
message = "Completed successfully" if image_urls else "No images generated"
|
| 98 |
+
update_log_status(log_id, status, urls=image_urls, message=message)
|
| 99 |
+
|
| 100 |
+
if image_urls:
|
| 101 |
+
return {
|
| 102 |
+
"file_name": file_name,
|
| 103 |
+
"urls": image_urls
|
| 104 |
+
}
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logs.append(f"Processing failed for {file_name}: {e}")
|
| 109 |
+
update_log_status(log_id, "failed", urls=[], message=str(e))
|
| 110 |
+
return None
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def generate_images_from_prompts(prompts, size, quality):
|
| 114 |
+
image_urls = []
|
| 115 |
+
|
| 116 |
+
for prompt in prompts:
|
| 117 |
+
try:
|
| 118 |
+
img_url = generate_image(prompt, size, quality)
|
| 119 |
+
|
| 120 |
+
s3_url = upload_image_to_r2(img_url)
|
| 121 |
+
image_urls.append(s3_url)
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
log_error(f"Image generation failed: {e}")
|
| 125 |
+
|
| 126 |
+
return image_urls
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def log_error(message):
|
| 130 |
+
print(f"ERROR: {message}")
|
mongo_logger.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, uuid
|
| 2 |
+
from pymongo import MongoClient
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
|
| 5 |
+
client = MongoClient(os.getenv("MONGO_URI"))
|
| 6 |
+
collection = client.adgenesis.records
|
| 7 |
+
|
| 8 |
+
def create_log(category, file_name):
|
| 9 |
+
log_id = str(uuid.uuid4())
|
| 10 |
+
collection.insert_one({
|
| 11 |
+
"_id": log_id,
|
| 12 |
+
"category": category,
|
| 13 |
+
"file_name": file_name,
|
| 14 |
+
"status": "in_progress",
|
| 15 |
+
"urls": [],
|
| 16 |
+
"message": "Processing started",
|
| 17 |
+
"created_at": datetime.now(timezone.utc),
|
| 18 |
+
"updated_at": datetime.now(timezone.utc),
|
| 19 |
+
"lob": "search_arb"
|
| 20 |
+
})
|
| 21 |
+
return log_id
|
| 22 |
+
|
| 23 |
+
def update_log_status(log_id, status, urls=None, message=None):
|
| 24 |
+
update = {
|
| 25 |
+
"$set": {
|
| 26 |
+
"status": status,
|
| 27 |
+
"updated_at": datetime.now(timezone.utc)
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
if urls:
|
| 31 |
+
update["$set"]["urls"] = urls
|
| 32 |
+
if message:
|
| 33 |
+
update["$set"]["message"] = message
|
| 34 |
+
|
| 35 |
+
collection.update_one({"_id": log_id}, update)
|
r2_uploader.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import boto3, os
|
| 2 |
+
from uuid import uuid4
|
| 3 |
+
|
| 4 |
+
def upload_image_to_r2(image_bytes):
|
| 5 |
+
s3 = boto3.client(
|
| 6 |
+
"s3",
|
| 7 |
+
endpoint_url=os.getenv("R2_ENDPOINT"),
|
| 8 |
+
aws_access_key_id=os.getenv("R2_ACCESS_KEY"),
|
| 9 |
+
aws_secret_access_key=os.getenv("R2_SECRET_KEY")
|
| 10 |
+
)
|
| 11 |
+
file_key = f"{uuid4().hex}.png"
|
| 12 |
+
s3.put_object(Bucket=os.getenv("R2_BUCKET_NAME"), Key=file_key, Body=image_bytes, ContentType="image/png")
|
| 13 |
+
return f"{os.getenv('R2_ENDPOINT')}/{os.getenv('R2_BUCKET_NAME')}/{file_key}"
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
pymongo
|
| 3 |
+
boto3
|