userIdc2024 commited on
Commit
333e771
·
verified ·
1 Parent(s): 143d80b

Upload 6 files

Browse files
Files changed (6) hide show
  1. helpers.py +8 -0
  2. image_functions.py +82 -0
  3. image_processor.py +130 -0
  4. mongo_logger.py +35 -0
  5. r2_uploader.py +13 -0
  6. requirements.txt +3 -0
helpers.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import base64, os
2
+
3
+ def encode_image_to_base64(image_path: str) -> str:
4
+ with open(image_path, "rb") as f:
5
+ return base64.b64encode(f.read()).decode("utf-8")
6
+
7
+ def is_valid_image(file_name: str) -> bool:
8
+ return file_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp"))
image_functions.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import logging
4
+ from typing import List, Optional
5
+ import base64
6
+ from openai import OpenAI
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ def unzip_images(zip_path: str, extract_to: Optional[str] = None) -> List[str]:
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logger.debug(f"Starting to unzip images from {zip_path!r}")
14
+
15
+ # Determine extraction directory
16
+ if extract_to is None:
17
+ extract_to = os.path.splitext(os.path.basename(zip_path))[0]
18
+ logger.debug(f"Extraction directory set to {extract_to!r}")
19
+
20
+ # Define supported image extensions
21
+ image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
22
+
23
+ extracted_files: List[str] = []
24
+
25
+ try:
26
+ # Ensure the zip file exists
27
+ if not os.path.isfile(zip_path):
28
+ logger.error(f"Zip file does not exist: {zip_path}")
29
+ return []
30
+
31
+ # Create extraction directory if needed
32
+ os.makedirs(extract_to, exist_ok=True)
33
+ logger.debug(f"Ensured extraction directory exists: {extract_to!r}")
34
+
35
+ # Open and iterate through the zip
36
+ with zipfile.ZipFile(zip_path, 'r') as zf:
37
+ for member in zf.infolist():
38
+ filename = member.filename
39
+ _, ext = os.path.splitext(filename.lower())
40
+
41
+ # Only extract images
42
+ if ext in image_extensions:
43
+ try:
44
+ zf.extract(member, extract_to)
45
+ file_path = os.path.join(extract_to, filename)
46
+ extracted_files.append(_encode_image_to_base64(file_path))
47
+ logger.info(f"Extracted image: {file_path}")
48
+ except Exception as e:
49
+ logger.error(f"Failed to extract {filename}: {e}")
50
+
51
+ if not extracted_files:
52
+ logger.warning(f"No images found in archive: {zip_path}")
53
+
54
+ logger.debug(f"Finished unzipping. Total images extracted: {len(extracted_files)}")
55
+ return extracted_files
56
+
57
+ except zipfile.BadZipFile:
58
+ logger.exception(f"Bad zip file or corrupt archive: {zip_path}")
59
+ return []
60
+ except Exception:
61
+ logger.exception(f"Unexpected error while unzipping {zip_path}")
62
+ return []
63
+
64
+ def _encode_image_to_base64(image_path):
65
+ with open(image_path, "rb") as f:
66
+ return base64.b64encode(f.read()).decode("utf-8")
67
+
68
+
69
+ def generate_image(prompt, size, quality):
70
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
71
+
72
+ result = client.images.generate(
73
+ model="gpt-image-1",
74
+ prompt=prompt,
75
+ size=size,
76
+ quality=quality,
77
+ )
78
+
79
+ image_base64 = result.data[0].b64_json
80
+ image_bytes = base64.b64decode(image_base64)
81
+
82
+ return image_bytes
image_processor.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import tempfile
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+
6
+ from image_functions import generate_image
7
+ from prompt_service import get_prompts
8
+ from r2_uploader import upload_image_to_r2
9
+ from mongo_logger import create_log, update_log_status
10
+ from helpers import encode_image_to_base64, is_valid_image
11
+
12
+
13
+ def process_zip_and_generate_images(zip_path, category, size, quality, user_prompt):
14
+ try:
15
+ temp_dir = extract_zip_file(zip_path)
16
+
17
+ image_files = get_valid_image_files(temp_dir)
18
+ results = process_image_files(image_files, category, size, quality, user_prompt)
19
+ all_urls = [url for entry in results for url in entry["urls"]]
20
+
21
+ new_base = "https://adgen.statics.lookfinity.net/"
22
+
23
+ modified_urls = [new_base + url.rsplit('/', 1)[-1] for url in all_urls]
24
+ return modified_urls
25
+
26
+ except Exception as e:
27
+ log_error(f"Global error in zip processing: {e}")
28
+ return [], [f"Global error: {e}"]
29
+
30
+
31
+ def extract_zip_file(zip_path):
32
+
33
+ temp_dir = tempfile.TemporaryDirectory()
34
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
35
+ zip_ref.extractall(temp_dir.name)
36
+
37
+ return temp_dir
38
+
39
+
40
+ def get_valid_image_files(temp_dir):
41
+
42
+ valid_files = []
43
+ zip_contents = os.listdir(temp_dir.name)
44
+
45
+ for file in zip_contents:
46
+ file_path = os.path.join(temp_dir.name, file)
47
+ if is_valid_image(file) and "__MACOSX" not in file:
48
+ valid_files.append((file, file_path))
49
+
50
+ return valid_files
51
+
52
+
53
+ def process_image_files(image_files, category, size, quality, user_prompt):
54
+ final_results = []
55
+ logs = []
56
+
57
+ with ThreadPoolExecutor(max_workers=5) as executor:
58
+ futures = []
59
+
60
+ for file_name, file_path in image_files:
61
+ log_id = create_log(category, file_name)
62
+ logs.append(f"Processing {file_name}")
63
+
64
+ futures.append(
65
+ executor.submit(
66
+ process_single_image,
67
+ file_name,
68
+ file_path,
69
+ category,
70
+ size,
71
+ quality,
72
+ user_prompt,
73
+ log_id,
74
+ logs
75
+ )
76
+ )
77
+
78
+ for future in as_completed(futures):
79
+ result = future.result()
80
+ if result:
81
+ final_results.append(result)
82
+
83
+ return final_results
84
+
85
+
86
+ def process_single_image(file_name, file_path, category, size, quality, user_prompt, log_id, logs):
87
+ try:
88
+ base64_img = encode_image_to_base64(file_path)
89
+ logs.append(f"Prompts generated for {file_name}")
90
+
91
+ prompts = get_prompts(base64_img, category, user_prompt)
92
+ logs.append(f"Generating images for {file_name}")
93
+
94
+ image_urls = generate_images_from_prompts(prompts, size, quality)
95
+
96
+ status = "completed" if image_urls else "failed"
97
+ message = "Completed successfully" if image_urls else "No images generated"
98
+ update_log_status(log_id, status, urls=image_urls, message=message)
99
+
100
+ if image_urls:
101
+ return {
102
+ "file_name": file_name,
103
+ "urls": image_urls
104
+ }
105
+ return None
106
+
107
+ except Exception as e:
108
+ logs.append(f"Processing failed for {file_name}: {e}")
109
+ update_log_status(log_id, "failed", urls=[], message=str(e))
110
+ return None
111
+
112
+
113
+ def generate_images_from_prompts(prompts, size, quality):
114
+ image_urls = []
115
+
116
+ for prompt in prompts:
117
+ try:
118
+ img_url = generate_image(prompt, size, quality)
119
+
120
+ s3_url = upload_image_to_r2(img_url)
121
+ image_urls.append(s3_url)
122
+
123
+ except Exception as e:
124
+ log_error(f"Image generation failed: {e}")
125
+
126
+ return image_urls
127
+
128
+
129
+ def log_error(message):
130
+ print(f"ERROR: {message}")
mongo_logger.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, uuid
2
+ from pymongo import MongoClient
3
+ from datetime import datetime, timezone
4
+
5
+ client = MongoClient(os.getenv("MONGO_URI"))
6
+ collection = client.adgenesis.records
7
+
8
+ def create_log(category, file_name):
9
+ log_id = str(uuid.uuid4())
10
+ collection.insert_one({
11
+ "_id": log_id,
12
+ "category": category,
13
+ "file_name": file_name,
14
+ "status": "in_progress",
15
+ "urls": [],
16
+ "message": "Processing started",
17
+ "created_at": datetime.now(timezone.utc),
18
+ "updated_at": datetime.now(timezone.utc),
19
+ "lob": "search_arb"
20
+ })
21
+ return log_id
22
+
23
+ def update_log_status(log_id, status, urls=None, message=None):
24
+ update = {
25
+ "$set": {
26
+ "status": status,
27
+ "updated_at": datetime.now(timezone.utc)
28
+ }
29
+ }
30
+ if urls:
31
+ update["$set"]["urls"] = urls
32
+ if message:
33
+ update["$set"]["message"] = message
34
+
35
+ collection.update_one({"_id": log_id}, update)
r2_uploader.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3, os
2
+ from uuid import uuid4
3
+
4
+ def upload_image_to_r2(image_bytes):
5
+ s3 = boto3.client(
6
+ "s3",
7
+ endpoint_url=os.getenv("R2_ENDPOINT"),
8
+ aws_access_key_id=os.getenv("R2_ACCESS_KEY"),
9
+ aws_secret_access_key=os.getenv("R2_SECRET_KEY")
10
+ )
11
+ file_key = f"{uuid4().hex}.png"
12
+ s3.put_object(Bucket=os.getenv("R2_BUCKET_NAME"), Key=file_key, Body=image_bytes, ContentType="image/png")
13
+ return f"{os.getenv('R2_ENDPOINT')}/{os.getenv('R2_BUCKET_NAME')}/{file_key}"
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openai
2
+ pymongo
3
+ boto3