Hammad712 commited on
Commit
0120494
·
verified ·
1 Parent(s): be34ebf

Update app/services/image_service.py

Browse files
Files changed (1) hide show
  1. app/services/image_service.py +175 -90
app/services/image_service.py CHANGED
@@ -2,16 +2,17 @@ import base64
2
  import logging
3
  import json
4
  from io import BytesIO
 
 
5
  from PIL import Image
6
  from google.genai import types
7
- from typing import Optional, Dict, Any
8
 
9
- # Import our initialized clients
10
- from app.core.clients import llm, prompt_template, genai_client
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
- # --- New Constant for Virtual Try-On Prompt ---
15
  VIRTUAL_TRY_ON_PROMPT = """
16
  You are an expert image-editing agent. Perform a high-fidelity virtual try-on using two inputs:
17
 
@@ -37,14 +38,17 @@ Output requirements:
37
  - Return a single photorealistic image (same orientation as the person image) and a short text JSON summary.
38
  - No collages, no multiple thumbnail images, no visible watermarks.
39
  - If the try-on cannot be done reliably, set "success": false and explain why in "notes".
40
-
41
- When uncertain, err on the side of preserving the person's identity and the dress fidelity.
42
  """
43
 
 
 
 
 
44
  def enhance_user_prompt(raw_prompt: str) -> str:
 
45
  logger.info(f"Enhancing prompt: {raw_prompt[:50]}...")
46
  formatted_prompt = prompt_template.invoke({"Raw_Prompt": raw_prompt})
47
-
48
  try:
49
  response = llm.invoke(formatted_prompt)
50
  logger.info("Prompt enhancement successful.")
@@ -53,129 +57,210 @@ def enhance_user_prompt(raw_prompt: str) -> str:
53
  logger.error(f"Error during prompt enhancement: {e}", exc_info=True)
54
  raise
55
 
56
- # Returns BytesIO object, not base64 string
 
 
 
57
  def generate_image_from_text(image_prompt: str) -> tuple[Optional[str], Optional[BytesIO]]:
58
- """Generates an image using the GenAI client, returning text and BytesIO."""
59
  logger.info(f"Generating image with prompt: {image_prompt[:50]}...")
60
-
61
- generation_config = types.GenerateContentConfig(
62
- response_modalities=['Text', 'Image']
63
- )
64
-
65
  try:
 
66
  response = genai_client.models.generate_content(
67
- model="gemini-2.0-flash-exp-image-generation",
68
  contents=image_prompt,
69
  config=generation_config
70
  )
71
-
72
- logger.info("Image generation call successful.")
73
-
74
- generated_text = None
75
- generated_image_bytes = None
76
-
77
  for part in response.candidates[0].content.parts:
78
- if part.text is not None:
79
  generated_text = part.text
80
- elif part.inline_data is not None:
81
- logger.info("Processing generated image data...")
82
- # Image is directly from model, convert to BytesIO
83
  generated_image_bytes = BytesIO(part.inline_data.data)
84
-
85
  return generated_text, generated_image_bytes
 
86
  except Exception as e:
87
- logger.error(f"Error during image generation: {e}", exc_info=True)
88
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # Returns BytesIO object, not base64 string
 
 
91
  def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[Optional[str], Optional[BytesIO]]:
92
- """Updates an existing image based on a text instruction, returning text and BytesIO."""
93
- logger.info("Opening image from bytes for update...")
94
-
95
  try:
96
- # MODIFICATION: Open image directly from bytes, not base64
97
  image = Image.open(BytesIO(image_bytes))
98
- except Exception as e:
99
- logger.error(f"Failed to open image from bytes: {e}", exc_info=True)
100
- raise ValueError("Invalid image data. The uploaded file is corrupt or not an image.")
101
-
102
- logger.info(f"Updating image with instruction: {text_instruction[:50]}...")
103
-
104
- generation_config = types.GenerateContentConfig(
105
- response_modalities=['Text', 'Image']
106
- )
107
-
108
  try:
109
  response = genai_client.models.generate_content(
110
- model="gemini-2.0-flash-exp-image-generation",
111
  contents=[text_instruction, image],
112
  config=generation_config
113
  )
114
-
115
- logger.info("Image update call successful.")
116
-
117
- updated_text = None
118
- updated_image_bytes = None
119
-
120
  for part in response.candidates[0].content.parts:
121
- if part.text is not None:
122
  updated_text = part.text
123
- elif part.inline_data is not None:
124
- logger.info("Processing updated image data...")
125
  updated_image_bytes = BytesIO(part.inline_data.data)
126
-
127
  return updated_text, updated_image_bytes
 
128
  except Exception as e:
129
- logger.error(f"Error during image update: {e}", exc_info=True)
130
- raise
131
- # --- New Virtual Try-On Service ---
132
- # Accepts raw bytes for dress and person images
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def virtual_try_on(dress_image_bytes: bytes, person_image_bytes: bytes) -> tuple[Dict[str, Any], Optional[BytesIO]]:
134
- """Performs virtual try-on using raw image bytes, returning summary and BytesIO."""
135
-
136
- logger.info("Opening images for virtual try-on.")
137
  try:
138
  dress_image = Image.open(BytesIO(dress_image_bytes))
139
  person_image = Image.open(BytesIO(person_image_bytes))
140
- except Exception as e:
141
- logger.error(f"Failed to open image from bytes: {e}", exc_info=True)
142
- raise ValueError("Invalid image data. One or both uploaded files are corrupt or not images.")
143
 
144
  try:
145
- logger.info("Calling GenAI model for virtual try-on...")
146
- config = types.GenerateContentConfig(
147
- response_modalities=["Text", "Image"]
148
- )
149
-
150
- response = genai_client.models.generate_content(
151
- model="gemini-2.0-flash-exp",
152
  contents=[VIRTUAL_TRY_ON_PROMPT, dress_image, person_image],
153
  config=config
154
  )
155
-
156
- logger.info("Virtual try-on call successful.")
157
-
158
- result_summary = {"success": False, "notes": "No text response from model."}
159
  result_image_bytes = None
160
-
161
  for part in response.candidates[0].content.parts:
162
- if part.text is not None:
163
  try:
164
  result_summary = json.loads(part.text)
165
- logger.info("Parsed JSON summary from model.")
166
  except json.JSONDecodeError:
167
- logger.warning(f"Model returned non-JSON text: {part.text}")
168
- result_summary = {
169
- "success": False,
170
- "notes": "Model returned non-JSON text.",
171
- "raw_text": part.text
172
- }
173
- elif part.inline_data is not None:
174
- logger.info("Processing generated try-on image data...")
175
  result_image_bytes = BytesIO(part.inline_data.data)
176
-
177
  return result_summary, result_image_bytes
178
-
179
  except Exception as e:
180
- logger.error(f"Error during virtual try-on model call: {e}", exc_info=True)
181
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import logging
3
  import json
4
  from io import BytesIO
5
+ from typing import Optional, Dict, Any, Tuple, List
6
+
7
  from PIL import Image
8
  from google.genai import types
 
9
 
10
+ # Import initialized clients (ensure flux_client is defined in app/core/clients.py)
11
+ from app.core.clients import llm, prompt_template, genai_client, flux_client, genai_client1
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
+ # --- Virtual Try-On Instruction Template ---
16
  VIRTUAL_TRY_ON_PROMPT = """
17
  You are an expert image-editing agent. Perform a high-fidelity virtual try-on using two inputs:
18
 
 
38
  - Return a single photorealistic image (same orientation as the person image) and a short text JSON summary.
39
  - No collages, no multiple thumbnail images, no visible watermarks.
40
  - If the try-on cannot be done reliably, set "success": false and explain why in "notes".
 
 
41
  """
42
 
43
+
44
+ # ===============================================================
45
+ # 🔹 PROMPT ENHANCEMENT
46
+ # ===============================================================
47
  def enhance_user_prompt(raw_prompt: str) -> str:
48
+ """Enhance user prompt using LLM"""
49
  logger.info(f"Enhancing prompt: {raw_prompt[:50]}...")
50
  formatted_prompt = prompt_template.invoke({"Raw_Prompt": raw_prompt})
51
+
52
  try:
53
  response = llm.invoke(formatted_prompt)
54
  logger.info("Prompt enhancement successful.")
 
57
  logger.error(f"Error during prompt enhancement: {e}", exc_info=True)
58
  raise
59
 
60
+
61
+ # ===============================================================
62
+ # 🔹 IMAGE GENERATION (TEXT → IMAGE) WITH FALLBACK
63
+ # ===============================================================
64
  def generate_image_from_text(image_prompt: str) -> tuple[Optional[str], Optional[BytesIO]]:
65
+ """Generate an image from a text prompt with Gemini Flux fallback"""
66
  logger.info(f"Generating image with prompt: {image_prompt[:50]}...")
67
+
68
+ generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])
69
+
 
 
70
  try:
71
+ # Try Gemini first
72
  response = genai_client.models.generate_content(
73
+ model="gemini-2.0-flash-preview-image-generation",
74
  contents=image_prompt,
75
  config=generation_config
76
  )
77
+
78
+ logger.info("Gemini image generation successful.")
79
+
80
+ generated_text, generated_image_bytes = None, None
 
 
81
  for part in response.candidates[0].content.parts:
82
+ if part.text:
83
  generated_text = part.text
84
+ elif getattr(part, "inline_data", None):
 
 
85
  generated_image_bytes = BytesIO(part.inline_data.data)
86
+
87
  return generated_text, generated_image_bytes
88
+
89
  except Exception as e:
90
+ logger.warning(f"Gemini image generation failed: {e}. Falling back to Flux...")
91
+
92
+ try:
93
+ image = flux_client.text_to_image(
94
+ image_prompt,
95
+ model="black-forest-labs/FLUX.1-dev"
96
+ )
97
+ buf = BytesIO()
98
+ image.save(buf, format="PNG")
99
+ buf.seek(0)
100
+ return None, buf
101
+ except Exception as flux_error:
102
+ logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
103
+ raise
104
+
105
 
106
+ # ===============================================================
107
+ # 🔹 IMAGE UPDATE (EDIT EXISTING IMAGE WITH TEXT)
108
+ # ===============================================================
109
  def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[Optional[str], Optional[BytesIO]]:
110
+ """Update an existing image using Gemini, fallback to Flux if fails."""
111
+ logger.info("Opening image for update...")
112
+
113
  try:
 
114
  image = Image.open(BytesIO(image_bytes))
115
+ except Exception:
116
+ raise ValueError("Invalid image data. Upload a valid image file.")
117
+
118
+ generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])
119
+
 
 
 
 
 
120
  try:
121
  response = genai_client.models.generate_content(
122
+ model="gemini-2.0-flash-preview-image-generation",
123
  contents=[text_instruction, image],
124
  config=generation_config
125
  )
126
+
127
+ logger.info("Gemini image update successful.")
128
+
129
+ updated_text, updated_image_bytes = None, None
 
 
130
  for part in response.candidates[0].content.parts:
131
+ if part.text:
132
  updated_text = part.text
133
+ elif getattr(part, "inline_data", None):
 
134
  updated_image_bytes = BytesIO(part.inline_data.data)
135
+
136
  return updated_text, updated_image_bytes
137
+
138
  except Exception as e:
139
+ logger.warning(f"Gemini update failed: {e}. Falling back to Flux edit...")
140
+
141
+ try:
142
+ image = flux_client.text_to_image(
143
+ f"Edit image based on instruction: {text_instruction}",
144
+ model="black-forest-labs/FLUX.1-dev"
145
+ )
146
+ buf = BytesIO()
147
+ image.save(buf, format="PNG")
148
+ buf.seek(0)
149
+ return None, buf
150
+ except Exception as flux_error:
151
+ logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
152
+ raise
153
+
154
+
155
+ # ===============================================================
156
+ # 🔹 VIRTUAL TRY-ON (DRESS + PERSON)
157
+ # ===============================================================
158
  def virtual_try_on(dress_image_bytes: bytes, person_image_bytes: bytes) -> tuple[Dict[str, Any], Optional[BytesIO]]:
159
+ """Perform virtual try-on with Gemini, fallback to Flux if fails."""
160
+ logger.info("Opening images for virtual try-on...")
161
+
162
  try:
163
  dress_image = Image.open(BytesIO(dress_image_bytes))
164
  person_image = Image.open(BytesIO(person_image_bytes))
165
+ except Exception:
166
+ raise ValueError("Invalid image data provided.")
 
167
 
168
  try:
169
+ config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
170
+ response = genai_client1.models.generate_content(
171
+ model="gemini-2.0-flash-preview-image-generation",
 
 
 
 
172
  contents=[VIRTUAL_TRY_ON_PROMPT, dress_image, person_image],
173
  config=config
174
  )
175
+
176
+ logger.info("Gemini virtual try-on successful.")
177
+
178
+ result_summary = {"success": False, "notes": "No text response."}
179
  result_image_bytes = None
180
+
181
  for part in response.candidates[0].content.parts:
182
+ if part.text:
183
  try:
184
  result_summary = json.loads(part.text)
 
185
  except json.JSONDecodeError:
186
+ result_summary = {"success": False, "notes": part.text}
187
+ elif getattr(part, "inline_data", None):
 
 
 
 
 
 
188
  result_image_bytes = BytesIO(part.inline_data.data)
189
+
190
  return result_summary, result_image_bytes
191
+
192
  except Exception as e:
193
+ logger.warning(f"Gemini try-on failed: {e}. Falling back to Flux...")
194
+
195
+ # try:
196
+ # prompt = "Photorealistic virtual try-on of a person wearing the given dress."
197
+ # image = flux_client.text_to_image(
198
+ # prompt,
199
+ # model="black-forest-labs/FLUX.1-dev"
200
+ # )
201
+ # buf = BytesIO()
202
+ # image.save(buf, format="PNG")
203
+ # buf.seek(0)
204
+ # return {"success": True, "notes": "Generated using Flux fallback."}, buf
205
+ # except Exception as flux_error:
206
+ # logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
207
+ # raise
208
+
209
+
210
+ # ===============================================================
211
+ # 🔹 SHOE IMAGE GENERATION
212
+ # ===============================================================
213
+ def generate_shoe_images(
214
+ style: str,
215
+ color: Optional[str] = None,
216
+ perspective: Optional[str] = None,
217
+ variations: int = 1,
218
+ ) -> Tuple[Optional[str], List[BytesIO]]:
219
+ """Generate photorealistic shoe images with Gemini → Flux fallback."""
220
+ logger.info("Generating shoe images...")
221
+
222
+ shoe_prompt = f"""
223
+ Produce a photorealistic product image of a shoe.
224
+
225
+ Details:
226
+ - Style: {style}
227
+ - Color: {color or 'natural tones'}
228
+ - Perspective: {perspective or 'three-quarter'}
229
+ - Background: plain white / studio.
230
+ - High detail, realistic textures, clean lighting.
231
+ """
232
+
233
+ config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
234
+
235
+ try:
236
+ response = genai_client.models.generate_content(
237
+ model="gemini-2.0-flash-preview-image-generation",
238
+ contents=shoe_prompt,
239
+ config=config
240
+ )
241
+
242
+ generated_text, images = None, []
243
+
244
+ for part in response.candidates[0].content.parts:
245
+ if part.text:
246
+ generated_text = part.text
247
+ elif getattr(part, "inline_data", None):
248
+ images.append(BytesIO(part.inline_data.data))
249
+
250
+ return generated_text, images
251
+
252
+ except Exception as e:
253
+ logger.warning(f"Gemini shoe generation failed: {e}. Falling back to Flux...")
254
+
255
+ try:
256
+ image = flux_client.text_to_image(
257
+ shoe_prompt,
258
+ model="black-forest-labs/FLUX.1-dev"
259
+ )
260
+ buf = BytesIO()
261
+ image.save(buf, format="PNG")
262
+ buf.seek(0)
263
+ return None, [buf]
264
+ except Exception as flux_error:
265
+ logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
266
+ raise