davidlee831117 commited on
Commit
86eb4e0
·
verified ·
1 Parent(s): 67a9521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -31
app.py CHANGED
@@ -1,11 +1,15 @@
1
  import gradio as gr
 
2
  import os
3
  import json
4
  import base64
 
5
  from io import BytesIO
6
  from PIL import Image
 
7
  import numpy as np
8
  from google.generativeai import GenerativeModel, configure
 
9
 
10
  # Global variable to store API key
11
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
@@ -37,6 +41,15 @@ class NanoBananaImageGenerator:
37
  img_bytes = img_byte_arr.getvalue()
38
  return {"inline_data": {"mime_type": "image/png", "data": base64.b64encode(img_bytes).decode('utf-8')}}
39
 
 
 
 
 
 
 
 
 
 
40
  def build_prompt_for_operation(self, prompt, operation="generate", has_references=False, aspect_ratio="1:1", character_consistency=True):
41
  aspect_instructions = {
42
  "1:1": "square format",
@@ -90,8 +103,8 @@ class NanoBananaImageGenerator:
90
  for i in range(batch_count):
91
  try:
92
  print(f"Debug: Batch {i+1} - Attempting to call Gemini API.")
93
- # Changed the model to a more stable 'gemini-pro-vision'
94
- response = GenerativeModel("gemini-pro-vision").generate_content(
95
  contents=content_parts,
96
  generation_config=generation_config
97
  )
@@ -102,11 +115,13 @@ class NanoBananaImageGenerator:
102
 
103
  batch_images = []
104
 
 
105
  if hasattr(response, 'prompt_feedback') and response.prompt_feedback.safety_ratings:
106
  operation_log += f"提示詞因安全政策被拒絕: {response.prompt_feedback.safety_ratings}\n"
107
  continue
108
 
109
  if not hasattr(response, 'candidates') or not response.candidates:
 
110
  operation_log += f"批次 {i+1}: 在回應中未找到任何候選者。完整回應: {str(response)}\n"
111
  continue
112
 
@@ -130,13 +145,11 @@ class NanoBananaImageGenerator:
130
  else:
131
  operation_log += f"批次 {i+1}: 未找到圖像。請檢視日誌了解詳細資訊。\n"
132
 
133
- except KeyError as e:
134
- if str(e) == "'Text'":
135
- operation_log += f"批次 {i+1} 錯誤: Gemini API 返回了意外的回應結構。影像生成可能因安全政策、無效輸入或內部 API 問題而失敗。原始錯誤: {type(e).__name__} - {str(e)}\n"
136
- else:
137
- raise e
138
  except Exception as e:
139
- operation_log += f"批次 {i+1} 意外錯誤: {type(e).__name__} - {str(e)}\n"
 
 
 
140
 
141
  return all_generated_images, operation_log
142
 
@@ -144,23 +157,23 @@ class NanoBananaImageGenerator:
144
  operation_log = f"API 呼叫錯誤: {type(e).__name__} - {str(e)}\n"
145
  return [], operation_log
146
 
147
- def generate_image_from_uploads(white_background_img, reference_img, prompt):
148
  image_generator = NanoBananaImageGenerator(api_key=GEMINI_API_KEY)
149
 
150
  if not GEMINI_API_KEY:
151
  return None, "錯誤: GEMINI_API_KEY 環境變數未設定。"
152
 
153
  encoded_images = []
154
-
155
- # Correctly check for valid uploaded images
156
- if white_background_img is not None and isinstance(white_background_img, np.ndarray):
157
- encoded_images.append(image_generator._image_to_base64(Image.fromarray(white_background_img).convert("RGB")))
158
 
159
- if reference_img is not None and isinstance(reference_img, np.ndarray):
160
- encoded_images.append(image_generator._image_to_base64(Image.fromarray(reference_img).convert("RGB")))
 
161
 
162
  if not encoded_images:
163
- return None, "錯誤: 請上傳至少一張圖片。"
164
 
165
  has_references = len(encoded_images) > 0
166
  final_prompt = image_generator.build_prompt_for_operation(
@@ -181,25 +194,92 @@ def generate_image_from_uploads(white_background_img, reference_img, prompt):
181
  else:
182
  return None, operation_log
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  if __name__ == "__main__":
185
  with gr.Blocks() as demo:
186
  gr.Markdown("# AutoLS Gradio Image Generator")
187
- gr.Markdown("請上傳圖片並輸入提示詞,進行影像生成。")
188
-
 
 
 
 
 
 
 
189
  with gr.Row():
190
- white_background_input = gr.Image(type="numpy", label="上傳白背圖", show_label=True)
191
- reference_image_input = gr.Image(type="numpy", label="上傳參考圖", show_label=True)
192
-
193
- prompt_input = gr.Textbox(label="提示詞", placeholder="例如:加上一個舒適的木製椅子。")
194
- generate_button = gr.Button("生成圖片")
195
-
196
- generated_image_output = gr.Image(label="生成的圖片", show_label=True)
197
- operation_log_output = gr.Textbox(label="操作日誌", lines=5, show_label=True)
198
-
199
- generate_button.click(
200
- fn=generate_image_from_uploads,
201
- inputs=[white_background_input, reference_image_input, prompt_input],
 
 
 
 
202
  outputs=[generated_image_output, operation_log_output]
203
  )
204
-
205
  demo.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
  import os
4
  import json
5
  import base64
6
+ import requests
7
  from io import BytesIO
8
  from PIL import Image
9
+ import torch
10
  import numpy as np
11
  from google.generativeai import GenerativeModel, configure
12
+ from urllib.parse import urlparse, parse_qs
13
 
14
  # Global variable to store API key
15
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 
41
  img_bytes = img_byte_arr.getvalue()
42
  return {"inline_data": {"mime_type": "image/png", "data": base64.b64encode(img_bytes).decode('utf-8')}}
43
 
44
+ def _load_image_from_url(self, url):
45
+ try:
46
+ response = requests.get(url)
47
+ response.raise_for_status()
48
+ return Image.open(BytesIO(response.content)).convert("RGB")
49
+ except Exception as e:
50
+ print(f"Error loading image from URL {url}: {e}")
51
+ return None
52
+
53
  def build_prompt_for_operation(self, prompt, operation="generate", has_references=False, aspect_ratio="1:1", character_consistency=True):
54
  aspect_instructions = {
55
  "1:1": "square format",
 
103
  for i in range(batch_count):
104
  try:
105
  print(f"Debug: Batch {i+1} - Attempting to call Gemini API.")
106
+ print(f"Debug: Batch {i+1} - Contents (first part): {content_parts[0]['parts'][0]['text'][:50]}...")
107
+ response = GenerativeModel("gemini-2.5-flash-image-preview").generate_content(
108
  contents=content_parts,
109
  generation_config=generation_config
110
  )
 
115
 
116
  batch_images = []
117
 
118
+ # 優先檢查提示詞或候選者是否因安全政策被拒絕
119
  if hasattr(response, 'prompt_feedback') and response.prompt_feedback.safety_ratings:
120
  operation_log += f"提示詞因安全政策被拒絕: {response.prompt_feedback.safety_ratings}\n"
121
  continue
122
 
123
  if not hasattr(response, 'candidates') or not response.candidates:
124
+ # 如果沒有候選者,但有其他錯誤資訊,記錄下來
125
  operation_log += f"批次 {i+1}: 在回應中未找到任何候選者。完整回應: {str(response)}\n"
126
  continue
127
 
 
145
  else:
146
  operation_log += f"批次 {i+1}: 未找到圖像。請檢視日誌了解詳細資訊。\n"
147
 
 
 
 
 
 
148
  except Exception as e:
149
+ operation_log += f"批次 {i+1} 發生意外錯誤: {type(e).__name__} - {str(e)}\n"
150
+ if 'response' in locals() and response is not None:
151
+ operation_log += f"除錯: 錯誤時的完整回應 (嘗試轉換為字串): {str(response)}\n"
152
+ operation_log += f"除錯: 回應物件類型: {type(response)}\n"
153
 
154
  return all_generated_images, operation_log
155
 
 
157
  operation_log = f"API 呼叫錯誤: {type(e).__name__} - {str(e)}\n"
158
  return [], operation_log
159
 
160
+ def generate_image(white_background_url, reference_image_url, prompt):
161
  image_generator = NanoBananaImageGenerator(api_key=GEMINI_API_KEY)
162
 
163
  if not GEMINI_API_KEY:
164
  return None, "錯誤: GEMINI_API_KEY 環境變數未設定。"
165
 
166
  encoded_images = []
167
+ wb_image = image_generator._load_image_from_url(white_background_url)
168
+ if wb_image:
169
+ encoded_images.append(image_generator._image_to_base64(wb_image))
 
170
 
171
+ ref_image = image_generator._load_image_from_url(reference_image_url)
172
+ if ref_image:
173
+ encoded_images.append(image_generator._image_to_base64(ref_image))
174
 
175
  if not encoded_images:
176
+ return None, "錯誤: 無法從提供的 URL 載入任何圖片。"
177
 
178
  has_references = len(encoded_images) > 0
179
  final_prompt = image_generator.build_prompt_for_operation(
 
194
  else:
195
  return None, operation_log
196
 
197
+ def read_google_sheet(sheet_url):
198
+ try:
199
+ def build_csv_url(url: str) -> str:
200
+ parsed = urlparse(url)
201
+ path_parts = parsed.path.strip("/").split("/")
202
+ doc_id = None
203
+ if len(path_parts) >= 3 and path_parts[0] == "spreadsheets" and path_parts[1] == "d":
204
+ doc_id = path_parts[2]
205
+ qs_gid = parse_qs(parsed.query).get("gid", [None])[0]
206
+ frag_gid = None
207
+ if parsed.fragment:
208
+ frag_qs = parse_qs(parsed.fragment)
209
+ frag_gid = frag_qs.get("gid", [None])[0]
210
+ gid = qs_gid or frag_gid or "0"
211
+ if doc_id:
212
+ return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={gid}"
213
+ if "/export" in parsed.path and "format=csv" in parsed.query:
214
+ return url
215
+ return url.replace("/edit#gid=0", "/export?format=csv&gid=0")
216
+ csv_url = build_csv_url(sheet_url)
217
+ print(f"Attempting to read CSV from: {csv_url}")
218
+ df = pd.read_csv(csv_url, engine='python', on_bad_lines='warn')
219
+ print("Successfully read Google Sheet.")
220
+ return df
221
+ except Exception as e:
222
+ print(f"Error reading Google Sheet: {e}")
223
+ raise gr.Error(f"Error reading Google Sheet: {e}")
224
+
225
+ def process_sheet_data(sheet_url):
226
+ try:
227
+ df = read_google_sheet(sheet_url)
228
+ if df.shape[1] < 3:
229
+ error_msg = f"Error: Google Sheet has only {df.shape[1]} columns, but 3 are expected (White Background URL, Reference Image URL, Prompt)."
230
+ print(error_msg)
231
+ raise gr.Error(error_msg)
232
+ white_background_urls = df.iloc[:, 0].tolist()
233
+ reference_image_urls = df.iloc[:, 1].tolist()
234
+ prompts = df.iloc[:, 2].tolist()
235
+ data = []
236
+ for i, (wb, ref, p) in enumerate(zip(white_background_urls, reference_image_urls, prompts)):
237
+ if pd.notna(wb) and pd.notna(ref) and pd.notna(p):
238
+ data.append([i, wb, ref, p])
239
+ print(f"Processed {len(data)} valid rows.")
240
+ return data
241
+ except Exception as e:
242
+ print(f"Error processing sheet data: {e}")
243
+ raise gr.Error(f"Error processing sheet data: {e}")
244
+
245
+ def generate_image_for_row(row_index, dataframe_data):
246
+ if not isinstance(dataframe_data, pd.DataFrame) or not (0 <= row_index < len(dataframe_data)):
247
+ return None, "Error: Invalid row index or dataframe data not loaded."
248
+ row = dataframe_data.iloc[row_index]
249
+ white_background_url = row.iloc[1]
250
+ reference_image_url = row.iloc[2]
251
+ prompt = row.iloc[3]
252
+ return generate_image(white_background_url, reference_image_url, prompt)
253
+
254
  if __name__ == "__main__":
255
  with gr.Blocks() as demo:
256
  gr.Markdown("# AutoLS Gradio Image Generator")
257
+ gr.Markdown("Enter the Google Sheet URL to process image generation requests.")
258
+ sheet_url_input = gr.Textbox(label="Google Sheet URL", value="https://docs.google.com/spreadsheets/d/1G3olHxydDIbnyXdh5nnw5TG0akZFeMeYm-25JmCGDLg/edit?gid=0#gid=0")
259
+ process_button = gr.Button("Process Sheet")
260
+ processed_df_state = gr.State()
261
+ output_dataframe = gr.DataFrame(
262
+ headers=["Index", "白背圖URL", "參考圖URL", "提示詞"],
263
+ col_count=(4, "fixed"),
264
+ interactive=False
265
+ )
266
  with gr.Row():
267
+ row_index_input = gr.Number(label="Row Index to Generate", precision=0, value=0)
268
+ generate_selected_button = gr.Button("Generate Image for Selected Row")
269
+ generated_image_output = gr.Image(label="Generated Image")
270
+ operation_log_output = gr.Textbox(label="Operation Log", lines=5)
271
+ process_button.click(
272
+ fn=process_sheet_data,
273
+ inputs=sheet_url_input,
274
+ outputs=output_dataframe
275
+ ).success(
276
+ fn=lambda x: x,
277
+ inputs=output_dataframe,
278
+ outputs=processed_df_state
279
+ )
280
+ generate_selected_button.click(
281
+ fn=generate_image_for_row,
282
+ inputs=[row_index_input, processed_df_state],
283
  outputs=[generated_image_output, operation_log_output]
284
  )
 
285
  demo.launch()