davidlee831117 commited on
Commit
e06bc2a
·
verified ·
1 Parent(s): 52c912b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -189
app.py CHANGED
@@ -7,6 +7,10 @@ from PIL import Image
7
  from urllib.parse import urlparse, parse_qs
8
  import google.generativeai as genai
9
  import time
 
 
 
 
10
 
11
  # 全域變數來儲存 API 金鑰
12
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
@@ -22,207 +26,187 @@ print(f"Debug: Top-level Loaded GEMINI_API_KEY (first 5 chars): {GEMINI_API_KEY[
22
  if not GEMINI_API_KEY:
23
  raise ValueError("ERROR: GEMINI_API_KEY environment variable is not set. Please set it correctly.")
24
 
25
- # 配置 Gemini API
26
  genai.configure(api_key=GEMINI_API_KEY)
27
 
28
- def load_image_from_url(url: str):
29
- """從 URL 下載圖片並以 PIL Image 格式回傳。"""
 
 
30
  try:
31
- headers = {'User-Agent': 'Mozilla/5.0'}
32
- response = requests.get(url, timeout=20, headers=headers)
33
- response.raise_for_status() # 檢查請求是否成功
34
- image = Image.open(BytesIO(response.content)).convert("RGB")
35
- print(f"Debug: Successfully loaded image from URL: {url}")
36
- return image
37
- except requests.exceptions.HTTPError as e:
38
- print(f"Error downloading image from {url}: HTTP Error {e.response.status_code}")
39
- return None
40
- except Exception as e:
41
- print(f"An unexpected error occurred: {e}")
42
- return None
43
-
44
- def build_prompt_for_operation(prompt, has_references=False, aspect_ratio="1:1"):
45
- """根據輸入構建完整的提示詞。"""
46
- aspect_instructions = {
47
- "1:1": "square format",
48
- "16:9": "widescreen landscape format",
49
- "9:16": "portrait format",
50
- "4:3": "standard landscape format",
51
- "3:4": "standard portrait format"
52
- }
53
- base_quality = "Generate a high-quality, photorealistic image"
54
- format_instruction = f"in {aspect_instructions.get(aspect_ratio, 'square format')}"
55
-
56
- final_prompt = f"{base_quality} inspired by the style and elements of the reference images. {prompt}. {format_instruction}."
57
- if not has_references:
58
- final_prompt = f"{base_quality} of: {prompt}. {format_instruction}."
59
 
60
- return final_prompt
61
-
62
- def call_gemini_api(prompt, images):
63
- """使用官方函式庫呼叫 Gemini API。"""
64
- operation_log = ""
65
- try:
66
- # 使用官方 genai.generate_content 函式
67
  response = genai.generate_content(
68
- contents=[prompt] + images, # 圖片列表直接作為輸入
69
- model="gemini-1.5-pro-latest" # 使用官方推薦的多模態模型
70
  )
71
 
72
- print(f"Debug: Full API Response: {response.text}")
73
-
74
- # 檢查是否有安全政策問題
75
- if 'prompt_feedback' in response:
76
- if 'safety_ratings' in response['prompt_feedback']:
77
- for rating in response['prompt_feedback']['safety_ratings']:
78
- if rating['block_reason'] != 'NONE':
79
- operation_log += f"API 被安全政策阻止。原因:{rating['block_reason']}\n"
80
- return None, operation_log
81
 
82
- # 處理回應
83
- if response.text is not None and "data:image" in response.text:
84
- # 這是內嵌的圖片 Base64 字串,需要解碼
85
- base64_string = response.text.split(',')[1]
86
- image_data = base64.b64decode(base64_string)
87
- image_parts = [image_data]
88
- operation_log += f"成功生成 {len(image_parts)} 張圖片。\n"
89
- return image_parts, operation_log
90
- elif response.text is not None:
91
- # 如果回傳的是文字
92
- operation_log += f"API 回應文字:{response.text}\n"
93
- return None, operation_log
94
- else:
95
- # 沒有任何回傳
96
- operation_log += "API 回應沒有包含任何圖像或文字數據。\n"
97
- return None, operation_log
98
-
99
- except Exception as e:
100
- operation_log = f"意外錯誤: {type(e).__name__} - {str(e)}\n"
101
- return None, operation_log
 
 
 
 
102
 
103
- def generate_image(white_background_url, reference_image_url, prompt):
104
- """Gradio 介面呼叫的主函式。"""
105
- if not GEMINI_API_KEY:
106
- return None, "Error: GEMINI_API_KEY is not set."
107
-
108
- wb_image = load_image_from_url(white_background_url)
109
- ref_image = load_image_from_url(reference_image_url)
110
-
111
- if wb_image is None or ref_image is None:
112
- return None, "Error: One or more images failed to load from URL."
113
-
114
- images = [wb_image, ref_image]
115
- final_prompt = build_prompt_for_operation(prompt, has_references=True)
116
-
117
- generated_images_binary, operation_log = call_gemini_api(final_prompt, images)
118
-
119
- if generated_images_binary:
120
- output_dir = "generated_images"
121
- os.makedirs(output_dir, exist_ok=True)
122
- # 使用時間戳或唯一ID來確保檔名唯一
123
- output_path = os.path.join(output_dir, f"generated_{int(time.time())}.png")
124
- with open(output_path, "wb") as f:
125
- f.write(generated_images_binary[0])
126
- return output_path, operation_log
127
- else:
128
- return None, operation_log
129
-
130
- def read_google_sheet(sheet_url):
131
- """從 Google Sheet 讀取資料。"""
132
- try:
133
- def build_csv_url(url: str) -> str:
134
- parsed = urlparse(url)
135
- path_parts = parsed.path.strip("/").split("/")
136
- doc_id = None
137
- if len(path_parts) >= 3 and path_parts[0] == "spreadsheets" and path_parts[1] == "d":
138
- doc_id = path_parts[2]
139
- qs_gid = parse_qs(parsed.query).get("gid", [None])[0]
140
- frag_gid = None
141
- if parsed.fragment:
142
- frag_qs = parse_qs(parsed.fragment)
143
- frag_gid = frag_qs.get("gid", [None])[0]
144
- gid = qs_gid or frag_gid or "0"
145
- if doc_id:
146
- return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={gid}"
147
- if "/export" in parsed.path and "format=csv" in parsed.query:
148
- return url
149
- return url.replace("/edit#gid=0", "/export?format=csv&gid=0")
150
-
151
- csv_url = build_csv_url(sheet_url)
152
- df = pd.read_csv(csv_url, engine='python', on_bad_lines='warn')
153
- return df
154
  except Exception as e:
155
- raise gr.Error(f"Error reading Google Sheet: {e}")
 
156
 
157
- def process_sheet_data(sheet_url):
158
- """處理試算表資料,為 Gradio DataFrame 準備。"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  try:
160
- df = read_google_sheet(sheet_url)
161
-
162
- if df.shape[1] < 3:
163
- error_msg = f"Error: Google Sheet has only {df.shape[1]} columns, but 3 are expected (White Background URL, Reference Image URL, Prompt)."
164
- raise gr.Error(error_msg)
 
 
 
 
 
 
 
165
 
166
- data_list = []
167
- for i, row in df.iterrows():
168
- if pd.notna(row.iloc[0]) and pd.notna(row.iloc[1]) and pd.notna(row.iloc[2]):
169
- data_list.append([i, row.iloc[0], row.iloc[1], row.iloc[2]])
 
 
 
170
 
171
- return data_list
172
  except Exception as e:
173
- raise gr.Error(f"Error processing sheet data: {e}")
174
-
175
- def generate_image_for_row(row_index, dataframe_data):
176
- """根據 Gradio DataFrame 的行索引生成圖片。"""
177
- df = pd.DataFrame(dataframe_data, columns=["Index", "白背圖URL", "參考圖URL", "提示詞"])
178
-
179
- if not (0 <= row_index < len(df)):
180
- return None, "Error: Invalid row index."
181
-
182
- row = df.iloc[int(row_index)]
183
- white_background_url = row['白背圖URL']
184
- reference_image_url = row['參考圖URL']
185
- prompt = row['提示詞']
186
-
187
- return generate_image(white_background_url, reference_image_url, prompt)
188
-
189
- if __name__ == "__main__":
190
- with gr.Blocks() as demo:
191
- gr.Markdown("# AutoLS Gradio Image Generator (Powered by Gemini API)")
192
- gr.Markdown("輸入 Google Sheet 網址來處理圖像生成請求。")
193
-
194
- sheet_url_input = gr.Textbox(label="Google Sheet URL", value="https://docs.google.com/spreadsheets/d/1G3olHxydDIbnyXdh5nnw5TG0akZFeMeYm-25JmCGDLg/edit?gid=0#gid=0")
195
- process_button = gr.Button("處理試算表")
196
-
197
- processed_df_state = gr.State()
198
-
199
- output_dataframe = gr.DataFrame(
200
- headers=["Index", "白背圖URL", "參考圖URL", "提示詞"],
201
- col_count=(4, "fixed"),
202
- interactive=False
203
- )
204
-
205
- with gr.Row():
206
- row_index_input = gr.Number(label="要生成的行數", precision=0, value=0)
207
- generate_selected_button = gr.Button("生成所選行的圖片")
208
-
209
- generated_image_output = gr.Image(label="生成的圖片")
210
- operation_log_output = gr.Textbox(label="操作日誌", lines=10)
211
-
212
- process_button.click(
213
- fn=process_sheet_data,
214
- inputs=sheet_url_input,
215
- outputs=output_dataframe
216
- ).success(
217
- fn=lambda x: x,
218
- inputs=output_dataframe,
219
- outputs=processed_df_state
220
- )
221
-
222
- generate_selected_button.click(
223
- fn=generate_image_for_row,
224
- inputs=[row_index_input, output_dataframe],
225
- outputs=[generated_image_output, operation_log_output]
226
- )
227
-
228
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from urllib.parse import urlparse, parse_qs
8
  import google.generativeai as genai
9
  import time
10
+ import tempfile
11
+ import uuid
12
+
13
+ # --- 修正後的 API 設定與函式 ---
14
 
15
  # 全域變數來儲存 API 金鑰
16
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 
26
  if not GEMINI_API_KEY:
27
  raise ValueError("ERROR: GEMINI_API_KEY environment variable is not set. Please set it correctly.")
28
 
29
+ # 配置 Gemini API,使用 genai.configure() 替代 genai.Client()
30
  genai.configure(api_key=GEMINI_API_KEY)
31
 
32
+ def generate_content(text, images, model="gemini-1.5-pro-latest"):
33
+ """
34
+ 使用官方 genai.generate_content 函式呼叫 API。
35
+ """
36
  try:
37
+ # contents 是文字和圖片的列表
38
+ contents = images + [text]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # 呼叫官方 API
 
 
 
 
 
 
41
  response = genai.generate_content(
42
+ model=model,
43
+ contents=contents
44
  )
45
 
46
+ # 處理 API 回應
47
+ text_response = ""
48
+ image_path = None
 
 
 
 
 
 
49
 
50
+ # 檢查回應中是否有內容
51
+ if not response.candidates:
52
+ # 如果沒有候選內容,檢查是否有安全政策問題
53
+ if hasattr(response, 'prompt_feedback') and 'safety_ratings' in response.prompt_feedback:
54
+ for rating in response.prompt_feedback['safety_ratings']:
55
+ if rating['block_reason']:
56
+ text_response = f"API 被安全政策阻止。原因:{rating['block_reason']}\n"
57
+ break
58
+ if not text_response:
59
+ text_response = "API 回應中未找到內容,可能的原因是內部錯誤或無效請求。\n"
60
+ return None, text_response
61
+
62
+ for part in response.candidates[0].content.parts:
63
+ if hasattr(part, 'text') and part.text is not None:
64
+ text_response += part.text + "\n"
65
+ elif hasattr(part, 'inline_data') and part.inline_data is not None:
66
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
67
+ temp_path = tmp.name
68
+ generated_image = Image.open(BytesIO(part.inline_data.data))
69
+ generated_image.save(temp_path)
70
+ image_path = temp_path
71
+ print(f"Generated image saved to: {temp_path} with prompt: {text}")
72
+
73
+ return image_path, text_response
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
+ print(f"Error calling Gemini API: {e}")
77
+ return None, f"API 呼叫失敗: {e}"
78
 
79
+ def load_uploaded_images(uploaded_files):
80
+ """載入並顯示上傳的圖片。"""
81
+ if not uploaded_files:
82
+ return []
83
+
84
+ loaded_images = []
85
+ for file in uploaded_files:
86
+ try:
87
+ img = Image.open(file.name)
88
+ if img.mode == "RGBA":
89
+ img = img.convert("RGBA")
90
+ loaded_images.append(img)
91
+ except Exception as e:
92
+ print(f"Failed to load image {file.name}: {e}")
93
+ return loaded_images
94
+
95
+ def process_image_and_prompt(uploaded_files, prompt, gemini_api_key):
96
+ """處理圖片和提示詞,並呼叫 API。"""
97
  try:
98
+ if not uploaded_files:
99
+ raise gr.Error("請至少上傳一張圖片。", duration=5)
100
+
101
+ images = load_uploaded_images(uploaded_files)
102
+
103
+ # 這裡我們使用一個公開、穩定的多模態模型
104
+ # gemini-1.5-pro-latest 支援圖片輸入
105
+ model = "gemini-1.5-pro-latest"
106
+
107
+ image_path, text_response = generate_content(text=prompt, images=images, model=model)
108
+
109
+ uploaded_images = images # 確保畫廊顯示所有上傳的圖片
110
 
111
+ if image_path:
112
+ result_img = Image.open(image_path)
113
+ if result_img.mode == "RGBA":
114
+ result_img = result_img.convert("RGBA")
115
+ return uploaded_images, [result_img], ""
116
+ else:
117
+ return uploaded_images, None, text_response
118
 
 
119
  except Exception as e:
120
+ raise gr.Error(f"處理錯誤: {e}", duration=5)
121
+
122
+ # --- Gradio 介面設定(這部分與你的程式碼完全相同)---
123
+ with gr.Blocks(css_paths="style.css",) as demo:
124
+ gr.HTML(
125
+ """
126
+ <div class="header-container">
127
+ <div>
128
+ <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
129
+ </div>
130
+ <div>
131
+ <h1>Gemini for Image Editing</h1>
132
+ <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
133
+ <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
134
+ <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
135
+ Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
136
+ </div>
137
+ </div>
138
+ """
139
+ )
140
+ with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
141
+ gr.Markdown("""
142
+ - **Issue:** ❗ Sometimes the model returns text instead of an image.
143
+ ### 🔧 Steps to Address:
144
+ 1. **🛠️ Duplicate the Repository**
145
+ - Create a separate copy for modifications.
146
+ 2. **🔑 Use Your Own Gemini API Key**
147
+ - You **must** configure your own Gemini key for generation!
148
+ """)
149
+ with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
150
+ gr.Markdown("""
151
+ ### 📌 Usage
152
+ - Upload an image and enter a prompt to generate outputs.
153
+ - If text is returned instead of an image, it will appear in the text output.
154
+ - Upload Only PNG Image
155
+ - ❌ **Do not use NSFW images!**
156
+ """)
157
+ with gr.Row(elem_classes="main-content"):
158
+ with gr.Column(elem_classes="input-column"):
159
+ image_input = gr.File(
160
+ file_types=["image"],
161
+ file_count="multiple",
162
+ label="Upload Images ",
163
+ elem_id="image-input",
164
+ elem_classes="upload-box"
165
+ )
166
+ gemini_api_key = gr.Textbox(
167
+ lines=1,
168
+ placeholder="Enter Gemini API Key (optional)",
169
+ label="Gemini API Key (optional)",
170
+ elem_classes="api-key-input"
171
+ )
172
+ prompt_input = gr.Textbox(
173
+ lines=2,
174
+ placeholder="Enter prompt here...",
175
+ label="Prompt",
176
+ elem_classes="prompt-input"
177
+ )
178
+ submit_btn = gr.Button("Generate", elem_classes="generate-btn")
179
+ with gr.Column(elem_classes="output-column"):
180
+ uploaded_gallery = gr.Gallery(label="Uploaded Images", elem_classes="uploaded-gallery")
181
+ output_gallery = gr.Gallery(label="Generated Outputs", elem_classes="output-gallery")
182
+ output_text = gr.Textbox(
183
+ label="Gemini Output",
184
+ placeholder="Text response will appear here if no image is generated.",
185
+ elem_classes="output-text"
186
+ )
187
+ submit_btn.click(
188
+ fn=process_image_and_prompt,
189
+ inputs=[image_input, prompt_input, gemini_api_key],
190
+ outputs=[uploaded_gallery, output_gallery, output_text],
191
+ )
192
+ image_input.upload(
193
+ fn=load_uploaded_images,
194
+ inputs=[image_input],
195
+ outputs=[uploaded_gallery],
196
+ )
197
+ gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
198
+ examples = [
199
+ ["data/1.webp", 'change text to "AMEER"'],
200
+ ["data/2.webp", "remove the spoon from hand only"],
201
+ ["data/3.webp", 'change text to "Make it "'],
202
+ ["data/1.jpg", "add joker style only on face"],
203
+ ["data/1777043.jpg", "add lipstick on lip only"],
204
+ ["data/76860.jpg", "add lipstick on lip only"],
205
+ ["data/2807615.jpg", "make it happy looking face only"],
206
+ ]
207
+ gr.Examples(
208
+ examples=examples,
209
+ inputs=[image_input, prompt_input,],
210
+ elem_id="examples-grid"
211
+ )
212
+ demo.queue(max_size=50).launch(mcp_server=True, share=True)