Spaces:

davidlee831117
/

autoLS

Sleeping

App Files Files Community

davidlee831117 commited on Sep 1, 2025

Commit

e06bc2a

verified ·

1 Parent(s): 52c912b

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -189

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ from PIL import Image
 from urllib.parse import urlparse, parse_qs
 import google.generativeai as genai
 import time
 # 全域變數來儲存 API 金鑰
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
@@ -22,207 +26,187 @@ print(f"Debug: Top-level Loaded GEMINI_API_KEY (first 5 chars): {GEMINI_API_KEY[
 if not GEMINI_API_KEY:
     raise ValueError("ERROR: GEMINI_API_KEY environment variable is not set. Please set it correctly.")
-# 配置 Gemini API
 genai.configure(api_key=GEMINI_API_KEY)
-def load_image_from_url(url: str):
-    """從 URL 下載圖片並以 PIL Image 格式回傳。"""
     try:
-        headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url, timeout=20, headers=headers)
-        response.raise_for_status()  # 檢查請求是否成功
-        image = Image.open(BytesIO(response.content)).convert("RGB")
-        print(f"Debug: Successfully loaded image from URL: {url}")
-        return image
-    except requests.exceptions.HTTPError as e:
-        print(f"Error downloading image from {url}: HTTP Error {e.response.status_code}")
-        return None
-    except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        return None
-def build_prompt_for_operation(prompt, has_references=False, aspect_ratio="1:1"):
-    """根據輸入構建完整的提示詞。"""
-    aspect_instructions = {
-        "1:1": "square format",
-        "16:9": "widescreen landscape format",
-        "9:16": "portrait format",
-        "4:3": "standard landscape format",
-        "3:4": "standard portrait format"
-    }
-    base_quality = "Generate a high-quality, photorealistic image"
-    format_instruction = f"in {aspect_instructions.get(aspect_ratio, 'square format')}"
-    final_prompt = f"{base_quality} inspired by the style and elements of the reference images. {prompt}. {format_instruction}."
-    if not has_references:
-        final_prompt = f"{base_quality} of: {prompt}. {format_instruction}."
-    return final_prompt
-def call_gemini_api(prompt, images):
-    """使用官方函式庫呼叫 Gemini API。"""
-    operation_log = ""
-    try:
-        # 使用官方 genai.generate_content 函式
         response = genai.generate_content(
-            contents=[prompt] + images, # 圖片列表直接作為輸入
-            model="gemini-1.5-pro-latest" # 使用官方推薦的多模態模型
         )
-        print(f"Debug: Full API Response: {response.text}")
-        # 檢查是否有安全政策問題
-        if 'prompt_feedback' in response:
-            if 'safety_ratings' in response['prompt_feedback']:
-                for rating in response['prompt_feedback']['safety_ratings']:
-                    if rating['block_reason'] != 'NONE':
-                        operation_log += f"API 被安全政策阻止。原因：{rating['block_reason']}\n"
-                        return None, operation_log
-        # 處理回應
-        if response.text is not None and "data:image" in response.text:
-            # 這是內嵌的圖片 Base64 字串，需要解碼
-            base64_string = response.text.split(',')[1]
-            image_data = base64.b64decode(base64_string)
-            image_parts = [image_data]
-            operation_log += f"成功生成 {len(image_parts)} 張圖片。\n"
-            return image_parts, operation_log
-        elif response.text is not None:
-            # 如果回傳的是文字
-            operation_log += f"API 回應文字：{response.text}\n"
-            return None, operation_log
-        else:
-            # 沒有任何回傳
-            operation_log += "API 回應沒有包含任何圖像或文字數據。\n"
-            return None, operation_log
-    except Exception as e:
-        operation_log = f"意外錯誤: {type(e).__name__} - {str(e)}\n"
-        return None, operation_log
-def generate_image(white_background_url, reference_image_url, prompt):
-    """Gradio 介面呼叫的主函式。"""
-    if not GEMINI_API_KEY:
-        return None, "Error: GEMINI_API_KEY is not set."
-    wb_image = load_image_from_url(white_background_url)
-    ref_image = load_image_from_url(reference_image_url)
-    if wb_image is None or ref_image is None:
-        return None, "Error: One or more images failed to load from URL."
-    images = [wb_image, ref_image]
-    final_prompt = build_prompt_for_operation(prompt, has_references=True)
-    generated_images_binary, operation_log = call_gemini_api(final_prompt, images)
-    if generated_images_binary:
-        output_dir = "generated_images"
-        os.makedirs(output_dir, exist_ok=True)
-        # 使用時間戳或唯一ID來確保檔名唯一
-        output_path = os.path.join(output_dir, f"generated_{int(time.time())}.png")
-        with open(output_path, "wb") as f:
-            f.write(generated_images_binary[0])
-        return output_path, operation_log
-    else:
-        return None, operation_log
-def read_google_sheet(sheet_url):
-    """從 Google Sheet 讀取資料。"""
-    try:
-        def build_csv_url(url: str) -> str:
-            parsed = urlparse(url)
-            path_parts = parsed.path.strip("/").split("/")
-            doc_id = None
-            if len(path_parts) >= 3 and path_parts[0] == "spreadsheets" and path_parts[1] == "d":
-                doc_id = path_parts[2]
-            qs_gid = parse_qs(parsed.query).get("gid", [None])[0]
-            frag_gid = None
-            if parsed.fragment:
-                frag_qs = parse_qs(parsed.fragment)
-                frag_gid = frag_qs.get("gid", [None])[0]
-            gid = qs_gid or frag_gid or "0"
-            if doc_id:
-                return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={gid}"
-            if "/export" in parsed.path and "format=csv" in parsed.query:
-                return url
-            return url.replace("/edit#gid=0", "/export?format=csv&gid=0")
-        csv_url = build_csv_url(sheet_url)
-        df = pd.read_csv(csv_url, engine='python', on_bad_lines='warn')
-        return df
     except Exception as e:
-        raise gr.Error(f"Error reading Google Sheet: {e}")
-def process_sheet_data(sheet_url):
-    """處理試算表資料，為 Gradio DataFrame 準備。"""
     try:
-        df = read_google_sheet(sheet_url)
-        if df.shape[1] < 3:
-            error_msg = f"Error: Google Sheet has only {df.shape[1]} columns, but 3 are expected (White Background URL, Reference Image URL, Prompt)."
-            raise gr.Error(error_msg)
-        data_list = []
-        for i, row in df.iterrows():
-            if pd.notna(row.iloc[0]) and pd.notna(row.iloc[1]) and pd.notna(row.iloc[2]):
-                data_list.append([i, row.iloc[0], row.iloc[1], row.iloc[2]])
-        return data_list
     except Exception as e:
-        raise gr.Error(f"Error processing sheet data: {e}")
-def generate_image_for_row(row_index, dataframe_data):
-    """根據 Gradio DataFrame 的行索引生成圖片。"""
-    df = pd.DataFrame(dataframe_data, columns=["Index", "白背圖URL", "參考圖URL", "提示詞"])
-    if not (0 <= row_index < len(df)):
-        return None, "Error: Invalid row index."
-    row = df.iloc[int(row_index)]
-    white_background_url = row['白背圖URL']
-    reference_image_url = row['參考圖URL']
-    prompt = row['提示詞']
-    return generate_image(white_background_url, reference_image_url, prompt)
-if __name__ == "__main__":
-    with gr.Blocks() as demo:
-        gr.Markdown("# AutoLS Gradio Image Generator (Powered by Gemini API)")
-        gr.Markdown("輸入 Google Sheet 網址來處理圖像生成請求。")
-        sheet_url_input = gr.Textbox(label="Google Sheet URL", value="https://docs.google.com/spreadsheets/d/1G3olHxydDIbnyXdh5nnw5TG0akZFeMeYm-25JmCGDLg/edit?gid=0#gid=0")
-        process_button = gr.Button("處理試算表")
-        processed_df_state = gr.State()
-        output_dataframe = gr.DataFrame(
-            headers=["Index", "白背圖URL", "參考圖URL", "提示詞"],
-            col_count=(4, "fixed"),
-            interactive=False
-        )
-        with gr.Row():
-            row_index_input = gr.Number(label="要生成的行數", precision=0, value=0)
-            generate_selected_button = gr.Button("生成所選行的圖片")
-        generated_image_output = gr.Image(label="生成的圖片")
-        operation_log_output = gr.Textbox(label="操作日誌", lines=10)
-        process_button.click(
-            fn=process_sheet_data,
-            inputs=sheet_url_input,
-            outputs=output_dataframe
-        ).success(
-            fn=lambda x: x,
-            inputs=output_dataframe,
-            outputs=processed_df_state
-        )
-        generate_selected_button.click(
-            fn=generate_image_for_row,
-            inputs=[row_index_input, output_dataframe],
-            outputs=[generated_image_output, operation_log_output]
-        )
-    demo.launch()

 from urllib.parse import urlparse, parse_qs
 import google.generativeai as genai
 import time
+import tempfile
+import uuid
+# --- 修正後的 API 設定與函式 ---
 # 全域變數來儲存 API 金鑰
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     raise ValueError("ERROR: GEMINI_API_KEY environment variable is not set. Please set it correctly.")
+# 配置 Gemini API，使用 genai.configure() 替代 genai.Client()
 genai.configure(api_key=GEMINI_API_KEY)
+def generate_content(text, images, model="gemini-1.5-pro-latest"):
+    """
+    使用官方 genai.generate_content 函式呼叫 API。
+    """
     try:
+        # contents 是文字和圖片的列表
+        contents = images + [text]
+        # 呼叫官方 API
         response = genai.generate_content(
+            model=model,
+            contents=contents
         )
+        # 處理 API 回應
+        text_response = ""
+        image_path = None
+        # 檢查回應中是否有內容
+        if not response.candidates:
+            # 如果沒有候選內容，檢查是否有安全政策問題
+            if hasattr(response, 'prompt_feedback') and 'safety_ratings' in response.prompt_feedback:
+                for rating in response.prompt_feedback['safety_ratings']:
+                    if rating['block_reason']:
+                        text_response = f"API 被安全政策阻止。原因：{rating['block_reason']}\n"
+                        break
+            if not text_response:
+                text_response = "API 回應中未找到內容，可能的原因是內部錯誤或無效請求。\n"
+            return None, text_response
+        for part in response.candidates[0].content.parts:
+            if hasattr(part, 'text') and part.text is not None:
+                text_response += part.text + "\n"
+            elif hasattr(part, 'inline_data') and part.inline_data is not None:
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+                    temp_path = tmp.name
+                    generated_image = Image.open(BytesIO(part.inline_data.data))
+                    generated_image.save(temp_path)
+                    image_path = temp_path
+                    print(f"Generated image saved to: {temp_path} with prompt: {text}")
+        return image_path, text_response
     except Exception as e:
+        print(f"Error calling Gemini API: {e}")
+        return None, f"API 呼叫失敗: {e}"
+def load_uploaded_images(uploaded_files):
+    """載入並顯示上傳的圖片。"""
+    if not uploaded_files:
+        return []
+    loaded_images = []
+    for file in uploaded_files:
+        try:
+            img = Image.open(file.name)
+            if img.mode == "RGBA":
+                img = img.convert("RGBA")
+            loaded_images.append(img)
+        except Exception as e:
+            print(f"Failed to load image {file.name}: {e}")
+    return loaded_images
+def process_image_and_prompt(uploaded_files, prompt, gemini_api_key):
+    """處理圖片和提示詞，並呼叫 API。"""
     try:
+        if not uploaded_files:
+            raise gr.Error("請至少上傳一張圖片。", duration=5)
+        images = load_uploaded_images(uploaded_files)
+        # 這裡我們使用一個公開、穩定的多模態模型
+        # gemini-1.5-pro-latest 支援圖片輸入
+        model = "gemini-1.5-pro-latest"
+        image_path, text_response = generate_content(text=prompt, images=images, model=model)
+        uploaded_images = images # 確保畫廊顯示所有上傳的圖片
+        if image_path:
+            result_img = Image.open(image_path)
+            if result_img.mode == "RGBA":
+                result_img = result_img.convert("RGBA")
+            return uploaded_images, [result_img], ""
+        else:
+            return uploaded_images, None, text_response
     except Exception as e:
+        raise gr.Error(f"處理錯誤: {e}", duration=5)
+# --- Gradio 介面設定（這部分與你的程式碼完全相同）---
+with gr.Blocks(css_paths="style.css",) as demo:
+    gr.HTML(
+    """
+    <div class="header-container">
+      <div>
+          <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
+      </div>
+      <div>
+          <h1>Gemini for Image Editing</h1>
+          <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
+           <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
+          <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
+           Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
+      </div>
+    </div>
+    """
+    )
+    with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
+        gr.Markdown("""
+    - **Issue:** ❗ Sometimes the model returns text instead of an image.
+      ### 🔧 Steps to Address:
+    1. **🛠️ Duplicate the Repository**
+         - Create a separate copy for modifications.
+      2. **🔑 Use Your Own Gemini API Key**
+         - You **must** configure your own Gemini key for generation!
+      """)
+    with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
+        gr.Markdown("""
+    ### 📌 Usage
+      - Upload an image and enter a prompt to generate outputs.
+    - If text is returned instead of an image, it will appear in the text output.
+    - Upload Only PNG Image
+    - ❌ **Do not use NSFW images!**
+    """)
+    with gr.Row(elem_classes="main-content"):
+        with gr.Column(elem_classes="input-column"):
+            image_input = gr.File(
+                file_types=["image"],
+                file_count="multiple",
+                label="Upload Images ",
+                elem_id="image-input",
+                elem_classes="upload-box"
+            )
+            gemini_api_key = gr.Textbox(
+                lines=1,
+                placeholder="Enter Gemini API Key (optional)",
+                label="Gemini API Key (optional)",
+                elem_classes="api-key-input"
+            )
+            prompt_input = gr.Textbox(
+                lines=2,
+                placeholder="Enter prompt here...",
+                label="Prompt",
+                elem_classes="prompt-input"
+            )
+            submit_btn = gr.Button("Generate", elem_classes="generate-btn")
+        with gr.Column(elem_classes="output-column"):
+            uploaded_gallery = gr.Gallery(label="Uploaded Images", elem_classes="uploaded-gallery")
+            output_gallery = gr.Gallery(label="Generated Outputs", elem_classes="output-gallery")
+            output_text = gr.Textbox(
+                label="Gemini Output",
+                placeholder="Text response will appear here if no image is generated.",
+                elem_classes="output-text"
+            )
+    submit_btn.click(
+        fn=process_image_and_prompt,
+        inputs=[image_input, prompt_input, gemini_api_key],
+        outputs=[uploaded_gallery, output_gallery, output_text],
+    )
+    image_input.upload(
+        fn=load_uploaded_images,
+        inputs=[image_input],
+        outputs=[uploaded_gallery],
+    )
+    gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
+    examples = [
+        ["data/1.webp", 'change text to "AMEER"'],
+        ["data/2.webp", "remove the spoon from hand only"],
+        ["data/3.webp", 'change text to "Make it "'],
+        ["data/1.jpg", "add joker style only on face"],
+        ["data/1777043.jpg", "add lipstick on lip only"],
+        ["data/76860.jpg", "add lipstick on lip only"],
+        ["data/2807615.jpg", "make it happy looking face only"],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, prompt_input,],
+        elem_id="examples-grid"
+    )
+demo.queue(max_size=50).launch(mcp_server=True, share=True)