Spaces:

jerrychen428
/

groq_API_gradio

Sleeping

File size: 5,264 Bytes

fc15c86

import gradio as gr
from groq import Groq
import base64
import io
from PIL import Image

def encode_image(image):
    """將 PIL Image 編碼為 base64 字符串"""
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode("utf-8")

def analyze_image(api_key, image, prompt, temperature=1, max_tokens=512):
    """使用 Groq API 分析圖片"""
    try:
        # 檢查 API Key 是否為空
        if not api_key or api_key.strip() == "":
            return "錯誤：請輸入有效的 Groq API Key"

        # 檢查圖片是否上傳
        if image is None:
            return "錯誤：請上傳一張圖片"

        # 檢查 prompt 是否為空
        if not prompt or prompt.strip() == "":
            return "錯誤：請輸入分析提示文字"

        # 初始化 Groq 客戶端
        client = Groq(api_key=api_key.strip())

        # 編碼圖片
        base64_image = encode_image(image)
        image_content = {
            "type": "image_url",
            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
        }

        # 發送請求到 Groq API
        completion = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=[{
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    image_content
                ]
            }],
            temperature=temperature,
            max_completion_tokens=max_tokens,
            top_p=1,
            stream=False,
            stop=None,
        )

        # 返回分析結果
        return completion.choices[0].message.content

    except Exception as e:
        return f"錯誤：{str(e)}"

# 創建 Gradio 界面
with gr.Blocks(title="Groq 圖片分析器", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🔍 Groq 圖片分析器

        使用 Groq API 和 Llama-4-Scout 模型來分析圖片內容

        **使用說明：**
        1. 輸入您的 Groq API Key
        2. 上傳要分析的圖片
        3. 輸入分析提示文字
        4. 調整參數（可選）
        5. 點擊「分析圖片」按鈕
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            # API Key 輸入
            api_key = gr.Textbox(
                label="Groq API Key",
                placeholder="輸入您的 Groq API Key (例: gsk_...)",
                type="password",
                info="您的 API Key 將被安全處理，不會被儲存"
            )

            # 圖片上傳
            image = gr.Image(
                label="上傳圖片",
                type="pil",
                format="jpeg"
            )

            # Prompt 輸入
            prompt = gr.Textbox(
                label="分析提示",
                placeholder="請描述您想要分析的內容...",
                lines=5,
                value="""幫我算出有幾個人和大象，同時說明
這可能是什麼儀式？
天氣和季節在某個時段？
在什麼國家？"""
            )

            # 進階參數
            with gr.Accordion("進階設定", open=False):
                temperature = gr.Slider(
                    label="Temperature (創意度)",
                    minimum=0.1,
                    maximum=2.0,
                    value=1.0,
                    step=0.1,
                    info="數值越高，回答越有創意但可能不太準確"
                )

                max_tokens = gr.Slider(
                    label="最大回應長度",
                    minimum=100,
                    maximum=2048,
                    value=512,
                    step=50,
                    info="限制回應的最大字數"
                )

            # 分析按鈕
            analyze_btn = gr.Button(
                "🔍 分析圖片",
                variant="primary",
                size="lg"
            )

        with gr.Column(scale=1):
            # 分析結果輸出
            output = gr.Textbox(
                label="分析結果",
                lines=20,
                max_lines=30,
                show_copy_button=True,
                placeholder="分析結果將顯示在這裡..."
            )

    # 綁定事件
    analyze_btn.click(
        fn=analyze_image,
        inputs=[api_key, image, prompt, temperature, max_tokens],
        outputs=output
    )

    # 示例區域
    gr.Markdown(
        """
        ## 💡 提示範例

        **分析人物和動物：**
        - "請計算圖片中有多少人和動物，並描述他們在做什麼"

        **場景描述：**
        - "描述這張圖片的場景，包括時間、地點、天氣狀況"

        **文化分析：**
        - "這看起來像什麼文化活動或儀式？可能在哪個國家或地區？"

        **物體識別：**
        - "識別圖片中的所有物體並說明它們的用途"
        """
    )

# 啟動應用
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        debug=False
    )