Spaces:

AIDemoProject
/

DeblurGANV2Demo

Running

File size: 12,785 Bytes

import os  # 匯入 os 模組以處理環境變數和檔案路徑
import io  # 匯入 io 模組以處理二進位資料流
import PIL.Image  # 匯入 PIL 的 Image 模組以處理圖片
import requests  # 匯入 requests 模組以進行 HTTP 請求
from dotenv import load_dotenv # 匯入 dotenv 以載入 .env 環境變數檔案
import json # 匯入 json 庫用於序列化
from urllib.parse import urlparse

# LangChain 相關匯入
from langchain.agents import create_agent
from langchain.tools import tool
from langchain.messages import AIMessage, HumanMessage, ToolMessage

from langchain_google_genai import ChatGoogleGenerativeAI

from google import genai  # 匯入 Google GenAI 函式庫
from google.genai import types  # 匯入 GenAI 的類型定義
from services.deblur import deblur_image_tiled  # 從本地服務匯入去模糊函式

# ==========================
#  環境設定與工具函式
# ==========================
load_dotenv()

# 設置 Google AI API 金鑰 (從環境變數讀取)
google_api = os.environ["GOOGLE_API_KEY"]

# 初始化 Google GenAI 客戶端
genai_client = genai.Client(api_key=google_api)

# ==========================
#  some 工具定義
# ==========================
def load_image(file_url: str) ->  PIL.Image.Image:
    """
    支援本地檔案或 HTTP(S) URL 讀取圖片
    """
    parsed = urlparse(file_url)
    
    if parsed.scheme in ("http", "https"):
        # 網路圖片
        try:
            print(f"Agent 正在下載圖片: {file_url}")
            resp = requests.get(file_url, timeout=15)
            resp.raise_for_status()
            img =  PIL.Image.open(io.BytesIO(resp.content)).convert("RGB")
            return img
        except Exception as e:
            raise ValueError(f"下載圖片失敗: {e}")
    else:
        # 本地檔案
        if not os.path.exists(file_url):
            raise ValueError("圖片路徑無效，無法進行分析。")
        try:
            img =  PIL.Image.open(file_url).convert("RGB")
            return img
        except Exception as e:
            raise ValueError(f"開啟本地圖片失敗: {e}")


# ==========================
#  LangChain 工具定義
# ==========================

@tool
def generate_and_upload_image(prompt: str) -> str:
    """
    這個工具可以根據文字提示生成圖片，並將其上傳到伺服器。
    
    Args:
        prompt: 用於生成圖片的文字提示。
        
    Returns:
        一個 JSON 格式的字串，包含圖片 URL 和描述，或錯誤訊息。
    """
    try:
        # 呼叫 Google GenAI 模型生成內容
        response = genai_client.models.generate_content(
            model="gemini-2.0-flash-preview-image-generation",#"gemini-2.5-flash-image", # 指定圖片生成模型
            contents=prompt, # 傳入文字提示
            config=types.GenerateContentConfig(response_modalities=['Text', 'Image']) # 指定回應類型
        )
        
        image_binary = None
        # 遍歷回應的 parts，找到圖片的二進位數據
        for part in response.candidates[0].content.parts:
            if part.inline_data is not None:
                image_binary = part.inline_data.data
                break
        
        if image_binary:
            # 使用 PIL 將二進位數據轉換為圖片物件
            image = PIL.Image.open(io.BytesIO(image_binary))
            # 隨機生成一個檔案名以避免衝突，並儲存在 static 資料夾
            file_name = f"static/{os.urandom(16).hex()}.jpg"
            image.save(file_name, format="JPEG")
            
            # 從環境變數獲取 Hugging Face Space 的 URL (或你的伺服器 URL)
            # 並組合完整的圖片 URL
            image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space
            # 統一回傳 JSON 成功格式
            return json.dumps({
                "image_url": image_url,
                "text_result": f"圖片已成功生成並上傳。這是根據提示 '{prompt[:50]}...' 生成的圖片。"
            })
            
        # 處理圖片生成失敗但 API 未報錯的情況
        return json.dumps({
            "error": "圖片生成失敗。API 回應中未包含圖片數據，請嘗試修改提示詞。"
        })
    except Exception as e:
        error_msg = f"圖片生成與上傳失敗: {e}"
        return json.dumps({
            "error": error_msg
        })

@tool
def analyze_image_with_text(image_path: str, user_text: str) -> str:
    """
    這個工具可以根據圖片和文字提示來回答問題 (多模態分析)。
    
    Args:
        image_path: 圖片在本地端儲存的路徑。
        user_text: 針對圖片提出的文字問題。
        
    Returns:
        一個 JSON 格式的字串，包含模型回應或錯誤訊息。
    """
    try:
        # 檢查圖片路徑是否存在
        #if not os.path.exists(image_path):
        #    return json.dumps({
        #        "error": "圖片路徑無效，無法進行分析。"
        #    })
            
        # 使用 PIL 開啟圖片
        #img_user = PIL.Image.open(image_path)
        img_user = load_image(image_path)
        # 呼叫 Google GenAI 模型 (gemini-2.5-flash) 進行多模態分析
        response = genai_client.models.generate_content(
                    model="gemini-2.5-flash",
                    contents=[img_user, user_text] # 同時傳入圖片物件和文字
        )
        if (response.text != None):
            out = response.text
        else:
            out = "Gemini沒答案!請換個說法！"

        # 統一回傳 JSON 成功格式 (只有文字結果)
        return json.dumps({
            "text_result": out
        })
    
    except Exception as e:
        # 處理錯誤
        out = f"Gemini執行出錯: {e}"
        # 統一回傳 JSON 錯誤格式
        return json.dumps({
            "error": out
        })
        

@tool
def deblur_image_from_url(
    file_url: str,
    user_text: str
) -> str:
    """
    這個工具可以從提供的圖片來源載入影像（支援 HTTP/HTTPS 網址與本地檔案路徑），
    並使用分塊處理（Tiled Processing）進行去模糊（deblur）。處理完成後，
    會將結果儲存於伺服器的 static/ 目錄，並回傳去模糊後圖片的 **絕對 URL 路徑**
    以及根據 user_text 生成的額外文字結果。

    Args:
        file_url:  
            圖片來源，可為：
            - HTTP/HTTPS 網路圖片網址（例如：https://example.com/img.png）  
            - 本地檔案路徑（例如：/tmp/xxx.png）

        user_text:  
            使用者針對圖片提出的處理需求或描述文字。

    Returns: 
            JSON 格式的字串，包含：
            - "image_url": 去模糊後圖片的絕對 URL 路徑  
            - "text_result": 根據 user_text 產生的額外文字說明
    """
    try:
        tile_size = 512
        overlap = 32

        # 內容轉換為 PIL Image
        img_input = load_image(file_url)

        # 2. 執行去模糊處理
        img_deblurred = deblur_image_tiled(
            img_input, 
            tile_size=tile_size, 
            overlap=overlap
        )
        
        # 建立一個唯一的檔案名
        ext = img_input.format if img_input.format else 'JPEG'
        file_name = f"static/{os.urandom(16).hex()}.jpg"

        img_deblurred.save(file_name, format=ext)

        # 4. 建構絕對 URL 路徑 (供客戶端存取)
        # 這裡假設 BASE_URL 已經設定好，並與 FastAPI 的 static mount 匹配
        image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space

        analysis_result = f"圖片已成功去模糊。用戶請求的描述為：'{user_text}'。模型已根據此要求調整參數進行處理。"

        # 5. 返回 JSON 字串
        return json.dumps({
            "image_url": image_url,
            "text_result": analysis_result
        })

    except requests.exceptions.RequestException as e:
        return json.dumps({
            "error": f"下載圖片失敗或 URL 無效: {e}"
        })
    except Exception as e:
        return json.dumps({
            "error": f"圖片處理失敗。錯誤訊息: {e}"
        })
    
# ------------------------------
# 1️⃣ 意圖分類工具
# ------------------------------
@tool
def classify_intent(user_input: str) -> str:
    """
    判斷使用者輸入意圖：
    - "deblur" -> 去模糊 / 修復 / 影像清晰化
    - "qa" -> 一般問題或圖片分析
    """
    deblur_keywords = [
        # 中文
        "去模糊", "清晰", "清楚", "修復", "模糊", "變清楚", "提高清晰度", 
        "還原", "去噪", "降噪", "去霧", "增強", "超解析", "超分辨", 
        # 英文/拼音
        "deblur", "restore", "restoration", "denoise", "noise", "enhance", 
        "enhancement", "super resolution", "sr", "defog", "dehaze", 
        "sharpen", "blurry", "blurred", "fix blur"
    ]
    text = user_input.lower()
    if any(k in text for k in deblur_keywords):
        return "deblur"
    else:
        return "qa"

# ==========================
#  LangChain 代理人設定
# ==========================

# 結合所有定義的工具
tools = [
        classify_intent,              # 意圖分類
        generate_and_upload_image,    # 生成圖片
        analyze_image_with_text,      # 分析圖片
        deblur_image_from_url         # 去模糊圖片
    ]

# 建立 LLM 模型實例 (使用 LangChain 的 ChatGoogleGenerativeAI)
llm = ChatGoogleGenerativeAI(
        google_api_key=google_api,
        model="gemini-2.5-flash", 
        temperature=0.2
    )

# ✅ 建立 Prompt (新版語法)
sys_prompt = """
你是一個圖像生成、去模糊與圖片問答助理，請依流程使用工具。

【可用工具】
1. classify_intent(user_input) → 回傳 "deblur" 或 "qa"
2. deblur_image_from_url(file_url, user_text) → 圖片去模糊/修復
3. analyze_image_with_text(image_path, user_text) → 圖片理解與問答
4. generate_and_upload_image(prompt) → 生成圖像

【流程】
- 先呼叫 classify_intent 判斷意圖
- 若為 "deblur" → 呼叫 deblur_image_from_url
- 若為 "qa"：
    - 若與圖片內容有關 → analyze_image_with_text
    - 若需生成新圖 → generate_and_upload_image

【回覆規則】
- 若工具成功輸出圖片 → 回覆必須包含：
    - 圖片完整 URL
    - 簡要說明（如：已完成去模糊/生成圖片）
- 若工具失敗 → 用自然語言說明錯誤，不輸出技術錯誤碼或 traceback

【判斷原則】
- 有「去模糊、清晰、修復」等語意 → deblur
- 有提問或描述圖片 → qa
- 有「生成、畫、幫我做一張圖」→ generate_and_upload_image

請嚴格遵循流程，不要跳步。
"""

# --- 4. 建立代理人與執行器 ---
# 建立工具調用代理人 (Tool Calling Agent)
agent = create_agent(
        model=llm,
        tools=tools,
        system_prompt=sys_prompt
    )

def format_agent_result(result):
    output = {
        "user": None,
        "tool_call": None,
        "tool_result": None,
        "final_response": None
    }

    for msg in result["messages"]:
        if isinstance(msg, HumanMessage):
            output["user"] = msg.content

        elif isinstance(msg, AIMessage) and msg.additional_kwargs.get("function_call"):
            fn = msg.additional_kwargs["function_call"]
            output["tool_call"] = {
                "name": fn["name"],
                "arguments": fn["arguments"]
            }

        elif isinstance(msg, ToolMessage):
            try:
                output["tool_result"] = json.loads(msg.content)
            except Exception:
                output["tool_result"] = msg.content  # 若非 JSON

        elif isinstance(msg, AIMessage) and not msg.additional_kwargs.get("function_call"):
            # 如果是 list of dict（如 [{'type': 'text','text':...}]）
            if isinstance(msg.content, list):
                # 只取第一個 text
                if len(msg.content) > 0 and "text" in msg.content[0]:
                    output["final_response"] = msg.content[0]["text"]
                else:
                    output["final_response"] = str(msg.content)
            else:
                output["final_response"] = msg.content

    return output

def run_agent(user_input: str):
    """呼叫此函式來執行 Agent"""
    print(f"UserInput:{user_input}")
    result = agent.invoke({
        "messages": [{"role": "user", "content": user_input }]
    })
    #print(f"result:{result}")

    output_format = format_agent_result( result )
    print(f"output_format:{output_format}")

    return { "output": output_format }