Spaces:

AIDemoProject
/

DeblurGANV2Demo

Sleeping

App Files Files Community

DeblurGANV2Demo / services /agents.py

JasonFinley0821

feat : upgrade ai agent readme

1fa1fdf 5 months ago

raw

history blame contribute delete

12.8 kB

	import os # 匯入 os 模組以處理環境變數和檔案路徑
	import io # 匯入 io 模組以處理二進位資料流
	import PIL.Image # 匯入 PIL 的 Image 模組以處理圖片
	import requests # 匯入 requests 模組以進行 HTTP 請求
	from dotenv import load_dotenv # 匯入 dotenv 以載入 .env 環境變數檔案
	import json # 匯入 json 庫用於序列化
	from urllib.parse import urlparse

	# LangChain 相關匯入
	from langchain.agents import create_agent
	from langchain.tools import tool
	from langchain.messages import AIMessage, HumanMessage, ToolMessage

	from langchain_google_genai import ChatGoogleGenerativeAI

	from google import genai # 匯入 Google GenAI 函式庫
	from google.genai import types # 匯入 GenAI 的類型定義
	from services.deblur import deblur_image_tiled # 從本地服務匯入去模糊函式

	# ==========================
	# 環境設定與工具函式
	# ==========================
	load_dotenv()

	# 設置 Google AI API 金鑰 (從環境變數讀取)
	google_api = os.environ["GOOGLE_API_KEY"]

	# 初始化 Google GenAI 客戶端
	genai_client = genai.Client(api_key=google_api)

	# ==========================
	# some 工具定義
	# ==========================
	def load_image(file_url: str) -> PIL.Image.Image:
	"""
	支援本地檔案或 HTTP(S) URL 讀取圖片
	"""
	parsed = urlparse(file_url)

	if parsed.scheme in ("http", "https"):
	# 網路圖片
	try:
	print(f"Agent 正在下載圖片: {file_url}")
	resp = requests.get(file_url, timeout=15)
	resp.raise_for_status()
	img = PIL.Image.open(io.BytesIO(resp.content)).convert("RGB")
	return img
	except Exception as e:
	raise ValueError(f"下載圖片失敗: {e}")
	else:
	# 本地檔案
	if not os.path.exists(file_url):
	raise ValueError("圖片路徑無效，無法進行分析。")
	try:
	img = PIL.Image.open(file_url).convert("RGB")
	return img
	except Exception as e:
	raise ValueError(f"開啟本地圖片失敗: {e}")


	# ==========================
	# LangChain 工具定義
	# ==========================

	@tool
	def generate_and_upload_image(prompt: str) -> str:
	"""
	這個工具可以根據文字提示生成圖片，並將其上傳到伺服器。

	Args:
	prompt: 用於生成圖片的文字提示。

	Returns:
	一個 JSON 格式的字串，包含圖片 URL 和描述，或錯誤訊息。
	"""
	try:
	# 呼叫 Google GenAI 模型生成內容
	response = genai_client.models.generate_content(
	model="gemini-2.0-flash-preview-image-generation",#"gemini-2.5-flash-image", # 指定圖片生成模型
	contents=prompt, # 傳入文字提示
	config=types.GenerateContentConfig(response_modalities=['Text', 'Image']) # 指定回應類型
	)

	image_binary = None
	# 遍歷回應的 parts，找到圖片的二進位數據
	for part in response.candidates[0].content.parts:
	if part.inline_data is not None:
	image_binary = part.inline_data.data
	break

	if image_binary:
	# 使用 PIL 將二進位數據轉換為圖片物件
	image = PIL.Image.open(io.BytesIO(image_binary))
	# 隨機生成一個檔案名以避免衝突，並儲存在 static 資料夾
	file_name = f"static/{os.urandom(16).hex()}.jpg"
	image.save(file_name, format="JPEG")

	# 從環境變數獲取 Hugging Face Space 的 URL (或你的伺服器 URL)
	# 並組合完整的圖片 URL
	image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space
	# 統一回傳 JSON 成功格式
	return json.dumps({
	"image_url": image_url,
	"text_result": f"圖片已成功生成並上傳。這是根據提示 '{prompt[:50]}...' 生成的圖片。"
	})

	# 處理圖片生成失敗但 API 未報錯的情況
	return json.dumps({
	"error": "圖片生成失敗。API 回應中未包含圖片數據，請嘗試修改提示詞。"
	})
	except Exception as e:
	error_msg = f"圖片生成與上傳失敗: {e}"
	return json.dumps({
	"error": error_msg
	})

	@tool
	def analyze_image_with_text(image_path: str, user_text: str) -> str:
	"""
	這個工具可以根據圖片和文字提示來回答問題 (多模態分析)。

	Args:
	image_path: 圖片在本地端儲存的路徑。
	user_text: 針對圖片提出的文字問題。

	Returns:
	一個 JSON 格式的字串，包含模型回應或錯誤訊息。
	"""
	try:
	# 檢查圖片路徑是否存在
	#if not os.path.exists(image_path):
	# return json.dumps({
	# "error": "圖片路徑無效，無法進行分析。"
	# })

	# 使用 PIL 開啟圖片
	#img_user = PIL.Image.open(image_path)
	img_user = load_image(image_path)
	# 呼叫 Google GenAI 模型 (gemini-2.5-flash) 進行多模態分析
	response = genai_client.models.generate_content(
	model="gemini-2.5-flash",
	contents=[img_user, user_text] # 同時傳入圖片物件和文字
	)
	if (response.text != None):
	out = response.text
	else:
	out = "Gemini沒答案!請換個說法！"

	# 統一回傳 JSON 成功格式 (只有文字結果)
	return json.dumps({
	"text_result": out
	})

	except Exception as e:
	# 處理錯誤
	out = f"Gemini執行出錯: {e}"
	# 統一回傳 JSON 錯誤格式
	return json.dumps({
	"error": out
	})


	@tool
	def deblur_image_from_url(
	file_url: str,
	user_text: str
	) -> str:
	"""
	這個工具可以從提供的圖片來源載入影像（支援 HTTP/HTTPS 網址與本地檔案路徑），
	並使用分塊處理（Tiled Processing）進行去模糊（deblur）。處理完成後，
	會將結果儲存於伺服器的 static/ 目錄，並回傳去模糊後圖片的絕對 URL 路徑
	以及根據 user_text 生成的額外文字結果。

	Args:
	file_url:
	圖片來源，可為：
	- HTTP/HTTPS 網路圖片網址（例如：https://example.com/img.png）
	- 本地檔案路徑（例如：/tmp/xxx.png）

	user_text:
	使用者針對圖片提出的處理需求或描述文字。

	Returns:
	JSON 格式的字串，包含：
	- "image_url": 去模糊後圖片的絕對 URL 路徑
	- "text_result": 根據 user_text 產生的額外文字說明
	"""
	try:
	tile_size = 512
	overlap = 32

	# 內容轉換為 PIL Image
	img_input = load_image(file_url)

	# 2. 執行去模糊處理
	img_deblurred = deblur_image_tiled(
	img_input,
	tile_size=tile_size,
	overlap=overlap
	)

	# 建立一個唯一的檔案名
	ext = img_input.format if img_input.format else 'JPEG'
	file_name = f"static/{os.urandom(16).hex()}.jpg"

	img_deblurred.save(file_name, format=ext)

	# 4. 建構絕對 URL 路徑 (供客戶端存取)
	# 這裡假設 BASE_URL 已經設定好，並與 FastAPI 的 static mount 匹配
	image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space

	analysis_result = f"圖片已成功去模糊。用戶請求的描述為：'{user_text}'。模型已根據此要求調整參數進行處理。"

	# 5. 返回 JSON 字串
	return json.dumps({
	"image_url": image_url,
	"text_result": analysis_result
	})

	except requests.exceptions.RequestException as e:
	return json.dumps({
	"error": f"下載圖片失敗或 URL 無效: {e}"
	})
	except Exception as e:
	return json.dumps({
	"error": f"圖片處理失敗。錯誤訊息: {e}"
	})

	# ------------------------------
	# 1️⃣ 意圖分類工具
	# ------------------------------
	@tool
	def classify_intent(user_input: str) -> str:
	"""
	判斷使用者輸入意圖：
	- "deblur" -> 去模糊 / 修復 / 影像清晰化
	- "qa" -> 一般問題或圖片分析
	"""
	deblur_keywords = [
	# 中文
	"去模糊", "清晰", "清楚", "修復", "模糊", "變清楚", "提高清晰度",
	"還原", "去噪", "降噪", "去霧", "增強", "超解析", "超分辨",
	# 英文/拼音
	"deblur", "restore", "restoration", "denoise", "noise", "enhance",
	"enhancement", "super resolution", "sr", "defog", "dehaze",
	"sharpen", "blurry", "blurred", "fix blur"
	]
	text = user_input.lower()
	if any(k in text for k in deblur_keywords):
	return "deblur"
	else:
	return "qa"

	# ==========================
	# LangChain 代理人設定
	# ==========================

	# 結合所有定義的工具
	tools = [
	classify_intent, # 意圖分類
	generate_and_upload_image, # 生成圖片
	analyze_image_with_text, # 分析圖片
	deblur_image_from_url # 去模糊圖片
	]

	# 建立 LLM 模型實例 (使用 LangChain 的 ChatGoogleGenerativeAI)
	llm = ChatGoogleGenerativeAI(
	google_api_key=google_api,
	model="gemini-2.5-flash",
	temperature=0.2
	)

	# ✅ 建立 Prompt (新版語法)
	sys_prompt = """
	你是一個圖像生成、去模糊與圖片問答助理，請依流程使用工具。

	【可用工具】
	1. classify_intent(user_input) → 回傳 "deblur" 或 "qa"
	2. deblur_image_from_url(file_url, user_text) → 圖片去模糊/修復
	3. analyze_image_with_text(image_path, user_text) → 圖片理解與問答
	4. generate_and_upload_image(prompt) → 生成圖像

	【流程】
	- 先呼叫 classify_intent 判斷意圖
	- 若為 "deblur" → 呼叫 deblur_image_from_url
	- 若為 "qa"：
	- 若與圖片內容有關 → analyze_image_with_text
	- 若需生成新圖 → generate_and_upload_image

	【回覆規則】
	- 若工具成功輸出圖片 → 回覆必須包含：
	- 圖片完整 URL
	- 簡要說明（如：已完成去模糊/生成圖片）
	- 若工具失敗 → 用自然語言說明錯誤，不輸出技術錯誤碼或 traceback

	【判斷原則】
	- 有「去模糊、清晰、修復」等語意 → deblur
	- 有提問或描述圖片 → qa
	- 有「生成、畫、幫我做一張圖」→ generate_and_upload_image

	請嚴格遵循流程，不要跳步。
	"""

	# --- 4. 建立代理人與執行器 ---
	# 建立工具調用代理人 (Tool Calling Agent)
	agent = create_agent(
	model=llm,
	tools=tools,
	system_prompt=sys_prompt
	)

	def format_agent_result(result):
	output = {
	"user": None,
	"tool_call": None,
	"tool_result": None,
	"final_response": None
	}

	for msg in result["messages"]:
	if isinstance(msg, HumanMessage):
	output["user"] = msg.content

	elif isinstance(msg, AIMessage) and msg.additional_kwargs.get("function_call"):
	fn = msg.additional_kwargs["function_call"]
	output["tool_call"] = {
	"name": fn["name"],
	"arguments": fn["arguments"]
	}

	elif isinstance(msg, ToolMessage):
	try:
	output["tool_result"] = json.loads(msg.content)
	except Exception:
	output["tool_result"] = msg.content # 若非 JSON

	elif isinstance(msg, AIMessage) and not msg.additional_kwargs.get("function_call"):
	# 如果是 list of dict（如 [{'type': 'text','text':...}]）
	if isinstance(msg.content, list):
	# 只取第一個 text
	if len(msg.content) > 0 and "text" in msg.content[0]:
	output["final_response"] = msg.content[0]["text"]
	else:
	output["final_response"] = str(msg.content)
	else:
	output["final_response"] = msg.content

	return output

	def run_agent(user_input: str):
	"""呼叫此函式來執行 Agent"""
	print(f"UserInput:{user_input}")
	result = agent.invoke({
	"messages": [{"role": "user", "content": user_input }]
	})
	#print(f"result:{result}")

	output_format = format_agent_result( result )
	print(f"output_format:{output_format}")

	return { "output": output_format }