Spaces:

cjian2025
/

MultiPhotoOCR

Sleeping

App Files Files Community

MultiPhotoOCR / app.py

cjian2025

Create app.py

c6d0584 verified 5 months ago

raw

history blame contribute delete

24.9 kB

	import os
	import csv
	import base64
	from datetime import datetime
	from pathlib import Path
	import gradio as gr
	import google.generativeai as genai
	from PIL import Image
	import resend
	import tempfile

	class GeminiImageAnalyzer:
	def __init__(self, api_key):
	"""初始化Gemini客戶端"""
	self.api_key = api_key
	# 正確的配置方式
	genai.configure(api_key=api_key)
	# 創建模型實例
	self.model = genai.GenerativeModel('gemini-2.0-flash-exp')

	def validate_image(self, image_path):
	"""驗證圖片格式和存在性"""
	if not os.path.exists(image_path):
	raise FileNotFoundError(f"圖片檔案不存在: {image_path}")

	file_extension = Path(image_path).suffix.lower()
	if file_extension not in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
	raise ValueError("支援 .jpg, .jpeg, .png, .gif, .webp 格式的圖片")

	return True

	def analyze_single_image(self, image_path, prompt="請詳細描述這張圖片的內容，並提取圖片中的文字，使用繁體中文"):
	"""分析單張圖片"""
	try:
	# 驗證圖片
	self.validate_image(image_path)

	# 直接使用PIL打開圖片
	image = Image.open(image_path)

	# 調用Gemini API的正確方式
	response = self.model.generate_content([prompt, image])

	return response.text

	except Exception as e:
	return f"分析失敗: {str(e)}"

	def analyze_multiple_images(self, image_paths, comparison_prompt=None):
	"""分析多張圖片並進行比較"""
	try:
	if not image_paths or len(image_paths) == 0:
	return "❌ 請至少上傳一張圖片"

	# 如果只有一張圖片，使用單圖片分析
	if len(image_paths) == 1:
	return self.analyze_single_image(
	image_paths[0],
	comparison_prompt or "請詳細描述這張圖片的內容，並提取圖片中的文字，使用繁體中文"
	)

	# 多圖片分析
	images = []
	image_info = []

	for i, image_path in enumerate(image_paths):
	# 驗證圖片
	self.validate_image(image_path)
	# 打開圖片
	image = Image.open(image_path)
	images.append(image)
	image_info.append(f"圖片 {i+1}: {os.path.basename(image_path)}")

	# 構建比較提示詞
	if not comparison_prompt:
	comparison_prompt = f"""
	請詳細分析和比較這 {len(images)} 張圖片。請從以下方面進行分析：

	1. 個別圖片描述：
	- 分別描述每張圖片的主要內容
	- 識別每張圖片中的文字、物品、人物等

	2. 相似之處：
	- 找出圖片之間的共同點
	- 相同的元素、主題、風格等

	3. 差異比較：
	- 指出圖片之間的明顯差異
	- 不同的顏色、構圖、內容、角度等

	4. 整體總結：
	- 這些圖片可能的關聯性
	- 整體給人的印象或傳達的訊息

	請使用繁體中文回答，條理清晰地組織你的分析。
	"""

	# 準備內容列表
	content_parts = [comparison_prompt]
	content_parts.extend(images)

	# 調用API進行比較分析
	response = self.model.generate_content(content_parts)

	return response.text

	except Exception as e:
	return f"多圖片分析失敗: {str(e)}"

	def batch_analyze_images(self, image_paths, individual_prompt=None):
	"""批量分析圖片（每張圖片單獨分析）"""
	try:
	if not image_paths or len(image_paths) == 0:
	return "❌ 請至少上傳一張圖片"

	results = {}
	individual_prompt = individual_prompt or "請詳細描述這張圖片的內容，並提取圖片中的文字，使用繁體中文"

	for i, image_path in enumerate(image_paths):
	image_name = os.path.basename(image_path)
	print(f"正在分析圖片 {i+1}/{len(image_paths)}: {image_name}")

	result = self.analyze_single_image(image_path, individual_prompt)
	results[f"圖片 {i+1} ({image_name})"] = result

	# 格式化輸出
	formatted_result = "📊 批量圖片分析結果\n\n"
	for image_key, analysis in results.items():
	formatted_result += f"## {image_key}\n\n{analysis}\n\n---\n\n"

	return formatted_result

	except Exception as e:
	return f"批量分析失敗: {str(e)}"

	class EmailSender:
	def __init__(self, api_key):
	"""初始化Resend郵件客戶端"""
	resend.api_key = api_key

	def send_analysis_email(self, to_email, analysis_result, image_count=1, analysis_type="單圖片分析"):
	"""發送分析結果郵件"""
	try:
	# 構建HTML郵件內容
	html_content = f"""
	<html>
	<head>
	<style>
	body {{
	font-family: 'Microsoft JhengHei', Arial, sans-serif;
	line-height: 1.6;
	color: #333;
	max-width: 900px;
	margin: 0 auto;
	padding: 20px;
	}}
	.header {{
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	text-align: center;
	border-radius: 10px;
	margin-bottom: 20px;
	}}
	.content {{
	background-color: #f9f9f9;
	padding: 25px;
	border-radius: 10px;
	border-left: 5px solid #667eea;
	box-shadow: 0 2px 10px rgba(0,0,0,0.1);
	}}
	.analysis-type {{
	background-color: #e3f2fd;
	padding: 10px;
	border-radius: 5px;
	margin-bottom: 15px;
	font-weight: bold;
	color: #1976d2;
	}}
	.timestamp {{
	color: #666;
	font-size: 0.9em;
	margin-top: 20px;
	text-align: right;
	border-top: 1px solid #ddd;
	padding-top: 10px;
	}}
	pre {{
	white-space: pre-wrap;
	background: white;
	padding: 20px;
	border-radius: 8px;
	border: 1px solid #e0e0e0;
	font-family: 'Microsoft JhengHei', Arial, sans-serif;
	font-size: 14px;
	line-height: 1.5;
	}}
	</style>
	</head>
	<body>
	<div class="header">
	<h1>🤖 Gemini AI 圖片分析結果</h1>
	<p>智能圖片分析與比較系統</p>
	</div>

	<div class="content">
	<div class="analysis-type">
	📋 分析類型：{analysis_type} \| 📸 圖片數量：{image_count} 張
	</div>

	<h2>🔍 分析結果</h2>
	<pre>{analysis_result}</pre>
	</div>

	<div class="timestamp">
	🕐 產生時間：{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}
	<br>
	🚀 Powered by Google Gemini 2.0 & Resend
	</div>
	</body>
	</html>
	"""

	params = {
	"from": "Gemini AI <onboarding@resend.dev>",
	"to": [to_email],
	"subject": f"🤖 AI圖片分析結果 - {analysis_type} ({image_count}張圖片)",
	"html": html_content
	}

	email = resend.Emails.send(params)
	return f"郵件發送成功！郵件ID: {email.get('id', 'N/A')}"

	except Exception as e:
	return f"郵件發送失敗: {str(e)}"

	def analyze_images_and_send_email(images, analysis_mode, custom_prompt, recipient_email, gemini_key, resend_key):
	"""分析圖片並發送郵件"""
	if not images or len(images) == 0:
	return "❌ 請至少上傳一張圖片", "", ""

	if not recipient_email:
	return "❌ 請輸入收件人信箱", "", ""

	if not gemini_key:
	return "❌ 請輸入Gemini API Key", "", ""

	if not resend_key:
	return "❌ 請輸入Resend API Key", "", ""

	try:
	# 初始化服務
	analyzer = GeminiImageAnalyzer(gemini_key)
	email_sender = EmailSender(resend_key)

	# 獲取圖片路徑列表
	image_paths = images if isinstance(images, list) else [images]
	image_count = len(image_paths)

	# 根據分析模式選擇分析方法
	if analysis_mode == "比較分析":
	analysis_result = analyzer.analyze_multiple_images(image_paths, custom_prompt)
	analysis_type = "多圖片比較分析"
	elif analysis_mode == "批量分析":
	analysis_result = analyzer.batch_analyze_images(image_paths, custom_prompt)
	analysis_type = "批量個別分析"
	else: # 單張分析
	if len(image_paths) > 1:
	# 如果上傳多張但選擇單張分析，只分析第一張
	analysis_result = analyzer.analyze_single_image(image_paths[0], custom_prompt)
	analysis_type = "單圖片分析（僅分析第一張）"
	image_count = 1
	else:
	analysis_result = analyzer.analyze_single_image(image_paths[0], custom_prompt)
	analysis_type = "單圖片分析"

	if analysis_result.startswith(("分析失敗", "多圖片分析失敗", "批量分析失敗", "❌")):
	return f"❌ {analysis_result}", analysis_result, ""

	# 發送郵件
	email_result = email_sender.send_analysis_email(
	recipient_email,
	analysis_result,
	image_count,
	analysis_type
	)

	status = f"✅ 處理完成！\n📊 分析模式：{analysis_type}\n📸 處理圖片：{image_count} 張\n📧 {email_result}"

	return status, analysis_result, email_result

	except Exception as e:
	error_msg = f"❌ 處理失敗: {str(e)}"
	return error_msg, "", ""

	def save_results_to_csv(images, analysis_mode, custom_prompt, analysis_result):
	"""儲存結果到CSV"""
	try:
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	temp_dir = tempfile.gettempdir()
	csv_filename = os.path.join(temp_dir, f"multi_image_analysis_{timestamp}.csv")

	with open(csv_filename, 'w', newline='', encoding='utf-8-sig') as csvfile:
	fieldnames = ['時間戳記', '分析模式', '圖片數量', '圖片列表', '自訂提示', '分析結果']
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

	writer.writeheader()

	# 處理圖片列表
	if images:
	image_list = [os.path.basename(img) if isinstance(img, str) else f"圖片_{i+1}"
	for i, img in enumerate(images)]
	image_names = "; ".join(image_list)
	image_count = len(images)
	else:
	image_names = "無"
	image_count = 0

	writer.writerow({
	'時間戳記': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	'分析模式': analysis_mode,
	'圖片數量': image_count,
	'圖片列表': image_names,
	'自訂提示': custom_prompt or "預設提示",
	'分析結果': analysis_result
	})

	return csv_filename
	except Exception as e:
	return f"儲存失敗: {str(e)}"

	# 建立Gradio介面
	def create_interface():
	with gr.Blocks(
	title="🤖 Gemini 多圖片分析比較助手",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1400px !important;
	margin: auto !important;
	}
	.main-header {
	text-align: center;
	margin-bottom: 2rem;
	padding: 1.5rem;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 15px;
	box-shadow: 0 4px 15px rgba(0,0,0,0.2);
	}
	.api-warning {
	background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);
	border: 1px solid #ffeaa7;
	border-radius: 10px;
	padding: 15px;
	margin: 15px 0;
	color: #856404;
	box-shadow: 0 2px 8px rgba(0,0,0,0.1);
	}
	.feature-highlight {
	background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
	border: 2px solid #2196f3;
	border-radius: 10px;
	padding: 15px;
	margin: 15px 0;
	color: #1976d2;
	}
	"""
	) as demo:

	gr.HTML("""
	<div class="main-header">
	<h1>🤖 Gemini 多圖片分析比較助手</h1>
	<p>支援單張分析、多圖比較、批量處理 - 一站式AI圖片分析解決方案</p>
	<p><small>🚀 部署於 Hugging Face Spaces \| 🔥 支援最新 Gemini 2.0</small></p>
	</div>
	""")

	# 新功能亮點
	gr.HTML("""
	<div class="feature-highlight">
	<h3>🎯 新增功能亮點：</h3>
	<ul>
	<li>📸 <strong>多圖片上傳</strong>：一次上傳多張圖片進行分析</li>
	<li>🔍 <strong>智能比較</strong>：AI自動比較多張圖片的差異和相似點</li>
	<li>📊 <strong>批量處理</strong>：同時分析多張圖片並生成個別報告</li>
	<li>🎨 <strong>靈活模式</strong>：支援單張分析、比較分析、批量分析三種模式</li>
	</ul>
	</div>
	""")

	# API 安全提醒
	gr.HTML("""
	<div class="api-warning">
	<strong>⚠️ 重要提醒：</strong><br>
	• 請勿在公共環境中分享您的 API 密鑰<br>
	• 建議在本地環境或私人部署中使用敏感功能<br>
	• 此應用程式不會儲存您的 API 密鑰<br>
	• 多圖片分析會消耗更多 API 配額，請注意使用量
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📁 圖片上傳")
	images_input = gr.File(
	label="選擇圖片 (支援多張上傳)",
	file_count="multiple",
	file_types=["image"],
	height=200
	)

	# 顯示上傳的圖片預覽
	images_gallery = gr.Gallery(
	label="圖片預覽",
	show_label=True,
	elem_id="gallery",
	columns=3,
	rows=2,
	height=300,
	object_fit="contain"
	)

	gr.Markdown("### 🎯 分析模式")
	analysis_mode = gr.Radio(
	choices=["單張分析", "比較分析", "批量分析"],
	value="比較分析",
	label="選擇分析模式",
	info="""
	• 單張分析：分析單張圖片（如上傳多張僅分析第一張）
	• 比較分析：比較多張圖片的差異和相似點（推薦）
	• 批量分析：對每張圖片進行單獨分析
	"""
	)

	gr.Markdown("### ✏️ 分析設定")
	prompt_input = gr.Textbox(
	label="自訂提示詞 (可選)",
	placeholder="留空將使用智能預設提示詞...",
	lines=3,
	info="不同分析模式會自動使用相應的最佳化提示詞"
	)

	recipient_email = gr.Textbox(
	label="📧 收件人信箱",
	placeholder="your-email@example.com"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🔑 API 設定")

	with gr.Accordion("如何獲取 API Keys", open=False):
	gr.Markdown("""
	Gemini API Key:
	1. 前往 [Google AI Studio](https://aistudio.google.com/app/apikey)
	2. 創建新的 API Key
	3. 複製 API Key（格式：AIza...）

	Resend API Key:
	1. 前往 [Resend](https://resend.com/api-keys)
	2. 註冊帳號並創建 API Key
	3. 複製 API Key（格式：re_...）

	注意事項:
	- 多圖片分析會消耗更多 API 配額
	- 建議先用少量圖片測試
	- 大型圖片會增加處理時間
	""")

	gemini_key = gr.Textbox(
	label="Gemini API Key",
	placeholder="AIza...",
	type="password"
	)

	resend_key = gr.Textbox(
	label="Resend API Key",
	placeholder="re_...",
	type="password"
	)

	gr.Markdown("### 🚀 執行操作")
	analyze_btn = gr.Button(
	"🔍 開始AI分析並發送郵件",
	variant="primary",
	size="lg"
	)

	gr.Markdown("### 📊 處理結果")

	with gr.Row():
	status_output = gr.Textbox(
	label="📈 處理狀態",
	lines=4,
	interactive=False
	)

	with gr.Row():
	with gr.Column():
	analysis_output = gr.Textbox(
	label="🤖 AI分析結果",
	lines=15,
	interactive=False
	)

	with gr.Column():
	email_output = gr.Textbox(
	label="📧 郵件發送結果",
	lines=15,
	interactive=False
	)

	with gr.Row():
	download_btn = gr.Button("💾 下載詳細CSV結果", variant="secondary")
	csv_file = gr.File(label="CSV檔案", visible=False)

	# 事件綁定
	def update_gallery(files):
	if files:
	return [file.name for file in files]
	return []

	images_input.change(
	fn=update_gallery,
	inputs=[images_input],
	outputs=[images_gallery]
	)

	analyze_btn.click(
	fn=analyze_images_and_send_email,
	inputs=[images_input, analysis_mode, prompt_input, recipient_email, gemini_key, resend_key],
	outputs=[status_output, analysis_output, email_output]
	)

	def download_csv(images, analysis_mode, custom_prompt, analysis_result):
	if analysis_result and not analysis_result.startswith("❌"):
	filename = save_results_to_csv(images, analysis_mode, custom_prompt, analysis_result)
	if not filename.startswith("儲存失敗"):
	return gr.update(value=filename, visible=True)
	return gr.update(visible=False)

	download_btn.click(
	fn=download_csv,
	inputs=[images_input, analysis_mode, prompt_input, analysis_output],
	outputs=csv_file
	)

	# 使用說明
	with gr.Accordion("📝 詳細使用說明", open=False):
	gr.Markdown("""
	### 🎯 分析模式說明

	1. 單張分析
	- 適用：單張圖片的詳細分析
	- 特點：深入分析圖片內容、文字識別
	- 建議：需要詳細了解單張圖片內容時使用

	2. 比較分析（推薦）
	- 適用：2-10張圖片的對比分析
	- 特點：找出圖片間的差異、相似點、關聯性
	- 建議：需要對比多張圖片時使用，最適合2-5張圖片

	3. 批量分析
	- 適用：多張圖片需要分別分析
	- 特點：每張圖片獨立分析，生成個別報告
	- 建議：需要處理大量圖片且每張都需單獨報告

	### 📝 使用步驟
	1. 上傳圖片：支援 JPG、PNG、GIF、WebP 格式
	2. 選擇模式：根據需求選擇分析模式
	3. 設定提示：可自訂提示詞或使用預設
	4. 輸入信箱：設定接收結果的郵箱
	5. 配置API：輸入必要的API密鑰
	6. 開始分析：點擊按鈕開始處理
	7. 查看結果：在介面查看結果或等待郵件

	### ⚡ 效能建議
	- 圖片數量：建議一次處理2-10張圖片
	- 圖片大小：單張圖片建議不超過10MB
	- 處理時間：多圖片分析需要更長時間，請耐心等待
	- API配額：注意API使用量，避免超出限制

	### 🛠️ 故障排除
	- 如果分析失敗，檢查API密鑰是否正確
	- 確保圖片格式支援且未損壞
	- 網路不穩定時可能需要重試
	- 大量圖片處理時間較長，請耐心等待
	""")

	# 提示詞範例
	with gr.Accordion("🎯 提示詞範例", open=False):
	gr.Markdown("### 📸 單張分析提示詞")
	gr.Examples(
	examples=[
	["請詳細描述這張圖片，包括主要物件、顏色、構圖和可能的背景故事"],
	["提取並分析圖片中的所有文字內容，並解釋其含義"],
	["分析圖片的藝術風格、攝影技法和視覺效果"],
	["識別圖片中的品牌、標誌和商業元素"],
	["描述圖片中人物的情緒、動作和互動關係"]
	],
	inputs=prompt_input,
	label="點擊使用單張分析範例"
	)

	gr.Markdown("### 🔄 比較分析提示詞")
	gr.Examples(
	examples=[
	["比較這些圖片的相似點和差異，分析它們的關聯性"],
	["分析這些圖片在構圖、色彩和風格上的異同"],
	["比較圖片中產品或物件的特徵差異"],
	["分析這些圖片的時間順序和變化過程"],
	["比較不同角度或場景下同一主題的表現差異"]
	],
	inputs=prompt_input,
	label="點擊使用比較分析範例"
	)

	# 版本資訊
	gr.HTML("""
	<div style="text-align: center; margin-top: 2rem; padding: 1.5rem; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);">
	<p><small>
	🚀 Powered by <strong>Google Gemini 2.0</strong> & <strong>Resend</strong><br>
	📍 Deployed on <strong>Hugging Face Spaces</strong><br>
	🔥 <strong>多圖片比較分析版本 2.0</strong> \| Made with ❤️ using Gradio<br>
	⭐ 支援最多10張圖片同時分析比較
	</small></p>
	</div>
	""")

	return demo

	# 主程式入口
	if __name__ == "__main__":
	# 建立並啟動介面
	demo = create_interface()

	# Hugging Face Spaces 部署設定
	demo.launch(
	share=False,
	show_error=True,
	show_api=False,
	quiet=False
	)