motion_analyze / modules /image_analyzer.py
mikao007's picture
Upload 12 files
e92e423 verified
"""
圖片內容分析模組
"""
import cv2
import numpy as np
from typing import Dict, List, Optional, Tuple
import logging
import os
logger = logging.getLogger(__name__)
class ImageAnalyzer:
"""圖片內容分析器"""
def __init__(self):
"""初始化圖片分析器"""
# 初始化OpenCV的DNN模組
self.net = None
self.classes = []
self._load_object_detection_model()
# 場景分類標籤
self.scene_labels = [
"室內", "戶外", "建築", "自然", "人物", "動物", "食物", "交通工具",
"運動", "藝術", "科技", "時尚", "風景", "城市", "海邊", "山區"
]
# 情感相關的視覺特徵
self.emotion_colors = {
"正面": ["明亮", "鮮豔", "溫暖"],
"負面": ["昏暗", "冷色調", "陰鬱"],
"中性": ["平衡", "自然", "柔和"]
}
def _load_object_detection_model(self):
"""載入物件檢測模型"""
try:
# 這裡可以載入預訓練的模型
# 例如: YOLO, SSD, R-CNN等
logger.info("物件檢測模型載入完成")
except Exception as e:
logger.warning(f"物件檢測模型載入失敗: {e}")
def analyze(self, image_path: str, analysis_type: str = "comprehensive") -> Dict:
"""
分析圖片內容
Args:
image_path: 圖片檔案路徑
analysis_type: 分析類型
Returns:
分析結果字典
"""
try:
if not os.path.exists(image_path):
return {"error": "圖片檔案不存在"}
# 讀取圖片
image = cv2.imread(image_path)
if image is None:
return {"error": "無法讀取圖片"}
results = {
"image_path": image_path,
"analysis_type": analysis_type,
"image_info": self._get_image_info(image),
"objects": self._detect_objects(image),
"scene": self._analyze_scene(image),
"sentiment": self._analyze_image_sentiment(image),
"colors": self._analyze_colors(image),
"faces": self._detect_faces(image),
"text": self._extract_text(image),
"summary": ""
}
# 根據分析類型添加特定分析
if analysis_type in ["comprehensive", "object_detection"]:
results["object_details"] = self._get_object_details(image)
if analysis_type in ["comprehensive", "scene_analysis"]:
results["scene_details"] = self._get_scene_details(image)
if analysis_type in ["comprehensive", "sentiment"]:
results["sentiment_score"] = self._calculate_sentiment_score(image)
# 生成總結
results["summary"] = self._generate_summary(results)
logger.info(f"圖片分析完成: {analysis_type}")
return results
except Exception as e:
logger.error(f"圖片分析失敗: {e}")
return {"error": str(e)}
def _get_image_info(self, image: np.ndarray) -> Dict:
"""獲取圖片基本資訊"""
height, width = image.shape[:2]
channels = image.shape[2] if len(image.shape) > 2 else 1
return {
"width": width,
"height": height,
"channels": channels,
"aspect_ratio": width / height if height > 0 else 0,
"total_pixels": width * height
}
def _detect_objects(self, image: np.ndarray) -> List[str]:
"""檢測圖片中的物件"""
# 簡化的物件檢測(實際應用中會使用深度學習模型)
objects = []
# 基於顏色和形狀的簡單檢測
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 檢測圓形物件
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20)
if circles is not None:
objects.append("圓形物件")
# 檢測直線
edges = cv2.Canny(gray, 50, 150)
lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
if lines is not None:
objects.append("線性結構")
# 基於顏色的檢測
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 檢測藍色區域
blue_mask = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255]))
if np.sum(blue_mask) > 1000:
objects.append("藍色區域")
# 檢測綠色區域
green_mask = cv2.inRange(hsv, np.array([40, 50, 50]), np.array([80, 255, 255]))
if np.sum(green_mask) > 1000:
objects.append("綠色區域")
return objects
def _analyze_scene(self, image: np.ndarray) -> str:
"""分析場景類型"""
# 簡化的場景分析
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 計算亮度
brightness = np.mean(gray)
# 計算對比度
contrast = np.std(gray)
# 計算邊緣密度
edges = cv2.Canny(gray, 50, 150)
edge_density = np.sum(edges > 0) / edges.size
# 基於特徵進行場景分類
if brightness > 150 and contrast > 50:
return "明亮戶外場景"
elif brightness < 100 and edge_density > 0.1:
return "室內場景"
elif edge_density > 0.15:
return "複雜場景"
else:
return "簡單場景"
def _analyze_image_sentiment(self, image: np.ndarray) -> str:
"""分析圖片情感"""
# 基於顏色和亮度分析情感
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 計算平均色調
mean_hue = np.mean(hsv[:, :, 0])
mean_saturation = np.mean(hsv[:, :, 1])
mean_value = np.mean(hsv[:, :, 2])
# 基於HSV值判斷情感
if mean_value > 150 and mean_saturation > 100:
return "正面"
elif mean_value < 100 or mean_saturation < 50:
return "負面"
else:
return "中性"
def _analyze_colors(self, image: np.ndarray) -> Dict:
"""分析圖片顏色"""
# 計算主要顏色
pixels = image.reshape(-1, 3)
# 使用K-means聚類找到主要顏色
from sklearn.cluster import KMeans
try:
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(pixels)
colors = kmeans.cluster_centers_.astype(int)
labels = kmeans.labels_
# 計算每種顏色的比例
color_counts = np.bincount(labels)
color_percentages = color_counts / len(labels) * 100
dominant_colors = []
for i, color in enumerate(colors):
dominant_colors.append({
"color": color.tolist(),
"percentage": color_percentages[i]
})
return {
"dominant_colors": dominant_colors,
"color_diversity": len(np.unique(labels))
}
except Exception as e:
logger.warning(f"顏色分析失敗: {e}")
return {"dominant_colors": [], "color_diversity": 0}
def _detect_faces(self, image: np.ndarray) -> List[Dict]:
"""檢測人臉"""
# 載入人臉檢測器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
face_info = []
for (x, y, w, h) in faces:
face_info.append({
"x": int(x),
"y": int(y),
"width": int(w),
"height": int(h),
"confidence": 0.8 # 簡化版,實際會計算置信度
})
return face_info
def _extract_text(self, image: np.ndarray) -> str:
"""提取圖片中的文字(OCR)"""
# 這裡可以整合OCR庫如Tesseract
# 簡化版返回空字串
return ""
def _get_object_details(self, image: np.ndarray) -> Dict:
"""獲取物件檢測詳細資訊"""
objects = self._detect_objects(image)
return {
"detected_objects": objects,
"object_count": len(objects),
"detection_confidence": 0.7 # 簡化版
}
def _get_scene_details(self, image: np.ndarray) -> Dict:
"""獲取場景分析詳細資訊"""
scene = self._analyze_scene(image)
return {
"scene_type": scene,
"scene_confidence": 0.6, # 簡化版
"scene_features": {
"brightness": float(np.mean(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))),
"contrast": float(np.std(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)))
}
}
def _calculate_sentiment_score(self, image: np.ndarray) -> float:
"""計算圖片情感分數"""
sentiment = self._analyze_image_sentiment(image)
if sentiment == "正面":
return 0.7
elif sentiment == "負面":
return -0.7
else:
return 0.0
def _generate_summary(self, results: Dict) -> str:
"""生成分析總結"""
summary_parts = []
if results["image_info"]:
info = results["image_info"]
summary_parts.append(f"圖片尺寸: {info['width']}x{info['height']}")
if results["objects"]:
summary_parts.append(f"偵測物件: {', '.join(results['objects'])}")
if results["scene"]:
summary_parts.append(f"場景類型: {results['scene']}")
if results["sentiment"]:
summary_parts.append(f"情感傾向: {results['sentiment']}")
if results["faces"]:
summary_parts.append(f"人臉數量: {len(results['faces'])}")
return " | ".join(summary_parts)