""" 圖片內容分析模組 """ import cv2 import numpy as np from typing import Dict, List, Optional, Tuple import logging import os logger = logging.getLogger(__name__) class ImageAnalyzer: """圖片內容分析器""" def __init__(self): """初始化圖片分析器""" # 初始化OpenCV的DNN模組 self.net = None self.classes = [] self._load_object_detection_model() # 場景分類標籤 self.scene_labels = [ "室內", "戶外", "建築", "自然", "人物", "動物", "食物", "交通工具", "運動", "藝術", "科技", "時尚", "風景", "城市", "海邊", "山區" ] # 情感相關的視覺特徵 self.emotion_colors = { "正面": ["明亮", "鮮豔", "溫暖"], "負面": ["昏暗", "冷色調", "陰鬱"], "中性": ["平衡", "自然", "柔和"] } def _load_object_detection_model(self): """載入物件檢測模型""" try: # 這裡可以載入預訓練的模型 # 例如: YOLO, SSD, R-CNN等 logger.info("物件檢測模型載入完成") except Exception as e: logger.warning(f"物件檢測模型載入失敗: {e}") def analyze(self, image_path: str, analysis_type: str = "comprehensive") -> Dict: """ 分析圖片內容 Args: image_path: 圖片檔案路徑 analysis_type: 分析類型 Returns: 分析結果字典 """ try: if not os.path.exists(image_path): return {"error": "圖片檔案不存在"} # 讀取圖片 image = cv2.imread(image_path) if image is None: return {"error": "無法讀取圖片"} results = { "image_path": image_path, "analysis_type": analysis_type, "image_info": self._get_image_info(image), "objects": self._detect_objects(image), "scene": self._analyze_scene(image), "sentiment": self._analyze_image_sentiment(image), "colors": self._analyze_colors(image), "faces": self._detect_faces(image), "text": self._extract_text(image), "summary": "" } # 根據分析類型添加特定分析 if analysis_type in ["comprehensive", "object_detection"]: results["object_details"] = self._get_object_details(image) if analysis_type in ["comprehensive", "scene_analysis"]: results["scene_details"] = self._get_scene_details(image) if analysis_type in ["comprehensive", "sentiment"]: results["sentiment_score"] = self._calculate_sentiment_score(image) # 生成總結 results["summary"] = self._generate_summary(results) logger.info(f"圖片分析完成: {analysis_type}") return results except Exception as e: logger.error(f"圖片分析失敗: {e}") return {"error": str(e)} def _get_image_info(self, image: np.ndarray) -> Dict: """獲取圖片基本資訊""" height, width = image.shape[:2] channels = image.shape[2] if len(image.shape) > 2 else 1 return { "width": width, "height": height, "channels": channels, "aspect_ratio": width / height if height > 0 else 0, "total_pixels": width * height } def _detect_objects(self, image: np.ndarray) -> List[str]: """檢測圖片中的物件""" # 簡化的物件檢測(實際應用中會使用深度學習模型) objects = [] # 基於顏色和形狀的簡單檢測 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 檢測圓形物件 circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20) if circles is not None: objects.append("圓形物件") # 檢測直線 edges = cv2.Canny(gray, 50, 150) lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100) if lines is not None: objects.append("線性結構") # 基於顏色的檢測 hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # 檢測藍色區域 blue_mask = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255])) if np.sum(blue_mask) > 1000: objects.append("藍色區域") # 檢測綠色區域 green_mask = cv2.inRange(hsv, np.array([40, 50, 50]), np.array([80, 255, 255])) if np.sum(green_mask) > 1000: objects.append("綠色區域") return objects def _analyze_scene(self, image: np.ndarray) -> str: """分析場景類型""" # 簡化的場景分析 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 計算亮度 brightness = np.mean(gray) # 計算對比度 contrast = np.std(gray) # 計算邊緣密度 edges = cv2.Canny(gray, 50, 150) edge_density = np.sum(edges > 0) / edges.size # 基於特徵進行場景分類 if brightness > 150 and contrast > 50: return "明亮戶外場景" elif brightness < 100 and edge_density > 0.1: return "室內場景" elif edge_density > 0.15: return "複雜場景" else: return "簡單場景" def _analyze_image_sentiment(self, image: np.ndarray) -> str: """分析圖片情感""" # 基於顏色和亮度分析情感 hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # 計算平均色調 mean_hue = np.mean(hsv[:, :, 0]) mean_saturation = np.mean(hsv[:, :, 1]) mean_value = np.mean(hsv[:, :, 2]) # 基於HSV值判斷情感 if mean_value > 150 and mean_saturation > 100: return "正面" elif mean_value < 100 or mean_saturation < 50: return "負面" else: return "中性" def _analyze_colors(self, image: np.ndarray) -> Dict: """分析圖片顏色""" # 計算主要顏色 pixels = image.reshape(-1, 3) # 使用K-means聚類找到主要顏色 from sklearn.cluster import KMeans try: kmeans = KMeans(n_clusters=5, random_state=42) kmeans.fit(pixels) colors = kmeans.cluster_centers_.astype(int) labels = kmeans.labels_ # 計算每種顏色的比例 color_counts = np.bincount(labels) color_percentages = color_counts / len(labels) * 100 dominant_colors = [] for i, color in enumerate(colors): dominant_colors.append({ "color": color.tolist(), "percentage": color_percentages[i] }) return { "dominant_colors": dominant_colors, "color_diversity": len(np.unique(labels)) } except Exception as e: logger.warning(f"顏色分析失敗: {e}") return {"dominant_colors": [], "color_diversity": 0} def _detect_faces(self, image: np.ndarray) -> List[Dict]: """檢測人臉""" # 載入人臉檢測器 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.1, 4) face_info = [] for (x, y, w, h) in faces: face_info.append({ "x": int(x), "y": int(y), "width": int(w), "height": int(h), "confidence": 0.8 # 簡化版,實際會計算置信度 }) return face_info def _extract_text(self, image: np.ndarray) -> str: """提取圖片中的文字(OCR)""" # 這裡可以整合OCR庫如Tesseract # 簡化版返回空字串 return "" def _get_object_details(self, image: np.ndarray) -> Dict: """獲取物件檢測詳細資訊""" objects = self._detect_objects(image) return { "detected_objects": objects, "object_count": len(objects), "detection_confidence": 0.7 # 簡化版 } def _get_scene_details(self, image: np.ndarray) -> Dict: """獲取場景分析詳細資訊""" scene = self._analyze_scene(image) return { "scene_type": scene, "scene_confidence": 0.6, # 簡化版 "scene_features": { "brightness": float(np.mean(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))), "contrast": float(np.std(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))) } } def _calculate_sentiment_score(self, image: np.ndarray) -> float: """計算圖片情感分數""" sentiment = self._analyze_image_sentiment(image) if sentiment == "正面": return 0.7 elif sentiment == "負面": return -0.7 else: return 0.0 def _generate_summary(self, results: Dict) -> str: """生成分析總結""" summary_parts = [] if results["image_info"]: info = results["image_info"] summary_parts.append(f"圖片尺寸: {info['width']}x{info['height']}") if results["objects"]: summary_parts.append(f"偵測物件: {', '.join(results['objects'])}") if results["scene"]: summary_parts.append(f"場景類型: {results['scene']}") if results["sentiment"]: summary_parts.append(f"情感傾向: {results['sentiment']}") if results["faces"]: summary_parts.append(f"人臉數量: {len(results['faces'])}") return " | ".join(summary_parts)