Spaces:
Sleeping
Sleeping
| """ | |
| 多模態融合分析模組 | |
| """ | |
| import numpy as np | |
| from typing import Dict, List, Optional, Tuple | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class MultimodalFusion: | |
| """多模態融合分析器""" | |
| def __init__(self): | |
| """初始化多模態融合分析器""" | |
| # 權重設定 | |
| self.weights = { | |
| "text": 0.4, | |
| "image": 0.35, | |
| "video": 0.25 | |
| } | |
| # 情感映射 | |
| self.emotion_mapping = { | |
| "正面": 1.0, | |
| "中性": 0.0, | |
| "負面": -1.0 | |
| } | |
| def fuse_analysis(self, | |
| text_analysis: Optional[Dict] = None, | |
| image_analysis: Optional[Dict] = None, | |
| video_analysis: Optional[Dict] = None) -> Dict: | |
| """ | |
| 融合多模態分析結果 | |
| Args: | |
| text_analysis: 文字分析結果 | |
| image_analysis: 圖片分析結果 | |
| video_analysis: 影片分析結果 | |
| Returns: | |
| 融合後的分析結果 | |
| """ | |
| try: | |
| results = { | |
| "modalities": [], | |
| "fused_sentiment": "中性", | |
| "fused_sentiment_score": 0.0, | |
| "content_category": "一般", | |
| "confidence": 0.0, | |
| "key_insights": [], | |
| "summary": "" | |
| } | |
| # 收集可用的模態 | |
| available_modalities = [] | |
| if text_analysis and not text_analysis.get("error"): | |
| available_modalities.append("text") | |
| if image_analysis and not image_analysis.get("error"): | |
| available_modalities.append("image") | |
| if video_analysis and not video_analysis.get("error"): | |
| available_modalities.append("video") | |
| results["modalities"] = available_modalities | |
| if not available_modalities: | |
| results["summary"] = "無可用的分析模態" | |
| return results | |
| # 融合情感分析 | |
| results["fused_sentiment"], results["fused_sentiment_score"] = self._fuse_sentiment( | |
| text_analysis, image_analysis, video_analysis, available_modalities | |
| ) | |
| # 融合內容分類 | |
| results["content_category"] = self._fuse_content_category( | |
| text_analysis, image_analysis, video_analysis, available_modalities | |
| ) | |
| # 計算整體置信度 | |
| results["confidence"] = self._calculate_confidence( | |
| text_analysis, image_analysis, video_analysis, available_modalities | |
| ) | |
| # 提取關鍵洞察 | |
| results["key_insights"] = self._extract_key_insights( | |
| text_analysis, image_analysis, video_analysis, available_modalities | |
| ) | |
| # 生成總結 | |
| results["summary"] = self._generate_fusion_summary(results) | |
| logger.info(f"多模態融合分析完成,使用模態: {available_modalities}") | |
| return results | |
| except Exception as e: | |
| logger.error(f"多模態融合分析失敗: {e}") | |
| return {"error": str(e)} | |
| def _fuse_sentiment(self, text_analysis: Optional[Dict], | |
| image_analysis: Optional[Dict], | |
| video_analysis: Optional[Dict], | |
| modalities: List[str]) -> Tuple[str, float]: | |
| """融合情感分析結果""" | |
| sentiment_scores = [] | |
| weights = [] | |
| # 文字情感 | |
| if "text" in modalities and text_analysis: | |
| text_sentiment = text_analysis.get("sentiment", "中性") | |
| text_score = self.emotion_mapping.get(text_sentiment, 0.0) | |
| # 如果有sentiment_score,使用它 | |
| if "sentiment_score" in text_analysis: | |
| text_score = text_analysis["sentiment_score"] | |
| sentiment_scores.append(text_score) | |
| weights.append(self.weights["text"]) | |
| # 圖片情感 | |
| if "image" in modalities and image_analysis: | |
| image_sentiment = image_analysis.get("sentiment", "中性") | |
| image_score = self.emotion_mapping.get(image_sentiment, 0.0) | |
| # 如果有sentiment_score,使用它 | |
| if "sentiment_score" in image_analysis: | |
| image_score = image_analysis["sentiment_score"] | |
| sentiment_scores.append(image_score) | |
| weights.append(self.weights["image"]) | |
| # 影片情感 | |
| if "video" in modalities and video_analysis: | |
| video_sentiment = video_analysis.get("audio_sentiment", "中性") | |
| video_score = self.emotion_mapping.get(video_sentiment, 0.0) | |
| sentiment_scores.append(video_score) | |
| weights.append(self.weights["video"]) | |
| if not sentiment_scores: | |
| return "中性", 0.0 | |
| # 加權平均 | |
| weighted_score = np.average(sentiment_scores, weights=weights) | |
| # 轉換為情感標籤 | |
| if weighted_score > 0.3: | |
| sentiment_label = "正面" | |
| elif weighted_score < -0.3: | |
| sentiment_label = "負面" | |
| else: | |
| sentiment_label = "中性" | |
| return sentiment_label, float(weighted_score) | |
| def _fuse_content_category(self, text_analysis: Optional[Dict], | |
| image_analysis: Optional[Dict], | |
| video_analysis: Optional[Dict], | |
| modalities: List[str]) -> str: | |
| """融合內容分類結果""" | |
| categories = [] | |
| # 文字分類 | |
| if "text" in modalities and text_analysis: | |
| text_category = text_analysis.get("content_category", "一般") | |
| categories.append(text_category) | |
| # 圖片分類(基於場景) | |
| if "image" in modalities and image_analysis: | |
| image_scene = image_analysis.get("scene", "一般場景") | |
| if "戶外" in image_scene: | |
| categories.append("戶外") | |
| elif "室內" in image_scene: | |
| categories.append("室內") | |
| else: | |
| categories.append("一般") | |
| # 影片分類(基於動作) | |
| if "video" in modalities and video_analysis: | |
| video_actions = video_analysis.get("actions", []) | |
| if "運動" in video_actions: | |
| categories.append("運動") | |
| elif "靜止" in video_actions: | |
| categories.append("靜態") | |
| else: | |
| categories.append("一般") | |
| if not categories: | |
| return "一般" | |
| # 選擇最常見的分類 | |
| from collections import Counter | |
| category_counts = Counter(categories) | |
| return category_counts.most_common(1)[0][0] | |
| def _calculate_confidence(self, text_analysis: Optional[Dict], | |
| image_analysis: Optional[Dict], | |
| video_analysis: Optional[Dict], | |
| modalities: List[str]) -> float: | |
| """計算整體置信度""" | |
| confidences = [] | |
| weights = [] | |
| # 文字置信度 | |
| if "text" in modalities and text_analysis: | |
| text_conf = 0.8 # 簡化版,實際會根據分析品質計算 | |
| confidences.append(text_conf) | |
| weights.append(self.weights["text"]) | |
| # 圖片置信度 | |
| if "image" in modalities and image_analysis: | |
| image_conf = 0.7 # 簡化版 | |
| confidences.append(image_conf) | |
| weights.append(self.weights["image"]) | |
| # 影片置信度 | |
| if "video" in modalities and video_analysis: | |
| video_conf = 0.6 # 簡化版 | |
| confidences.append(video_conf) | |
| weights.append(self.weights["video"]) | |
| if not confidences: | |
| return 0.0 | |
| # 加權平均 | |
| return float(np.average(confidences, weights=weights)) | |
| def _extract_key_insights(self, text_analysis: Optional[Dict], | |
| image_analysis: Optional[Dict], | |
| video_analysis: Optional[Dict], | |
| modalities: List[str]) -> List[str]: | |
| """提取關鍵洞察""" | |
| insights = [] | |
| # 文字洞察 | |
| if "text" in modalities and text_analysis: | |
| keywords = text_analysis.get("keywords", []) | |
| if keywords: | |
| insights.append(f"文字關鍵詞: {', '.join(keywords[:3])}") | |
| topics = text_analysis.get("topics", []) | |
| if topics: | |
| insights.append(f"文字主題: {', '.join(topics[:2])}") | |
| # 圖片洞察 | |
| if "image" in modalities and image_analysis: | |
| objects = image_analysis.get("objects", []) | |
| if objects: | |
| insights.append(f"圖片物件: {', '.join(objects[:3])}") | |
| scene = image_analysis.get("scene", "") | |
| if scene: | |
| insights.append(f"圖片場景: {scene}") | |
| # 影片洞察 | |
| if "video" in modalities and video_analysis: | |
| actions = video_analysis.get("actions", []) | |
| if actions: | |
| insights.append(f"影片動作: {', '.join(actions)}") | |
| motion = video_analysis.get("motion", {}) | |
| if motion and motion.get("motion_type"): | |
| insights.append(f"運動類型: {motion['motion_type']}") | |
| return insights | |
| def _generate_fusion_summary(self, results: Dict) -> str: | |
| """生成融合分析總結""" | |
| summary_parts = [] | |
| # 模態資訊 | |
| modalities = results.get("modalities", []) | |
| summary_parts.append(f"分析模態: {', '.join(modalities)}") | |
| # 融合情感 | |
| sentiment = results.get("fused_sentiment", "未知") | |
| sentiment_score = results.get("fused_sentiment_score", 0.0) | |
| summary_parts.append(f"綜合情感: {sentiment} ({sentiment_score:.2f})") | |
| # 內容分類 | |
| category = results.get("content_category", "一般") | |
| summary_parts.append(f"內容類型: {category}") | |
| # 置信度 | |
| confidence = results.get("confidence", 0.0) | |
| summary_parts.append(f"分析置信度: {confidence:.2f}") | |
| # 關鍵洞察 | |
| insights = results.get("key_insights", []) | |
| if insights: | |
| summary_parts.append(f"關鍵洞察: {'; '.join(insights[:3])}") | |
| return " | ".join(summary_parts) | |