Spaces:

mikao007
/

motion_analyze

Sleeping

App Files Files Community

mikao007 commited on Oct 2, 2025

Commit

e92e423

verified ·

1 Parent(s): cf2c2a8

Upload 12 files

Browse files

Files changed (12) hide show

README.md +28 -12
app.py +385 -0
config_spaces.json +36 -0
modules/__init__.py +1 -0
modules/image_analyzer.py +301 -0
modules/multimodal_fusion.py +294 -0
modules/text_analyzer.py +205 -0
modules/video_analyzer.py +417 -0
requirements.txt +17 -0
utils/__init__.py +1 -0
utils/config.py +96 -0
utils/file_handler.py +96 -0

README.md CHANGED Viewed

@@ -1,12 +1,28 @@
----
-title: Motion Analyze
-emoji: 🐠
-colorFrom: yellow
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.48.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 社交媒體多模態內容分析系統
+這是一個基於Gradio的社交媒體多模態內容分析系統。
+## 快速開始
+1. 將此repository fork到您的GitHub帳戶
+2. 前往 [Hugging Face Spaces](https://huggingface.co/spaces)
+3. 創建新的Space，選擇Gradio SDK
+4. 連接您的GitHub repository
+5. 等待自動部署完成
+## 功能特色
+- 📝 文字情感分析和關鍵詞提取
+- 🖼️ 圖片物件檢測和場景識別
+- 🎬 影片動作識別和音頻分析
+- 🔗 多模態融合分析
+## 使用方式
+1. 在文字框中輸入要分析的文字
+2. 上傳圖片檔案（支援jpg, png等格式）
+3. 上傳影片檔案（支援mp4, avi等格式）
+4. 選擇分析類型
+5. 點擊"開始分析"按鈕
+系統會自動分析內容並提供詳細的分析結果。

app.py ADDED Viewed

	@@ -0,0 +1,385 @@

+"""
+Gradio部署專用腳本
+優化用於Gradio Spaces部署
+"""
+import gradio as gr
+import os
+import sys
+import logging
+from typing import Dict, Optional
+import tempfile
+import shutil
+# 設定日誌
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# 導入分析模組
+try:
+    from modules.text_analyzer import TextAnalyzer
+    from modules.image_analyzer import ImageAnalyzer
+    from modules.video_analyzer import VideoAnalyzer
+    from modules.multimodal_fusion import MultimodalFusion
+    from utils.file_handler import FileHandler
+    from utils.config import Config
+except ImportError as e:
+    logger.error(f"模組導入失敗: {e}")
+    # 創建簡化版本的分析器
+    class TextAnalyzer:
+        def analyze(self, text, analysis_type="comprehensive"):
+            return {"sentiment": "中性", "keywords": ["測試"], "summary": "測試分析"}
+    class ImageAnalyzer:
+        def analyze(self, image_path, analysis_type="comprehensive"):
+            return {"objects": ["測試物件"], "scene": "測試場景", "summary": "測試分析"}
+    class VideoAnalyzer:
+        def analyze(self, video_path, analysis_type="comprehensive"):
+            return {"actions": ["測試動作"], "audio_sentiment": "中性", "summary": "測試分析"}
+    class MultimodalFusion:
+        def fuse_analysis(self, text_analysis, image_analysis, video_analysis):
+            return {"fused_sentiment": "中性", "summary": "測試融合分析"}
+    class FileHandler:
+        pass
+    class Config:
+        def get(self, key, default=None):
+            return default
+class GradioSocialMediaAnalyzer:
+    """Gradio專用社交媒體分析器"""
+    def __init__(self):
+        """初始化分析器"""
+        try:
+            self.config = Config()
+            self.text_analyzer = TextAnalyzer()
+            self.image_analyzer = ImageAnalyzer()
+            self.video_analyzer = VideoAnalyzer()
+            self.multimodal_fusion = MultimodalFusion()
+            self.file_handler = FileHandler()
+            logger.info("所有分析模組載入成功")
+        except Exception as e:
+            logger.error(f"分析器初始化失敗: {e}")
+            # 使用簡化版本
+            self.text_analyzer = TextAnalyzer()
+            self.image_analyzer = ImageAnalyzer()
+            self.video_analyzer = VideoAnalyzer()
+            self.multimodal_fusion = MultimodalFusion()
+    def analyze_content(self,
+                       text_input: Optional[str] = None,
+                       image_input: Optional[str] = None,
+                       video_input: Optional[str] = None,
+                       analysis_type: str = "comprehensive") -> Dict:
+        """分析多模態內容"""
+        try:
+            results = {
+                "text_analysis": None,
+                "image_analysis": None,
+                "video_analysis": None,
+                "multimodal_analysis": None,
+                "summary": ""
+            }
+            # 文字分析
+            if text_input and text_input.strip():
+                logger.info("開始文字分析...")
+                results["text_analysis"] = self.text_analyzer.analyze(text_input, analysis_type)
+            # 圖片分析
+            if image_input:
+                logger.info("開始圖片分析...")
+                results["image_analysis"] = self.image_analyzer.analyze(image_input, analysis_type)
+            # 影片分析
+            if video_input:
+                logger.info("開始影片分析...")
+                results["video_analysis"] = self.video_analyzer.analyze(video_input, analysis_type)
+            # 多模態融合分析
+            if any([text_input, image_input, video_input]):
+                logger.info("開始多模態融合分析...")
+                results["multimodal_analysis"] = self.multimodal_fusion.fuse_analysis(
+                    results["text_analysis"],
+                    results["image_analysis"],
+                    results["video_analysis"]
+                )
+                # 生成總結
+                results["summary"] = self._generate_summary(results)
+            logger.info("分析完成")
+            return results
+        except Exception as e:
+            logger.error(f"分析過程中發生錯誤: {str(e)}")
+            return {"error": str(e)}
+    def _generate_summary(self, results: Dict) -> str:
+        """生成分析總結"""
+        summary_parts = []
+        if results["text_analysis"]:
+            summary_parts.append(f"文字分析: {results['text_analysis'].get('summary', 'N/A')}")
+        if results["image_analysis"]:
+            summary_parts.append(f"圖片分析: {results['image_analysis'].get('summary', 'N/A')}")
+        if results["video_analysis"]:
+            summary_parts.append(f"影片分析: {results['video_analysis'].get('summary', 'N/A')}")
+        if results["multimodal_analysis"]:
+            summary_parts.append(f"綜合分析: {results['multimodal_analysis'].get('summary', 'N/A')}")
+        return "\n".join(summary_parts)
+# 創建全局分析器實例
+analyzer = GradioSocialMediaAnalyzer()
+def analyze_interface(text: str, image, video, analysis_type: str):
+    """Gradio介面函數"""
+    try:
+        # 處理檔案輸入
+        image_path = None
+        video_path = None
+        if image:
+            image_path = image.name if hasattr(image, 'name') else str(image)
+        if video:
+            video_path = video.name if hasattr(video, 'name') else str(video)
+        # 執行分析
+        results = analyzer.analyze_content(
+            text_input=text if text.strip() else None,
+            image_input=image_path,
+            video_input=video_path,
+            analysis_type=analysis_type
+        )
+        if "error" in results:
+            return f"分析錯誤: {results['error']}", "", "", ""
+        # 格式化輸出
+        text_output = format_text_analysis(results.get("text_analysis", {}))
+        image_output = format_image_analysis(results.get("image_analysis", {}))
+        video_output = format_video_analysis(results.get("video_analysis", {}))
+        summary_output = results.get("summary", "無分析結果")
+        return text_output, image_output, video_output, summary_output
+    except Exception as e:
+        error_msg = f"處理過程中發生錯誤: {str(e)}"
+        logger.error(error_msg)
+        return error_msg, "", "", ""
+def format_text_analysis(analysis: Dict) -> str:
+    """格式化文字分析結果"""
+    if not analysis:
+        return "無文字分析結果"
+    formatted = []
+    if "sentiment" in analysis:
+        formatted.append(f"情感分析: {analysis['sentiment']}")
+    if "keywords" in analysis:
+        formatted.append(f"關鍵詞: {', '.join(analysis['keywords'])}")
+    if "topics" in analysis:
+        formatted.append(f"主題: {', '.join(analysis['topics'])}")
+    if "summary" in analysis:
+        formatted.append(f"總結: {analysis['summary']}")
+    return "\n".join(formatted)
+def format_image_analysis(analysis: Dict) -> str:
+    """格式化圖片分析結果"""
+    if not analysis:
+        return "無圖片分析結果"
+    formatted = []
+    if "objects" in analysis:
+        formatted.append(f"偵測物件: {', '.join(analysis['objects'])}")
+    if "scene" in analysis:
+        formatted.append(f"場景描述: {analysis['scene']}")
+    if "sentiment" in analysis:
+        formatted.append(f"圖片情感: {analysis['sentiment']}")
+    if "summary" in analysis:
+        formatted.append(f"總結: {analysis['summary']}")
+    return "\n".join(formatted)
+def format_video_analysis(analysis: Dict) -> str:
+    """格式化影片分析結果"""
+    if not analysis:
+        return "無影片分析結果"
+    formatted = []
+    if "objects" in analysis:
+        formatted.append(f"偵測物件: {', '.join(analysis['objects'])}")
+    if "actions" in analysis:
+        formatted.append(f"動作識別: {', '.join(analysis['actions'])}")
+    if "audio_sentiment" in analysis:
+        formatted.append(f"音頻情感: {analysis['audio_sentiment']}")
+    if "summary" in analysis:
+        formatted.append(f"總結: {analysis['summary']}")
+    return "\n".join(formatted)
+def create_gradio_app():
+    """創建Gradio應用程式"""
+    # 創建Gradio介面
+    with gr.Blocks(
+        title="社交媒體多模態內容分析系統",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+            margin: auto !important;
+        }
+        .main-header {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+        """
+    ) as app:
+        # 標題和說明
+        with gr.Row():
+            gr.HTML("""
+            <div class="main-header">
+                <h1>🔍 社交媒體多模態內容分析系統</h1>
+                <p>支援文字、圖片、影片的智能分析與多模態融合</p>
+            </div>
+            """)
+        # 主要內容區域
+        with gr.Row():
+            # 左側輸入區域
+            with gr.Column(scale=1):
+                gr.Markdown("### 📝 輸入內容")
+                text_input = gr.Textbox(
+                    label="文字內容",
+                    placeholder="請輸入要分析的文字內容...",
+                    lines=5,
+                    max_lines=10
+                )
+                image_input = gr.File(
+                    label="圖片檔案",
+                    file_types=["image"],
+                    file_count="single"
+                )
+                video_input = gr.File(
+                    label="影片檔案",
+                    file_types=["video"],
+                    file_count="single"
+                )
+                analysis_type = gr.Dropdown(
+                    choices=[
+                        ("綜合分析", "comprehensive"),
+                        ("情感分析", "sentiment"),
+                        ("內容分類", "content_classification"),
+                        ("物件檢測", "object_detection")
+                    ],
+                    value="comprehensive",
+                    label="分析類型"
+                )
+                analyze_btn = gr.Button(
+                    "🚀 開始分析",
+                    variant="primary",
+                    size="lg"
+                )
+            # 右側結果區域
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 分析結果")
+                text_output = gr.Textbox(
+                    label="📝 文字分析結果",
+                    lines=8,
+                    interactive=False,
+                    show_copy_button=True
+                )
+                image_output = gr.Textbox(
+                    label="🖼️ 圖片分析結果",
+                    lines=8,
+                    interactive=False,
+                    show_copy_button=True
+                )
+                video_output = gr.Textbox(
+                    label="🎬 影片分析結果",
+                    lines=8,
+                    interactive=False,
+                    show_copy_button=True
+                )
+                summary_output = gr.Textbox(
+                    label="🎯 綜合分析總結",
+                    lines=6,
+                    interactive=False,
+                    show_copy_button=True
+                )
+        # 範例區域
+        with gr.Row():
+            gr.Markdown("""
+            ### 💡 使用範例
+            **文字分析範例：**
+            - 輸入：「這個新產品真的很棒，我強烈推薦給大家！」
+            - 分析：情感分析、關鍵詞提取、主題識別
+            **圖片分析範例：**
+            - 上傳：風景照片、人物照片、產品圖片
+            - 分析：物件檢測、場景識別、情感分析
+            **影片分析範例：**
+            - 上傳：短影片、廣告影片、教學影片
+            - 分析：動作識別、音頻分析、場景變化
+            **多模態分析：**
+            - 同時上傳多種內容類型
+            - 系統會進行綜合分析並提供融合結果
+            """)
+        # 綁定事件
+        analyze_btn.click(
+            fn=analyze_interface,
+            inputs=[text_input, image_input, video_input, analysis_type],
+            outputs=[text_output, image_output, video_output, summary_output]
+        )
+        # 清除按鈕
+        clear_btn = gr.Button("🗑️ 清除所有", variant="secondary")
+        clear_btn.click(
+            fn=lambda: ("", None, None, "comprehensive", "", "", "", ""),
+            outputs=[text_input, image_input, video_input, analysis_type,
+                    text_output, image_output, video_output, summary_output]
+        )
+    return app
+# Gradio Spaces 部署配置
+if __name__ == "__main__":
+    # 創建應用程式
+    app = create_gradio_app()
+    # 啟動應用程式
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,  # 在Spaces上不需要share
+        debug=False,  # 生產環境關閉debug
+        show_error=True,
+        quiet=False
+    )

config_spaces.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "models": {
+    "text_model": "distilbert-base-chinese",
+    "image_model": "mobilenet_v2",
+    "video_model": "slowfast",
+    "multimodal_model": "clip"
+  },
+  "analysis": {
+    "max_text_length": 256,
+    "max_image_size": 224,
+    "max_video_duration": 15,
+    "confidence_threshold": 0.5
+  },
+  "api": {
+    "openai_api_key": "",
+    "huggingface_token": "",
+    "google_api_key": ""
+  },
+  "storage": {
+    "temp_dir": "/tmp",
+    "output_dir": "/tmp/output",
+    "max_file_size": 10485760
+  },
+  "logging": {
+    "level": "INFO",
+    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  },
+  "gradio": {
+    "server_name": "0.0.0.0",
+    "server_port": 7860,
+    "share": false,
+    "debug": false,
+    "show_error": true,
+    "quiet": false
+  }
+}

modules/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # 初始化檔案

modules/image_analyzer.py ADDED Viewed

	@@ -0,0 +1,301 @@

+"""
+圖片內容分析模組
+"""
+import cv2
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import logging
+import os
+logger = logging.getLogger(__name__)
+class ImageAnalyzer:
+    """圖片內容分析器"""
+    def __init__(self):
+        """初始化圖片分析器"""
+        # 初始化OpenCV的DNN模組
+        self.net = None
+        self.classes = []
+        self._load_object_detection_model()
+        # 場景分類標籤
+        self.scene_labels = [
+            "室內", "戶外", "建築", "自然", "人物", "動物", "食物", "交通工具",
+            "運動", "藝術", "科技", "時尚", "風景", "城市", "海邊", "山區"
+        ]
+        # 情感相關的視覺特徵
+        self.emotion_colors = {
+            "正面": ["明亮", "鮮豔", "溫暖"],
+            "負面": ["昏暗", "冷色調", "陰鬱"],
+            "中性": ["平衡", "自然", "柔和"]
+        }
+    def _load_object_detection_model(self):
+        """載入物件檢測模型"""
+        try:
+            # 這裡可以載入預訓練的模型
+            # 例如: YOLO, SSD, R-CNN等
+            logger.info("物件檢測模型載入完成")
+        except Exception as e:
+            logger.warning(f"物件檢測模型載入失敗: {e}")
+    def analyze(self, image_path: str, analysis_type: str = "comprehensive") -> Dict:
+        """
+        分析圖片內容
+        Args:
+            image_path: 圖片檔案路徑
+            analysis_type: 分析類型
+        Returns:
+            分析結果字典
+        """
+        try:
+            if not os.path.exists(image_path):
+                return {"error": "圖片檔案不存在"}
+            # 讀取圖片
+            image = cv2.imread(image_path)
+            if image is None:
+                return {"error": "無法讀取圖片"}
+            results = {
+                "image_path": image_path,
+                "analysis_type": analysis_type,
+                "image_info": self._get_image_info(image),
+                "objects": self._detect_objects(image),
+                "scene": self._analyze_scene(image),
+                "sentiment": self._analyze_image_sentiment(image),
+                "colors": self._analyze_colors(image),
+                "faces": self._detect_faces(image),
+                "text": self._extract_text(image),
+                "summary": ""
+            }
+            # 根據分析類型添加特定分析
+            if analysis_type in ["comprehensive", "object_detection"]:
+                results["object_details"] = self._get_object_details(image)
+            if analysis_type in ["comprehensive", "scene_analysis"]:
+                results["scene_details"] = self._get_scene_details(image)
+            if analysis_type in ["comprehensive", "sentiment"]:
+                results["sentiment_score"] = self._calculate_sentiment_score(image)
+            # 生成總結
+            results["summary"] = self._generate_summary(results)
+            logger.info(f"圖片分析完成: {analysis_type}")
+            return results
+        except Exception as e:
+            logger.error(f"圖片分析失敗: {e}")
+            return {"error": str(e)}
+    def _get_image_info(self, image: np.ndarray) -> Dict:
+        """獲取圖片基本資訊"""
+        height, width = image.shape[:2]
+        channels = image.shape[2] if len(image.shape) > 2 else 1
+        return {
+            "width": width,
+            "height": height,
+            "channels": channels,
+            "aspect_ratio": width / height if height > 0 else 0,
+            "total_pixels": width * height
+        }
+    def _detect_objects(self, image: np.ndarray) -> List[str]:
+        """檢測圖片中的物件"""
+        # 簡化的物件檢測（實際應用中會使用深度學習模型）
+        objects = []
+        # 基於顏色和形狀的簡單檢測
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        # 檢測圓形物件
+        circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20)
+        if circles is not None:
+            objects.append("圓形物件")
+        # 檢測直線
+        edges = cv2.Canny(gray, 50, 150)
+        lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
+        if lines is not None:
+            objects.append("線性結構")
+        # 基於顏色的檢測
+        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+        # 檢測藍色區域
+        blue_mask = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255]))
+        if np.sum(blue_mask) > 1000:
+            objects.append("藍色區域")
+        # 檢測綠色區域
+        green_mask = cv2.inRange(hsv, np.array([40, 50, 50]), np.array([80, 255, 255]))
+        if np.sum(green_mask) > 1000:
+            objects.append("綠色區域")
+        return objects
+    def _analyze_scene(self, image: np.ndarray) -> str:
+        """分析場景類型"""
+        # 簡化的場景分析
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        # 計算亮度
+        brightness = np.mean(gray)
+        # 計算對比度
+        contrast = np.std(gray)
+        # 計算邊緣密度
+        edges = cv2.Canny(gray, 50, 150)
+        edge_density = np.sum(edges > 0) / edges.size
+        # 基於特徵進行場景分類
+        if brightness > 150 and contrast > 50:
+            return "明亮戶外場景"
+        elif brightness < 100 and edge_density > 0.1:
+            return "室內場景"
+        elif edge_density > 0.15:
+            return "複雜場景"
+        else:
+            return "簡單場景"
+    def _analyze_image_sentiment(self, image: np.ndarray) -> str:
+        """分析圖片情感"""
+        # 基於顏色和亮度分析情感
+        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+        # 計算平均色調
+        mean_hue = np.mean(hsv[:, :, 0])
+        mean_saturation = np.mean(hsv[:, :, 1])
+        mean_value = np.mean(hsv[:, :, 2])
+        # 基於HSV值判斷情感
+        if mean_value > 150 and mean_saturation > 100:
+            return "正面"
+        elif mean_value < 100 or mean_saturation < 50:
+            return "負面"
+        else:
+            return "中性"
+    def _analyze_colors(self, image: np.ndarray) -> Dict:
+        """分析圖片顏色"""
+        # 計算主要顏色
+        pixels = image.reshape(-1, 3)
+        # 使用K-means聚類找到主要顏色
+        from sklearn.cluster import KMeans
+        try:
+            kmeans = KMeans(n_clusters=5, random_state=42)
+            kmeans.fit(pixels)
+            colors = kmeans.cluster_centers_.astype(int)
+            labels = kmeans.labels_
+            # 計算每種顏色的比例
+            color_counts = np.bincount(labels)
+            color_percentages = color_counts / len(labels) * 100
+            dominant_colors = []
+            for i, color in enumerate(colors):
+                dominant_colors.append({
+                    "color": color.tolist(),
+                    "percentage": color_percentages[i]
+                })
+            return {
+                "dominant_colors": dominant_colors,
+                "color_diversity": len(np.unique(labels))
+            }
+        except Exception as e:
+            logger.warning(f"顏色分析失敗: {e}")
+            return {"dominant_colors": [], "color_diversity": 0}
+    def _detect_faces(self, image: np.ndarray) -> List[Dict]:
+        """檢測人臉"""
+        # 載入人臉檢測器
+        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+        face_info = []
+        for (x, y, w, h) in faces:
+            face_info.append({
+                "x": int(x),
+                "y": int(y),
+                "width": int(w),
+                "height": int(h),
+                "confidence": 0.8  # 簡化版，實際會計算置信度
+            })
+        return face_info
+    def _extract_text(self, image: np.ndarray) -> str:
+        """提取圖片中的文字（OCR）"""
+        # 這裡可以整合OCR庫如Tesseract
+        # 簡化版返回空字串
+        return ""
+    def _get_object_details(self, image: np.ndarray) -> Dict:
+        """獲取物件檢測詳細資訊"""
+        objects = self._detect_objects(image)
+        return {
+            "detected_objects": objects,
+            "object_count": len(objects),
+            "detection_confidence": 0.7  # 簡化版
+        }
+    def _get_scene_details(self, image: np.ndarray) -> Dict:
+        """獲取場景分析詳細資訊"""
+        scene = self._analyze_scene(image)
+        return {
+            "scene_type": scene,
+            "scene_confidence": 0.6,  # 簡化版
+            "scene_features": {
+                "brightness": float(np.mean(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))),
+                "contrast": float(np.std(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)))
+            }
+        }
+    def _calculate_sentiment_score(self, image: np.ndarray) -> float:
+        """計算圖片情感分數"""
+        sentiment = self._analyze_image_sentiment(image)
+        if sentiment == "正面":
+            return 0.7
+        elif sentiment == "負面":
+            return -0.7
+        else:
+            return 0.0
+    def _generate_summary(self, results: Dict) -> str:
+        """生成分析總結"""
+        summary_parts = []
+        if results["image_info"]:
+            info = results["image_info"]
+            summary_parts.append(f"圖片尺寸: {info['width']}x{info['height']}")
+        if results["objects"]:
+            summary_parts.append(f"偵測物件: {', '.join(results['objects'])}")
+        if results["scene"]:
+            summary_parts.append(f"場景類型: {results['scene']}")
+        if results["sentiment"]:
+            summary_parts.append(f"情感傾向: {results['sentiment']}")
+        if results["faces"]:
+            summary_parts.append(f"人臉數量: {len(results['faces'])}")
+        return " | ".join(summary_parts)

modules/multimodal_fusion.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+多模態融合分析模組
+"""
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import logging
+logger = logging.getLogger(__name__)
+class MultimodalFusion:
+    """多模態融合分析器"""
+    def __init__(self):
+        """初始化多模態融合分析器"""
+        # 權重設定
+        self.weights = {
+            "text": 0.4,
+            "image": 0.35,
+            "video": 0.25
+        }
+        # 情感映射
+        self.emotion_mapping = {
+            "正面": 1.0,
+            "中性": 0.0,
+            "負面": -1.0
+        }
+    def fuse_analysis(self,
+                     text_analysis: Optional[Dict] = None,
+                     image_analysis: Optional[Dict] = None,
+                     video_analysis: Optional[Dict] = None) -> Dict:
+        """
+        融合多模態分析結果
+        Args:
+            text_analysis: 文字分析結果
+            image_analysis: 圖片分析結果
+            video_analysis: 影片分析結果
+        Returns:
+            融合後的分析結果
+        """
+        try:
+            results = {
+                "modalities": [],
+                "fused_sentiment": "中性",
+                "fused_sentiment_score": 0.0,
+                "content_category": "一般",
+                "confidence": 0.0,
+                "key_insights": [],
+                "summary": ""
+            }
+            # 收集可用的模態
+            available_modalities = []
+            if text_analysis and not text_analysis.get("error"):
+                available_modalities.append("text")
+            if image_analysis and not image_analysis.get("error"):
+                available_modalities.append("image")
+            if video_analysis and not video_analysis.get("error"):
+                available_modalities.append("video")
+            results["modalities"] = available_modalities
+            if not available_modalities:
+                results["summary"] = "無可用的分析模態"
+                return results
+            # 融合情感分析
+            results["fused_sentiment"], results["fused_sentiment_score"] = self._fuse_sentiment(
+                text_analysis, image_analysis, video_analysis, available_modalities
+            )
+            # 融合內容分類
+            results["content_category"] = self._fuse_content_category(
+                text_analysis, image_analysis, video_analysis, available_modalities
+            )
+            # 計算整體置信度
+            results["confidence"] = self._calculate_confidence(
+                text_analysis, image_analysis, video_analysis, available_modalities
+            )
+            # 提取關鍵洞察
+            results["key_insights"] = self._extract_key_insights(
+                text_analysis, image_analysis, video_analysis, available_modalities
+            )
+            # 生成總結
+            results["summary"] = self._generate_fusion_summary(results)
+            logger.info(f"多模態融合分析完成，使用模態: {available_modalities}")
+            return results
+        except Exception as e:
+            logger.error(f"多模態融合分析失敗: {e}")
+            return {"error": str(e)}
+    def _fuse_sentiment(self, text_analysis: Optional[Dict],
+                       image_analysis: Optional[Dict],
+                       video_analysis: Optional[Dict],
+                       modalities: List[str]) -> Tuple[str, float]:
+        """融合情感分析結果"""
+        sentiment_scores = []
+        weights = []
+        # 文字情感
+        if "text" in modalities and text_analysis:
+            text_sentiment = text_analysis.get("sentiment", "中性")
+            text_score = self.emotion_mapping.get(text_sentiment, 0.0)
+            # 如果有sentiment_score，使用它
+            if "sentiment_score" in text_analysis:
+                text_score = text_analysis["sentiment_score"]
+            sentiment_scores.append(text_score)
+            weights.append(self.weights["text"])
+        # 圖片情感
+        if "image" in modalities and image_analysis:
+            image_sentiment = image_analysis.get("sentiment", "中性")
+            image_score = self.emotion_mapping.get(image_sentiment, 0.0)
+            # 如果有sentiment_score，使用它
+            if "sentiment_score" in image_analysis:
+                image_score = image_analysis["sentiment_score"]
+            sentiment_scores.append(image_score)
+            weights.append(self.weights["image"])
+        # 影片情感
+        if "video" in modalities and video_analysis:
+            video_sentiment = video_analysis.get("audio_sentiment", "中性")
+            video_score = self.emotion_mapping.get(video_sentiment, 0.0)
+            sentiment_scores.append(video_score)
+            weights.append(self.weights["video"])
+        if not sentiment_scores:
+            return "中性", 0.0
+        # 加權平均
+        weighted_score = np.average(sentiment_scores, weights=weights)
+        # 轉換為情感標籤
+        if weighted_score > 0.3:
+            sentiment_label = "正面"
+        elif weighted_score < -0.3:
+            sentiment_label = "負面"
+        else:
+            sentiment_label = "中性"
+        return sentiment_label, float(weighted_score)
+    def _fuse_content_category(self, text_analysis: Optional[Dict],
+                              image_analysis: Optional[Dict],
+                              video_analysis: Optional[Dict],
+                              modalities: List[str]) -> str:
+        """融合內容分類結果"""
+        categories = []
+        # 文字分類
+        if "text" in modalities and text_analysis:
+            text_category = text_analysis.get("content_category", "一般")
+            categories.append(text_category)
+        # 圖片分類（基於場景）
+        if "image" in modalities and image_analysis:
+            image_scene = image_analysis.get("scene", "一般場景")
+            if "戶外" in image_scene:
+                categories.append("戶外")
+            elif "室內" in image_scene:
+                categories.append("室內")
+            else:
+                categories.append("一般")
+        # 影片分類（基於動作）
+        if "video" in modalities and video_analysis:
+            video_actions = video_analysis.get("actions", [])
+            if "運動" in video_actions:
+                categories.append("運動")
+            elif "靜止" in video_actions:
+                categories.append("靜態")
+            else:
+                categories.append("一般")
+        if not categories:
+            return "一般"
+        # 選擇最常見的分類
+        from collections import Counter
+        category_counts = Counter(categories)
+        return category_counts.most_common(1)[0][0]
+    def _calculate_confidence(self, text_analysis: Optional[Dict],
+                            image_analysis: Optional[Dict],
+                            video_analysis: Optional[Dict],
+                            modalities: List[str]) -> float:
+        """計算整體置信度"""
+        confidences = []
+        weights = []
+        # 文字置信度
+        if "text" in modalities and text_analysis:
+            text_conf = 0.8  # 簡化版，實際會根據分析品質計算
+            confidences.append(text_conf)
+            weights.append(self.weights["text"])
+        # 圖片置信度
+        if "image" in modalities and image_analysis:
+            image_conf = 0.7  # 簡化版
+            confidences.append(image_conf)
+            weights.append(self.weights["image"])
+        # 影片置信度
+        if "video" in modalities and video_analysis:
+            video_conf = 0.6  # 簡化版
+            confidences.append(video_conf)
+            weights.append(self.weights["video"])
+        if not confidences:
+            return 0.0
+        # 加權平均
+        return float(np.average(confidences, weights=weights))
+    def _extract_key_insights(self, text_analysis: Optional[Dict],
+                            image_analysis: Optional[Dict],
+                            video_analysis: Optional[Dict],
+                            modalities: List[str]) -> List[str]:
+        """提取關鍵洞察"""
+        insights = []
+        # 文字洞察
+        if "text" in modalities and text_analysis:
+            keywords = text_analysis.get("keywords", [])
+            if keywords:
+                insights.append(f"文字關鍵詞: {', '.join(keywords[:3])}")
+            topics = text_analysis.get("topics", [])
+            if topics:
+                insights.append(f"文字主題: {', '.join(topics[:2])}")
+        # 圖片洞察
+        if "image" in modalities and image_analysis:
+            objects = image_analysis.get("objects", [])
+            if objects:
+                insights.append(f"圖片物件: {', '.join(objects[:3])}")
+            scene = image_analysis.get("scene", "")
+            if scene:
+                insights.append(f"圖片場景: {scene}")
+        # 影片洞察
+        if "video" in modalities and video_analysis:
+            actions = video_analysis.get("actions", [])
+            if actions:
+                insights.append(f"影片動作: {', '.join(actions)}")
+            motion = video_analysis.get("motion", {})
+            if motion and motion.get("motion_type"):
+                insights.append(f"運動類型: {motion['motion_type']}")
+        return insights
+    def _generate_fusion_summary(self, results: Dict) -> str:
+        """生成融合分析總結"""
+        summary_parts = []
+        # 模態資訊
+        modalities = results.get("modalities", [])
+        summary_parts.append(f"分析模態: {', '.join(modalities)}")
+        # 融合情感
+        sentiment = results.get("fused_sentiment", "未知")
+        sentiment_score = results.get("fused_sentiment_score", 0.0)
+        summary_parts.append(f"綜合情感: {sentiment} ({sentiment_score:.2f})")
+        # 內容分類
+        category = results.get("content_category", "一般")
+        summary_parts.append(f"內容類型: {category}")
+        # 置信度
+        confidence = results.get("confidence", 0.0)
+        summary_parts.append(f"分析置信度: {confidence:.2f}")
+        # 關鍵洞察
+        insights = results.get("key_insights", [])
+        if insights:
+            summary_parts.append(f"關鍵洞察: {'; '.join(insights[:3])}")
+        return " | ".join(summary_parts)

modules/text_analyzer.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""
+文字內容分析模組
+"""
+import re
+import jieba
+from typing import Dict, List, Optional
+import logging
+from collections import Counter
+logger = logging.getLogger(__name__)
+class TextAnalyzer:
+    """文字內容分析器"""
+    def __init__(self):
+        """初始化文字分析器"""
+        # 初始化jieba分詞
+        jieba.initialize()
+        # 情感詞典（簡化版）
+        self.positive_words = {
+            "好", "棒", "讚", "優秀", "完美", "喜歡", "愛", "開心", "快樂", "高興",
+            "滿意", "驚喜", "感動", "溫暖", "美好", "精彩", "出色", "傑出", "優秀"
+        }
+        self.negative_words = {
+            "壞", "差", "爛", "討厭", "恨", "生氣", "憤怒", "失望", "難過", "痛苦",
+            "糟糕", "惡劣", "可惡", "討厭", "煩人", "無聊", "討厭", "噁心", "恐怖"
+        }
+        # 停用詞
+        self.stop_words = {
+            "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都", "一",
+            "一個", "上", "也", "很", "到", "說", "要", "去", "你", "會", "著", "沒有",
+            "看", "好", "自己", "這", "那", "他", "她", "它", "們", "我們", "你們"
+        }
+    def analyze(self, text: str, analysis_type: str = "comprehensive") -> Dict:
+        """
+        分析文字內容
+        Args:
+            text: 要分析的文字
+            analysis_type: 分析類型
+        Returns:
+            分析結果字典
+        """
+        try:
+            results = {
+                "original_text": text,
+                "analysis_type": analysis_type,
+                "word_count": len(text),
+                "char_count": len(text.replace(" ", "")),
+                "sentiment": self._analyze_sentiment(text),
+                "keywords": self._extract_keywords(text),
+                "topics": self._extract_topics(text),
+                "summary": ""
+            }
+            # 根據分析類型添加特定分析
+            if analysis_type in ["comprehensive", "sentiment"]:
+                results["sentiment_score"] = self._calculate_sentiment_score(text)
+            if analysis_type in ["comprehensive", "content_classification"]:
+                results["content_category"] = self._classify_content(text)
+                results["language"] = self._detect_language(text)
+            if analysis_type in ["comprehensive", "keyword_extraction"]:
+                results["named_entities"] = self._extract_named_entities(text)
+            # 生成總結
+            results["summary"] = self._generate_summary(results)
+            logger.info(f"文字分析完成: {analysis_type}")
+            return results
+        except Exception as e:
+            logger.error(f"文字分析失敗: {e}")
+            return {"error": str(e)}
+    def _analyze_sentiment(self, text: str) -> str:
+        """分析情感傾向"""
+        words = jieba.lcut(text)
+        positive_count = sum(1 for word in words if word in self.positive_words)
+        negative_count = sum(1 for word in words if word in self.negative_words)
+        if positive_count > negative_count:
+            return "正面"
+        elif negative_count > positive_count:
+            return "負面"
+        else:
+            return "中性"
+    def _calculate_sentiment_score(self, text: str) -> float:
+        """計算情感分數 (-1 到 1)"""
+        words = jieba.lcut(text)
+        positive_count = sum(1 for word in words if word in self.positive_words)
+        negative_count = sum(1 for word in words if word in self.negative_words)
+        total_words = len(words)
+        if total_words == 0:
+            return 0.0
+        score = (positive_count - negative_count) / total_words
+        return max(-1.0, min(1.0, score))
+    def _extract_keywords(self, text: str, top_k: int = 10) -> List[str]:
+        """提取關鍵詞"""
+        words = jieba.lcut(text)
+        # 過濾停用詞和短詞
+        filtered_words = [
+            word for word in words
+            if len(word) > 1 and word not in self.stop_words
+        ]
+        # 計算詞頻
+        word_freq = Counter(filtered_words)
+        # 返回最常見的詞
+        return [word for word, freq in word_freq.most_common(top_k)]
+    def _extract_topics(self, text: str) -> List[str]:
+        """提取主題（簡化版）"""
+        # 這裡使用簡單的關鍵詞提取作為主題
+        keywords = self._extract_keywords(text, top_k=5)
+        # 可以根據需要添加更複雜的主題建模
+        return keywords
+    def _classify_content(self, text: str) -> str:
+        """內容分類"""
+        # 簡化的內容分類
+        if any(word in text for word in ["新聞", "報導", "消息", "事件"]):
+            return "新聞"
+        elif any(word in text for word in ["評論", "觀點", "看法", "認為"]):
+            return "評論"
+        elif any(word in text for word in ["問題", "求助", "請教", "怎麼辦"]):
+            return "問答"
+        elif any(word in text for word in ["分享", "推薦", "介紹", "推薦"]):
+            return "分享"
+        else:
+            return "一般"
+    def _detect_language(self, text: str) -> str:
+        """檢測語言"""
+        # 簡單的中文檢測
+        chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
+        total_chars = len(text.replace(" ", ""))
+        if total_chars == 0:
+            return "未知"
+        chinese_ratio = chinese_chars / total_chars
+        if chinese_ratio > 0.5:
+            return "中文"
+        else:
+            return "其他"
+    def _extract_named_entities(self, text: str) -> List[str]:
+        """提取命名實體（簡化版）"""
+        # 簡單的實體提取
+        entities = []
+        # 提取可能的姓名（2-4個中文字符）
+        names = re.findall(r'[\u4e00-\u9fff]{2,4}', text)
+        entities.extend(names)
+        # 提取可能的組織名稱
+        org_patterns = [
+            r'[\u4e00-\u9fff]+公司',
+            r'[\u4e00-\u9fff]+大學',
+            r'[\u4e00-\u9fff]+政府',
+            r'[\u4e00-\u9fff]+協會'
+        ]
+        for pattern in org_patterns:
+            orgs = re.findall(pattern, text)
+            entities.extend(orgs)
+        return list(set(entities))
+    def _generate_summary(self, results: Dict) -> str:
+        """生成分析總結"""
+        summary_parts = []
+        summary_parts.append(f"文字長度: {results['char_count']} 字符")
+        summary_parts.append(f"情感傾向: {results['sentiment']}")
+        if 'sentiment_score' in results:
+            score = results['sentiment_score']
+            summary_parts.append(f"情感分數: {score:.2f}")
+        if results['keywords']:
+            summary_parts.append(f"主要關鍵詞: {', '.join(results['keywords'][:5])}")
+        if 'content_category' in results:
+            summary_parts.append(f"內容類型: {results['content_category']}")
+        return " | ".join(summary_parts)

modules/video_analyzer.py ADDED Viewed

	@@ -0,0 +1,417 @@

+"""
+影片內容分析模組
+"""
+import cv2
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import logging
+import os
+import librosa
+import tempfile
+logger = logging.getLogger(__name__)
+class VideoAnalyzer:
+    """影片內容分析器"""
+    def __init__(self):
+        """初始化影片分析器"""
+        self.frame_analyzer = None  # 可以重用ImageAnalyzer
+        self.audio_analyzer = None  # 音頻分析器
+        # 動作檢測標籤
+        self.action_labels = [
+            "走路", "跑步", "跳躍", "坐下", "站立", "揮手", "點頭", "搖頭",
+            "拍手", "擁抱", "握手", "指向", "寫字", "打字", "開車", "騎車"
+        ]
+        # 音頻情感標籤
+        self.audio_emotion_labels = [
+            "快樂", "悲傷", "憤怒", "恐懼", "驚訝", "厭惡", "中性"
+        ]
+    def analyze(self, video_path: str, analysis_type: str = "comprehensive") -> Dict:
+        """
+        分析影片內容
+        Args:
+            video_path: 影片檔案路徑
+            analysis_type: 分析類型
+        Returns:
+            分析結果字典
+        """
+        try:
+            if not os.path.exists(video_path):
+                return {"error": "影片檔案不存在"}
+            # 讀取影片
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                return {"error": "無法讀取影片"}
+            results = {
+                "video_path": video_path,
+                "analysis_type": analysis_type,
+                "video_info": self._get_video_info(cap),
+                "objects": self._detect_objects_in_video(cap),
+                "actions": self._detect_actions(cap),
+                "scenes": self._detect_scenes(cap),
+                "audio_sentiment": self._analyze_audio_sentiment(video_path),
+                "motion": self._analyze_motion(cap),
+                "faces": self._detect_faces_in_video(cap),
+                "summary": ""
+            }
+            cap.release()
+            # 根據分析類型添加特定分析
+            if analysis_type in ["comprehensive", "object_detection"]:
+                results["object_tracking"] = self._track_objects(cap)
+            if analysis_type in ["comprehensive", "action_recognition"]:
+                results["action_details"] = self._get_action_details(cap)
+            if analysis_type in ["comprehensive", "audio_analysis"]:
+                results["audio_features"] = self._extract_audio_features(video_path)
+            # 生成總結
+            results["summary"] = self._generate_summary(results)
+            logger.info(f"影片分析完成: {analysis_type}")
+            return results
+        except Exception as e:
+            logger.error(f"影片分析失敗: {e}")
+            return {"error": str(e)}
+    def _get_video_info(self, cap: cv2.VideoCapture) -> Dict:
+        """獲取影片基本資訊"""
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        duration = frame_count / fps if fps > 0 else 0
+        return {
+            "width": width,
+            "height": height,
+            "fps": fps,
+            "frame_count": frame_count,
+            "duration": duration,
+            "aspect_ratio": width / height if height > 0 else 0
+        }
+    def _detect_objects_in_video(self, cap: cv2.VideoCapture) -> List[str]:
+        """檢測影片中的物件"""
+        objects = set()
+        frame_count = 0
+        sample_rate = 30  # 每30幀取樣一次
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % sample_rate == 0:
+                # 使用簡化的物件檢測
+                frame_objects = self._detect_objects_in_frame(frame)
+                objects.update(frame_objects)
+            frame_count += 1
+            # 限制處理的幀數以避免過長處理時間
+            if frame_count > 300:  # 最多處理300幀
+                break
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 重置到開始
+        return list(objects)
+    def _detect_objects_in_frame(self, frame: np.ndarray) -> List[str]:
+        """檢測單一幀中的物件"""
+        # 簡化的物件檢測
+        objects = []
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        # 檢測人臉
+        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+        if len(faces) > 0:
+            objects.append("人臉")
+        # 檢測邊緣
+        edges = cv2.Canny(gray, 50, 150)
+        edge_density = np.sum(edges > 0) / edges.size
+        if edge_density > 0.1:
+            objects.append("複雜結構")
+        return objects
+    def _detect_actions(self, cap: cv2.VideoCapture) -> List[str]:
+        """檢測動作"""
+        actions = []
+        frame_count = 0
+        prev_frame = None
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if prev_frame is not None and frame_count % 10 == 0:
+                # 計算幀間差異
+                diff = cv2.absdiff(prev_frame, frame)
+                motion_score = np.sum(diff) / diff.size
+                if motion_score > 1000:
+                    actions.append("運動")
+                elif motion_score < 100:
+                    actions.append("靜止")
+            prev_frame = frame.copy()
+            frame_count += 1
+            if frame_count > 100:  # 限制處理幀數
+                break
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 重置到開始
+        return list(set(actions))
+    def _detect_scenes(self, cap: cv2.VideoCapture) -> List[str]:
+        """檢測場景變化"""
+        scenes = []
+        frame_count = 0
+        prev_hist = None
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % 30 == 0:  # 每秒取樣一次
+                # 計算直方圖
+                hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
+                if prev_hist is not None:
+                    # 計算直方圖相似度
+                    correlation = cv2.compareHist(prev_hist, hist, cv2.HISTCMP_CORREL)
+                    if correlation < 0.7:  # 場景變化閾值
+                        scenes.append("場景變化")
+                prev_hist = hist
+            frame_count += 1
+            if frame_count > 300:  # 限制處理幀數
+                break
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 重置到開始
+        return scenes
+    def _analyze_audio_sentiment(self, video_path: str) -> str:
+        """分析音頻情感"""
+        try:
+            # 提取音頻
+            audio_path = self._extract_audio(video_path)
+            if not audio_path:
+                return "無法分析"
+            # 載入音頻
+            y, sr = librosa.load(audio_path, duration=30)  # 只分析前30秒
+            # 提取音頻特徵
+            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+            spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
+            # 基於特徵進行簡單的情感分析
+            mean_mfcc = np.mean(mfccs)
+            mean_spectral = np.mean(spectral_centroids)
+            # 簡化的情感判斷
+            if mean_spectral > 2000 and mean_mfcc > 0:
+                return "正面"
+            elif mean_spectral < 1000 and mean_mfcc < 0:
+                return "負面"
+            else:
+                return "中性"
+        except Exception as e:
+            logger.warning(f"音頻分析失敗: {e}")
+            return "無法分析"
+    def _extract_audio(self, video_path: str) -> Optional[str]:
+        """從影片中提取音頻"""
+        try:
+            # 創建臨時音頻檔案
+            temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+            temp_audio.close()
+            # 使用ffmpeg提取音頻
+            import subprocess
+            cmd = [
+                'ffmpeg', '-i', video_path,
+                '-vn', '-acodec', 'pcm_s16le',
+                '-ar', '44100', '-ac', '2',
+                temp_audio.name, '-y'
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                return temp_audio.name
+            else:
+                logger.warning(f"音頻提取失敗: {result.stderr}")
+                return None
+        except Exception as e:
+            logger.warning(f"音頻提取失敗: {e}")
+            return None
+    def _analyze_motion(self, cap: cv2.VideoCapture) -> Dict:
+        """分析運動特徵"""
+        motion_data = {
+            "motion_intensity": 0.0,
+            "motion_direction": "未知",
+            "motion_type": "靜止"
+        }
+        frame_count = 0
+        motion_scores = []
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count > 0:
+                # 計算光流
+                prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
+                curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)
+                if flow[0] is not None:
+                    motion_score = np.mean(np.linalg.norm(flow[1], axis=1))
+                    motion_scores.append(motion_score)
+            prev_frame = frame.copy()
+            frame_count += 1
+            if frame_count > 50:  # 限制處理幀數
+                break
+        if motion_scores:
+            motion_data["motion_intensity"] = float(np.mean(motion_scores))
+            if motion_data["motion_intensity"] > 5:
+                motion_data["motion_type"] = "快速運動"
+            elif motion_data["motion_intensity"] > 1:
+                motion_data["motion_type"] = "慢速運動"
+            else:
+                motion_data["motion_type"] = "靜止"
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 重置到開始
+        return motion_data
+    def _detect_faces_in_video(self, cap: cv2.VideoCapture) -> List[Dict]:
+        """檢測影片中的人臉"""
+        faces = []
+        frame_count = 0
+        sample_rate = 30  # 每30幀取樣一次
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % sample_rate == 0:
+                face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                detected_faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+                for (x, y, w, h) in detected_faces:
+                    faces.append({
+                        "frame": frame_count,
+                        "x": int(x),
+                        "y": int(y),
+                        "width": int(w),
+                        "height": int(h)
+                    })
+            frame_count += 1
+            if frame_count > 300:  # 限制處理幀數
+                break
+        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 重置到開始
+        return faces
+    def _track_objects(self, cap: cv2.VideoCapture) -> Dict:
+        """物件追蹤"""
+        # 簡化的物件追蹤
+        return {
+            "tracked_objects": [],
+            "tracking_confidence": 0.0
+        }
+    def _get_action_details(self, cap: cv2.VideoCapture) -> Dict:
+        """獲取動作識別詳細資訊"""
+        actions = self._detect_actions(cap)
+        return {
+            "detected_actions": actions,
+            "action_count": len(actions),
+            "action_confidence": 0.6  # 簡化版
+        }
+    def _extract_audio_features(self, video_path: str) -> Dict:
+        """提取音頻特徵"""
+        try:
+            audio_path = self._extract_audio(video_path)
+            if not audio_path:
+                return {}
+            y, sr = librosa.load(audio_path, duration=30)
+            features = {
+                "tempo": float(librosa.beat.tempo(y=y, sr=sr)[0]),
+                "spectral_centroid": float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))),
+                "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y))),
+                "mfcc_mean": float(np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)))
+            }
+            return features
+        except Exception as e:
+            logger.warning(f"音頻特徵提取失敗: {e}")
+            return {}
+    def _generate_summary(self, results: Dict) -> str:
+        """生成分析總結"""
+        summary_parts = []
+        if results["video_info"]:
+            info = results["video_info"]
+            summary_parts.append(f"影片長度: {info['duration']:.1f}秒")
+            summary_parts.append(f"解析度: {info['width']}x{info['height']}")
+        if results["objects"]:
+            summary_parts.append(f"偵測物件: {', '.join(results['objects'])}")
+        if results["actions"]:
+            summary_parts.append(f"動作: {', '.join(results['actions'])}")
+        if results["audio_sentiment"]:
+            summary_parts.append(f"音頻情感: {results['audio_sentiment']}")
+        if results["motion"]:
+            motion = results["motion"]
+            summary_parts.append(f"運動類型: {motion['motion_type']}")
+        if results["faces"]:
+            summary_parts.append(f"人臉數量: {len(results['faces'])}")
+        return " | ".join(summary_parts)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# Gradio Spaces 部署專用依賴套件
+gradio>=4.0.0
+opencv-python-headless>=4.8.0
+numpy>=1.24.0
+jieba>=0.42.1
+librosa>=0.10.0
+scikit-learn>=1.3.0
+Pillow>=10.0.0
+matplotlib>=3.7.0
+pandas>=2.0.0
+requests>=2.31.0
+tqdm>=4.65.0
+# 可選的深度學習套件（如果需要更進階的分析）
+# torch>=2.0.0
+# transformers>=4.30.0
+# torchvision>=0.15.0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # 工具模組初始化檔案

utils/config.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+配置管理模組
+"""
+import os
+import json
+from typing import Dict, Any
+class Config:
+    """配置管理類別"""
+    def __init__(self, config_file: str = "config.json"):
+        self.config_file = config_file
+        self.config = self._load_config()
+    def _load_config(self) -> Dict[str, Any]:
+        """載入配置檔案"""
+        default_config = {
+            "models": {
+                "text_model": "bert-base-chinese",
+                "image_model": "resnet50",
+                "video_model": "slowfast",
+                "multimodal_model": "clip"
+            },
+            "analysis": {
+                "max_text_length": 512,
+                "max_image_size": 224,
+                "max_video_duration": 30,
+                "confidence_threshold": 0.5
+            },
+            "api": {
+                "openai_api_key": "",
+                "huggingface_token": "",
+                "google_api_key": ""
+            },
+            "storage": {
+                "temp_dir": "temp",
+                "output_dir": "output",
+                "max_file_size": 100 * 1024 * 1024  # 100MB
+            },
+            "logging": {
+                "level": "INFO",
+                "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            }
+        }
+        if os.path.exists(self.config_file):
+            try:
+                with open(self.config_file, 'r', encoding='utf-8') as f:
+                    user_config = json.load(f)
+                    # 合併配置
+                    self._merge_config(default_config, user_config)
+            except Exception as e:
+                print(f"載入配置檔案失敗，使用預設配置: {e}")
+        return default_config
+    def _merge_config(self, default: Dict, user: Dict):
+        """遞歸合併配置"""
+        for key, value in user.items():
+            if key in default and isinstance(default[key], dict) and isinstance(value, dict):
+                self._merge_config(default[key], value)
+            else:
+                default[key] = value
+    def get(self, key: str, default=None):
+        """獲取配置值"""
+        keys = key.split('.')
+        value = self.config
+        try:
+            for k in keys:
+                value = value[k]
+            return value
+        except (KeyError, TypeError):
+            return default
+    def set(self, key: str, value: Any):
+        """設定配置值"""
+        keys = key.split('.')
+        config = self.config
+        for k in keys[:-1]:
+            if k not in config:
+                config[k] = {}
+            config = config[k]
+        config[keys[-1]] = value
+    def save(self):
+        """儲存配置到檔案"""
+        try:
+            with open(self.config_file, 'w', encoding='utf-8') as f:
+                json.dump(self.config, f, indent=2, ensure_ascii=False)
+        except Exception as e:
+            print(f"儲存配置檔案失敗: {e}")

utils/file_handler.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+檔案處理工具模組
+"""
+import os
+import shutil
+import tempfile
+from typing import Optional, List
+import logging
+logger = logging.getLogger(__name__)
+class FileHandler:
+    """檔案處理工具類別"""
+    def __init__(self, temp_dir: str = "temp", output_dir: str = "output"):
+        self.temp_dir = temp_dir
+        self.output_dir = output_dir
+        self._ensure_directories()
+    def _ensure_directories(self):
+        """確保必要目錄存在"""
+        for directory in [self.temp_dir, self.output_dir]:
+            os.makedirs(directory, exist_ok=True)
+    def save_uploaded_file(self, file_path: str, file_type: str = "unknown") -> str:
+        """
+        儲存上傳的檔案到臨時目錄
+        Args:
+            file_path: 原始檔案路徑
+            file_type: 檔案類型 (image, video, audio, etc.)
+        Returns:
+            新的檔案路徑
+        """
+        try:
+            # 生成新的檔案名
+            filename = os.path.basename(file_path)
+            name, ext = os.path.splitext(filename)
+            # 創建臨時檔案
+            temp_file = tempfile.NamedTemporaryFile(
+                delete=False,
+                suffix=ext,
+                dir=self.temp_dir,
+                prefix=f"{file_type}_"
+            )
+            temp_path = temp_file.name
+            temp_file.close()
+            # 複製檔案
+            shutil.copy2(file_path, temp_path)
+            logger.info(f"檔案已儲存到: {temp_path}")
+            return temp_path
+        except Exception as e:
+            logger.error(f"儲存檔案失敗: {e}")
+            raise
+    def cleanup_temp_files(self, file_paths: List[str]):
+        """清理臨時檔案"""
+        for file_path in file_paths:
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                    logger.info(f"已刪除臨時檔案: {file_path}")
+            except Exception as e:
+                logger.error(f"刪除檔案失敗: {e}")
+    def get_file_info(self, file_path: str) -> dict:
+        """獲取檔案資訊"""
+        try:
+            stat = os.stat(file_path)
+            return {
+                "size": stat.st_size,
+                "modified": stat.st_mtime,
+                "extension": os.path.splitext(file_path)[1].lower(),
+                "basename": os.path.basename(file_path)
+            }
+        except Exception as e:
+            logger.error(f"獲取檔案資訊失敗: {e}")
+            return {}
+    def is_valid_file_type(self, file_path: str, allowed_types: List[str]) -> bool:
+        """檢查檔案類型是否有效"""
+        file_info = self.get_file_info(file_path)
+        extension = file_info.get("extension", "")
+        return extension in allowed_types
+    def validate_file_size(self, file_path: str, max_size: int) -> bool:
+        """檢查檔案大小是否有效"""
+        file_info = self.get_file_info(file_path)
+        size = file_info.get("size", 0)
+        return size <= max_size