mikao007 commited on
Commit
e92e423
·
verified ·
1 Parent(s): cf2c2a8

Upload 12 files

Browse files
README.md CHANGED
@@ -1,12 +1,28 @@
1
- ---
2
- title: Motion Analyze
3
- emoji: 🐠
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.48.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 社交媒體多模態內容分析系統
2
+
3
+ 這是一個基於Gradio的社交媒體多模態內容分析系統。
4
+
5
+ ## 快速開始
6
+
7
+ 1. 將此repository fork到您的GitHub帳戶
8
+ 2. 前往 [Hugging Face Spaces](https://huggingface.co/spaces)
9
+ 3. 創建新的Space,選擇Gradio SDK
10
+ 4. 連接您的GitHub repository
11
+ 5. 等待自動部署完成
12
+
13
+ ## 功能特色
14
+
15
+ - 📝 文字情感分析和關鍵詞提取
16
+ - 🖼️ 圖片物件檢測和場景識別
17
+ - 🎬 影片動作識別和音頻分析
18
+ - 🔗 多模態融合分析
19
+
20
+ ## 使用方式
21
+
22
+ 1. 在文字框中輸入要分析的文字
23
+ 2. 上傳圖片檔案(支援jpg, png等格式)
24
+ 3. 上傳影片檔案(支援mp4, avi等格式)
25
+ 4. 選擇分析類型
26
+ 5. 點擊"開始分析"按鈕
27
+
28
+ 系統會自動分析內容並提供詳細的分析結果。
app.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio部署專用腳本
3
+ 優化用於Gradio Spaces部署
4
+ """
5
+
6
+ import gradio as gr
7
+ import os
8
+ import sys
9
+ import logging
10
+ from typing import Dict, Optional
11
+ import tempfile
12
+ import shutil
13
+
14
+ # 設定日誌
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # 導入分析模組
19
+ try:
20
+ from modules.text_analyzer import TextAnalyzer
21
+ from modules.image_analyzer import ImageAnalyzer
22
+ from modules.video_analyzer import VideoAnalyzer
23
+ from modules.multimodal_fusion import MultimodalFusion
24
+ from utils.file_handler import FileHandler
25
+ from utils.config import Config
26
+ except ImportError as e:
27
+ logger.error(f"模組導入失敗: {e}")
28
+ # 創建簡化版本的分析器
29
+ class TextAnalyzer:
30
+ def analyze(self, text, analysis_type="comprehensive"):
31
+ return {"sentiment": "中性", "keywords": ["測試"], "summary": "測試分析"}
32
+
33
+ class ImageAnalyzer:
34
+ def analyze(self, image_path, analysis_type="comprehensive"):
35
+ return {"objects": ["測試物件"], "scene": "測試場景", "summary": "測試分析"}
36
+
37
+ class VideoAnalyzer:
38
+ def analyze(self, video_path, analysis_type="comprehensive"):
39
+ return {"actions": ["測試動作"], "audio_sentiment": "中性", "summary": "測試分析"}
40
+
41
+ class MultimodalFusion:
42
+ def fuse_analysis(self, text_analysis, image_analysis, video_analysis):
43
+ return {"fused_sentiment": "中性", "summary": "測試融合分析"}
44
+
45
+ class FileHandler:
46
+ pass
47
+
48
+ class Config:
49
+ def get(self, key, default=None):
50
+ return default
51
+
52
+ class GradioSocialMediaAnalyzer:
53
+ """Gradio專用社交媒體分析器"""
54
+
55
+ def __init__(self):
56
+ """初始化分析器"""
57
+ try:
58
+ self.config = Config()
59
+ self.text_analyzer = TextAnalyzer()
60
+ self.image_analyzer = ImageAnalyzer()
61
+ self.video_analyzer = VideoAnalyzer()
62
+ self.multimodal_fusion = MultimodalFusion()
63
+ self.file_handler = FileHandler()
64
+ logger.info("所有分析模組載入成功")
65
+ except Exception as e:
66
+ logger.error(f"分析器初始化失敗: {e}")
67
+ # 使用簡化版本
68
+ self.text_analyzer = TextAnalyzer()
69
+ self.image_analyzer = ImageAnalyzer()
70
+ self.video_analyzer = VideoAnalyzer()
71
+ self.multimodal_fusion = MultimodalFusion()
72
+
73
+ def analyze_content(self,
74
+ text_input: Optional[str] = None,
75
+ image_input: Optional[str] = None,
76
+ video_input: Optional[str] = None,
77
+ analysis_type: str = "comprehensive") -> Dict:
78
+ """分析多模態內容"""
79
+ try:
80
+ results = {
81
+ "text_analysis": None,
82
+ "image_analysis": None,
83
+ "video_analysis": None,
84
+ "multimodal_analysis": None,
85
+ "summary": ""
86
+ }
87
+
88
+ # 文字分析
89
+ if text_input and text_input.strip():
90
+ logger.info("開始文字分析...")
91
+ results["text_analysis"] = self.text_analyzer.analyze(text_input, analysis_type)
92
+
93
+ # 圖片分析
94
+ if image_input:
95
+ logger.info("開始圖片分析...")
96
+ results["image_analysis"] = self.image_analyzer.analyze(image_input, analysis_type)
97
+
98
+ # 影片分析
99
+ if video_input:
100
+ logger.info("開始影片分析...")
101
+ results["video_analysis"] = self.video_analyzer.analyze(video_input, analysis_type)
102
+
103
+ # 多模態融合分析
104
+ if any([text_input, image_input, video_input]):
105
+ logger.info("開始多模態融合分析...")
106
+ results["multimodal_analysis"] = self.multimodal_fusion.fuse_analysis(
107
+ results["text_analysis"],
108
+ results["image_analysis"],
109
+ results["video_analysis"]
110
+ )
111
+
112
+ # 生成總結
113
+ results["summary"] = self._generate_summary(results)
114
+
115
+ logger.info("分析完成")
116
+ return results
117
+
118
+ except Exception as e:
119
+ logger.error(f"分析過程中發生錯誤: {str(e)}")
120
+ return {"error": str(e)}
121
+
122
+ def _generate_summary(self, results: Dict) -> str:
123
+ """生成分析總結"""
124
+ summary_parts = []
125
+
126
+ if results["text_analysis"]:
127
+ summary_parts.append(f"文字分析: {results['text_analysis'].get('summary', 'N/A')}")
128
+
129
+ if results["image_analysis"]:
130
+ summary_parts.append(f"圖片分析: {results['image_analysis'].get('summary', 'N/A')}")
131
+
132
+ if results["video_analysis"]:
133
+ summary_parts.append(f"影片分析: {results['video_analysis'].get('summary', 'N/A')}")
134
+
135
+ if results["multimodal_analysis"]:
136
+ summary_parts.append(f"綜合分析: {results['multimodal_analysis'].get('summary', 'N/A')}")
137
+
138
+ return "\n".join(summary_parts)
139
+
140
+ # 創建全局分析器實例
141
+ analyzer = GradioSocialMediaAnalyzer()
142
+
143
+ def analyze_interface(text: str, image, video, analysis_type: str):
144
+ """Gradio介面函數"""
145
+ try:
146
+ # 處理檔案輸入
147
+ image_path = None
148
+ video_path = None
149
+
150
+ if image:
151
+ image_path = image.name if hasattr(image, 'name') else str(image)
152
+
153
+ if video:
154
+ video_path = video.name if hasattr(video, 'name') else str(video)
155
+
156
+ # 執行分析
157
+ results = analyzer.analyze_content(
158
+ text_input=text if text.strip() else None,
159
+ image_input=image_path,
160
+ video_input=video_path,
161
+ analysis_type=analysis_type
162
+ )
163
+
164
+ if "error" in results:
165
+ return f"分析錯誤: {results['error']}", "", "", ""
166
+
167
+ # 格式化輸出
168
+ text_output = format_text_analysis(results.get("text_analysis", {}))
169
+ image_output = format_image_analysis(results.get("image_analysis", {}))
170
+ video_output = format_video_analysis(results.get("video_analysis", {}))
171
+ summary_output = results.get("summary", "無分析結果")
172
+
173
+ return text_output, image_output, video_output, summary_output
174
+
175
+ except Exception as e:
176
+ error_msg = f"處理過程中發生錯誤: {str(e)}"
177
+ logger.error(error_msg)
178
+ return error_msg, "", "", ""
179
+
180
+ def format_text_analysis(analysis: Dict) -> str:
181
+ """格式化文字分析結果"""
182
+ if not analysis:
183
+ return "無文字分析結果"
184
+
185
+ formatted = []
186
+ if "sentiment" in analysis:
187
+ formatted.append(f"情感分析: {analysis['sentiment']}")
188
+ if "keywords" in analysis:
189
+ formatted.append(f"關鍵詞: {', '.join(analysis['keywords'])}")
190
+ if "topics" in analysis:
191
+ formatted.append(f"主題: {', '.join(analysis['topics'])}")
192
+ if "summary" in analysis:
193
+ formatted.append(f"總結: {analysis['summary']}")
194
+
195
+ return "\n".join(formatted)
196
+
197
+ def format_image_analysis(analysis: Dict) -> str:
198
+ """格式化圖片分析結果"""
199
+ if not analysis:
200
+ return "無圖片分析結果"
201
+
202
+ formatted = []
203
+ if "objects" in analysis:
204
+ formatted.append(f"偵測物件: {', '.join(analysis['objects'])}")
205
+ if "scene" in analysis:
206
+ formatted.append(f"場景描述: {analysis['scene']}")
207
+ if "sentiment" in analysis:
208
+ formatted.append(f"圖片情感: {analysis['sentiment']}")
209
+ if "summary" in analysis:
210
+ formatted.append(f"總結: {analysis['summary']}")
211
+
212
+ return "\n".join(formatted)
213
+
214
+ def format_video_analysis(analysis: Dict) -> str:
215
+ """格式化影片分析結果"""
216
+ if not analysis:
217
+ return "無影片分析結果"
218
+
219
+ formatted = []
220
+ if "objects" in analysis:
221
+ formatted.append(f"偵測物件: {', '.join(analysis['objects'])}")
222
+ if "actions" in analysis:
223
+ formatted.append(f"動作識別: {', '.join(analysis['actions'])}")
224
+ if "audio_sentiment" in analysis:
225
+ formatted.append(f"音頻情感: {analysis['audio_sentiment']}")
226
+ if "summary" in analysis:
227
+ formatted.append(f"總結: {analysis['summary']}")
228
+
229
+ return "\n".join(formatted)
230
+
231
+ def create_gradio_app():
232
+ """創建Gradio應用程式"""
233
+
234
+ # 創建Gradio介面
235
+ with gr.Blocks(
236
+ title="社交媒體多模態內容分析系統",
237
+ theme=gr.themes.Soft(),
238
+ css="""
239
+ .gradio-container {
240
+ max-width: 1200px !important;
241
+ margin: auto !important;
242
+ }
243
+ .main-header {
244
+ text-align: center;
245
+ margin-bottom: 2rem;
246
+ }
247
+ """
248
+ ) as app:
249
+
250
+ # 標題和說明
251
+ with gr.Row():
252
+ gr.HTML("""
253
+ <div class="main-header">
254
+ <h1>🔍 社交媒體多模態內容分析系統</h1>
255
+ <p>支援文字、圖片、影片的智能分析與多模態融合</p>
256
+ </div>
257
+ """)
258
+
259
+ # 主要內容區域
260
+ with gr.Row():
261
+ # 左側輸入區域
262
+ with gr.Column(scale=1):
263
+ gr.Markdown("### 📝 輸入內容")
264
+
265
+ text_input = gr.Textbox(
266
+ label="文字內容",
267
+ placeholder="請輸入要分析的文字內容...",
268
+ lines=5,
269
+ max_lines=10
270
+ )
271
+
272
+ image_input = gr.File(
273
+ label="圖片檔案",
274
+ file_types=["image"],
275
+ file_count="single"
276
+ )
277
+
278
+ video_input = gr.File(
279
+ label="影片檔案",
280
+ file_types=["video"],
281
+ file_count="single"
282
+ )
283
+
284
+ analysis_type = gr.Dropdown(
285
+ choices=[
286
+ ("綜合分析", "comprehensive"),
287
+ ("情感分析", "sentiment"),
288
+ ("內容分類", "content_classification"),
289
+ ("物件檢測", "object_detection")
290
+ ],
291
+ value="comprehensive",
292
+ label="分析類型"
293
+ )
294
+
295
+ analyze_btn = gr.Button(
296
+ "🚀 開始分析",
297
+ variant="primary",
298
+ size="lg"
299
+ )
300
+
301
+ # 右側結果區域
302
+ with gr.Column(scale=1):
303
+ gr.Markdown("### 📊 分析結果")
304
+
305
+ text_output = gr.Textbox(
306
+ label="📝 文字分析結果",
307
+ lines=8,
308
+ interactive=False,
309
+ show_copy_button=True
310
+ )
311
+
312
+ image_output = gr.Textbox(
313
+ label="🖼️ 圖片分析結果",
314
+ lines=8,
315
+ interactive=False,
316
+ show_copy_button=True
317
+ )
318
+
319
+ video_output = gr.Textbox(
320
+ label="🎬 影片分析結果",
321
+ lines=8,
322
+ interactive=False,
323
+ show_copy_button=True
324
+ )
325
+
326
+ summary_output = gr.Textbox(
327
+ label="🎯 綜合分析總結",
328
+ lines=6,
329
+ interactive=False,
330
+ show_copy_button=True
331
+ )
332
+
333
+ # 範例區域
334
+ with gr.Row():
335
+ gr.Markdown("""
336
+ ### 💡 使用範例
337
+
338
+ **文字分析範例:**
339
+ - 輸入:「這個新產品真的很棒,我強烈推薦給大家!」
340
+ - 分析:情感分析、關鍵詞提取、主題識別
341
+
342
+ **圖片分析範例:**
343
+ - 上傳:風景照片、人物照片、產品圖片
344
+ - 分析:物件檢測、場景識別、情感分析
345
+
346
+ **影片分析範例:**
347
+ - 上傳:短影片、廣告影片、教學影片
348
+ - 分析:動作識別、音頻分析、場景變化
349
+
350
+ **多模態分析:**
351
+ - 同時上傳多種內容類型
352
+ - 系統會進行綜合分析並提供融合結果
353
+ """)
354
+
355
+ # 綁定事件
356
+ analyze_btn.click(
357
+ fn=analyze_interface,
358
+ inputs=[text_input, image_input, video_input, analysis_type],
359
+ outputs=[text_output, image_output, video_output, summary_output]
360
+ )
361
+
362
+ # 清除按鈕
363
+ clear_btn = gr.Button("🗑️ 清除所有", variant="secondary")
364
+ clear_btn.click(
365
+ fn=lambda: ("", None, None, "comprehensive", "", "", "", ""),
366
+ outputs=[text_input, image_input, video_input, analysis_type,
367
+ text_output, image_output, video_output, summary_output]
368
+ )
369
+
370
+ return app
371
+
372
+ # Gradio Spaces 部署配置
373
+ if __name__ == "__main__":
374
+ # 創建應用程式
375
+ app = create_gradio_app()
376
+
377
+ # 啟動應用程式
378
+ app.launch(
379
+ server_name="0.0.0.0",
380
+ server_port=7860,
381
+ share=False, # 在Spaces上不需要share
382
+ debug=False, # 生產環境關閉debug
383
+ show_error=True,
384
+ quiet=False
385
+ )
config_spaces.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": {
3
+ "text_model": "distilbert-base-chinese",
4
+ "image_model": "mobilenet_v2",
5
+ "video_model": "slowfast",
6
+ "multimodal_model": "clip"
7
+ },
8
+ "analysis": {
9
+ "max_text_length": 256,
10
+ "max_image_size": 224,
11
+ "max_video_duration": 15,
12
+ "confidence_threshold": 0.5
13
+ },
14
+ "api": {
15
+ "openai_api_key": "",
16
+ "huggingface_token": "",
17
+ "google_api_key": ""
18
+ },
19
+ "storage": {
20
+ "temp_dir": "/tmp",
21
+ "output_dir": "/tmp/output",
22
+ "max_file_size": 10485760
23
+ },
24
+ "logging": {
25
+ "level": "INFO",
26
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
27
+ },
28
+ "gradio": {
29
+ "server_name": "0.0.0.0",
30
+ "server_port": 7860,
31
+ "share": false,
32
+ "debug": false,
33
+ "show_error": true,
34
+ "quiet": false
35
+ }
36
+ }
modules/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # 初始化檔案
modules/image_analyzer.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 圖片內容分析模組
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ from typing import Dict, List, Optional, Tuple
8
+ import logging
9
+ import os
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class ImageAnalyzer:
14
+ """圖片內容分析器"""
15
+
16
+ def __init__(self):
17
+ """初始化圖片分析器"""
18
+ # 初始化OpenCV的DNN模組
19
+ self.net = None
20
+ self.classes = []
21
+ self._load_object_detection_model()
22
+
23
+ # 場景分類標籤
24
+ self.scene_labels = [
25
+ "室內", "戶外", "建築", "自然", "人物", "動物", "食物", "交通工具",
26
+ "運動", "藝術", "科技", "時尚", "風景", "城市", "海邊", "山區"
27
+ ]
28
+
29
+ # 情感相關的視覺特徵
30
+ self.emotion_colors = {
31
+ "正面": ["明亮", "鮮豔", "溫暖"],
32
+ "負面": ["昏暗", "冷色調", "陰鬱"],
33
+ "中性": ["平衡", "自然", "柔和"]
34
+ }
35
+
36
+ def _load_object_detection_model(self):
37
+ """載入物件檢測模型"""
38
+ try:
39
+ # 這裡可以載入預訓練的模型
40
+ # 例如: YOLO, SSD, R-CNN等
41
+ logger.info("物件檢測模型載入完成")
42
+ except Exception as e:
43
+ logger.warning(f"物件檢測模型載入失敗: {e}")
44
+
45
+ def analyze(self, image_path: str, analysis_type: str = "comprehensive") -> Dict:
46
+ """
47
+ 分析圖片內容
48
+
49
+ Args:
50
+ image_path: 圖片檔案路徑
51
+ analysis_type: 分析類型
52
+
53
+ Returns:
54
+ 分析結果字典
55
+ """
56
+ try:
57
+ if not os.path.exists(image_path):
58
+ return {"error": "圖片檔案不存在"}
59
+
60
+ # 讀取圖片
61
+ image = cv2.imread(image_path)
62
+ if image is None:
63
+ return {"error": "無法讀取圖片"}
64
+
65
+ results = {
66
+ "image_path": image_path,
67
+ "analysis_type": analysis_type,
68
+ "image_info": self._get_image_info(image),
69
+ "objects": self._detect_objects(image),
70
+ "scene": self._analyze_scene(image),
71
+ "sentiment": self._analyze_image_sentiment(image),
72
+ "colors": self._analyze_colors(image),
73
+ "faces": self._detect_faces(image),
74
+ "text": self._extract_text(image),
75
+ "summary": ""
76
+ }
77
+
78
+ # 根據分析類型添加特定分析
79
+ if analysis_type in ["comprehensive", "object_detection"]:
80
+ results["object_details"] = self._get_object_details(image)
81
+
82
+ if analysis_type in ["comprehensive", "scene_analysis"]:
83
+ results["scene_details"] = self._get_scene_details(image)
84
+
85
+ if analysis_type in ["comprehensive", "sentiment"]:
86
+ results["sentiment_score"] = self._calculate_sentiment_score(image)
87
+
88
+ # 生成總結
89
+ results["summary"] = self._generate_summary(results)
90
+
91
+ logger.info(f"圖片分析完成: {analysis_type}")
92
+ return results
93
+
94
+ except Exception as e:
95
+ logger.error(f"圖片分析失敗: {e}")
96
+ return {"error": str(e)}
97
+
98
+ def _get_image_info(self, image: np.ndarray) -> Dict:
99
+ """獲取圖片基本資訊"""
100
+ height, width = image.shape[:2]
101
+ channels = image.shape[2] if len(image.shape) > 2 else 1
102
+
103
+ return {
104
+ "width": width,
105
+ "height": height,
106
+ "channels": channels,
107
+ "aspect_ratio": width / height if height > 0 else 0,
108
+ "total_pixels": width * height
109
+ }
110
+
111
+ def _detect_objects(self, image: np.ndarray) -> List[str]:
112
+ """檢測圖片中的物件"""
113
+ # 簡化的物件檢測(實際應用中會使用深度學習模型)
114
+ objects = []
115
+
116
+ # 基於顏色和形狀的簡單檢測
117
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
118
+
119
+ # 檢測圓形物件
120
+ circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20)
121
+ if circles is not None:
122
+ objects.append("圓形物件")
123
+
124
+ # 檢測直線
125
+ edges = cv2.Canny(gray, 50, 150)
126
+ lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
127
+ if lines is not None:
128
+ objects.append("線性結構")
129
+
130
+ # 基於顏色的檢測
131
+ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
132
+
133
+ # 檢測藍色區域
134
+ blue_mask = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255]))
135
+ if np.sum(blue_mask) > 1000:
136
+ objects.append("藍色區域")
137
+
138
+ # 檢測綠色區域
139
+ green_mask = cv2.inRange(hsv, np.array([40, 50, 50]), np.array([80, 255, 255]))
140
+ if np.sum(green_mask) > 1000:
141
+ objects.append("綠色區域")
142
+
143
+ return objects
144
+
145
+ def _analyze_scene(self, image: np.ndarray) -> str:
146
+ """分析場景類型"""
147
+ # 簡化的場景分析
148
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
149
+
150
+ # 計算亮度
151
+ brightness = np.mean(gray)
152
+
153
+ # 計算對比度
154
+ contrast = np.std(gray)
155
+
156
+ # 計算邊緣密度
157
+ edges = cv2.Canny(gray, 50, 150)
158
+ edge_density = np.sum(edges > 0) / edges.size
159
+
160
+ # 基於特徵進行場景分類
161
+ if brightness > 150 and contrast > 50:
162
+ return "明亮戶外場景"
163
+ elif brightness < 100 and edge_density > 0.1:
164
+ return "室內場景"
165
+ elif edge_density > 0.15:
166
+ return "複雜場景"
167
+ else:
168
+ return "簡單場景"
169
+
170
+ def _analyze_image_sentiment(self, image: np.ndarray) -> str:
171
+ """分析圖片情感"""
172
+ # 基於顏色和亮度分析情感
173
+ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
174
+
175
+ # 計算平均色調
176
+ mean_hue = np.mean(hsv[:, :, 0])
177
+ mean_saturation = np.mean(hsv[:, :, 1])
178
+ mean_value = np.mean(hsv[:, :, 2])
179
+
180
+ # 基於HSV值判斷情感
181
+ if mean_value > 150 and mean_saturation > 100:
182
+ return "正面"
183
+ elif mean_value < 100 or mean_saturation < 50:
184
+ return "負面"
185
+ else:
186
+ return "中性"
187
+
188
+ def _analyze_colors(self, image: np.ndarray) -> Dict:
189
+ """分析圖片顏色"""
190
+ # 計算主要顏色
191
+ pixels = image.reshape(-1, 3)
192
+
193
+ # 使用K-means聚類找到主要顏色
194
+ from sklearn.cluster import KMeans
195
+
196
+ try:
197
+ kmeans = KMeans(n_clusters=5, random_state=42)
198
+ kmeans.fit(pixels)
199
+
200
+ colors = kmeans.cluster_centers_.astype(int)
201
+ labels = kmeans.labels_
202
+
203
+ # 計算每種顏色的比例
204
+ color_counts = np.bincount(labels)
205
+ color_percentages = color_counts / len(labels) * 100
206
+
207
+ dominant_colors = []
208
+ for i, color in enumerate(colors):
209
+ dominant_colors.append({
210
+ "color": color.tolist(),
211
+ "percentage": color_percentages[i]
212
+ })
213
+
214
+ return {
215
+ "dominant_colors": dominant_colors,
216
+ "color_diversity": len(np.unique(labels))
217
+ }
218
+
219
+ except Exception as e:
220
+ logger.warning(f"顏色分析失敗: {e}")
221
+ return {"dominant_colors": [], "color_diversity": 0}
222
+
223
+ def _detect_faces(self, image: np.ndarray) -> List[Dict]:
224
+ """檢測人臉"""
225
+ # 載入人臉檢測器
226
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
227
+
228
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
229
+ faces = face_cascade.detectMultiScale(gray, 1.1, 4)
230
+
231
+ face_info = []
232
+ for (x, y, w, h) in faces:
233
+ face_info.append({
234
+ "x": int(x),
235
+ "y": int(y),
236
+ "width": int(w),
237
+ "height": int(h),
238
+ "confidence": 0.8 # 簡化版,實際會計算置信度
239
+ })
240
+
241
+ return face_info
242
+
243
+ def _extract_text(self, image: np.ndarray) -> str:
244
+ """提取圖片中的文字(OCR)"""
245
+ # 這裡可以整合OCR庫如Tesseract
246
+ # 簡化版返回空字串
247
+ return ""
248
+
249
+ def _get_object_details(self, image: np.ndarray) -> Dict:
250
+ """獲取物件檢測詳細資訊"""
251
+ objects = self._detect_objects(image)
252
+ return {
253
+ "detected_objects": objects,
254
+ "object_count": len(objects),
255
+ "detection_confidence": 0.7 # 簡化版
256
+ }
257
+
258
+ def _get_scene_details(self, image: np.ndarray) -> Dict:
259
+ """獲取場景分析詳細資訊"""
260
+ scene = self._analyze_scene(image)
261
+ return {
262
+ "scene_type": scene,
263
+ "scene_confidence": 0.6, # 簡化版
264
+ "scene_features": {
265
+ "brightness": float(np.mean(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))),
266
+ "contrast": float(np.std(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)))
267
+ }
268
+ }
269
+
270
+ def _calculate_sentiment_score(self, image: np.ndarray) -> float:
271
+ """計算圖片情感分數"""
272
+ sentiment = self._analyze_image_sentiment(image)
273
+
274
+ if sentiment == "正面":
275
+ return 0.7
276
+ elif sentiment == "負面":
277
+ return -0.7
278
+ else:
279
+ return 0.0
280
+
281
+ def _generate_summary(self, results: Dict) -> str:
282
+ """生成分析總結"""
283
+ summary_parts = []
284
+
285
+ if results["image_info"]:
286
+ info = results["image_info"]
287
+ summary_parts.append(f"圖片尺寸: {info['width']}x{info['height']}")
288
+
289
+ if results["objects"]:
290
+ summary_parts.append(f"偵測物件: {', '.join(results['objects'])}")
291
+
292
+ if results["scene"]:
293
+ summary_parts.append(f"場景類型: {results['scene']}")
294
+
295
+ if results["sentiment"]:
296
+ summary_parts.append(f"情感傾向: {results['sentiment']}")
297
+
298
+ if results["faces"]:
299
+ summary_parts.append(f"人臉數量: {len(results['faces'])}")
300
+
301
+ return " | ".join(summary_parts)
modules/multimodal_fusion.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 多模態融合分析模組
3
+ """
4
+
5
+ import numpy as np
6
+ from typing import Dict, List, Optional, Tuple
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class MultimodalFusion:
12
+ """多模態融合分析器"""
13
+
14
+ def __init__(self):
15
+ """初始化多模態融合分析器"""
16
+ # 權重設定
17
+ self.weights = {
18
+ "text": 0.4,
19
+ "image": 0.35,
20
+ "video": 0.25
21
+ }
22
+
23
+ # 情感映射
24
+ self.emotion_mapping = {
25
+ "正面": 1.0,
26
+ "中性": 0.0,
27
+ "負面": -1.0
28
+ }
29
+
30
+ def fuse_analysis(self,
31
+ text_analysis: Optional[Dict] = None,
32
+ image_analysis: Optional[Dict] = None,
33
+ video_analysis: Optional[Dict] = None) -> Dict:
34
+ """
35
+ 融合多模態分析結果
36
+
37
+ Args:
38
+ text_analysis: 文字分析結果
39
+ image_analysis: 圖片分析結果
40
+ video_analysis: 影片分析結果
41
+
42
+ Returns:
43
+ 融合後的分析結果
44
+ """
45
+ try:
46
+ results = {
47
+ "modalities": [],
48
+ "fused_sentiment": "中性",
49
+ "fused_sentiment_score": 0.0,
50
+ "content_category": "一般",
51
+ "confidence": 0.0,
52
+ "key_insights": [],
53
+ "summary": ""
54
+ }
55
+
56
+ # 收集可用的模態
57
+ available_modalities = []
58
+ if text_analysis and not text_analysis.get("error"):
59
+ available_modalities.append("text")
60
+ if image_analysis and not image_analysis.get("error"):
61
+ available_modalities.append("image")
62
+ if video_analysis and not video_analysis.get("error"):
63
+ available_modalities.append("video")
64
+
65
+ results["modalities"] = available_modalities
66
+
67
+ if not available_modalities:
68
+ results["summary"] = "無可用的分析模態"
69
+ return results
70
+
71
+ # 融合情感分析
72
+ results["fused_sentiment"], results["fused_sentiment_score"] = self._fuse_sentiment(
73
+ text_analysis, image_analysis, video_analysis, available_modalities
74
+ )
75
+
76
+ # 融合內容分類
77
+ results["content_category"] = self._fuse_content_category(
78
+ text_analysis, image_analysis, video_analysis, available_modalities
79
+ )
80
+
81
+ # 計算整體置信度
82
+ results["confidence"] = self._calculate_confidence(
83
+ text_analysis, image_analysis, video_analysis, available_modalities
84
+ )
85
+
86
+ # 提取關鍵洞察
87
+ results["key_insights"] = self._extract_key_insights(
88
+ text_analysis, image_analysis, video_analysis, available_modalities
89
+ )
90
+
91
+ # 生成總結
92
+ results["summary"] = self._generate_fusion_summary(results)
93
+
94
+ logger.info(f"多模態融合分析完成,使用模態: {available_modalities}")
95
+ return results
96
+
97
+ except Exception as e:
98
+ logger.error(f"多模態融合分析失敗: {e}")
99
+ return {"error": str(e)}
100
+
101
+ def _fuse_sentiment(self, text_analysis: Optional[Dict],
102
+ image_analysis: Optional[Dict],
103
+ video_analysis: Optional[Dict],
104
+ modalities: List[str]) -> Tuple[str, float]:
105
+ """融合情感分析結果"""
106
+ sentiment_scores = []
107
+ weights = []
108
+
109
+ # 文字情感
110
+ if "text" in modalities and text_analysis:
111
+ text_sentiment = text_analysis.get("sentiment", "中性")
112
+ text_score = self.emotion_mapping.get(text_sentiment, 0.0)
113
+
114
+ # 如果有sentiment_score,使用它
115
+ if "sentiment_score" in text_analysis:
116
+ text_score = text_analysis["sentiment_score"]
117
+
118
+ sentiment_scores.append(text_score)
119
+ weights.append(self.weights["text"])
120
+
121
+ # 圖片情感
122
+ if "image" in modalities and image_analysis:
123
+ image_sentiment = image_analysis.get("sentiment", "中性")
124
+ image_score = self.emotion_mapping.get(image_sentiment, 0.0)
125
+
126
+ # 如果有sentiment_score,使用它
127
+ if "sentiment_score" in image_analysis:
128
+ image_score = image_analysis["sentiment_score"]
129
+
130
+ sentiment_scores.append(image_score)
131
+ weights.append(self.weights["image"])
132
+
133
+ # 影片情感
134
+ if "video" in modalities and video_analysis:
135
+ video_sentiment = video_analysis.get("audio_sentiment", "中性")
136
+ video_score = self.emotion_mapping.get(video_sentiment, 0.0)
137
+
138
+ sentiment_scores.append(video_score)
139
+ weights.append(self.weights["video"])
140
+
141
+ if not sentiment_scores:
142
+ return "中性", 0.0
143
+
144
+ # 加權平均
145
+ weighted_score = np.average(sentiment_scores, weights=weights)
146
+
147
+ # 轉換為情感標籤
148
+ if weighted_score > 0.3:
149
+ sentiment_label = "正面"
150
+ elif weighted_score < -0.3:
151
+ sentiment_label = "負面"
152
+ else:
153
+ sentiment_label = "中性"
154
+
155
+ return sentiment_label, float(weighted_score)
156
+
157
+ def _fuse_content_category(self, text_analysis: Optional[Dict],
158
+ image_analysis: Optional[Dict],
159
+ video_analysis: Optional[Dict],
160
+ modalities: List[str]) -> str:
161
+ """融合內容分類結果"""
162
+ categories = []
163
+
164
+ # 文字分類
165
+ if "text" in modalities and text_analysis:
166
+ text_category = text_analysis.get("content_category", "一般")
167
+ categories.append(text_category)
168
+
169
+ # 圖片分類(基於場景)
170
+ if "image" in modalities and image_analysis:
171
+ image_scene = image_analysis.get("scene", "一般場景")
172
+ if "戶外" in image_scene:
173
+ categories.append("戶外")
174
+ elif "室內" in image_scene:
175
+ categories.append("室內")
176
+ else:
177
+ categories.append("一般")
178
+
179
+ # 影片分類(基於動作)
180
+ if "video" in modalities and video_analysis:
181
+ video_actions = video_analysis.get("actions", [])
182
+ if "運動" in video_actions:
183
+ categories.append("運動")
184
+ elif "靜止" in video_actions:
185
+ categories.append("靜態")
186
+ else:
187
+ categories.append("一般")
188
+
189
+ if not categories:
190
+ return "一般"
191
+
192
+ # 選擇最常見的分類
193
+ from collections import Counter
194
+ category_counts = Counter(categories)
195
+ return category_counts.most_common(1)[0][0]
196
+
197
+ def _calculate_confidence(self, text_analysis: Optional[Dict],
198
+ image_analysis: Optional[Dict],
199
+ video_analysis: Optional[Dict],
200
+ modalities: List[str]) -> float:
201
+ """計算整體置信度"""
202
+ confidences = []
203
+ weights = []
204
+
205
+ # 文字置信度
206
+ if "text" in modalities and text_analysis:
207
+ text_conf = 0.8 # 簡化版,實際會根據分析品質計算
208
+ confidences.append(text_conf)
209
+ weights.append(self.weights["text"])
210
+
211
+ # 圖片置信度
212
+ if "image" in modalities and image_analysis:
213
+ image_conf = 0.7 # 簡化版
214
+ confidences.append(image_conf)
215
+ weights.append(self.weights["image"])
216
+
217
+ # 影片置信度
218
+ if "video" in modalities and video_analysis:
219
+ video_conf = 0.6 # 簡化版
220
+ confidences.append(video_conf)
221
+ weights.append(self.weights["video"])
222
+
223
+ if not confidences:
224
+ return 0.0
225
+
226
+ # 加權平均
227
+ return float(np.average(confidences, weights=weights))
228
+
229
+ def _extract_key_insights(self, text_analysis: Optional[Dict],
230
+ image_analysis: Optional[Dict],
231
+ video_analysis: Optional[Dict],
232
+ modalities: List[str]) -> List[str]:
233
+ """提取關鍵洞察"""
234
+ insights = []
235
+
236
+ # 文字洞察
237
+ if "text" in modalities and text_analysis:
238
+ keywords = text_analysis.get("keywords", [])
239
+ if keywords:
240
+ insights.append(f"文字關鍵詞: {', '.join(keywords[:3])}")
241
+
242
+ topics = text_analysis.get("topics", [])
243
+ if topics:
244
+ insights.append(f"文字主題: {', '.join(topics[:2])}")
245
+
246
+ # 圖片洞察
247
+ if "image" in modalities and image_analysis:
248
+ objects = image_analysis.get("objects", [])
249
+ if objects:
250
+ insights.append(f"圖片物件: {', '.join(objects[:3])}")
251
+
252
+ scene = image_analysis.get("scene", "")
253
+ if scene:
254
+ insights.append(f"圖片場景: {scene}")
255
+
256
+ # 影片洞察
257
+ if "video" in modalities and video_analysis:
258
+ actions = video_analysis.get("actions", [])
259
+ if actions:
260
+ insights.append(f"影片動作: {', '.join(actions)}")
261
+
262
+ motion = video_analysis.get("motion", {})
263
+ if motion and motion.get("motion_type"):
264
+ insights.append(f"運動類型: {motion['motion_type']}")
265
+
266
+ return insights
267
+
268
+ def _generate_fusion_summary(self, results: Dict) -> str:
269
+ """生成融合分析總結"""
270
+ summary_parts = []
271
+
272
+ # 模態資訊
273
+ modalities = results.get("modalities", [])
274
+ summary_parts.append(f"分析模態: {', '.join(modalities)}")
275
+
276
+ # 融合情感
277
+ sentiment = results.get("fused_sentiment", "未知")
278
+ sentiment_score = results.get("fused_sentiment_score", 0.0)
279
+ summary_parts.append(f"綜合情感: {sentiment} ({sentiment_score:.2f})")
280
+
281
+ # 內容分類
282
+ category = results.get("content_category", "一般")
283
+ summary_parts.append(f"內容類型: {category}")
284
+
285
+ # 置信度
286
+ confidence = results.get("confidence", 0.0)
287
+ summary_parts.append(f"分析置信度: {confidence:.2f}")
288
+
289
+ # 關鍵洞察
290
+ insights = results.get("key_insights", [])
291
+ if insights:
292
+ summary_parts.append(f"關鍵洞察: {'; '.join(insights[:3])}")
293
+
294
+ return " | ".join(summary_parts)
modules/text_analyzer.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 文字內容分析模組
3
+ """
4
+
5
+ import re
6
+ import jieba
7
+ from typing import Dict, List, Optional
8
+ import logging
9
+ from collections import Counter
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class TextAnalyzer:
14
+ """文字內容分析器"""
15
+
16
+ def __init__(self):
17
+ """初始化文字分析器"""
18
+ # 初始化jieba分詞
19
+ jieba.initialize()
20
+
21
+ # 情感詞典(簡化版)
22
+ self.positive_words = {
23
+ "好", "棒", "讚", "優秀", "完美", "喜歡", "愛", "開心", "快樂", "高興",
24
+ "滿意", "驚喜", "感動", "溫暖", "美好", "精彩", "出色", "傑出", "優秀"
25
+ }
26
+
27
+ self.negative_words = {
28
+ "壞", "差", "爛", "討厭", "恨", "生氣", "憤怒", "失望", "難過", "痛苦",
29
+ "糟糕", "惡劣", "可惡", "討厭", "煩人", "無聊", "討厭", "噁心", "恐怖"
30
+ }
31
+
32
+ # 停用詞
33
+ self.stop_words = {
34
+ "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都", "一",
35
+ "一個", "上", "也", "很", "到", "說", "要", "去", "你", "會", "著", "沒有",
36
+ "看", "好", "自己", "這", "那", "他", "她", "它", "們", "我們", "你們"
37
+ }
38
+
39
+ def analyze(self, text: str, analysis_type: str = "comprehensive") -> Dict:
40
+ """
41
+ 分析文字內容
42
+
43
+ Args:
44
+ text: 要分析的文字
45
+ analysis_type: 分析類型
46
+
47
+ Returns:
48
+ 分析結果字典
49
+ """
50
+ try:
51
+ results = {
52
+ "original_text": text,
53
+ "analysis_type": analysis_type,
54
+ "word_count": len(text),
55
+ "char_count": len(text.replace(" ", "")),
56
+ "sentiment": self._analyze_sentiment(text),
57
+ "keywords": self._extract_keywords(text),
58
+ "topics": self._extract_topics(text),
59
+ "summary": ""
60
+ }
61
+
62
+ # 根據分析類型添加特定分析
63
+ if analysis_type in ["comprehensive", "sentiment"]:
64
+ results["sentiment_score"] = self._calculate_sentiment_score(text)
65
+
66
+ if analysis_type in ["comprehensive", "content_classification"]:
67
+ results["content_category"] = self._classify_content(text)
68
+ results["language"] = self._detect_language(text)
69
+
70
+ if analysis_type in ["comprehensive", "keyword_extraction"]:
71
+ results["named_entities"] = self._extract_named_entities(text)
72
+
73
+ # 生成總結
74
+ results["summary"] = self._generate_summary(results)
75
+
76
+ logger.info(f"文字分析完成: {analysis_type}")
77
+ return results
78
+
79
+ except Exception as e:
80
+ logger.error(f"文字分析失敗: {e}")
81
+ return {"error": str(e)}
82
+
83
+ def _analyze_sentiment(self, text: str) -> str:
84
+ """分析情感傾向"""
85
+ words = jieba.lcut(text)
86
+
87
+ positive_count = sum(1 for word in words if word in self.positive_words)
88
+ negative_count = sum(1 for word in words if word in self.negative_words)
89
+
90
+ if positive_count > negative_count:
91
+ return "正面"
92
+ elif negative_count > positive_count:
93
+ return "負面"
94
+ else:
95
+ return "中性"
96
+
97
+ def _calculate_sentiment_score(self, text: str) -> float:
98
+ """計算情感分數 (-1 到 1)"""
99
+ words = jieba.lcut(text)
100
+
101
+ positive_count = sum(1 for word in words if word in self.positive_words)
102
+ negative_count = sum(1 for word in words if word in self.negative_words)
103
+ total_words = len(words)
104
+
105
+ if total_words == 0:
106
+ return 0.0
107
+
108
+ score = (positive_count - negative_count) / total_words
109
+ return max(-1.0, min(1.0, score))
110
+
111
+ def _extract_keywords(self, text: str, top_k: int = 10) -> List[str]:
112
+ """提取關鍵詞"""
113
+ words = jieba.lcut(text)
114
+
115
+ # 過濾停用詞和短詞
116
+ filtered_words = [
117
+ word for word in words
118
+ if len(word) > 1 and word not in self.stop_words
119
+ ]
120
+
121
+ # 計算詞頻
122
+ word_freq = Counter(filtered_words)
123
+
124
+ # 返回最常見的詞
125
+ return [word for word, freq in word_freq.most_common(top_k)]
126
+
127
+ def _extract_topics(self, text: str) -> List[str]:
128
+ """提取主題(簡化版)"""
129
+ # 這裡使用簡單的關鍵詞提取作為主題
130
+ keywords = self._extract_keywords(text, top_k=5)
131
+
132
+ # 可以根據需要添加更複雜的主題建模
133
+ return keywords
134
+
135
+ def _classify_content(self, text: str) -> str:
136
+ """內容分類"""
137
+ # 簡化的內容分類
138
+ if any(word in text for word in ["新聞", "報導", "消息", "事件"]):
139
+ return "新聞"
140
+ elif any(word in text for word in ["評論", "觀點", "看法", "認為"]):
141
+ return "評論"
142
+ elif any(word in text for word in ["問題", "求助", "請教", "怎麼辦"]):
143
+ return "問答"
144
+ elif any(word in text for word in ["分享", "推薦", "介紹", "推薦"]):
145
+ return "分享"
146
+ else:
147
+ return "一般"
148
+
149
+ def _detect_language(self, text: str) -> str:
150
+ """檢測語言"""
151
+ # 簡單的中文檢測
152
+ chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
153
+ total_chars = len(text.replace(" ", ""))
154
+
155
+ if total_chars == 0:
156
+ return "未知"
157
+
158
+ chinese_ratio = chinese_chars / total_chars
159
+
160
+ if chinese_ratio > 0.5:
161
+ return "中文"
162
+ else:
163
+ return "其他"
164
+
165
+ def _extract_named_entities(self, text: str) -> List[str]:
166
+ """提取命名實體(簡化版)"""
167
+ # 簡單的實體提取
168
+ entities = []
169
+
170
+ # 提取可能的姓名(2-4個中文字符)
171
+ names = re.findall(r'[\u4e00-\u9fff]{2,4}', text)
172
+ entities.extend(names)
173
+
174
+ # 提取可能的組織名稱
175
+ org_patterns = [
176
+ r'[\u4e00-\u9fff]+公司',
177
+ r'[\u4e00-\u9fff]+大學',
178
+ r'[\u4e00-\u9fff]+政府',
179
+ r'[\u4e00-\u9fff]+協會'
180
+ ]
181
+
182
+ for pattern in org_patterns:
183
+ orgs = re.findall(pattern, text)
184
+ entities.extend(orgs)
185
+
186
+ return list(set(entities))
187
+
188
+ def _generate_summary(self, results: Dict) -> str:
189
+ """生成分析總結"""
190
+ summary_parts = []
191
+
192
+ summary_parts.append(f"文字長度: {results['char_count']} 字符")
193
+ summary_parts.append(f"情感傾向: {results['sentiment']}")
194
+
195
+ if 'sentiment_score' in results:
196
+ score = results['sentiment_score']
197
+ summary_parts.append(f"情感分數: {score:.2f}")
198
+
199
+ if results['keywords']:
200
+ summary_parts.append(f"主要關鍵詞: {', '.join(results['keywords'][:5])}")
201
+
202
+ if 'content_category' in results:
203
+ summary_parts.append(f"內容類型: {results['content_category']}")
204
+
205
+ return " | ".join(summary_parts)
modules/video_analyzer.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 影片內容分析模組
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ from typing import Dict, List, Optional, Tuple
8
+ import logging
9
+ import os
10
+ import librosa
11
+ import tempfile
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class VideoAnalyzer:
16
+ """影片內容分析器"""
17
+
18
+ def __init__(self):
19
+ """初始化影片分析器"""
20
+ self.frame_analyzer = None # 可以重用ImageAnalyzer
21
+ self.audio_analyzer = None # 音頻分析器
22
+
23
+ # 動作檢測標籤
24
+ self.action_labels = [
25
+ "走路", "跑步", "跳躍", "坐下", "站立", "揮手", "點頭", "搖頭",
26
+ "拍手", "擁抱", "握手", "指向", "寫字", "打字", "開車", "騎車"
27
+ ]
28
+
29
+ # 音頻情感標籤
30
+ self.audio_emotion_labels = [
31
+ "快樂", "悲傷", "憤怒", "恐懼", "驚訝", "厭惡", "中性"
32
+ ]
33
+
34
+ def analyze(self, video_path: str, analysis_type: str = "comprehensive") -> Dict:
35
+ """
36
+ 分析影片內容
37
+
38
+ Args:
39
+ video_path: 影片檔案路徑
40
+ analysis_type: 分析類型
41
+
42
+ Returns:
43
+ 分析結果字典
44
+ """
45
+ try:
46
+ if not os.path.exists(video_path):
47
+ return {"error": "影片檔案不存在"}
48
+
49
+ # 讀取影片
50
+ cap = cv2.VideoCapture(video_path)
51
+ if not cap.isOpened():
52
+ return {"error": "無法讀取影片"}
53
+
54
+ results = {
55
+ "video_path": video_path,
56
+ "analysis_type": analysis_type,
57
+ "video_info": self._get_video_info(cap),
58
+ "objects": self._detect_objects_in_video(cap),
59
+ "actions": self._detect_actions(cap),
60
+ "scenes": self._detect_scenes(cap),
61
+ "audio_sentiment": self._analyze_audio_sentiment(video_path),
62
+ "motion": self._analyze_motion(cap),
63
+ "faces": self._detect_faces_in_video(cap),
64
+ "summary": ""
65
+ }
66
+
67
+ cap.release()
68
+
69
+ # 根據分析類型添加特定分析
70
+ if analysis_type in ["comprehensive", "object_detection"]:
71
+ results["object_tracking"] = self._track_objects(cap)
72
+
73
+ if analysis_type in ["comprehensive", "action_recognition"]:
74
+ results["action_details"] = self._get_action_details(cap)
75
+
76
+ if analysis_type in ["comprehensive", "audio_analysis"]:
77
+ results["audio_features"] = self._extract_audio_features(video_path)
78
+
79
+ # 生成總結
80
+ results["summary"] = self._generate_summary(results)
81
+
82
+ logger.info(f"影片分析完成: {analysis_type}")
83
+ return results
84
+
85
+ except Exception as e:
86
+ logger.error(f"影片分析失敗: {e}")
87
+ return {"error": str(e)}
88
+
89
+ def _get_video_info(self, cap: cv2.VideoCapture) -> Dict:
90
+ """獲取影片基本資訊"""
91
+ fps = cap.get(cv2.CAP_PROP_FPS)
92
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
93
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
94
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
95
+ duration = frame_count / fps if fps > 0 else 0
96
+
97
+ return {
98
+ "width": width,
99
+ "height": height,
100
+ "fps": fps,
101
+ "frame_count": frame_count,
102
+ "duration": duration,
103
+ "aspect_ratio": width / height if height > 0 else 0
104
+ }
105
+
106
+ def _detect_objects_in_video(self, cap: cv2.VideoCapture) -> List[str]:
107
+ """檢測影片中的物件"""
108
+ objects = set()
109
+ frame_count = 0
110
+ sample_rate = 30 # 每30幀取樣一次
111
+
112
+ while True:
113
+ ret, frame = cap.read()
114
+ if not ret:
115
+ break
116
+
117
+ if frame_count % sample_rate == 0:
118
+ # 使用簡化的物件檢測
119
+ frame_objects = self._detect_objects_in_frame(frame)
120
+ objects.update(frame_objects)
121
+
122
+ frame_count += 1
123
+
124
+ # 限制處理的幀數以避免過長處理時間
125
+ if frame_count > 300: # 最多處理300幀
126
+ break
127
+
128
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到開始
129
+ return list(objects)
130
+
131
+ def _detect_objects_in_frame(self, frame: np.ndarray) -> List[str]:
132
+ """檢測單一幀中的物件"""
133
+ # 簡化的物件檢測
134
+ objects = []
135
+
136
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
137
+
138
+ # 檢測人臉
139
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
140
+ faces = face_cascade.detectMultiScale(gray, 1.1, 4)
141
+ if len(faces) > 0:
142
+ objects.append("人臉")
143
+
144
+ # 檢測邊緣
145
+ edges = cv2.Canny(gray, 50, 150)
146
+ edge_density = np.sum(edges > 0) / edges.size
147
+
148
+ if edge_density > 0.1:
149
+ objects.append("複雜結構")
150
+
151
+ return objects
152
+
153
+ def _detect_actions(self, cap: cv2.VideoCapture) -> List[str]:
154
+ """檢測動作"""
155
+ actions = []
156
+ frame_count = 0
157
+ prev_frame = None
158
+
159
+ while True:
160
+ ret, frame = cap.read()
161
+ if not ret:
162
+ break
163
+
164
+ if prev_frame is not None and frame_count % 10 == 0:
165
+ # 計算幀間差異
166
+ diff = cv2.absdiff(prev_frame, frame)
167
+ motion_score = np.sum(diff) / diff.size
168
+
169
+ if motion_score > 1000:
170
+ actions.append("運動")
171
+ elif motion_score < 100:
172
+ actions.append("靜止")
173
+
174
+ prev_frame = frame.copy()
175
+ frame_count += 1
176
+
177
+ if frame_count > 100: # 限制處理幀數
178
+ break
179
+
180
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到開始
181
+ return list(set(actions))
182
+
183
+ def _detect_scenes(self, cap: cv2.VideoCapture) -> List[str]:
184
+ """檢測場景變化"""
185
+ scenes = []
186
+ frame_count = 0
187
+ prev_hist = None
188
+
189
+ while True:
190
+ ret, frame = cap.read()
191
+ if not ret:
192
+ break
193
+
194
+ if frame_count % 30 == 0: # 每秒取樣一次
195
+ # 計算直方圖
196
+ hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
197
+
198
+ if prev_hist is not None:
199
+ # 計算直方圖相似度
200
+ correlation = cv2.compareHist(prev_hist, hist, cv2.HISTCMP_CORREL)
201
+
202
+ if correlation < 0.7: # 場景變化閾值
203
+ scenes.append("場景變化")
204
+
205
+ prev_hist = hist
206
+
207
+ frame_count += 1
208
+
209
+ if frame_count > 300: # 限制處理幀數
210
+ break
211
+
212
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到開始
213
+ return scenes
214
+
215
+ def _analyze_audio_sentiment(self, video_path: str) -> str:
216
+ """分析音頻情感"""
217
+ try:
218
+ # 提取音頻
219
+ audio_path = self._extract_audio(video_path)
220
+ if not audio_path:
221
+ return "無法分析"
222
+
223
+ # 載入音頻
224
+ y, sr = librosa.load(audio_path, duration=30) # 只分析前30秒
225
+
226
+ # 提取音頻特徵
227
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
228
+ spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
229
+
230
+ # 基於特徵進行簡單的情感分析
231
+ mean_mfcc = np.mean(mfccs)
232
+ mean_spectral = np.mean(spectral_centroids)
233
+
234
+ # 簡化的情感判斷
235
+ if mean_spectral > 2000 and mean_mfcc > 0:
236
+ return "正面"
237
+ elif mean_spectral < 1000 and mean_mfcc < 0:
238
+ return "負面"
239
+ else:
240
+ return "中性"
241
+
242
+ except Exception as e:
243
+ logger.warning(f"音頻分析失敗: {e}")
244
+ return "無法分析"
245
+
246
+ def _extract_audio(self, video_path: str) -> Optional[str]:
247
+ """從影片中提取音頻"""
248
+ try:
249
+ # 創建臨時音頻檔案
250
+ temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
251
+ temp_audio.close()
252
+
253
+ # 使用ffmpeg提取音頻
254
+ import subprocess
255
+ cmd = [
256
+ 'ffmpeg', '-i', video_path,
257
+ '-vn', '-acodec', 'pcm_s16le',
258
+ '-ar', '44100', '-ac', '2',
259
+ temp_audio.name, '-y'
260
+ ]
261
+
262
+ result = subprocess.run(cmd, capture_output=True, text=True)
263
+
264
+ if result.returncode == 0:
265
+ return temp_audio.name
266
+ else:
267
+ logger.warning(f"音頻提取失敗: {result.stderr}")
268
+ return None
269
+
270
+ except Exception as e:
271
+ logger.warning(f"音頻提取失敗: {e}")
272
+ return None
273
+
274
+ def _analyze_motion(self, cap: cv2.VideoCapture) -> Dict:
275
+ """分析運動特徵"""
276
+ motion_data = {
277
+ "motion_intensity": 0.0,
278
+ "motion_direction": "未知",
279
+ "motion_type": "靜止"
280
+ }
281
+
282
+ frame_count = 0
283
+ motion_scores = []
284
+
285
+ while True:
286
+ ret, frame = cap.read()
287
+ if not ret:
288
+ break
289
+
290
+ if frame_count > 0:
291
+ # 計算光流
292
+ prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
293
+ curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
294
+
295
+ flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)
296
+
297
+ if flow[0] is not None:
298
+ motion_score = np.mean(np.linalg.norm(flow[1], axis=1))
299
+ motion_scores.append(motion_score)
300
+
301
+ prev_frame = frame.copy()
302
+ frame_count += 1
303
+
304
+ if frame_count > 50: # 限制處理幀數
305
+ break
306
+
307
+ if motion_scores:
308
+ motion_data["motion_intensity"] = float(np.mean(motion_scores))
309
+
310
+ if motion_data["motion_intensity"] > 5:
311
+ motion_data["motion_type"] = "快速運動"
312
+ elif motion_data["motion_intensity"] > 1:
313
+ motion_data["motion_type"] = "慢速運動"
314
+ else:
315
+ motion_data["motion_type"] = "靜止"
316
+
317
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到開始
318
+ return motion_data
319
+
320
+ def _detect_faces_in_video(self, cap: cv2.VideoCapture) -> List[Dict]:
321
+ """檢測影片中的人臉"""
322
+ faces = []
323
+ frame_count = 0
324
+ sample_rate = 30 # 每30幀取樣一次
325
+
326
+ while True:
327
+ ret, frame = cap.read()
328
+ if not ret:
329
+ break
330
+
331
+ if frame_count % sample_rate == 0:
332
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
333
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
334
+ detected_faces = face_cascade.detectMultiScale(gray, 1.1, 4)
335
+
336
+ for (x, y, w, h) in detected_faces:
337
+ faces.append({
338
+ "frame": frame_count,
339
+ "x": int(x),
340
+ "y": int(y),
341
+ "width": int(w),
342
+ "height": int(h)
343
+ })
344
+
345
+ frame_count += 1
346
+
347
+ if frame_count > 300: # 限制處理幀數
348
+ break
349
+
350
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到開始
351
+ return faces
352
+
353
+ def _track_objects(self, cap: cv2.VideoCapture) -> Dict:
354
+ """物件追蹤"""
355
+ # 簡化的物件追蹤
356
+ return {
357
+ "tracked_objects": [],
358
+ "tracking_confidence": 0.0
359
+ }
360
+
361
+ def _get_action_details(self, cap: cv2.VideoCapture) -> Dict:
362
+ """獲取動作識別詳細資訊"""
363
+ actions = self._detect_actions(cap)
364
+ return {
365
+ "detected_actions": actions,
366
+ "action_count": len(actions),
367
+ "action_confidence": 0.6 # 簡化版
368
+ }
369
+
370
+ def _extract_audio_features(self, video_path: str) -> Dict:
371
+ """提取音頻特徵"""
372
+ try:
373
+ audio_path = self._extract_audio(video_path)
374
+ if not audio_path:
375
+ return {}
376
+
377
+ y, sr = librosa.load(audio_path, duration=30)
378
+
379
+ features = {
380
+ "tempo": float(librosa.beat.tempo(y=y, sr=sr)[0]),
381
+ "spectral_centroid": float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))),
382
+ "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y))),
383
+ "mfcc_mean": float(np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)))
384
+ }
385
+
386
+ return features
387
+
388
+ except Exception as e:
389
+ logger.warning(f"音頻特徵提取失敗: {e}")
390
+ return {}
391
+
392
+ def _generate_summary(self, results: Dict) -> str:
393
+ """生成分析總結"""
394
+ summary_parts = []
395
+
396
+ if results["video_info"]:
397
+ info = results["video_info"]
398
+ summary_parts.append(f"影片長度: {info['duration']:.1f}秒")
399
+ summary_parts.append(f"解析度: {info['width']}x{info['height']}")
400
+
401
+ if results["objects"]:
402
+ summary_parts.append(f"偵測物件: {', '.join(results['objects'])}")
403
+
404
+ if results["actions"]:
405
+ summary_parts.append(f"動作: {', '.join(results['actions'])}")
406
+
407
+ if results["audio_sentiment"]:
408
+ summary_parts.append(f"音頻情感: {results['audio_sentiment']}")
409
+
410
+ if results["motion"]:
411
+ motion = results["motion"]
412
+ summary_parts.append(f"運動類型: {motion['motion_type']}")
413
+
414
+ if results["faces"]:
415
+ summary_parts.append(f"人臉數量: {len(results['faces'])}")
416
+
417
+ return " | ".join(summary_parts)
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio Spaces 部署專用依賴套件
2
+ gradio>=4.0.0
3
+ opencv-python-headless>=4.8.0
4
+ numpy>=1.24.0
5
+ jieba>=0.42.1
6
+ librosa>=0.10.0
7
+ scikit-learn>=1.3.0
8
+ Pillow>=10.0.0
9
+ matplotlib>=3.7.0
10
+ pandas>=2.0.0
11
+ requests>=2.31.0
12
+ tqdm>=4.65.0
13
+
14
+ # 可選的深度學習套件(如果需要更進階的分析)
15
+ # torch>=2.0.0
16
+ # transformers>=4.30.0
17
+ # torchvision>=0.15.0
utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # 工具模組初始化檔案
utils/config.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 配置管理模組
3
+ """
4
+
5
+ import os
6
+ import json
7
+ from typing import Dict, Any
8
+
9
+ class Config:
10
+ """配置管理類別"""
11
+
12
+ def __init__(self, config_file: str = "config.json"):
13
+ self.config_file = config_file
14
+ self.config = self._load_config()
15
+
16
+ def _load_config(self) -> Dict[str, Any]:
17
+ """載入配置檔案"""
18
+ default_config = {
19
+ "models": {
20
+ "text_model": "bert-base-chinese",
21
+ "image_model": "resnet50",
22
+ "video_model": "slowfast",
23
+ "multimodal_model": "clip"
24
+ },
25
+ "analysis": {
26
+ "max_text_length": 512,
27
+ "max_image_size": 224,
28
+ "max_video_duration": 30,
29
+ "confidence_threshold": 0.5
30
+ },
31
+ "api": {
32
+ "openai_api_key": "",
33
+ "huggingface_token": "",
34
+ "google_api_key": ""
35
+ },
36
+ "storage": {
37
+ "temp_dir": "temp",
38
+ "output_dir": "output",
39
+ "max_file_size": 100 * 1024 * 1024 # 100MB
40
+ },
41
+ "logging": {
42
+ "level": "INFO",
43
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
44
+ }
45
+ }
46
+
47
+ if os.path.exists(self.config_file):
48
+ try:
49
+ with open(self.config_file, 'r', encoding='utf-8') as f:
50
+ user_config = json.load(f)
51
+ # 合併配置
52
+ self._merge_config(default_config, user_config)
53
+ except Exception as e:
54
+ print(f"載入配置檔案失敗,使用預設配置: {e}")
55
+
56
+ return default_config
57
+
58
+ def _merge_config(self, default: Dict, user: Dict):
59
+ """遞歸合併配置"""
60
+ for key, value in user.items():
61
+ if key in default and isinstance(default[key], dict) and isinstance(value, dict):
62
+ self._merge_config(default[key], value)
63
+ else:
64
+ default[key] = value
65
+
66
+ def get(self, key: str, default=None):
67
+ """獲取配置值"""
68
+ keys = key.split('.')
69
+ value = self.config
70
+
71
+ try:
72
+ for k in keys:
73
+ value = value[k]
74
+ return value
75
+ except (KeyError, TypeError):
76
+ return default
77
+
78
+ def set(self, key: str, value: Any):
79
+ """設定配置值"""
80
+ keys = key.split('.')
81
+ config = self.config
82
+
83
+ for k in keys[:-1]:
84
+ if k not in config:
85
+ config[k] = {}
86
+ config = config[k]
87
+
88
+ config[keys[-1]] = value
89
+
90
+ def save(self):
91
+ """儲存配置到檔案"""
92
+ try:
93
+ with open(self.config_file, 'w', encoding='utf-8') as f:
94
+ json.dump(self.config, f, indent=2, ensure_ascii=False)
95
+ except Exception as e:
96
+ print(f"儲存配置檔案失敗: {e}")
utils/file_handler.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 檔案處理工具模組
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+ import tempfile
8
+ from typing import Optional, List
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class FileHandler:
14
+ """檔案處理工具類別"""
15
+
16
+ def __init__(self, temp_dir: str = "temp", output_dir: str = "output"):
17
+ self.temp_dir = temp_dir
18
+ self.output_dir = output_dir
19
+ self._ensure_directories()
20
+
21
+ def _ensure_directories(self):
22
+ """確保必要目錄存在"""
23
+ for directory in [self.temp_dir, self.output_dir]:
24
+ os.makedirs(directory, exist_ok=True)
25
+
26
+ def save_uploaded_file(self, file_path: str, file_type: str = "unknown") -> str:
27
+ """
28
+ 儲存上傳的檔案到臨時目錄
29
+
30
+ Args:
31
+ file_path: 原始檔案路徑
32
+ file_type: 檔案類型 (image, video, audio, etc.)
33
+
34
+ Returns:
35
+ 新的檔案路徑
36
+ """
37
+ try:
38
+ # 生成新的檔案名
39
+ filename = os.path.basename(file_path)
40
+ name, ext = os.path.splitext(filename)
41
+
42
+ # 創建臨時檔案
43
+ temp_file = tempfile.NamedTemporaryFile(
44
+ delete=False,
45
+ suffix=ext,
46
+ dir=self.temp_dir,
47
+ prefix=f"{file_type}_"
48
+ )
49
+ temp_path = temp_file.name
50
+ temp_file.close()
51
+
52
+ # 複製檔案
53
+ shutil.copy2(file_path, temp_path)
54
+
55
+ logger.info(f"檔案已儲存到: {temp_path}")
56
+ return temp_path
57
+
58
+ except Exception as e:
59
+ logger.error(f"儲存檔案失敗: {e}")
60
+ raise
61
+
62
+ def cleanup_temp_files(self, file_paths: List[str]):
63
+ """清理臨時檔案"""
64
+ for file_path in file_paths:
65
+ try:
66
+ if os.path.exists(file_path):
67
+ os.remove(file_path)
68
+ logger.info(f"已刪除臨時檔案: {file_path}")
69
+ except Exception as e:
70
+ logger.error(f"刪除檔案失敗: {e}")
71
+
72
+ def get_file_info(self, file_path: str) -> dict:
73
+ """獲取檔案資訊"""
74
+ try:
75
+ stat = os.stat(file_path)
76
+ return {
77
+ "size": stat.st_size,
78
+ "modified": stat.st_mtime,
79
+ "extension": os.path.splitext(file_path)[1].lower(),
80
+ "basename": os.path.basename(file_path)
81
+ }
82
+ except Exception as e:
83
+ logger.error(f"獲取檔案資訊失敗: {e}")
84
+ return {}
85
+
86
+ def is_valid_file_type(self, file_path: str, allowed_types: List[str]) -> bool:
87
+ """檢查檔案類型是否有效"""
88
+ file_info = self.get_file_info(file_path)
89
+ extension = file_info.get("extension", "")
90
+ return extension in allowed_types
91
+
92
+ def validate_file_size(self, file_path: str, max_size: int) -> bool:
93
+ """檢查檔案大小是否有效"""
94
+ file_info = self.get_file_info(file_path)
95
+ size = file_info.get("size", 0)
96
+ return size <= max_size