Spaces:
Sleeping
Sleeping
| """ | |
| 社交媒體多模態內容分析系統 - Gradio Spaces 版本 | |
| 簡化版應用程式檔案 | |
| """ | |
| import gradio as gr | |
| import os | |
| import sys | |
| import logging | |
| import requests | |
| import json | |
| from typing import Dict, Optional | |
| import base64 | |
| from PIL import Image | |
| import io | |
| import google.generativeai as genai | |
| # 設定日誌 | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class SimpleAnalyzer: | |
| """簡化版分析器,支援 Gemini API""" | |
| def __init__(self): | |
| # 從環境變數獲取 Gemini API 金鑰 | |
| self.gemini_api_key = os.getenv("GEMINI_API_KEY", "") | |
| logger.info("簡化版分析器初始化完成") | |
| if self.gemini_api_key: | |
| logger.info("找到 Gemini API 金鑰 - API 分析功能可用") | |
| else: | |
| logger.info("未找到 Gemini API 金鑰 - 使用備用分析") | |
| def set_api_key(self, api_key: str): | |
| """設定 API 金鑰""" | |
| if api_key and api_key.strip(): | |
| self.gemini_api_key = api_key.strip() | |
| logger.info("API 金鑰已設定") | |
| return True | |
| else: | |
| logger.warning("API 金鑰為空") | |
| return False | |
| def analyze_text(self, text: str, use_gemini: bool = False, api_key: str = "") -> Dict: | |
| """使用可選的 Gemini API 分析文字""" | |
| if not text.strip(): | |
| return {"sentiment": "無內容", "keywords": [], "summary": "無文字內容"} | |
| # 如果提供了新的 API 金鑰,則使用它 | |
| if api_key and api_key.strip(): | |
| self.gemini_api_key = api_key.strip() | |
| logger.info(f"API 金鑰已更新: {self.gemini_api_key[:10]}...") | |
| # 檢查是否要使用 Gemini API | |
| if use_gemini: | |
| if self.gemini_api_key and len(self.gemini_api_key.strip()) > 0: | |
| logger.info("使用 Gemini API 進行文字分析") | |
| logger.info(f"當前 API 金鑰: {self.gemini_api_key[:10]}...") | |
| try: | |
| return self.analyze_text_with_gemini(text) | |
| except Exception as e: | |
| logger.error(f"Gemini API 調用失敗: {e}") | |
| return { | |
| "sentiment": "API 調用失敗", | |
| "sentiment_score": 0.0, | |
| "keywords": [], | |
| "emotions": [], | |
| "summary": f"Gemini API 調用失敗: {str(e)}", | |
| "method": "API 調用失敗" | |
| } | |
| else: | |
| logger.warning("未提供有效的 Gemini API 金鑰,使用備用分析") | |
| return { | |
| "sentiment": "分析失敗", | |
| "sentiment_score": 0.0, | |
| "keywords": [], | |
| "emotions": [], | |
| "summary": "未提供有效的 Gemini API 金鑰", | |
| "method": "API 金鑰缺失" | |
| } | |
| else: | |
| logger.info("使用備用文字分析") | |
| return self._fallback_analysis(text) | |
| def analyze_text_with_gemini(self, text: str) -> Dict: | |
| """使用 Gemini API 分析文字""" | |
| try: | |
| # 配置 Gemini API | |
| genai.configure(api_key=self.gemini_api_key) | |
| # 創建模型 | |
| model = genai.GenerativeModel('gemini-pro') | |
| # 構建提示詞 | |
| prompt = f"""請分析以下文字的情感,並以JSON格式返回結果: | |
| 文字內容:{text} | |
| 請返回以下格式的JSON: | |
| {{ | |
| "sentiment": "正面/負面/中性", | |
| "sentiment_score": -1到1之間的分數, | |
| "keywords": ["關鍵詞1", "關鍵詞2", "關鍵詞3"], | |
| "emotions": ["具體情感1", "具體情感2"], | |
| "summary": "分析總結", | |
| "method": "Gemini API" | |
| }}""" | |
| logger.info(f"正在調用 Gemini API,文字長度: {len(text)}") | |
| logger.info(f"API 金鑰: {self.gemini_api_key[:10]}...") | |
| # 生成回應 | |
| response = model.generate_content( | |
| prompt, | |
| generation_config=genai.types.GenerationConfig( | |
| temperature=0.3, | |
| max_output_tokens=500 | |
| ) | |
| ) | |
| content = response.text | |
| logger.info(f"Gemini API 回應內容: {content[:100]}...") | |
| # 嘗試從回應中提取 JSON | |
| try: | |
| # 在回應中找到 JSON | |
| start_idx = content.find('{') | |
| end_idx = content.rfind('}') + 1 | |
| json_str = content[start_idx:end_idx] | |
| result = json.loads(json_str) | |
| logger.info("成功解析 Gemini API JSON 回應") | |
| return result | |
| except (json.JSONDecodeError, ValueError) as e: | |
| logger.warning(f"JSON 解析失敗: {e},使用原始回應") | |
| # 如果 JSON 解析失敗,返回結構化回應 | |
| return { | |
| "sentiment": "分析完成", | |
| "sentiment_score": 0.0, | |
| "keywords": [], | |
| "emotions": [], | |
| "summary": content[:200] + "..." if len(content) > 200 else content, | |
| "method": "Gemini API" | |
| } | |
| except Exception as e: | |
| logger.error(f"Gemini API 調用失敗: {e}") | |
| return self._fallback_analysis(text) | |
| def _fallback_analysis(self, text: str) -> Dict: | |
| """使用詞典匹配的備用分析""" | |
| # 簡單的情感分析 | |
| positive_words = ["好", "棒", "讚", "優秀", "完美", "喜歡", "愛", "開心", "快樂", "滿意", "精彩", "出色"] | |
| negative_words = ["壞", "差", "爛", "討厭", "恨", "生氣", "憤怒", "失望", "難過", "糟糕", "惡劣", "可惡"] | |
| text_lower = text.lower() | |
| pos_count = sum(1 for word in positive_words if word in text_lower) | |
| neg_count = sum(1 for word in negative_words if word in text_lower) | |
| if pos_count > neg_count: | |
| sentiment = "正面" | |
| elif neg_count > pos_count: | |
| sentiment = "負面" | |
| else: | |
| sentiment = "中性" | |
| # 簡單的關鍵詞提取 | |
| keywords = [word for word in text.split() if len(word) > 1][:5] | |
| return { | |
| "sentiment": sentiment, | |
| "sentiment_score": (pos_count - neg_count) / max(len(text.split()), 1), | |
| "keywords": keywords, | |
| "emotions": [sentiment], | |
| "summary": f"情感: {sentiment}, 關鍵詞: {', '.join(keywords[:3])}", | |
| "method": "詞典匹配" | |
| } | |
| def analyze_image(self, image_path: str, use_gemini: bool = False, api_key: str = "") -> Dict: | |
| """圖片分析,支援 Gemini Vision API""" | |
| if not image_path: | |
| return {"objects": [], "scene": "無圖片", "summary": "無圖片內容", "ocr_text": "", "method": "無圖片"} | |
| # 如果提供了新的 API 金鑰,則使用它 | |
| if api_key and api_key.strip(): | |
| self.gemini_api_key = api_key.strip() | |
| logger.info(f"圖片分析 API 金鑰已更新: {self.gemini_api_key[:10]}...") | |
| # 檢查是否要使用 Gemini API | |
| if use_gemini: | |
| if self.gemini_api_key and len(self.gemini_api_key.strip()) > 0: | |
| logger.info("使用 Gemini Vision API 進行圖片分析") | |
| return self.analyze_image_with_gemini(image_path) | |
| else: | |
| logger.warning("未提供有效的 Gemini API 金鑰,使用備用圖片分析") | |
| return { | |
| "objects": ["API 金鑰缺失"], | |
| "scene": "無法分析", | |
| "ocr_text": "需要有效的 Gemini API 金鑰", | |
| "sentiment": "中性", | |
| "summary": "未提供有效的 Gemini API 金鑰", | |
| "method": "API 金鑰缺失" | |
| } | |
| else: | |
| logger.info("使用備用圖片分析") | |
| return self._fallback_image_analysis(image_path) | |
| def analyze_image_with_gemini(self, image_path: str) -> Dict: | |
| try: | |
| # 配置 Gemini API | |
| genai.configure(api_key=self.gemini_api_key) | |
| # 創建模型 | |
| model = genai.GenerativeModel('gemini-pro-vision') | |
| # 讀取圖片 | |
| with open(image_path, "rb") as image_file: | |
| image_data = image_file.read() | |
| # 構建提示詞 | |
| prompt = """請詳細分析這張圖片,並以JSON格式返回結果。請包含: | |
| 1. 圖片中的物件檢測 | |
| 2. 場景描述 | |
| 3. OCR文字識別(如果圖片中有文字) | |
| 4. 圖片內容總結 | |
| 5. 情感分析(如果適用) | |
| 請返回以下格式的JSON: | |
| { | |
| "objects": ["物件1", "物件2", "物件3"], | |
| "scene": "場景描述", | |
| "ocr_text": "識別到的文字內容", | |
| "sentiment": "正面/負面/中性", | |
| "summary": "詳細分析總結", | |
| "method": "Gemini Vision API" | |
| }""" | |
| logger.info(f"正在調用 Gemini Vision API,圖片路徑: {image_path}") | |
| # 生成回應 | |
| response = model.generate_content([ | |
| prompt, | |
| { | |
| "mime_type": "image/jpeg", | |
| "data": image_data | |
| } | |
| ]) | |
| content = response.text | |
| logger.info(f"Gemini Vision API 回應內容: {content[:100]}...") | |
| # 嘗試從回應中提取 JSON | |
| try: | |
| start_idx = content.find('{') | |
| end_idx = content.rfind('}') + 1 | |
| json_str = content[start_idx:end_idx] | |
| result = json.loads(json_str) | |
| logger.info("成功解析 Gemini Vision API JSON 回應") | |
| return result | |
| except (json.JSONDecodeError, ValueError) as e: | |
| logger.warning(f"JSON 解析失敗: {e},使用原始回應") | |
| return { | |
| "objects": ["圖片分析完成"], | |
| "scene": "場景識別完成", | |
| "ocr_text": "文字識別完成", | |
| "sentiment": "中性", | |
| "summary": content[:300] + "..." if len(content) > 300 else content, | |
| "method": "Gemini Vision API" | |
| } | |
| except Exception as e: | |
| logger.error(f"Gemini Vision API 調用失敗: {e}") | |
| return self._fallback_image_analysis(image_path) | |
| def _fallback_image_analysis(self, image_path: str) -> Dict: | |
| try: | |
| # 使用PIL進行基本的圖片分析 | |
| with Image.open(image_path) as img: | |
| width, height = img.size | |
| mode = img.mode | |
| # 基本的圖片資訊分析 | |
| objects = [] | |
| scene = "未知場景" | |
| ocr_text = "無OCR功能(需要Gemini API)" | |
| # 根據圖片尺寸和模式進行簡單分析 | |
| if width > height: | |
| scene = "橫向圖片" | |
| else: | |
| scene = "縱向圖片" | |
| if mode == "RGB": | |
| objects.append("彩色圖片") | |
| elif mode == "L": | |
| objects.append("黑白圖片") | |
| # 根據檔案名稱進行簡單分析 | |
| filename = os.path.basename(image_path).lower() | |
| if any(word in filename for word in ["person", "people", "face", "人", "臉"]): | |
| objects.append("人物") | |
| if any(word in filename for word in ["car", "vehicle", "車", "交通工具"]): | |
| objects.append("交通工具") | |
| if any(word in filename for word in ["food", "meal", "食物", "餐"]): | |
| objects.append("食物") | |
| if any(word in filename for word in ["nature", "landscape", "風景", "自然"]): | |
| objects.append("自然風景") | |
| return { | |
| "objects": objects if objects else ["圖片"], | |
| "scene": scene, | |
| "ocr_text": ocr_text, | |
| "sentiment": "中性", | |
| "summary": f"圖片尺寸: {width}x{height}, 模式: {mode}, 場景: {scene}", | |
| "method": "基本圖片分析" | |
| } | |
| except Exception as e: | |
| logger.error(f"圖片分析失敗: {e}") | |
| return { | |
| "objects": ["分析失敗"], | |
| "scene": "無法識別", | |
| "ocr_text": "無法識別", | |
| "sentiment": "中性", | |
| "summary": f"圖片分析失敗: {str(e)}", | |
| "method": "分析失敗" | |
| } | |
| def analyze_video(self, video_path: str) -> Dict: | |
| """簡化版影片分析""" | |
| if not video_path: | |
| return {"actions": [], "audio_sentiment": "無音頻", "summary": "無影片內容"} | |
| # 模擬影片分析 | |
| return { | |
| "actions": ["影片動作"], | |
| "audio_sentiment": "中性", | |
| "summary": "影片分析完成" | |
| } | |
| def fuse_analysis(self, text_analysis: Dict, image_analysis: Dict, video_analysis: Dict) -> Dict: | |
| """多模態融合分析""" | |
| sentiments = [] | |
| if text_analysis and text_analysis.get("sentiment"): | |
| sentiments.append(text_analysis["sentiment"]) | |
| if image_analysis and image_analysis.get("sentiment"): | |
| sentiments.append(image_analysis["sentiment"]) | |
| if video_analysis and video_analysis.get("audio_sentiment"): | |
| sentiments.append(video_analysis["audio_sentiment"]) | |
| # 簡單的融合邏輯 | |
| if "正面" in sentiments: | |
| fused_sentiment = "正面" | |
| elif "負面" in sentiments: | |
| fused_sentiment = "負面" | |
| else: | |
| fused_sentiment = "中性" | |
| return { | |
| "fused_sentiment": fused_sentiment, | |
| "summary": f"綜合情感: {fused_sentiment}", | |
| "method": "多模態融合" | |
| } | |
| # 創建全局分析器實例 | |
| analyzer = SimpleAnalyzer() | |
| def toggle_api_key_visibility(use_gemini): | |
| """控制 API 金鑰輸入欄位的顯示""" | |
| return gr.update(visible=use_gemini) | |
| def analyze_interface(text: str, image, video, analysis_type: str, use_gemini: bool, api_key: str): | |
| """Gradio 介面函數""" | |
| try: | |
| # 添加調試信息 | |
| logger.info(f"analyze_interface 被調用:") | |
| logger.info(f" - use_gemini: {use_gemini}") | |
| logger.info(f" - api_key 長度: {len(api_key) if api_key else 0}") | |
| logger.info(f" - api_key 前10位: {api_key[:10] if api_key else 'None'}...") | |
| # 處理檔案輸入 | |
| image_path = None | |
| video_path = None | |
| if image: | |
| image_path = image.name if hasattr(image, 'name') else str(image) | |
| logger.info(f"圖片路徑: {image_path}") | |
| if video: | |
| video_path = video.name if hasattr(video, 'name') else str(video) | |
| # 執行分析 | |
| text_analysis = analyzer.analyze_text(text, use_gemini, api_key) if text.strip() else None | |
| image_analysis = analyzer.analyze_image(image_path, use_gemini, api_key) if image_path else None | |
| video_analysis = analyzer.analyze_video(video_path) if video_path else None | |
| # 多模態融合 | |
| multimodal_analysis = None | |
| if any([text_analysis, image_analysis, video_analysis]): | |
| multimodal_analysis = analyzer.fuse_analysis(text_analysis, image_analysis, video_analysis) | |
| # 格式化輸出 | |
| text_output = format_text_analysis(text_analysis) | |
| image_output = format_image_analysis(image_analysis) | |
| video_output = format_video_analysis(video_analysis) | |
| summary_output = multimodal_analysis.get("summary", "無分析結果") if multimodal_analysis else "無分析結果" | |
| return text_output, image_output, video_output, summary_output | |
| except Exception as e: | |
| error_msg = f"處理過程中發生錯誤: {str(e)}" | |
| logger.error(error_msg) | |
| return error_msg, "", "", "" | |
| def format_text_analysis(analysis: Dict) -> str: | |
| """格式化文字分析結果""" | |
| if not analysis: | |
| return "無文字分析結果" | |
| formatted = [] | |
| if "sentiment" in analysis: | |
| formatted.append(f"情感分析: {analysis['sentiment']}") | |
| if "sentiment_score" in analysis: | |
| formatted.append(f"情感分數: {analysis['sentiment_score']:.2f}") | |
| if "keywords" in analysis: | |
| formatted.append(f"關鍵詞: {', '.join(analysis['keywords'])}") | |
| if "emotions" in analysis: | |
| formatted.append(f"情感: {', '.join(analysis['emotions'])}") | |
| if "method" in analysis: | |
| formatted.append(f"分析方法: {analysis['method']}") | |
| if "summary" in analysis: | |
| formatted.append(f"總結: {analysis['summary']}") | |
| return "\n".join(formatted) | |
| def format_image_analysis(analysis: Dict) -> str: | |
| """格式化圖片分析結果""" | |
| if not analysis: | |
| return "無圖片分析結果" | |
| formatted = [] | |
| if "objects" in analysis: | |
| formatted.append(f"偵測物件: {', '.join(analysis['objects'])}") | |
| if "scene" in analysis: | |
| formatted.append(f"場景描述: {analysis['scene']}") | |
| if "ocr_text" in analysis and analysis['ocr_text']: | |
| formatted.append(f"OCR文字: {analysis['ocr_text']}") | |
| if "sentiment" in analysis: | |
| formatted.append(f"圖片情感: {analysis['sentiment']}") | |
| if "method" in analysis: | |
| formatted.append(f"分析方法: {analysis['method']}") | |
| if "summary" in analysis: | |
| formatted.append(f"總結: {analysis['summary']}") | |
| return "\n".join(formatted) | |
| def format_video_analysis(analysis: Dict) -> str: | |
| """格式化影片分析結果""" | |
| if not analysis: | |
| return "無影片分析結果" | |
| formatted = [] | |
| if "actions" in analysis: | |
| formatted.append(f"動作識別: {', '.join(analysis['actions'])}") | |
| if "audio_sentiment" in analysis: | |
| formatted.append(f"音頻情感: {analysis['audio_sentiment']}") | |
| if "summary" in analysis: | |
| formatted.append(f"總結: {analysis['summary']}") | |
| return "\n".join(formatted) | |
| def create_gradio_app(): | |
| """創建 Gradio 應用程式""" | |
| # 創建 Gradio 介面 | |
| with gr.Blocks( | |
| title="社交媒體多模態內容分析系統", | |
| theme=gr.themes.Soft() | |
| ) as app: | |
| # 標題 | |
| gr.Markdown("# 社交媒體多模態內容分析系統") | |
| gr.Markdown("支援文字、圖片、影片的智能分析與多模態融合") | |
| gr.Markdown("**新功能**: Gemini API 整合,提供更精確的文字和圖片分析!🚀") | |
| # 主要內容區域 | |
| with gr.Row(): | |
| # 左側輸入區域 | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📝 輸入內容") | |
| text_input = gr.Textbox( | |
| label="文字內容", | |
| placeholder="請輸入要分析的文字內容...", | |
| lines=5 | |
| ) | |
| image_input = gr.File( | |
| label="圖片檔案", | |
| file_types=["image"], | |
| file_count="single" | |
| ) | |
| video_input = gr.File( | |
| label="影片檔案", | |
| file_types=["video"], | |
| file_count="single" | |
| ) | |
| analysis_type = gr.Dropdown( | |
| choices=[ | |
| ("綜合分析", "comprehensive"), | |
| ("情感分析", "sentiment"), | |
| ("內容分類", "content_classification"), | |
| ("物件檢測", "object_detection") | |
| ], | |
| value="comprehensive", | |
| label="分析類型" | |
| ) | |
| use_gemini = gr.Checkbox( | |
| label="使用 Gemini API", | |
| value=False, | |
| info="啟用以使用 Google 的 Gemini AI 進行更精確的分析(包括圖片OCR和物件檢測)" | |
| ) | |
| api_key_input = gr.Textbox( | |
| label="Gemini API 金鑰", | |
| placeholder="請輸入您的 Gemini API 金鑰...", | |
| type="password", | |
| visible=False, | |
| info="從 Google AI Studio 獲取 API 金鑰" | |
| ) | |
| analyze_btn = gr.Button( | |
| "🔍 開始分析", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # 右側結果區域 | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📊 分析結果") | |
| text_output = gr.Textbox( | |
| label="📝 文字分析結果", | |
| lines=8, | |
| interactive=False | |
| ) | |
| image_output = gr.Textbox( | |
| label="🖼️ 圖片分析結果", | |
| lines=8, | |
| interactive=False | |
| ) | |
| video_output = gr.Textbox( | |
| label="🎬 影片分析結果", | |
| lines=8, | |
| interactive=False | |
| ) | |
| summary_output = gr.Textbox( | |
| label="🎯 綜合分析總結", | |
| lines=6, | |
| interactive=False | |
| ) | |
| # 範例區域 | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### 💡 使用範例 | |
| **使用 Gemini API 的文字分析:** | |
| - 勾選「使用 Gemini API」複選框 | |
| - 在「Gemini API 金鑰」欄位輸入您的 API 金鑰 | |
| - 輸入:「這個新產品真的很棒,我強烈推薦給大家!」 | |
| - 分析:進階情感分析、情感檢測、關鍵詞提取 | |
| **使用 Gemini API 的圖片分析:** | |
| - 勾選「使用 Gemini API」複選框 | |
| - 上傳包含文字的圖片(如海報、標誌、文件) | |
| - 分析:OCR文字識別、物件檢測、場景分析、情感分析 | |
| **文字分析(備用方案):** | |
| - 取消勾選「使用 Gemini API」複選框 | |
| - 分析:基於詞典的情感分析 | |
| **圖片分析(備用方案):** | |
| - 取消勾選「使用 Gemini API」複選框 | |
| - 分析:基本圖片資訊分析(尺寸、模式、檔案名稱推測) | |
| **影片分析範例:** | |
| - 上傳:短影片、廣告影片、教學影片 | |
| - 分析:動作識別、音頻分析 | |
| **多模態分析:** | |
| - 同時上傳多種內容類型下午 02:43 2025/10/2 | |
| - 系統會進行綜合分析並提供融合結果 | |
| ### 🔧 Gemini API 設定: | |
| 1. 從 [Google AI Studio](https://makersuite.google.com/app/apikey) 獲取 API 金鑰 | |
| 2. 勾選「使用 Gemini API」複選框 | |
| 3. 在「Gemini API 金鑰」欄位輸入您的 API 金鑰 | |
| 4. 點擊「開始分析」以啟用進階分析下午 01:58 2025/10/2 | |
| ### 📸 圖片分析功能: | |
| - **OCR文字識別**: 識別圖片中的文字內容 | |
| - **物件檢測**: 識別圖片中的物件 | |
| - **場景分析**: 分析圖片場景和內容 | |
| - **情感分析**: 分析圖片的情感色彩 | |
| - **詳細描述**: 提供完整的圖片內容描述 | |
| """) | |
| # 綁定事件 | |
| analyze_btn.click( | |
| fn=analyze_interface, | |
| inputs=[text_input, image_input, video_input, analysis_type, use_gemini, api_key_input], | |
| outputs=[text_output, image_output, video_output, summary_output] | |
| ) | |
| # API 金鑰顯示控制 | |
| use_gemini.change( | |
| fn=toggle_api_key_visibility, | |
| inputs=[use_gemini], | |
| outputs=[api_key_input] | |
| ) | |
| # 清除按鈕 | |
| clear_btn = gr.Button("🗑️ 清除所有", variant="secondary") | |
| clear_btn.click( | |
| fn=lambda: ("", None, None, "comprehensive", False, "", "", "", "", ""), | |
| outputs=[text_input, image_input, video_input, analysis_type, use_gemini, api_key_input, text_output, image_output, video_output, summary_output] | |
| ) | |
| return app | |
| # 創建並啟動應用程式 | |
| if __name__ == "__main__": | |
| app = create_gradio_app() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=False, | |
| show_error=True, | |
| quiet=False | |
| ) |