Spaces:
Sleeping
Sleeping
| from datetime import datetime | |
| from typing import Optional, List | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from ..models.schemas import VideoAnalysis, TranscriptSegment | |
| from ..utils.cache import CacheManager | |
| from .youtube_api import YouTubeAPI | |
| class ContentAnalyzer: | |
| def __init__(self, youtube_api: YouTubeAPI, cache_manager: CacheManager): | |
| self.youtube_api = youtube_api | |
| self.cache_manager = cache_manager | |
| # ํ์ค ํํ ํค์๋ | |
| self.hate_speech_keywords = { | |
| "์ฐจ๋ณ": ["์งฑ๊นจ", "ํํ", "ํ๋ฑ", "๊น์น๋ "], | |
| "๋นํ": ["๋ณ์ ", "์จ๋ฐ", "๋ฏธ์น"], | |
| "์ ์น": ["ํญ๋", "ํญ๋", "์ข ๋ถ"], | |
| } | |
| def analyze_video(self, video_id: str, channel_id: str) -> Optional[VideoAnalysis]: | |
| """๋์์ ๋ถ์""" | |
| # ์บ์ ํ์ธ | |
| cached_analysis = self.cache_manager.get_video_analysis(video_id) | |
| if cached_analysis: | |
| return cached_analysis | |
| # ๋น๋์ค ์ ๋ณด ๊ฐ์ ธ์ค๊ธฐ | |
| video_info = self.youtube_api.get_video_details(video_id) | |
| if not video_info: | |
| return None | |
| try: | |
| # ํธ๋์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ | |
| transcript_list = YouTubeTranscriptApi.get_transcript( | |
| video_id, | |
| languages=['ko', 'en'] | |
| ) | |
| transcript_segments = [ | |
| TranscriptSegment( | |
| start=t['start'], | |
| text=t['text'], | |
| duration=t['duration'] | |
| ) for t in transcript_list | |
| ] | |
| # ํ์ค ํํ ๊ฒ์ฌ | |
| hate_speech_instances = self._detect_hate_speech(transcript_segments) | |
| analysis = VideoAnalysis( | |
| video_id=video_id, | |
| title=video_info['title'], | |
| channel_id=channel_id, | |
| publish_date=datetime.fromisoformat(video_info['publishedAt'].replace('Z', '+00:00')), | |
| last_analyzed=datetime.now(), | |
| transcript_segments=transcript_segments, | |
| hate_speech_instances=hate_speech_instances | |
| ) | |
| # ์บ์์ ์ ์ฅ | |
| self.cache_manager.save_video_analysis(analysis) | |
| return analysis | |
| except Exception as e: | |
| print(f"Error analyzing video {video_id}: {str(e)}") | |
| return None | |
| def _detect_hate_speech(self, segments: List[TranscriptSegment]) -> List[dict]: | |
| """ํ์ค ํํ ๊ฒ์ถ""" | |
| instances = [] | |
| for segment in segments: | |
| for category, keywords in self.hate_speech_keywords.items(): | |
| for keyword in keywords: | |
| if keyword in segment.text: | |
| instances.append({ | |
| 'timestamp': segment.start, | |
| 'text': segment.text, | |
| 'category': category, | |
| 'keyword': keyword | |
| }) | |
| return instances |