Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass, field | |
| from datetime import datetime | |
| from typing import List | |
| class YoutubeTranscript: | |
| video_id: str | |
| channel_id: str | |
| channel_handle: str | |
| title: str | |
| transcript_segments: List[dict] | |
| collected_at: datetime | |
| class ChunkedSegment: | |
| start: float # ์ฒญํฌ ์์ ์๊ฐ | |
| end: float # ์ฒญํฌ ์ข ๋ฃ ์๊ฐ | |
| transcript: str # ํด๋น ๊ตฌ๊ฐ์ ํ ์คํธ | |
| toxicity_score: float = 0.0 # ๊ธฐ๋ณธ๊ฐ 0.0์ผ๋ก ์ค์ | |
| class AnalyzedTranscript: | |
| video_id: str | |
| chunk_count: int = 0 | |
| chunked_segments: List[ChunkedSegment] = field(default_factory=list) | |
| is_toxic: bool = False | |
| class YoutubeVideo: | |
| channel_id: str | |
| channel_handle: str | |
| video_id: str | |
| title: str | |
| description: str | |
| thumbnail_url: str | |
| published_at: datetime | |
| collected_at: datetime | |
| thumbnail_url_highres: str | None = None | |
| def __post_init__(self): | |
| self.published_at = datetime.fromisoformat(self.published_at) | |
| self.collected_at = datetime.fromisoformat(self.collected_at) | |
| if self.thumbnail_url_highres is None: | |
| self.thumbnail_url_highres = self.thumbnail_url.replace( | |
| "default", "maxresdefault" | |
| ) | |
| class YoutubeChannel: | |
| channel_id: str | |
| channel_handle: str | |
| channel_name: str | |
| channel_description: str | |
| channel_thumbnail: str | |
| subscriber_count: int | |
| video_count: int | |
| view_count: int | |
| channel_url: str | |
| created_at: datetime | |
| collected_at: datetime | |
| channel_thumbnail_highres: str | None = None | |
| def __post_init__(self): | |
| self.created_at = datetime.fromisoformat(self.created_at) | |
| self.collected_at = datetime.fromisoformat(self.collected_at) | |
| if self.channel_thumbnail_highres is None: | |
| self.channel_thumbnail_highres = self.channel_thumbnail.replace( | |
| "s88", "s400" | |
| ) | |