518_yt_monitor / src /models /schemas.py
atoye1's picture
major commit with data files
b9cc1a2
from dataclasses import dataclass, field
from datetime import datetime
from typing import List
@dataclass
class YoutubeTranscript:
video_id: str
channel_id: str
channel_handle: str
title: str
transcript_segments: List[dict]
collected_at: datetime
@dataclass
class ChunkedSegment:
start: float # ์ฒญํฌ ์‹œ์ž‘ ์‹œ๊ฐ„
end: float # ์ฒญํฌ ์ข…๋ฃŒ ์‹œ๊ฐ„
transcript: str # ํ•ด๋‹น ๊ตฌ๊ฐ„์˜ ํ…์ŠคํŠธ
toxicity_score: float = 0.0 # ๊ธฐ๋ณธ๊ฐ’ 0.0์œผ๋กœ ์„ค์ •
@dataclass
class AnalyzedTranscript:
video_id: str
chunk_count: int = 0
chunked_segments: List[ChunkedSegment] = field(default_factory=list)
is_toxic: bool = False
@dataclass
class YoutubeVideo:
channel_id: str
channel_handle: str
video_id: str
title: str
description: str
thumbnail_url: str
published_at: datetime
collected_at: datetime
thumbnail_url_highres: str | None = None
def __post_init__(self):
self.published_at = datetime.fromisoformat(self.published_at)
self.collected_at = datetime.fromisoformat(self.collected_at)
if self.thumbnail_url_highres is None:
self.thumbnail_url_highres = self.thumbnail_url.replace(
"default", "maxresdefault"
)
@dataclass
class YoutubeChannel:
channel_id: str
channel_handle: str
channel_name: str
channel_description: str
channel_thumbnail: str
subscriber_count: int
video_count: int
view_count: int
channel_url: str
created_at: datetime
collected_at: datetime
channel_thumbnail_highres: str | None = None
def __post_init__(self):
self.created_at = datetime.fromisoformat(self.created_at)
self.collected_at = datetime.fromisoformat(self.collected_at)
if self.channel_thumbnail_highres is None:
self.channel_thumbnail_highres = self.channel_thumbnail.replace(
"s88", "s400"
)