Spaces:
Sleeping
Sleeping
File size: 8,473 Bytes
db4f540 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
"""章节验证器"""
from dataclasses import dataclass
from chapterbar.chapter_extractor import Chapter
@dataclass
class ValidationError:
"""验证错误"""
chapter_index: int
error_type: str
message: str
is_warning: bool = False
class ChapterValidator:
"""章节验证器"""
def __init__(self, chapters: list[Chapter], total_duration: float):
"""
初始化验证器
Args:
chapters: 章节列表
total_duration: 视频总时长(秒)
"""
self.chapters = chapters
self.total_duration = total_duration
self.errors: list[ValidationError] = []
self.warnings: list[ValidationError] = []
def validate(self) -> tuple[bool, list[ValidationError], list[ValidationError]]:
"""
验证章节列表
Returns:
(is_valid, errors, warnings)
"""
self.errors = []
self.warnings = []
# 基础检查
self._check_empty()
self._check_basic_fields()
# 时间检查
self._check_time_order()
self._check_time_overlap()
self._check_time_range()
self._check_time_gaps()
# 内容检查
self._check_titles()
self._check_duration()
is_valid = len(self.errors) == 0
return is_valid, self.errors, self.warnings
def _check_empty(self):
"""检查章节列表是否为空"""
if not self.chapters:
self.errors.append(ValidationError(chapter_index=-1, error_type="empty", message="章节列表为空"))
def _check_basic_fields(self):
"""检查基本字段"""
for i, chapter in enumerate(self.chapters):
if not hasattr(chapter, "start_time") or chapter.start_time is None:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="missing_field",
message=f"章节 {i + 1} 缺少开始时间",
)
)
if not hasattr(chapter, "end_time") or chapter.end_time is None:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="missing_field",
message=f"章节 {i + 1} 缺少结束时间",
)
)
if not hasattr(chapter, "title") or not chapter.title:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="missing_field",
message=f"章节 {i + 1} 缺少标题",
)
)
def _check_time_order(self):
"""检查时间顺序"""
for i, chapter in enumerate(self.chapters):
if chapter.start_time >= chapter.end_time:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="time_order",
message=(f"章节 {i + 1} 开始时间 ({chapter.start_time}s) >= 结束时间 ({chapter.end_time}s)"),
)
)
def _check_time_overlap(self):
"""检查时间重叠"""
for i in range(len(self.chapters) - 1):
current = self.chapters[i]
next_chapter = self.chapters[i + 1]
if current.end_time > next_chapter.start_time:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="time_overlap",
message=(
f"章节 {i + 1} 和章节 {i + 2} 时间重叠:章节 {i + 1} 结束于 "
f"{current.end_time}s,但章节 {i + 2} 开始于 {next_chapter.start_time}s"
),
)
)
def _check_time_range(self):
"""检查时间范围"""
for i, chapter in enumerate(self.chapters):
if chapter.start_time < 0:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="time_range",
message=f"章节 {i + 1} 开始时间 ({chapter.start_time}s) 不能为负数",
)
)
if chapter.end_time > self.total_duration:
self.errors.append(
ValidationError(
chapter_index=i,
error_type="time_range",
message=(
f"章节 {i + 1} 结束时间 ({chapter.end_time}s) 超出视频总时长 ({self.total_duration}s)"
),
)
)
def _check_time_gaps(self):
"""检查时间间隙(警告)"""
# 检查第一个章节是否从 0 开始
if self.chapters and self.chapters[0].start_time > 0:
self.warnings.append(
ValidationError(
chapter_index=0,
error_type="time_gap",
message=(
f"第一个章节从 {self.chapters[0].start_time}s 开始,前面有 "
f"{self.chapters[0].start_time}s 的间隙"
),
is_warning=True,
)
)
# 检查章节之间的间隙
for i in range(len(self.chapters) - 1):
current = self.chapters[i]
next_chapter = self.chapters[i + 1]
gap = next_chapter.start_time - current.end_time
if gap > 0:
self.warnings.append(
ValidationError(
chapter_index=i,
error_type="time_gap",
message=f"章节 {i + 1} 和章节 {i + 2} 之间有 {gap}s 的间隙",
is_warning=True,
)
)
# 检查最后一个章节是否到达视频结尾
if self.chapters and self.chapters[-1].end_time < self.total_duration:
gap = self.total_duration - self.chapters[-1].end_time
self.warnings.append(
ValidationError(
chapter_index=len(self.chapters) - 1,
error_type="time_gap",
message=(f"最后一个章节结束于 {self.chapters[-1].end_time}s,距离视频结尾还有 {gap}s"),
is_warning=True,
)
)
def _check_titles(self):
"""检查标题"""
for i, chapter in enumerate(self.chapters):
if chapter.title and len(chapter.title.strip()) == 0:
self.errors.append(
ValidationError(chapter_index=i, error_type="empty_title", message=f"章节 {i + 1} 标题为空")
)
if chapter.title and len(chapter.title) > 100:
self.warnings.append(
ValidationError(
chapter_index=i,
error_type="long_title",
message=(f"章节 {i + 1} 标题过长 ({len(chapter.title)} 字符),建议不超过 100 字符"),
is_warning=True,
)
)
def _check_duration(self):
"""检查章节时长(警告)"""
for i, chapter in enumerate(self.chapters):
duration = chapter.end_time - chapter.start_time
if duration < 5:
self.warnings.append(
ValidationError(
chapter_index=i,
error_type="short_duration",
message=f"章节 {i + 1} 时长过短 ({duration}s),建议至少 5 秒",
is_warning=True,
)
)
if duration > 600: # 10 分钟
self.warnings.append(
ValidationError(
chapter_index=i,
error_type="long_duration",
message=f"章节 {i + 1} 时长过长 ({duration}s),建议不超过 10 分钟",
is_warning=True,
)
)
|