AniFileBERT / data /dmhy /dmhy_weak.manifest.json
ModerRAS's picture
完成整个数据集的整理
f4f4e0e
{
"created_at": "2026-05-14T00:01:38.686220+00:00",
"source_db": "D:\\WorkSpace\\Python\\dmhy-parser\\dmhy_anime.db",
"output": "data\\dmhy\\dmhy_weak_v3.jsonl",
"min_file_id": 1,
"last_file_id": 1675184,
"db_max_file_id_at_export_start": 1675184,
"limit": null,
"stats": {
"scanned_rows": 1675184,
"video_rows": 920699,
"duplicate_basenames": 162707,
"labeled_samples": 632002,
"skipped_no_episode": 125346,
"skipped_no_title": 0,
"skipped_too_short": 643,
"skipped_too_long": 1
},
"label_counts": {
"B-TITLE": 656614,
"I-TITLE": 3786494,
"O": 4302284,
"B-SEASON": 66497,
"B-EPISODE": 632002,
"B-RESOLUTION": 305724,
"B-SOURCE": 432921,
"B-GROUP": 521259,
"I-GROUP": 748796,
"B-SPECIAL": 42960
},
"vocab_size": 3000,
"notes": [
"Rows are a snapshot of files.id <= last_file_id.",
"Future incremental export can use --min-id last_file_id+1.",
"Weak labels target GROUP, TITLE, SEASON, and EPISODE; media tags are boundary labels/noise."
],
"examples": [
{
"file_id": 1,
"filename": "Witch.Hat.Atelier.S01E07.1080p.NF.WEB-DL.JPN.AAC2.0.H.264.MSubs-ToonsHub",
"tokens": [
"Witch",
".",
"Hat",
".",
"Atelier",
".",
"S01",
"E07",
".",
"1080p",
".",
"NF",
".",
"WEB-DL",
".",
"JP",
"N",
".",
"AAC",
"2",
".",
"0",
".",
"H.264",
".",
"MSubs",
"-",
"ToonsHub"
],
"labels": [
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-SEASON",
"B-EPISODE",
"O",
"B-RESOLUTION",
"O",
"B-SOURCE",
"O",
"B-SOURCE",
"O",
"B-SOURCE",
"O",
"O",
"B-SOURCE",
"O",
"O",
"O",
"O",
"B-SOURCE",
"O",
"B-SOURCE",
"O",
"O"
]
},
{
"file_id": 2,
"filename": "[LoliHouse] Maid-san wa Taberu Dake - 07 [WebRip 1080p HEVC-10bit AAC ASSx2]",
"tokens": [
"[",
"LoliHouse",
"]",
" ",
"Maid",
"-",
"san",
" ",
"wa",
" ",
"Taberu",
" ",
"Dake",
" ",
"-",
" ",
"07",
" ",
"[WebRip 1080p HEVC-10bit AAC ASSx2]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"O",
"O",
"B-EPISODE",
"O",
"O"
]
},
{
"file_id": 3,
"filename": "[ANi] 異世界悠閒農家 2 - 06 [1080P][Baha][WEB-DL][AAC AVC][CHT]",
"tokens": [
"[",
"ANi",
"]",
" ",
"異",
"世",
"界",
"悠",
"閒",
"農",
"家",
" ",
"2",
" ",
"-",
" ",
"06",
" ",
"[1080P]",
"[Baha]",
"[WEB-DL]",
"[AAC AVC]",
"[CHT]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-SEASON",
"O",
"O",
"O",
"B-EPISODE",
"O",
"B-RESOLUTION",
"B-SOURCE",
"B-SOURCE",
"O",
"B-SOURCE"
]
},
{
"file_id": 4,
"filename": "[ANi] 木頭風紀委員和迷你裙 JK 的故事 - 06 [1080P][Baha][WEB-DL][AAC AVC][CHT]",
"tokens": [
"[",
"ANi",
"]",
" ",
"木",
"頭",
"風",
"紀",
"委",
"員",
"和",
"迷",
"你",
"裙",
" ",
"JK",
" ",
"的",
"故",
"事",
" ",
"-",
" ",
"06",
" ",
"[1080P]",
"[Baha]",
"[WEB-DL]",
"[AAC AVC]",
"[CHT]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"O",
"O",
"B-EPISODE",
"O",
"B-RESOLUTION",
"B-SOURCE",
"B-SOURCE",
"O",
"B-SOURCE"
]
},
{
"file_id": 5,
"filename": "[KissSub][Shunkashuutou Daikousha - Haru no Mai][05][1080P][GB][MP4]",
"tokens": [
"[",
"KissSub",
"]",
"[",
"Shunkashuutou",
" ",
"Daikousha",
" ",
"-",
" ",
"Haru",
" ",
"no",
" ",
"Mai",
"]",
"[05]",
"[1080P]",
"[GB]",
"[MP4]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"B-RESOLUTION",
"B-SOURCE",
"O"
]
},
{
"file_id": 6,
"filename": "[KissSub][Shunkashuutou Daikousha - Haru no Mai][06][1080P][GB][MP4]",
"tokens": [
"[",
"KissSub",
"]",
"[",
"Shunkashuutou",
" ",
"Daikousha",
" ",
"-",
" ",
"Haru",
" ",
"no",
" ",
"Mai",
"]",
"[06]",
"[1080P]",
"[GB]",
"[MP4]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"B-RESOLUTION",
"B-SOURCE",
"O"
]
},
{
"file_id": 7,
"filename": "[KissSub][Shunkashuutou Daikousha - Haru no Mai][06][1080P][BIG5][MP4]",
"tokens": [
"[",
"KissSub",
"]",
"[",
"Shunkashuutou",
" ",
"Daikousha",
" ",
"-",
" ",
"Haru",
" ",
"no",
" ",
"Mai",
"]",
"[06]",
"[1080P]",
"[BIG5]",
"[MP4]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"B-RESOLUTION",
"B-SOURCE",
"O"
]
},
{
"file_id": 8,
"filename": "[KissSub][Shunkashuutou Daikousha - Haru no Mai][05][1080P][BIG5][MP4]",
"tokens": [
"[",
"KissSub",
"]",
"[",
"Shunkashuutou",
" ",
"Daikousha",
" ",
"-",
" ",
"Haru",
" ",
"no",
" ",
"Mai",
"]",
"[05]",
"[1080P]",
"[BIG5]",
"[MP4]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"B-RESOLUTION",
"B-SOURCE",
"O"
]
},
{
"file_id": 9,
"filename": "[Airota][Sousou no Frieren][29][1080p AVC AAC][CHT]",
"tokens": [
"[",
"Airota",
"]",
"[",
"Sousou",
" ",
"no",
" ",
"Frieren",
"]",
"[29]",
"[1080p AVC AAC]",
"[CHT]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"O",
"B-SOURCE"
]
},
{
"file_id": 10,
"filename": "[Airota][Sousou no Frieren][30][1080p AVC AAC][CHT]",
"tokens": [
"[",
"Airota",
"]",
"[",
"Sousou",
" ",
"no",
" ",
"Frieren",
"]",
"[30]",
"[1080p AVC AAC]",
"[CHT]"
],
"labels": [
"O",
"B-GROUP",
"O",
"O",
"B-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"I-TITLE",
"O",
"B-EPISODE",
"O",
"B-SOURCE"
]
}
]
}