AniFileBERT / data /dmhy /dmhy_weak_new.manifest.json

完成整个数据集的整理

f4f4e0e 9 days ago

1.07 kB

	{
	"created_at": "2026-05-13T15:26:19.767707+00:00",
	"source_db": "D:\\WorkSpace\\Python\\dmhy-parser\\dmhy_anime.db",
	"output": "data\\dmhy\\dmhy_weak_new.jsonl",
	"min_file_id": 689305,
	"last_file_id": 1675184,
	"db_max_file_id_at_export_start": 1675184,
	"limit": null,
	"stats": {
	"scanned_rows": 985880,
	"video_rows": 556778,
	"duplicate_basenames": 95422,
	"labeled_samples": 378327,
	"skipped_no_episode": 82422,
	"skipped_no_title": 0,
	"skipped_too_short": 606,
	"skipped_too_long": 1
	},
	"label_counts": {
	"B-GROUP": 306878,
	"B-TITLE": 390543,
	"B-EPISODE": 378327,
	"B-RESOLUTION": 156089,
	"B-SOURCE": 180428,
	"O": 1587219,
	"I-TITLE": 1401899,
	"B-SPECIAL": 29468,
	"B-SEASON": 18792,
	"I-GROUP": 517
	},
	"vocab_size": 3000,
	"notes": [
	"Rows are a snapshot of files.id <= last_file_id.",
	"Future incremental export can use --min-id last_file_id+1.",
	"Weak labels target GROUP, TITLE, SEASON, and EPISODE; media tags are boundary labels/noise."
	],
	"examples": []
	}