koltrust-simulator / data /dataset_stats.json
liamxdev's picture
Upload folder using huggingface_hub
270e108 verified
Raw
History Blame Contribute Delete
1.58 kB
{
"dataset_name": "Vietnam KOL Trustworthiness Dataset",
"built_at": "2026-06-15T10:29:04Z",
"source": {
"youtube": "YouTube Data API v3 public data",
"tiktok": "TikTokApi public data"
},
"raw": {
"youtube_channels": 207,
"youtube_videos": 570,
"youtube_comments": 2993,
"tiktok_creators": 29,
"tiktok_videos": 585
},
"youtube": {
"channels": 100,
"videos": 450
},
"tiktok": {
"creators": 15,
"videos": 305
},
"unified": {
"feature_rows": 755,
"platforms": [
"youtube",
"tiktok"
]
},
"splits": {
"strategy": "creator_id_hash_60_20_20",
"train_rows": 431,
"eval_rows": 231,
"analysis_profile_rows": 93,
"train_comment_rows": 1398,
"eval_comment_rows": 760,
"analysis_profile_comment_rows": 835,
"simulator_profile_creators": 16
},
"comments": {
"sentiment_rows": 2993,
"anonymized": true
},
"sample": {
"kafka_events": 755,
"fixture_rows": 0,
"zero_view_rows": 0
},
"features": {
"engagement_rows": 755,
"suspicious_rows": 755,
"trust_score_rows": 120
},
"quality": {
"removed_zero_view_youtube_videos": 3,
"removed_zero_view_tiktok_videos": 0,
"duplicate_youtube_channels_removed": 107,
"duplicate_youtube_videos_removed": 117,
"duplicate_tiktok_creators_removed": 14,
"duplicate_tiktok_videos_removed": 280,
"possible_mojibake_output_rows": 0
},
"label_notice": "Rule-generated labels are not ground truth."
}