File size: 1,024 Bytes
b0c0df0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json

from datasets import Dataset, load_dataset

with open("data/vatex_public_test_english_v1.1.json", "r") as f:
    data = json.load(f)

for da in data:
    da["url"] = "https://www.youtube.com/watch?v=" + da["videoID"]

vatex_dataset = Dataset.from_list(data)
# vatex_dataset.rename_columns({
#     'videoID': 'video_name',
#     'enCap': 'caption'
# }) #if change name is needed
hub_dataset_path = "lmms-lab/vatex_from_url"

vatex_dataset.push_to_hub(repo_id=hub_dataset_path, split="test", config_name="vatex_test", token=True)

with open("data/vatex_validation_v1.0.json", "r") as f:
    data = json.load(f)
for da in data:
    da["url"] = "https://www.youtube.com/watch?v=" + da["videoID"]

vatex_dataset = Dataset.from_list(data)
# vatex_dataset.rename_columns({
#     'videoID': 'video_name',
#     'enCap': 'caption'
# }) #if change name is needed
hub_dataset_path = "lmms-lab/vatex_from_url"

vatex_dataset.push_to_hub(repo_id=hub_dataset_path, split="validation", config_name="vatex_val_zh", token=True)