File size: 1,557 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
import re
from pathlib import Path

from utils import replace_link

DATASETS_ROOT = Path('dataset_zoo')  # Path to save generated paper pages.
MODELZOO_TEMPLATE = """\

# Dataset Zoo Summary



In this page, we list [all datasets](#all-supported-datasets) we support. You can click the link to jump to the corresponding dataset pages.



## All supported datasets



* Number of datasets: {num_datasets}

{dataset_msg}



"""  # noqa: E501


def generate_datasets_pages():
    dataset_list = Path('../../tools/data').glob('*/README.md')
    num_datasets = 0
    dataset_msgs = []

    for file in dataset_list:
        num_datasets += 1

        copy = DATASETS_ROOT / file.parent.with_suffix('.md').name

        with open(file, 'r') as f:
            content = f.read()

        title = re.match(r'^# Preparing (.*)', content).group(1)
        content = replace_link(r'\[([^\]]+)\]\(([^)]+)\)', '[{}]({})', content,
                               file)
        content = replace_link(r'\[([^\]]+)\]: (.*)', '[{}]: {}', content,
                               file)
        dataset_msgs.append(f'\t - [{title}]({copy})')

        with open(copy, 'w') as f:
            f.write(content)

    dataset_msg = '\n'.join(dataset_msgs)

    modelzoo = MODELZOO_TEMPLATE.format(
        num_datasets=num_datasets,
        dataset_msg=dataset_msg,
    )

    with open('datasetzoo_statistics.md', 'w') as f:
        f.write(modelzoo)


DATASETS_ROOT.mkdir(exist_ok=True)
generate_datasets_pages()