File size: 1,557 Bytes
d670799 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#!/usr/bin/env python
import re
from pathlib import Path
from utils import replace_link
DATASETS_ROOT = Path('dataset_zoo') # Path to save generated paper pages.
MODELZOO_TEMPLATE = """\
# Dataset Zoo Summary
In this page, we list [all datasets](#all-supported-datasets) we support. You can click the link to jump to the corresponding dataset pages.
## All supported datasets
* Number of datasets: {num_datasets}
{dataset_msg}
""" # noqa: E501
def generate_datasets_pages():
dataset_list = Path('../../tools/data').glob('*/README.md')
num_datasets = 0
dataset_msgs = []
for file in dataset_list:
num_datasets += 1
copy = DATASETS_ROOT / file.parent.with_suffix('.md').name
with open(file, 'r') as f:
content = f.read()
title = re.match(r'^# Preparing (.*)', content).group(1)
content = replace_link(r'\[([^\]]+)\]\(([^)]+)\)', '[{}]({})', content,
file)
content = replace_link(r'\[([^\]]+)\]: (.*)', '[{}]: {}', content,
file)
dataset_msgs.append(f'\t - [{title}]({copy})')
with open(copy, 'w') as f:
f.write(content)
dataset_msg = '\n'.join(dataset_msgs)
modelzoo = MODELZOO_TEMPLATE.format(
num_datasets=num_datasets,
dataset_msg=dataset_msg,
)
with open('datasetzoo_statistics.md', 'w') as f:
f.write(modelzoo)
DATASETS_ROOT.mkdir(exist_ok=True)
generate_datasets_pages()
|