| import re |
|
|
| from pathlib import Path |
|
|
| |
| |
| |
|
|
| json_root = f"./playground/Datasets/LLaVA-OneVision-COCO/llava_jsons" |
| image_root = f"./playground/Datasets/LLaVA-OneVision-COCO/images" |
|
|
| SHAREGPT4V_COCO = { |
| "annotation_path": f"{json_root}/sharegpt4v_coco.json", |
| "data_path": f"{image_root}/", |
| } |
|
|
| data_dict = { |
| "sharegpt4v_coco": SHAREGPT4V_COCO, |
| } |
|
|
| def parse_sampling_rate(dataset_name): |
| match = re.search(r"%(\d+)$", dataset_name) |
| if match: |
| return int(match.group(1)) / 100.0 |
| return 1.0 |
|
|
| def data_list(dataset_names): |
| if dataset_names == ["all"]: |
| dataset_names = list(data_dict.keys()) |
| config_list = [] |
| for dataset_name in dataset_names: |
| sampling_rate = parse_sampling_rate(dataset_name) |
| dataset_name = re.sub(r"%(\d+)$", "", dataset_name) |
| if dataset_name in data_dict.keys(): |
| config = data_dict[dataset_name].copy() |
| config["sampling_rate"] = sampling_rate |
| config_list.append(config) |
| else: |
| raise ValueError(f"do not find {dataset_name}") |
| return config_list |
|
|
| if __name__ == "__main__": |
| print(data_list) |
| |
|
|