| """ | |
| Please refer to data/README.md for data download and preparation. | |
| Then run this script to preprocess data. | |
| """ | |
| import os, sys | |
| from data_io.process_raw_script import ( | |
| convert_janelia_json, | |
| normalize_root_and_check, | |
| filter_axon_and_check, | |
| summarize_branch, | |
| split_sample_10fold_cv_and_merge, | |
| ) | |
| import pandas as pd | |
| data_path = "./data" | |
| # convert JML json file | |
| convert_janelia_json( | |
| os.path.join(data_path, "raw/janelia_mouselight/json30/*.json"), | |
| os.path.join(data_path, "raw/janelia_mouselight/swc"), | |
| os.path.join(data_path, "info/JML_info_swc.csv"), | |
| ) | |
| # preprocess data | |
| for source in ["janelia_mouselight", "allen_cell_type", "bil"]: | |
| print(f"Processing:{source}") | |
| # normalize neuron's center, orientation and size | |
| print(f"Normalize neuron") | |
| folder_in = f"{data_path}/raw/{source}/swc/" | |
| folder_out = f"{data_path}/raw/{source}/swc_soma0/" | |
| if source == "bil": | |
| normalize_root_and_check(folder_in + "*reg.swc", folder_out) | |
| # some BIL reconstructions are not correctly scaled. this will fix them | |
| normalize_root_and_check( | |
| folder_in + "*__reg.swc", folder_out, scale=[0.114, 0.114, 0.28] | |
| ) | |
| else: | |
| normalize_root_and_check(folder_in + "*.swc", folder_out) | |
| # remove axon file | |
| print(f"Remove axons") | |
| folder_in = f"{data_path}/raw/{source}/swc_soma0/*.swc" | |
| folder_out = f"{data_path}/dendrite/{source}/swc_soma0/" | |
| filter_axon_and_check(folder_in, folder_out) | |
| print(f"Calculate features") | |
| folder_in = f"{data_path}/dendrite/{source}/swc_soma0/*.swc" | |
| folder_out = f"{data_path}/dendrite/{source}/eswc_soma0/" | |
| summarize_branch(folder_in, folder_out) | |
| # split data into 10 folds | |
| split_sample_10fold_cv_and_merge(data_path) | |
| # hack folder creation | |
| folder_names = ["allen_cell_type", "bil", "janelia_mouselight"] | |
| all_wo_others = { | |
| "VPM": 0, | |
| "Isocortex_layer23": 1, | |
| "Isocortex_layer4": 2, | |
| "PRE": 3, | |
| "SUB": 4, | |
| "CP": 5, | |
| "VPL": 6, | |
| "Isocortex_layer6": 7, | |
| "MG": 8, | |
| "Isocortex_layer5": 9, | |
| } | |
| for i, split_csv in enumerate( | |
| [ | |
| f"{data_path}/info/ACT_info_swc_10folds.csv", | |
| f"{data_path}/info/BIL_info_swc_10folds.csv", | |
| f"{data_path}/info/JML_info_swc_10folds.csv", | |
| ] | |
| ): | |
| csv = pd.read_csv(split_csv) | |
| folder_name = folder_names[i] | |
| for split in range(10): | |
| for fname in csv[csv["model__fold"] == split]["swc__fname"]: | |
| # get acronym from "structure_merge__acronym" | |
| acronym = csv[csv["swc__fname"] == fname][ | |
| "structure_merge__acronym" | |
| ].values[0] | |
| if acronym not in all_wo_others: | |
| if acronym == "Isocortex_layer2/3": | |
| acronym = "Isocortex_layer23" | |
| else: | |
| continue | |
| os.makedirs( | |
| f"{data_path}/dendrite/all_eswc_soma0_ssl/{acronym}/{folder_name}-{split}/", | |
| exist_ok=True, | |
| ) | |
| source_path = os.path.abspath( | |
| f"{data_path}/dendrite/{folder_name}/eswc_soma0/{fname}" | |
| ) | |
| target_path = f"{data_path}/dendrite/all_eswc_soma0_ssl/{acronym}/{folder_name}-{split}/{fname}" | |
| # Check if the source path is a valid file | |
| if os.path.isfile(source_path): | |
| os.symlink(source_path, target_path) | |
| else: | |
| print(f"{source_path} is not a valid file!") | |
| # # hack folder creation | |
| # folder_names = ["allen_cell_type"] | |
| # all_wo_others = { | |
| # "Isocortex_layer23": 0, | |
| # "Isocortex_layer4": 1, | |
| # "Isocortex_layer5": 2, | |
| # "Isocortex_layer6": 3, | |
| # } | |
| # for i, split_csv in enumerate( | |
| # [ | |
| # f"{data_path}/info/ACT_info_swc_10folds.csv", | |
| # # f"{data_path}/info/BIL_info_swc_10folds.csv", | |
| # # f"{data_path}/info/JML_info_swc_10folds.csv", | |
| # ] | |
| # ): | |
| # csv = pd.read_csv(split_csv) | |
| # folder_name = folder_names[i] | |
| # for split in range(10): | |
| # for fname in csv[csv["model__fold"] == split]["swc__fname"]: | |
| # # get acronym from "structure_merge__acronym" | |
| # acronym = csv[csv["swc__fname"] == fname][ | |
| # "structure_merge__acronym" | |
| # ].values[0] | |
| # if acronym not in all_wo_others: | |
| # if acronym == "Isocortex_layer2/3": | |
| # acronym = "Isocortex_layer23" | |
| # else: | |
| # continue | |
| # os.makedirs( | |
| # f"{data_path}/dendrite/ACT/{acronym}/{folder_name}-{split}/", | |
| # exist_ok=True, | |
| # ) | |
| # source_path = os.path.abspath( | |
| # f"{data_path}/dendrite/{folder_name}/eswc_soma0/{fname}" | |
| # ) | |
| # target_path = f"{data_path}/dendrite/ACT/{acronym}/{folder_name}-{split}/{fname}" | |
| # # Check if the source path is a valid file | |
| # if os.path.isfile(source_path): | |
| # os.symlink(source_path, target_path) | |
| # else: | |
| # print(f"{source_path} is not a valid file!") | |
| # # hack folder creation | |
| # folder_names = ["janelia_mouselight"] | |
| # all_wo_others = { | |
| # "Isocortex_layer23": 0, | |
| # "Isocortex_layer5": 1, | |
| # "Isocortex_layer6": 2, | |
| # "VPM": 3, | |
| # } | |
| # for i, split_csv in enumerate( | |
| # [ | |
| # # f"{data_path}/info/ACT_info_swc_10folds.csv", | |
| # # f"{data_path}/info/BIL_info_swc_10folds.csv", | |
| # f"{data_path}/info/JML_info_swc_10folds.csv", | |
| # ] | |
| # ): | |
| # csv = pd.read_csv(split_csv) | |
| # folder_name = folder_names[i] | |
| # for split in range(10): | |
| # for fname in csv[csv["model__fold"] == split]["swc__fname"]: | |
| # # get acronym from "structure_merge__acronym" | |
| # acronym = csv[csv["swc__fname"] == fname][ | |
| # "structure_merge__acronym" | |
| # ].values[0] | |
| # if acronym not in all_wo_others: | |
| # if acronym == "Isocortex_layer2/3": | |
| # acronym = "Isocortex_layer23" | |
| # else: | |
| # continue | |
| # os.makedirs( | |
| # f"{data_path}/dendrite/JML/{acronym}/{folder_name}-{split}/", | |
| # exist_ok=True, | |
| # ) | |
| # source_path = os.path.abspath( | |
| # f"{data_path}/dendrite/{folder_name}/eswc_soma0/{fname}" | |
| # ) | |
| # target_path = f"{data_path}/dendrite/JML/{acronym}/{folder_name}-{split}/{fname}" | |
| # # Check if the source path is a valid file | |
| # if os.path.isfile(source_path): | |
| # os.symlink(source_path, target_path) | |
| # else: | |
| # print(f"{source_path} is not a valid file!") | |
| # # hack folder creation | |
| # folder_names = ["bil"] | |
| # all_wo_others = { | |
| # "CP": 0, | |
| # "Isocortex_layer23": 1, | |
| # "Isocortex_layer4": 2, | |
| # "Isocortex_layer5": 3, | |
| # "Isocortex_layer6": 4, | |
| # "VPM": 5, | |
| # } | |
| # for i, split_csv in enumerate( | |
| # [ | |
| # # f"{data_path}/info/ACT_info_swc_10folds.csv", | |
| # f"{data_path}/info/BIL_info_swc_10folds.csv", | |
| # # f"{data_path}/info/JML_info_swc_10folds.csv", | |
| # ] | |
| # ): | |
| # csv = pd.read_csv(split_csv) | |
| # folder_name = folder_names[i] | |
| # for split in range(10): | |
| # for fname in csv[csv["model__fold"] == split]["swc__fname"]: | |
| # # get acronym from "structure_merge__acronym" | |
| # acronym = csv[csv["swc__fname"] == fname][ | |
| # "structure_merge__acronym" | |
| # ].values[0] | |
| # if acronym not in all_wo_others: | |
| # if acronym == "Isocortex_layer2/3": | |
| # acronym = "Isocortex_layer23" | |
| # else: | |
| # continue | |
| # os.makedirs( | |
| # f"{data_path}/dendrite/BIL/{acronym}/{folder_name}-{split}/", | |
| # exist_ok=True, | |
| # ) | |
| # source_path = os.path.abspath( | |
| # f"{data_path}/dendrite/{folder_name}/eswc_soma0/{fname}" | |
| # ) | |
| # target_path = f"{data_path}/dendrite/BIL/{acronym}/{folder_name}-{split}/{fname}" | |
| # # Check if the source path is a valid file | |
| # if os.path.isfile(source_path): | |
| # os.symlink(source_path, target_path) | |
| # else: | |
| # print(f"{source_path} is not a valid file!") |