|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
import os
|
|
|
|
|
|
def filter_npz_by_filenames(npz_path, txt_path, output_path):
|
|
|
|
|
|
data_list = np.load(npz_path, allow_pickle=True)['arr_0']
|
|
|
|
|
|
with open(txt_path, 'r') as f:
|
|
|
exclude_filenames = set(line.strip() for line in f if line.strip())
|
|
|
|
|
|
|
|
|
filtered_data = []
|
|
|
excluded_count = 0
|
|
|
|
|
|
for item in data_list:
|
|
|
|
|
|
filename = item['uuid']
|
|
|
|
|
|
if filename in exclude_filenames:
|
|
|
excluded_count += 1
|
|
|
print(filename)
|
|
|
else:
|
|
|
filtered_data.append(item)
|
|
|
|
|
|
|
|
|
kept_count = len(filtered_data)
|
|
|
total_count = len(data_list)
|
|
|
print(f"Original items: {total_count}")
|
|
|
print(f"Kept items: {kept_count}")
|
|
|
print(f"Removed items: {excluded_count}")
|
|
|
|
|
|
print(f"Saving filtered data")
|
|
|
np.savez_compressed(output_path, filtered_data, allow_pickle=True)
|
|
|
|
|
|
def main():
|
|
|
issue_list = "data_utils/issue_data_list.txt"
|
|
|
npz_path_train = "articulation_xlv2_train.npz"
|
|
|
output_path_train = "articulation_xlv2_train_update.npz"
|
|
|
npz_path_test = "articulation_xlv2_test.npz"
|
|
|
output_path_test = "articulation_xlv2_test_update.npz"
|
|
|
|
|
|
filter_npz_by_filenames(npz_path_train, issue_list, output_path_train)
|
|
|
filter_npz_by_filenames(npz_path_test, issue_list, output_path_test)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main() |