|
|
import filecmp |
|
|
import os |
|
|
|
|
|
import fire |
|
|
|
|
|
|
|
|
def compare_results(folder1, folder2, results_ignore_list=None): |
|
|
|
|
|
if results_ignore_list is None: |
|
|
results_ignore_list = [] |
|
|
|
|
|
|
|
|
assert os.path.isdir(folder1), f'Folder does not exist: {folder1}' |
|
|
assert os.path.isdir(folder2), f'Folder does not exist: {folder2}' |
|
|
|
|
|
sub_folder1 = get_all_subpaths(folder1)[0] |
|
|
sub_folder2 = get_all_subpaths(folder2)[0] |
|
|
|
|
|
print('compare predicitons') |
|
|
compare_folders(os.path.join(sub_folder1, 'predictions'), |
|
|
os.path.join(sub_folder2, 'predictions'), |
|
|
results_ignore_list=['srbench.json']) |
|
|
print('compare results') |
|
|
compare_folders(os.path.join(sub_folder1, 'results'), |
|
|
os.path.join(sub_folder2, 'results'), |
|
|
results_ignore_list=[ |
|
|
'dingo_en_192.json', 'dingo_zh_170.json', |
|
|
'qa_dingo_cn.json', 'srbench.json' |
|
|
]) |
|
|
|
|
|
|
|
|
def compare_folders(folder1, folder2, results_ignore_list=None): |
|
|
''' |
|
|
Compare the contents of files with the same name in two folders |
|
|
and their subfolders, |
|
|
ignoring files specified in the ignore_list. |
|
|
:param folder1: Path to the first folder |
|
|
:param folder2: Path to the second folder |
|
|
:param ignore_list: List of filenames to ignore |
|
|
(e.g., ['temp.txt', '.DS_Store']) |
|
|
:raises: AssertionError if any non-ignored files are missing or |
|
|
have different content |
|
|
''' |
|
|
|
|
|
if results_ignore_list is None: |
|
|
results_ignore_list = [] |
|
|
|
|
|
|
|
|
assert os.path.isdir(folder1), f'Folder does not exist: {folder1}' |
|
|
assert os.path.isdir(folder2), f'Folder does not exist: {folder2}' |
|
|
|
|
|
|
|
|
diff_files = [] |
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(folder1): |
|
|
for file in files: |
|
|
|
|
|
if os.path.basename(file) in results_ignore_list: |
|
|
print('ignore case: ' + os.path.basename(file)) |
|
|
continue |
|
|
|
|
|
|
|
|
rel_path = os.path.relpath(os.path.join(root, file), folder1) |
|
|
file2 = os.path.join(folder2, rel_path) |
|
|
|
|
|
|
|
|
if not os.path.exists(file2): |
|
|
diff_files.append((rel_path, 'File missing in second folder')) |
|
|
continue |
|
|
|
|
|
|
|
|
if not filecmp.cmp(os.path.join(root, file), file2, shallow=False): |
|
|
diff_files.append((rel_path, 'Content differs')) |
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(folder2): |
|
|
for file in files: |
|
|
|
|
|
if file in results_ignore_list: |
|
|
continue |
|
|
|
|
|
rel_path = os.path.relpath(os.path.join(root, file), folder2) |
|
|
file1 = os.path.join(folder1, rel_path) |
|
|
if not os.path.exists(file1): |
|
|
diff_files.append((rel_path, 'File missing in first folder')) |
|
|
|
|
|
|
|
|
if diff_files: |
|
|
error_msg = 'Found differences in files:\n' |
|
|
error_msg += '\n'.join(f'{path}: {reason}' |
|
|
for path, reason in diff_files) |
|
|
raise AssertionError(error_msg) |
|
|
|
|
|
|
|
|
def get_all_subpaths(directory): |
|
|
''' |
|
|
Get all subpaths (files and directories) within a given directory. |
|
|
Args: |
|
|
directory (str): The root directory path to search |
|
|
Returns: |
|
|
list: A list of all complete subpaths |
|
|
''' |
|
|
subpaths = [] |
|
|
|
|
|
|
|
|
if not os.path.isdir(directory): |
|
|
raise ValueError(f'Directory does not exist: {directory}') |
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(directory): |
|
|
|
|
|
for dir_name in dirs: |
|
|
full_path = os.path.join(root, dir_name) |
|
|
subpaths.append(full_path) |
|
|
|
|
|
|
|
|
for file_name in files: |
|
|
full_path = os.path.join(root, file_name) |
|
|
subpaths.append(full_path) |
|
|
|
|
|
return subpaths |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
fire.Fire() |
|
|
|