Spaces:
Build error
Build error
| import argparse | |
| import copy | |
| import difflib | |
| import json | |
| import os | |
| import traceback | |
| def insert_line_in_string(input_string, new_str, insert_line): | |
| """ | |
| Inserts a new line into a string at the specified line number. | |
| :param input_string: The original string. | |
| :param new_str: The string to insert. | |
| :param insert_line: The line number at which to insert (1-based index). | |
| :return: The modified string. | |
| """ | |
| file_text = input_string.expandtabs() | |
| new_str = new_str.expandtabs() | |
| file_text_lines = file_text.split('\n') | |
| new_str_lines = new_str.split('\n') | |
| new_file_text_lines = ( | |
| file_text_lines[:insert_line] + new_str_lines + file_text_lines[insert_line:] | |
| ) | |
| return '\n'.join(new_file_text_lines) | |
| def print_string_diff(original, modified): | |
| """ | |
| Prints the differences between two strings line by line. | |
| :param original: The original string. | |
| :param modified: The modified string. | |
| """ | |
| original_lines = original.splitlines(keepends=True) | |
| modified_lines = modified.splitlines(keepends=True) | |
| diff = difflib.unified_diff( | |
| original_lines, | |
| modified_lines, | |
| fromfile='original', | |
| tofile='modified', | |
| lineterm='', | |
| ) | |
| print(''.join(diff)) | |
| def parse_json_files(root_dir, output_dir, metadata_objs, preds_objs): | |
| final_output = {i: [] for i in range(25)} | |
| for subdir in sorted(os.listdir(root_dir)): # Sorting ensures consistent order | |
| subdir_path = os.path.join(root_dir, subdir) | |
| # subdir_instance = subdir.rsplit('-', 1)[0] | |
| metadata = metadata_objs[subdir] | |
| orig_test_suite = metadata['test_result']['test_suite'] | |
| if os.path.isdir(subdir_path): # Check if it's a directory | |
| print(f'Processing subdirectory: {subdir}') | |
| # Now loop through the JSON files in this subdirectory | |
| i = 0 | |
| test_suite = preds_objs[subdir] if subdir in preds_objs else '' | |
| for file in sorted( | |
| os.listdir(subdir_path) | |
| ): # Sorting ensures consistent order | |
| metadata_copy = copy.deepcopy(metadata) | |
| if file.endswith('.json'): # Check for JSON files | |
| file_path = os.path.join(subdir_path, file) | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) # Load JSON data | |
| try: | |
| tool_calls = data['response']['choices'][0]['message'][ | |
| 'tool_calls' | |
| ] | |
| if tool_calls is not None: | |
| for tool_call in tool_calls: | |
| tool_call_dict = eval( | |
| tool_call['function']['arguments'] | |
| ) | |
| if ( | |
| tool_call_dict is not None | |
| and tool_call_dict != {} | |
| ): | |
| command = tool_call_dict['command'] | |
| if command == 'create': | |
| test_suite = tool_call_dict['file_text'] | |
| if ( | |
| command != 'str_replace' | |
| and command != 'insert' | |
| and 'coverage' not in command | |
| ): | |
| print(command) | |
| if command == 'insert': | |
| test_suite_new = insert_line_in_string( | |
| test_suite, | |
| tool_call_dict['new_str'], | |
| tool_call_dict['insert_line'], | |
| ) | |
| test_suite = test_suite_new | |
| if command == 'str_replace': | |
| if ( | |
| test_suite.count( | |
| tool_call_dict['old_str'] | |
| ) | |
| == 1 | |
| ): | |
| test_suite_new = test_suite.replace( | |
| tool_call_dict['old_str'], | |
| tool_call_dict['new_str'], | |
| ) | |
| else: | |
| continue | |
| test_suite = test_suite_new | |
| except Exception: | |
| print(traceback.format_exc()) | |
| continue | |
| metadata_copy['test_result']['test_suite'] = test_suite | |
| if i < 25: | |
| final_output[i].append(metadata_copy) | |
| i += 1 | |
| except Exception as e: | |
| print(traceback.format_exc()) | |
| print(f' Error loading {file_path}: {e}') | |
| for j in range(i, 24): | |
| final_output[j].append(metadata_copy) | |
| metadata_orig = copy.deepcopy(metadata) | |
| metadata_orig['test_result']['test_suite'] = orig_test_suite | |
| final_output[24].append(metadata_orig) | |
| for i in range(25): | |
| output_file = os.path.join(output_dir, f'output_{i}.jsonl') | |
| with open(output_file, 'w') as f: | |
| for metadata in final_output[i]: | |
| f.write(json.dumps(metadata) + '\n') | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description='Parse JSON file') | |
| parser.add_argument('--root_dir', type=str, help='Root directory', required=True) | |
| parser.add_argument( | |
| '--output_dir', type=str, help='Output directory', required=True | |
| ) | |
| parser.add_argument( | |
| '--starting_preds_file', type=str, help='Starting predictions', default=None | |
| ) | |
| args = parser.parse_args() | |
| output_file = os.path.join(args.output_dir, 'output.jsonl') | |
| metadata_objs = {} | |
| with open(output_file, 'r') as f: | |
| content = f.readlines() | |
| for line in content: | |
| metadata = json.loads(line) | |
| metadata_objs[metadata['instance_id']] = metadata | |
| starting_preds_file = args.starting_preds_file | |
| preds_objs = {} | |
| if starting_preds_file is not None: | |
| with open(starting_preds_file, 'r') as f: | |
| content = f.readlines() | |
| for line in content: | |
| pred = json.loads(line) | |
| preds_objs[pred['id']] = pred['preds']['full'][0] | |
| parse_json_files(args.root_dir, args.output_dir, metadata_objs, preds_objs) | |