| import os |
| import jsonlines |
| import json |
|
|
| def get_function_scores(dir): |
| scores = [] |
| subdirs = sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]) |
| for subdir in subdirs: |
| md_path = os.path.join(dir, subdir, 'readme_summary.json') |
| md_score = 0 |
| with open(md_path, 'r', encoding='utf-8', errors='ignore') as f: |
| md_score = json.load(f)['score'] |
|
|
| json_path = os.path.join(dir, subdir, 'functions.jsonl') |
| contents = [] |
| with jsonlines.open(json_path) as reader: |
| for obj in reader: |
| if 'score' in obj: |
| contents.append(obj['score'] * md_score) |
| scores.extend(contents) |
| return scores |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| def output_scores(dir, output_path, score): |
| subdirs = sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]) |
| for subdir in subdirs: |
| md_path = os.path.join(dir, subdir, 'readme_summary.json') |
| md_summary = '' |
| md_score = 0 |
| with open(md_path, 'r', encoding='utf-8', errors='ignore') as f: |
| data = json.load(f) |
| md_summary = data['readme_summary'] |
| md_score = data['score'] |
|
|
| json_path = os.path.join(dir, subdir, 'functions.jsonl') |
| contents = [] |
| with jsonlines.open(json_path) as reader: |
| for obj in reader: |
| if 'score' in obj and obj['score'] * md_score > score: |
| obj['md_summary'] = md_summary |
| obj['md_score'] = md_score |
| obj['final_score'] = obj['score'] * md_score |
| with open(obj['file'], 'r', encoding='utf-8', errors='ignore') as f: |
| obj['code_content'] = ''.join(f.readlines()[obj['start_line']-1:obj['end_line']]) |
| contents.append(obj) |
| with jsonlines.open(output_path, 'a', flush=True) as writer: |
| writer.write_all(contents) |
|
|
| output_scores('/home/weifengsun/tangou1/step2/step22/dataset', '/home/weifengsun/tangou1/step2/step22/output/function_filtered_scores.jsonl', 0.1282891692796717) |
| |
| |
| |
|
|
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
|
|