| | import os |
| | import json |
| | from glob import glob |
| | import pandas as pd |
| | from collections import defaultdict |
| |
|
| | def process_json_files(base_path): |
| | results = [] |
| | for folder_path in glob(os.path.join(base_path, "*/")): |
| | model_name = os.path.basename(os.path.dirname(folder_path)) |
| | json_files = glob(os.path.join(folder_path, "results*.json")) |
| | if json_files: |
| | with open(json_files[0], 'r') as f: |
| | data = json.load(f) |
| | if 'results' in data: |
| | for dataset, values in data['results'].items(): |
| | if 'acc,none' in values: |
| | results.append({ |
| | 'model': model_name, |
| | 'dataset': dataset, |
| | 'accuracy': round(values['acc,none']*100, 2) |
| | }) |
| | return results |
| |
|
| | def process_results(results): |
| | model_data = defaultdict(list) |
| | for item in results: |
| | if item['dataset'] != 'multimedqa': |
| | model_data[item['model']].append(item) |
| | |
| | new_results = [] |
| | for model, items in model_data.items(): |
| | total_acc = sum(item['accuracy'] for item in items) |
| | avg_acc = round(total_acc / len(items), 2) |
| | new_results.extend(items) |
| | new_results.append({ |
| | 'model': model, |
| | 'dataset': 'average', |
| | 'accuracy': avg_acc |
| | }) |
| | return new_results |
| |
|
| | def format_results(results): |
| | df = pd.DataFrame(results) |
| | pivot_df = df.pivot(index='model', columns='dataset', values='accuracy') |
| | pivot_df.reset_index(inplace=True) |
| | pivot_df.rename(columns={'model': 'file_name'}, inplace=True) |
| | |
| | |
| | columns = ['file_name', 'average'] + [col for col in pivot_df.columns if col not in ['file_name', 'average']] |
| | pivot_df = pivot_df[columns] |
| | |
| | return pivot_df |
| |
|
| | def create_summary_text(df): |
| | |
| | header = "file_name\taverage\tmedmcqa\tmedqa_4options\tmmlu_anatomy\tmmlu_clinical_knowledge\tmmlu_college_biology\tmmlu_college_medicine\tmmlu_medical_genetics\tmmlu_professional_medicine\tpubmedqa" |
| | |
| | |
| | data_lines = [] |
| | for _, row in df.iterrows(): |
| | model_name = row['file_name'] |
| | accuracies = [f"{row[col]:.2f}" if pd.notna(row[col]) else "" for col in df.columns if col != 'file_name'] |
| | data_line = f"{model_name}_\t" + "\t".join(accuracies) |
| | data_lines.append(data_line) |
| | |
| | |
| | return header + "\n" + "\n".join(data_lines) |
| |
|
| | |
| | results = process_json_files('.') |
| | processed_results = process_results(results) |
| | pivot_df = format_results(processed_results) |
| |
|
| | |
| | summary_text = create_summary_text(pivot_df) |
| |
|
| | |
| | with open('results_summary.txt', 'w') as f: |
| | f.write(summary_text) |
| |
|
| | print("Summary has been written to results_summary.txt") |
| |
|
| | |
| | pivot_df.to_csv('llama31_finalresult.csv', index=False) |