|
|
import os |
|
|
import json |
|
|
from glob import glob |
|
|
import pandas as pd |
|
|
from collections import defaultdict |
|
|
|
|
|
def process_json_files(base_path): |
|
|
results = [] |
|
|
for folder_path in glob(os.path.join(base_path, "*/")): |
|
|
model_name = os.path.basename(os.path.dirname(folder_path)) |
|
|
json_files = glob(os.path.join(folder_path, "results*.json")) |
|
|
if json_files: |
|
|
with open(json_files[0], 'r') as f: |
|
|
data = json.load(f) |
|
|
if 'results' in data: |
|
|
for dataset, values in data['results'].items(): |
|
|
if 'acc,none' in values: |
|
|
results.append({ |
|
|
'model': model_name, |
|
|
'dataset': dataset, |
|
|
'accuracy': round(values['acc,none']*100, 2) |
|
|
}) |
|
|
return results |
|
|
|
|
|
def process_results(results): |
|
|
model_data = defaultdict(list) |
|
|
for item in results: |
|
|
if item['dataset'] != 'multimedqa': |
|
|
model_data[item['model']].append(item) |
|
|
|
|
|
new_results = [] |
|
|
for model, items in model_data.items(): |
|
|
total_acc = sum(item['accuracy'] for item in items) |
|
|
avg_acc = round(total_acc / len(items), 2) |
|
|
new_results.extend(items) |
|
|
new_results.append({ |
|
|
'model': model, |
|
|
'dataset': 'average', |
|
|
'accuracy': avg_acc |
|
|
}) |
|
|
return new_results |
|
|
|
|
|
def format_results(results): |
|
|
df = pd.DataFrame(results) |
|
|
pivot_df = df.pivot(index='model', columns='dataset', values='accuracy') |
|
|
pivot_df.reset_index(inplace=True) |
|
|
pivot_df.rename(columns={'model': 'file_name'}, inplace=True) |
|
|
|
|
|
|
|
|
columns = ['file_name', 'average'] + [col for col in pivot_df.columns if col not in ['file_name', 'average']] |
|
|
pivot_df = pivot_df[columns] |
|
|
|
|
|
return pivot_df |
|
|
|
|
|
def create_summary_text(df): |
|
|
|
|
|
header = "file_name\taverage\tmedmcqa\tmedqa_4options\tmmlu_anatomy\tmmlu_clinical_knowledge\tmmlu_college_biology\tmmlu_college_medicine\tmmlu_medical_genetics\tmmlu_professional_medicine\tpubmedqa" |
|
|
|
|
|
|
|
|
data_lines = [] |
|
|
for _, row in df.iterrows(): |
|
|
model_name = row['file_name'] |
|
|
accuracies = [f"{row[col]:.2f}" if pd.notna(row[col]) else "" for col in df.columns if col != 'file_name'] |
|
|
data_line = f"{model_name}_\t" + "\t".join(accuracies) |
|
|
data_lines.append(data_line) |
|
|
|
|
|
|
|
|
return header + "\n" + "\n".join(data_lines) |
|
|
|
|
|
|
|
|
results = process_json_files('.') |
|
|
processed_results = process_results(results) |
|
|
pivot_df = format_results(processed_results) |
|
|
|
|
|
|
|
|
summary_text = create_summary_text(pivot_df) |
|
|
|
|
|
|
|
|
with open('results_summary.txt', 'w') as f: |
|
|
f.write(summary_text) |
|
|
|
|
|
print("Summary has been written to results_summary.txt") |
|
|
|
|
|
|
|
|
pivot_df.to_csv('llama31_finalresult.csv', index=False) |