import os import json from glob import glob import pandas as pd from collections import defaultdict def process_json_files(base_path): results = [] for folder_path in glob(os.path.join(base_path, "*/")): model_name = os.path.basename(os.path.dirname(folder_path)) json_files = glob(os.path.join(folder_path, "results*.json")) if json_files: with open(json_files[0], 'r') as f: data = json.load(f) if 'results' in data: for dataset, values in data['results'].items(): if 'acc,none' in values: results.append({ 'model': model_name, 'dataset': dataset, 'accuracy': round(values['acc,none']*100, 2) }) return results def process_results(results): model_data = defaultdict(list) for item in results: if item['dataset'] != 'multimedqa': model_data[item['model']].append(item) new_results = [] for model, items in model_data.items(): total_acc = sum(item['accuracy'] for item in items) avg_acc = round(total_acc / len(items), 2) new_results.extend(items) new_results.append({ 'model': model, 'dataset': 'average', 'accuracy': avg_acc }) return new_results def format_results(results): df = pd.DataFrame(results) pivot_df = df.pivot(index='model', columns='dataset', values='accuracy') pivot_df.reset_index(inplace=True) pivot_df.rename(columns={'model': 'file_name'}, inplace=True) # Reorder columns to have 'average' first after 'file_name' columns = ['file_name', 'average'] + [col for col in pivot_df.columns if col not in ['file_name', 'average']] pivot_df = pivot_df[columns] return pivot_df def create_summary_text(df): # Create header header = "file_name\taverage\tmedmcqa\tmedqa_4options\tmmlu_anatomy\tmmlu_clinical_knowledge\tmmlu_college_biology\tmmlu_college_medicine\tmmlu_medical_genetics\tmmlu_professional_medicine\tpubmedqa" # Create data lines data_lines = [] for _, row in df.iterrows(): model_name = row['file_name'] accuracies = [f"{row[col]:.2f}" if pd.notna(row[col]) else "" for col in df.columns if col != 'file_name'] data_line = f"{model_name}_\t" + "\t".join(accuracies) data_lines.append(data_line) # Combine header and data return header + "\n" + "\n".join(data_lines) # Main execution results = process_json_files('.') processed_results = process_results(results) pivot_df = format_results(processed_results) # Create summary text summary_text = create_summary_text(pivot_df) # Write summary to file with open('results_summary.txt', 'w') as f: f.write(summary_text) print("Summary has been written to results_summary.txt") # Optionally, still save the CSV if needed pivot_df.to_csv('llama31_finalresult.csv', index=False)