File size: 2,745 Bytes
22df562 7939a4f 22df562 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
"""
Utility functions for MUSEval Leaderboard
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional
def norm_sNavie(df: pd.DataFrame) -> pd.DataFrame:
"""Normalize dataframe using naive normalization"""
# Simple normalization - keep as is for now
return df
def pivot_df(file_path: str, tab_name: str) -> pd.DataFrame:
"""Pivot dataframe from file"""
try:
df = pd.read_csv(file_path)
# Simple pivot - return as is for now
return df
except Exception as e:
print(f"Error reading {file_path}: {e}")
return pd.DataFrame()
def get_grouped_dfs() -> Dict[str, pd.DataFrame]:
"""Get grouped dataframes for different views"""
from .load_results import load_results_with_metadata, create_overall_table
# Load results
results = load_results_with_metadata()
if not results:
return {
'domain': pd.DataFrame(),
'frequency': pd.DataFrame(),
'term_length': pd.DataFrame(),
'univariate': pd.DataFrame(),
'overall': pd.DataFrame()
}
# Create overall dataframe
overall_df = create_overall_table()
# For now, return the same dataframe for all views
# In a real implementation, these would be different aggregations
return {
'domain': overall_df.copy(),
'frequency': overall_df.copy(),
'term_length': overall_df.copy(),
'univariate': overall_df.copy(),
'overall': overall_df.copy()
}
def pivot_existed_df(df: pd.DataFrame, tab_name: str) -> pd.DataFrame:
"""Pivot existing dataframe"""
if df.empty:
return df
# Add tab name as a column for identification
df_copy = df.copy()
df_copy['tab'] = tab_name
return df_copy
def rename_metrics(df: pd.DataFrame) -> pd.DataFrame:
"""Rename metrics columns"""
if df.empty:
return df
# Add rank column based on MAE
if 'MAE' in df.columns:
df_copy = df.copy()
df_copy['MASE_Rank'] = df_copy['MAE'].rank(method='min')
return df_copy
return df
def format_df(df: pd.DataFrame) -> pd.DataFrame:
"""Format dataframe for display"""
if df.empty:
return df
df_copy = df.copy()
# Format numeric columns
numeric_cols = ['MAE', 'Uni-MAE', 'RMSE', 'MAPE', 'R²', 'SMAPE', 'Uni-Multi']
for col in numeric_cols:
if col in df_copy.columns:
if col in ['MAPE', 'SMAPE']:
df_copy[col] = df_copy[col].apply(lambda x: f"{x:.1f}%" if pd.notna(x) else "")
else:
df_copy[col] = df_copy[col].apply(lambda x: f"{x:.3f}" if pd.notna(x) else "")
return df_copy
|