Spaces:
Running
Running
File size: 8,131 Bytes
ac500fb 316d130 b9a6af9 316d130 571b442 ac500fb 316d130 ac500fb 571b442 ac500fb 316d130 ac500fb 571b442 ac500fb b9a6af9 9d5c760 b9a6af9 6a70d01 ac500fb ea2d5c8 ce4972d ea2d5c8 ac500fb ea2d5c8 ac500fb ea2d5c8 ce4972d ac500fb 88ada88 ac500fb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | from dataclasses import dataclass, make_dataclass
from enum import Enum
import pandas as pd
from src.about import Tasks
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
## Leaderboard columns
auto_eval_column_dict = []
# Init
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
#Scores
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Média Geral ⬆️", "number", True)])
# Média específica do grupo PLUE
auto_eval_column_dict.append(["plue_avg", ColumnContent, ColumnContent("PLUE", "number", True)])
# Adicionando colunas para as médias das áreas (manter para cálculo e outras abas)
auto_eval_column_dict.append(["area_medica_avg", ColumnContent, ColumnContent("Área Médica", "number", False)]) # Exibido por padrão = False
auto_eval_column_dict.append(["area_direito_avg", ColumnContent, ColumnContent("Área do Direito", "number", False)]) # Exibido por padrão = False
auto_eval_column_dict.append(["provas_militares_avg", ColumnContent, ColumnContent("Provas Militares", "number", False)]) # Exibido por padrão = False
auto_eval_column_dict.append(["computacao_avg", ColumnContent, ColumnContent("Computação", "number", False)]) # Exibido por padrão = False
auto_eval_column_dict.append(["discurso_odio_avg", ColumnContent, ColumnContent("Discurso de Ódio", "number", False)]) # Mover para PLUE -> False
auto_eval_column_dict.append(["economia_contabilidade_avg", ColumnContent, ColumnContent("Economia e Contabilidade", "number", False)]) # Mover para PLUE -> False
auto_eval_column_dict.append(["semantica_inferencia_avg", ColumnContent, ColumnContent("Semântica e Inferência", "number", False)]) # Mover para PLUE -> False
auto_eval_column_dict.append(["multidisciplinar_avg", ColumnContent, ColumnContent("Multidisciplinar", "number", False)]) # Exibido por padrão = False
# Médias Novas Áreas
auto_eval_column_dict.append(["energy_avg", ColumnContent, ColumnContent("Energy", "number", False)]) # PLUE -> False
auto_eval_column_dict.append(["reasoning_avg", ColumnContent, ColumnContent("Reasoning", "number", False)]) # PLUE -> False
for task in Tasks:
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", False)])
# Model information
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
# We use make dataclass to dynamically fill the scores from Tasks
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
# Mapeamento das áreas de conhecimento para os Tasks correspondentes
AREA_DEFINITIONS = {
"Área Médica": [Tasks.REVALIDA, Tasks.MREX],
"Área do Direito": [Tasks.OAB, Tasks.ENAM],
"Provas Militares": [Tasks.AFA, Tasks.ITA, Tasks.IME],
"Computação": [Tasks.POSCOMP, Tasks.OBI],
"Discurso de Ódio": [Tasks.HATEBR, Tasks.PT_HATE_SPEECH, Tasks.TWEETSENTBR],
"Economia e Contabilidade": [Tasks.BCB, Tasks.CFCES],
"Semântica e Inferência": [Tasks.FAQUAD_NLI, Tasks.ASSIN2_RTE, Tasks.ASSIN2_STS],
"Multidisciplinar": [Tasks.ENEM, Tasks.BLUEX, Tasks.CNPU, Tasks.ENADE, Tasks.BNDES, Tasks.CACD_1, Tasks.CACD_2],
# Novas Áreas
"Energy": [Tasks.ENERGY_DATASET],
"Reasoning": [Tasks.REASONING_DATASET],
}
# Mapeamento dos nomes das áreas para as colunas de média correspondentes (Manter todos)
AREA_AVG_COLUMN_MAP = {
"Área Médica": AutoEvalColumn.area_medica_avg.name,
"Área do Direito": AutoEvalColumn.area_direito_avg.name,
"Provas Militares": AutoEvalColumn.provas_militares_avg.name,
"Computação": AutoEvalColumn.computacao_avg.name,
"Discurso de Ódio": AutoEvalColumn.discurso_odio_avg.name,
"Economia e Contabilidade": AutoEvalColumn.economia_contabilidade_avg.name,
"Semântica e Inferência": AutoEvalColumn.semantica_inferencia_avg.name,
"Multidisciplinar": AutoEvalColumn.multidisciplinar_avg.name,
# Novas Áreas
"Energy": AutoEvalColumn.energy_avg.name,
"Reasoning": AutoEvalColumn.reasoning_avg.name,
}
# --- Definição do Grupo PLUE Atualizado ---
PLUE_GROUP_AREAS = [
"Área Médica",
"Área do Direito",
"Provas Militares",
"Computação",
"Discurso de Ódio",
"Economia e Contabilidade",
"Semântica e Inferência",
"Multidisciplinar"
]
# -------
## For the queue columns in the submission tab
@dataclass(frozen=True)
class EvalQueueColumn: # Queue column
model = ColumnContent("model", "markdown", True)
revision = ColumnContent("revision", "str", True)
private = ColumnContent("private", "bool", True)
precision = ColumnContent("precision", "str", True)
weight_type = ColumnContent("weight_type", "str", "Original")
status = ColumnContent("status", "str", True)
## All the model information that we might need
@dataclass
class ModelDetails:
name: str
display_name: str = ""
symbol: str = "" # emoji
class ModelType(Enum):
PT = ModelDetails(name="Pre trained", symbol="🟢")
SFT = ModelDetails(name="Supervised Finetuning", symbol="🔶")
RL = ModelDetails(name="Reinforcement Learning", symbol="🟦")
Unknown = ModelDetails(name="", symbol="?")
def to_str(self, separator=" : "):
return f"{self.name}{separator}{self.value.name}"
@staticmethod
def from_str(type_str):
if "fine-tuned" in type_str.lower() or \
"instruction-tuned" in type_str.lower() or \
"supervised finetuning" in type_str.lower() or \
"🔶" in type_str or \
type_str == "SFT" or type_str == "FT" or type_str == "IFT":
return ModelType.SFT
if "pretrained" in type_str.lower() or "pre trained" in type_str.lower() or "pré-treinado" in type_str.lower() or "🟢" in type_str or type_str == "PT":
return ModelType.PT
if "rl-tuned" in type_str.lower() or "reinforcement learning" in type_str.lower() or "🟦" in type_str or type_str == "RL":
return ModelType.RL
return ModelType.Unknown
class WeightType(Enum):
Adapter = ModelDetails("Adapter")
Original = ModelDetails("Original")
Delta = ModelDetails("Delta")
class Precision(Enum):
float16 = ModelDetails("float16")
bfloat16 = ModelDetails("bfloat16")
Unknown = ModelDetails("?")
def from_str(precision):
if precision in ["torch.float16", "float16"]:
return Precision.float16
if precision in ["torch.bfloat16", "bfloat16"]:
return Precision.bfloat16
return Precision.Unknown
# Column selection
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|