Spaces:
Running
Running
| import pandas as pd | |
| from datetime import datetime, timezone | |
| def verify_df_intergrity(df): | |
| """Verify that the DataFrame has no common elements between columns.""" | |
| columns = df.columns.tolist() | |
| for i in range(len(columns)-1): | |
| for j in range(i + 1, len(columns)-1): | |
| if i!=7 and set(df[columns[i]]) & set(df[columns[j]]): | |
| return False | |
| return True | |
| class ModelsData: | |
| class KeyErrorInCache(Exception): | |
| """Exception raised when a key is not found in the DataFrame cache.""" | |
| pass | |
| def __init__(self, df): | |
| """Initialize the ModelsData object.""" | |
| if isinstance(df, pd.DataFrame): | |
| self.df = df.reset_index(drop=True, inplace=False) | |
| else: | |
| raise ValueError("Data must be a pandas DataFrame.") | |
| if not verify_df_intergrity(self.df): | |
| raise ValueError(f"The {self.df.columns.tolist()} must not have any common values.") | |
| def __getitem__(self, key): | |
| """ | |
| Custom indexing to filter by a column value. | |
| Example: | |
| - obj['MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7']['ASI']['QMNLI'] -> avg_mean_score | |
| """ | |
| if key in self.df['model_id'].values: | |
| filtered_df = self.df[self.df['model_id'] == key] | |
| elif key in self.df["model_version_id"].values: | |
| filtered_df = self.df[self.df['model_version_id'] == key] | |
| elif key in self.df['questionnaire_name'].values: | |
| filtered_df = self.df[self.df['questionnaire_name'] == key] | |
| elif key in self.df['questionnaire_task'].values: | |
| filtered_df = self.df[self.df['questionnaire_task'] == key] | |
| else: | |
| raise ModelsData.KeyErrorInCache("These specific parameters are not found in cache.") | |
| filtered_df.reset_index(drop=True, inplace=True) | |
| return ModelsData(filtered_df) | |
| def get_model_version_id(self): | |
| if len(self.df) > 1: | |
| raise ValueError("To get model_version_id the DF must be exactly of length 1, use indexing to filter the desired model and questionnaire and then call `get_model_version_id`") | |
| model_version_id = self.df["model_version_id"].to_list()[0] | |
| return model_version_id | |
| def get_evaluation_results(self): | |
| """ | |
| Expects a DataFrame with columns: | |
| - model_id (all rows must have the same model_id) | |
| - questionnaire_task | |
| - questionnaire_name | |
| - mean_score | |
| Returns a YAML string in the format: | |
| model-index: | |
| - name: <model_id> | |
| results: | |
| - task: | |
| type: <questionnaire_task> | |
| dataset: | |
| name: <questionnaire_name> | |
| type: Qpsychometric | |
| metrics: | |
| - name: Mean-Score | |
| type: Mean-Score | |
| value: <mean_score> | |
| source: | |
| name: Qpsychometric Space | |
| url: https://huggingface.co/spaces/cnai-lab/Qpsychometric | |
| - ... | |
| If the DataFrame contains more than one unique model_id, raises a ValueError. | |
| """ | |
| unique_model_ids = self.df["model_id"].unique() | |
| if len(unique_model_ids) > 1: | |
| raise ValueError( | |
| f"Multiple model_ids found: {unique_model_ids}. " | |
| "Expected only one unique model_id." | |
| ) | |
| # Extract the single model_id | |
| model_id = unique_model_ids[0] | |
| lines = [] | |
| lines.append("model-index:") | |
| lines.append(f" - name: {model_id}") | |
| lines.append(" results:") | |
| # Build one 'results' entry per row | |
| for _, row in self.df.iterrows(): | |
| lines.append(" - task:") | |
| lines.append(f" type: {row['questionnaire_task']}") | |
| lines.append(" dataset:") | |
| lines.append(f" name: {row['questionnaire_name']}") | |
| lines.append(" type: Qpsychometric") | |
| lines.append(" metrics:") | |
| lines.append(" - name: Mean-Score") | |
| lines.append(" type: Mean-Score") | |
| lines.append(f" value: {row['mean_score']}") | |
| lines.append(" source:") | |
| lines.append(" name: Qpsychometric Space") | |
| lines.append(" url: https://huggingface.co/spaces/cnai-lab/Qpsychometric") | |
| return "\n".join(lines) | |
| def get_mean_score(self): | |
| if len(self.df) > 1: | |
| raise ValueError("To get mean score the DF must be exactly of length 1, use indexing to filter the desired model and then call `get_mean_score`") | |
| return {"avg_mean_score" : self.df['mean_score'].to_list()[0]} | |
| def __str__(self): | |
| """String representation of the DataFrame.""" | |
| return self.df.to_string() | |
| def __len__(self): | |
| return len(self.df) | |