Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import json | |
| from pathlib import Path | |
| import sys | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from src.leaderboard.eval_processor import process_eval_file | |
| def preprocess_eval_file(eval_path: Path, output_dir: Path, experiment_name: str): | |
| from inspect_ai.log import read_eval_log | |
| log = read_eval_log(eval_path) | |
| tag = log.eval.metadata.get('tag', 'unknown') if log.eval.metadata else 'unknown' | |
| model_id = log.eval.model | |
| df = process_eval_file(eval_path, experiment_name) | |
| experiment_data = { | |
| "name": experiment_name, | |
| "model_id": model_id, | |
| "tag": tag, | |
| "eval_filename": eval_path.name, | |
| "samples": df.to_dict(orient='records') | |
| } | |
| output_file = output_dir / f"{experiment_name.replace(' ', '_').lower()}_{tag}.json" | |
| with open(output_file, 'w') as f: | |
| json.dump(experiment_data, f, indent=2) | |
| return { | |
| "name": experiment_name, | |
| "data_file": output_file.name, | |
| "tag": tag, | |
| "model_id": model_id | |
| } | |
| def main(): | |
| repo_root = Path(__file__).parent | |
| output_dir = repo_root / "data" / "experiments" | |
| config_path = repo_root / "data" / "leaderboard_config.json" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| if not config_path.exists(): | |
| print(f"Error: {config_path} does not exist") | |
| sys.exit(1) | |
| with open(config_path) as f: | |
| config = json.load(f) | |
| if not config.get("experiments"): | |
| print("Error: No experiments in config") | |
| sys.exit(1) | |
| experiments = [] | |
| for exp_config in config["experiments"]: | |
| eval_path = repo_root / exp_config["eval_path"] | |
| experiment_name = exp_config["name"] | |
| if not eval_path.exists(): | |
| continue | |
| exp_info = preprocess_eval_file(eval_path, output_dir, experiment_name) | |
| experiments.append(exp_info) | |
| processed_config_path = repo_root / "data" / "processed_config.json" | |
| new_config = {"experiments": experiments} | |
| with open(processed_config_path, 'w') as f: | |
| json.dump(new_config, f, indent=2) | |
| print(f"Preprocessed {len(experiments)} experiments") | |
| if __name__ == "__main__": | |
| main() | |