| import json |
| import logging |
|
|
| import polars as pl |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| FEATURES = { |
| "Performance": { |
| "metrics.result": "continuous", |
| "metrics.result_per_accelerator": "continuous", |
| "metrics.accuracy": "continuous", |
| }, |
| "Model": { |
| "model.name": "categorical", |
| "model.mlperf_name": "categorical", |
| "model.architecture": "categorical", |
| "model.number_of_parameters": "continuous", |
| "model.weight_data_types": "categorical", |
| }, |
| "Accelerator": { |
| "system.accelerator.vendor": "categorical", |
| "system.accelerator.name": "categorical", |
| "system.accelerator.count_per_node": "continuous", |
| "system.accelerator.total_count": "continuous", |
| "system.accelerator.memory_capacity": "continuous", |
| "system.accelerator.memory_config": "text", |
| "system.interconnect.accelerator": "categorical", |
| }, |
| "CPU": { |
| "system.cpu.vendor": "categorical", |
| "system.cpu.model": "categorical", |
| "system.cpu.core_count": "continuous", |
| "system.cpu.count_per_node": "continuous", |
| "system.cpu.frequency": "continuous", |
| "system.cpu.caches": "text", |
| "system.cpu.vcpu_count": "continuous", |
| }, |
| "System": { |
| "system.name": "text", |
| "system.type": "categorical", |
| "system.cooling": "categorical", |
| "system.number_of_nodes": "continuous", |
| "system.memory.capacity": "continuous", |
| "system.memory.configuration": "text", |
| "system.interconnect.accelerator_host": "categorical", |
| }, |
| "Software": { |
| "software.framework": "categorical", |
| "software.version": "categorical", |
| "software.operating_system": "categorical", |
| }, |
| "Submission": { |
| "submission.organization": "categorical", |
| "submission.division": "categorical", |
| "submission.scenario": "categorical", |
| "submission.availability": "boolean", |
| }, |
| } |
|
|
|
|
| def get_features_by_type(feature_type: str) -> list[str]: |
| """Get all features of a specific type.""" |
| result = [] |
| for group in FEATURES.values(): |
| for feature, typ in group.items(): |
| if typ == feature_type: |
| result.append(feature) |
| return result |
|
|
|
|
| FEATURE_TYPES = { |
| "continuous": get_features_by_type("continuous"), |
| "categorical": get_features_by_type("categorical"), |
| "boolean": get_features_by_type("boolean"), |
| "text": get_features_by_type("text"), |
| } |
|
|
| UI_FEATURE_GROUPS = { |
| group: list(features.keys()) for group, features in FEATURES.items() |
| } |
|
|
|
|
| def get_feature_type(feature_name: str) -> str: |
| """Get the type of a feature from the FEATURES dictionary.""" |
| for group in FEATURES.values(): |
| if feature_name in group: |
| return group[feature_name] |
| return "categorical" |
|
|
|
|
| def load_data(file_path: str = "data.json") -> pl.DataFrame: |
| """Load processed benchmark data.""" |
| logger.info(f"Loading processed data from {file_path}") |
|
|
| try: |
| with open(file_path, "r") as f: |
| data = json.load(f) |
|
|
| for item in data: |
| for key, value in item.items(): |
| if isinstance(value, str): |
| if value.isdigit(): |
| item[key] = int(value) |
| elif value.replace(".", "", 1).isdigit(): |
| item[key] = float(value) |
|
|
| df = pl.DataFrame(data, infer_schema_length=None) |
| logger.info(f"Loaded {len(df)} benchmark results") |
| return df |
|
|
| except Exception as e: |
| logger.error(f"Error loading data: {e}") |
| return pl.DataFrame() |
|
|