File size: 3,654 Bytes
4d2d78e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | import json
import logging
import polars as pl
logger = logging.getLogger(__name__)
FEATURES = {
"Performance": {
"metrics.result": "continuous",
"metrics.result_per_accelerator": "continuous",
"metrics.accuracy": "continuous",
},
"Model": {
"model.name": "categorical",
"model.mlperf_name": "categorical",
"model.architecture": "categorical",
"model.number_of_parameters": "continuous",
"model.weight_data_types": "categorical",
},
"Accelerator": {
"system.accelerator.vendor": "categorical",
"system.accelerator.name": "categorical",
"system.accelerator.count_per_node": "continuous",
"system.accelerator.total_count": "continuous",
"system.accelerator.memory_capacity": "continuous",
"system.accelerator.memory_config": "text",
"system.interconnect.accelerator": "categorical",
},
"CPU": {
"system.cpu.vendor": "categorical",
"system.cpu.model": "categorical",
"system.cpu.core_count": "continuous",
"system.cpu.count_per_node": "continuous",
"system.cpu.frequency": "continuous",
"system.cpu.caches": "text",
"system.cpu.vcpu_count": "continuous",
},
"System": {
"system.name": "text",
"system.type": "categorical",
"system.cooling": "categorical",
"system.number_of_nodes": "continuous",
"system.memory.capacity": "continuous",
"system.memory.configuration": "text",
"system.interconnect.accelerator_host": "categorical",
},
"Software": {
"software.framework": "categorical",
"software.version": "categorical",
"software.operating_system": "categorical",
},
"Submission": {
"submission.organization": "categorical",
"submission.division": "categorical",
"submission.scenario": "categorical",
"submission.availability": "boolean",
},
}
def get_features_by_type(feature_type: str) -> list[str]:
"""Get all features of a specific type."""
result = []
for group in FEATURES.values():
for feature, typ in group.items():
if typ == feature_type:
result.append(feature)
return result
FEATURE_TYPES = {
"continuous": get_features_by_type("continuous"),
"categorical": get_features_by_type("categorical"),
"boolean": get_features_by_type("boolean"),
"text": get_features_by_type("text"),
}
UI_FEATURE_GROUPS = {
group: list(features.keys()) for group, features in FEATURES.items()
}
def get_feature_type(feature_name: str) -> str:
"""Get the type of a feature from the FEATURES dictionary."""
for group in FEATURES.values():
if feature_name in group:
return group[feature_name]
return "categorical"
def load_data(file_path: str = "data.json") -> pl.DataFrame:
"""Load processed benchmark data."""
logger.info(f"Loading processed data from {file_path}")
try:
with open(file_path, "r") as f:
data = json.load(f)
for item in data:
for key, value in item.items():
if isinstance(value, str):
if value.isdigit():
item[key] = int(value)
elif value.replace(".", "", 1).isdigit():
item[key] = float(value)
df = pl.DataFrame(data, infer_schema_length=None)
logger.info(f"Loaded {len(df)} benchmark results")
return df
except Exception as e:
logger.error(f"Error loading data: {e}")
return pl.DataFrame()
|