File size: 3,562 Bytes
4d2d78e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Configuration recommendation module for MLPerf benchmarks."""

import logging

import pandas as pd
from utils import get_feature_type

logger = logging.getLogger(__name__)


class ConfigurationFinder:
    """Finds optimal hardware configurations based on user requirements."""

    def __init__(self, dataset: pd.DataFrame):
        """Initialize with benchmark dataset."""
        self.df = dataset
        self.perf_metric = "metrics.result_per_accelerator"
        self.cost_metric = "cost_per_million_tokens"
        self.total_perf_metric = "metrics.result"

    def is_within_tolerance(
        self, value1: float, value2: float, tolerance: float = 0.1
    ) -> bool:
        """Check if two values are within a specified percentage tolerance."""
        if value1 is None or value2 is None:
            return False

        try:
            if value1 == 0 or value2 == 0:
                return value1 == value2
            percentage_diff = abs(value1 - value2) / max(abs(value1), abs(value2))
            return percentage_diff <= tolerance
        except:
            return False

    def find_configurations(
        self, constraints: dict, tolerance: float = 0.1
    ) -> pd.DataFrame:
        """Find configurations matching the given constraints."""
        if self.df.empty:
            return pd.DataFrame()

        filtered_df = self.df.copy()

        for feature, value in constraints.items():
            if feature not in filtered_df.columns or value is None or value == "Any":
                continue

            if get_feature_type(feature) == "continuous":
                try:
                    target_value = float(value)
                    lower_bound = target_value * (1 - tolerance)
                    upper_bound = target_value * (1 + tolerance)
                    filtered_df = filtered_df[
                        (filtered_df[feature] >= lower_bound)
                        & (filtered_df[feature] <= upper_bound)
                    ]
                except:
                    filtered_df = filtered_df[filtered_df[feature] == value]
            else:
                filtered_df = filtered_df[filtered_df[feature] == value]

        if "min_accelerators" in constraints and constraints["min_accelerators"]:
            min_acc = constraints["min_accelerators"]
            filtered_df = filtered_df[
                filtered_df["system.accelerator.total_count"] >= min_acc
            ]

        if "max_accelerators" in constraints and constraints["max_accelerators"]:
            max_acc = constraints["max_accelerators"]
            filtered_df = filtered_df[
                filtered_df["system.accelerator.total_count"] <= max_acc
            ]

        return filtered_df

    def rank_configurations(
        self,
        df: pd.DataFrame,
        metric: str = "metrics.result_per_accelerator",
        ascending: bool = False,
    ) -> pd.DataFrame:
        """Rank configurations by the specified metric."""
        if df.empty or metric not in df.columns:
            return df
        return df.sort_values(by=metric, ascending=ascending)

    def recommend(self, constraints: dict, top_n: int = 10) -> pd.DataFrame:
        """Find and rank configurations based on constraints."""
        filtered_configs = self.find_configurations(constraints)
        ranked_configs = self.rank_configurations(
            filtered_configs, metric=self.perf_metric, ascending=False
        )

        if len(ranked_configs) > top_n:
            return ranked_configs.head(top_n)
        return ranked_configs