ror HF Staff commited on
Commit
aa7e786
·
1 Parent(s): 22cf82d

Change data backend

Browse files
Files changed (2) hide show
  1. bar_plot.py +1 -1
  2. data.py +18 -19
bar_plot.py CHANGED
@@ -86,7 +86,7 @@ def create_matplotlib_bar_plot() -> None:
86
  fig.suptitle(
87
  f"Mismatch for batch size, sequence length and number of tokens to generate between configs: {bs} "
88
  f"!= {batch_size}, {seqlen} != {sequence_length}, {n_tok} != {num_tokens_to_generate}",
89
- color='white', fontsize=18, pad=20
90
  )
91
  return None
92
 
 
86
  fig.suptitle(
87
  f"Mismatch for batch size, sequence length and number of tokens to generate between configs: {bs} "
88
  f"!= {batch_size}, {seqlen} != {sequence_length}, {n_tok} != {num_tokens_to_generate}",
89
+ color='white', fontsize=18
90
  )
91
  return None
92
 
data.py CHANGED
@@ -10,25 +10,24 @@ class ModelBenchmarkData:
10
 
11
  def __init__(self, json_path: str) -> None:
12
  with open(json_path, "r") as f:
13
- self.data = json.load(f)
14
 
15
- def compute_e2e_latency(self, measures: dict) -> tuple[float, Optional[float]]:
16
- return measures["e2e_latency"]
 
 
 
 
 
 
17
 
18
- def compute_ttft(self, measures: dict) -> float:
19
- return measures["t_tokens"][0] - measures["wall_time_start"]
20
-
21
- def compute_itl(self, measures: dict) -> Optional[float]:
22
- if len(measures["t_tokens"]) < 2:
23
- return None
24
- delta_t = measures["t_tokens"][-1] - measures["t_tokens"][0]
25
- num_tokens = len(measures["t_tokens"]) - 1
26
- return delta_t / num_tokens
27
 
28
  def ensure_coherence(self) -> tuple[int, int, int]:
29
  all_hyperparams = set()
30
- for cfg_name, data in self.data.items():
31
- config = data["metadata"]["config"]
32
  hyperparams = (config["batch_size"], config["sequence_length"], config["num_tokens_to_generate"])
33
  all_hyperparams.add(hyperparams)
34
  if len(all_hyperparams) > 1:
@@ -40,12 +39,12 @@ class ModelBenchmarkData:
40
  def get_bar_plot_data(self, collapse_on_cache: bool = True, collapse_on_compile_mode: bool = True) -> dict:
41
  # Gather data for each scenario
42
  per_scenario_data = {}
43
- for i, (cfg_name, data) in enumerate(self.data.items()):
44
  per_scenario_data[cfg_name] = {
45
- "ttft": [self.compute_ttft(d) for d in data["measures"]],
46
- "itl": [self.compute_itl(d) for d in data["measures"]],
47
- "e2e": [self.compute_e2e_latency(d) for d in data["measures"]],
48
- "config": data["metadata"]["config"],
49
  }
50
  # Eventually collapse on cache
51
  if collapse_on_cache:
 
10
 
11
  def __init__(self, json_path: str) -> None:
12
  with open(json_path, "r") as f:
13
+ self.data: dict = json.load(f)
14
 
15
+ def compute_ttft(self, measures: dict) -> list[float]:
16
+ return [dts[0] for dts in measures["dt_tokens"]]
17
+
18
+ def compute_itl(self, measures: dict) -> list[float]:
19
+ return [
20
+ (dts[-1] - dts[0]) / (len(dts) - 1) if len(dts) > 2 else 0
21
+ for dts in measures["dt_tokens"]
22
+ ]
23
 
24
+ def compute_e2e_latency(self, measures: dict) -> list[float]:
25
+ return measures["e2e_latency"][:]
 
 
 
 
 
 
 
26
 
27
  def ensure_coherence(self) -> tuple[int, int, int]:
28
  all_hyperparams = set()
29
+ for data in self.data.values():
30
+ config = data["config"]
31
  hyperparams = (config["batch_size"], config["sequence_length"], config["num_tokens_to_generate"])
32
  all_hyperparams.add(hyperparams)
33
  if len(all_hyperparams) > 1:
 
39
  def get_bar_plot_data(self, collapse_on_cache: bool = True, collapse_on_compile_mode: bool = True) -> dict:
40
  # Gather data for each scenario
41
  per_scenario_data = {}
42
+ for cfg_name, data in self.data.items():
43
  per_scenario_data[cfg_name] = {
44
+ "ttft": self.compute_ttft(data["measures"]),
45
+ "itl": self.compute_itl(data["measures"]),
46
+ "e2e": self.compute_e2e_latency(data["measures"]),
47
+ "config": data["config"],
48
  }
49
  # Eventually collapse on cache
50
  if collapse_on_cache: