Spaces:

ror
/

performative_dashboard

Sleeping

App Files Files Community

ror HF Staff commited on Sep 30

Commit

22cf82d

1 Parent(s): 9f6e83d

More info in title

Browse files

Files changed (2) hide show

bar_plot.py +25 -16
data.py +10 -8

bar_plot.py CHANGED Viewed

@@ -75,24 +75,35 @@ def create_matplotlib_bar_plot() -> None:
     fig, axs = plt.subplots(2, 1, figsize=(30, 16), sharex=True)
     fig.patch.set_facecolor('#000000')
-    # Load and sanitize data
     per_device_data = load_data()
-    batch_sizes = {name: device_data.get_main_batch_size() for name, device_data in per_device_data.items()}
-    if len(set(batch_sizes.values())) > 1:
-        fig.suptitle(f"Unmatched batch sizes: {batch_sizes}", color='white', fontsize=18, pad=20)
-        return None
     # TTFT Plot (left)
     ttft_bars, ttft_errors, x_ticks = make_bar_kwargs(per_device_data, "ttft")
-    draw_bar_plot(axs[0], ttft_bars, ttft_errors, "Time to first token and inter-token latency (lower is better)", "TTFT (seconds)", x_ticks)
     # # ITL Plot (right)
     itl_bars, itl_errors, x_ticks = make_bar_kwargs(per_device_data, "itl")
-    draw_bar_plot(axs[1], itl_bars, itl_errors, None, "ITL (seconds)", x_ticks)
-    # # E2E Plot (right)
-    # e2e_bars, e2e_errors = make_bar_kwargs("e2e")
-    # draw_bar_plot(axs, e2e_bars, e2e_errors, "End-to-end latency (lower is better)", "E2E (seconds)")
     plt.tight_layout()
     # Add common legend with full text
@@ -102,7 +113,7 @@ def create_matplotlib_bar_plot() -> None:
     # Put a legend to the right of the current axis
     fig.legend(legend_handles, legend_labels, loc='lower center', ncol=4,
-               bbox_to_anchor=(0.515, -0.15), facecolor='black', edgecolor='white',
                labelcolor='white', fontsize=14)
     # Save plot to bytes with high DPI for crisp text
@@ -124,7 +135,7 @@ def create_matplotlib_bar_plot() -> None:
     return html
-def draw_bar_plot(ax: plt.Axes, bar_kwargs: dict, errors: list, title: str, ylabel: str, xticks: list[tuple[float, str]]):
     ax.set_facecolor('#000000')
     ax.grid(True, alpha=0.2, color='white', zorder=0)
     # Draw bars
@@ -134,10 +145,8 @@ def draw_bar_plot(ax: plt.Axes, bar_kwargs: dict, errors: list, title: str, ylab
         bar_kwargs["x"], bar_kwargs["height"], yerr=errors,
         fmt='none', ecolor='white', alpha=0.8, elinewidth=1.5, capthick=1.5, capsize=4, zorder=4,
     )
-    # Set labels and title
     ax.set_ylabel(ylabel, color='white', fontsize=16)
-    ax.set_title(title, color='white', fontsize=18, pad=20)
-    # Set ticks and grid
     ax.set_xticks([])
     ax.tick_params(colors='white', labelsize=13)
     ax.set_xticks([xt[0] for xt in xticks], [xt[1] for xt in xticks], fontsize=16)

     fig, axs = plt.subplots(2, 1, figsize=(30, 16), sharex=True)
     fig.patch.set_facecolor('#000000')
+    # Load data and ensure coherence
     per_device_data = load_data()
+    batch_size, sequence_length, num_tokens_to_generate = None, None, None
+    for device_name, device_data in per_device_data.items():
+        bs, seqlen, n_tok = device_data.ensure_coherence()
+        if batch_size is None:
+            batch_size, sequence_length, num_tokens_to_generate = bs, seqlen, n_tok
+        elif (bs, seqlen, n_tok) != (batch_size, sequence_length, num_tokens_to_generate):
+            fig.suptitle(
+                f"Mismatch for batch size, sequence length and number of tokens to generate between configs: {bs} "
+                f"!= {batch_size}, {seqlen} != {sequence_length}, {n_tok} != {num_tokens_to_generate}",
+                color='white', fontsize=18, pad=20
+            )
+            return None
     # TTFT Plot (left)
     ttft_bars, ttft_errors, x_ticks = make_bar_kwargs(per_device_data, "ttft")
+    draw_bar_plot(axs[0], ttft_bars, ttft_errors, "TTFT (seconds)", x_ticks)
     # # ITL Plot (right)
     itl_bars, itl_errors, x_ticks = make_bar_kwargs(per_device_data, "itl")
+    draw_bar_plot(axs[1], itl_bars, itl_errors, "ITL (seconds)", x_ticks)
+    # Title and tight layout
+    title = "\n".join([
+        "Time to first token and inter-token latency (lower is better)",
+        f"Batch size: {batch_size},  sequence length: {sequence_length},  new tokens: {num_tokens_to_generate}",
+    ])
+    fig.suptitle(title, color='white', fontsize=20, y=1.005)
     plt.tight_layout()
     # Add common legend with full text
     # Put a legend to the right of the current axis
     fig.legend(legend_handles, legend_labels, loc='lower center', ncol=4,
+               bbox_to_anchor=(0.515, -0.11), facecolor='black', edgecolor='white',
                labelcolor='white', fontsize=14)
     # Save plot to bytes with high DPI for crisp text
     return html
+def draw_bar_plot(ax: plt.Axes, bar_kwargs: dict, errors: list, ylabel: str, xticks: list[tuple[float, str]]):
     ax.set_facecolor('#000000')
     ax.grid(True, alpha=0.2, color='white', zorder=0)
     # Draw bars
         bar_kwargs["x"], bar_kwargs["height"], yerr=errors,
         fmt='none', ecolor='white', alpha=0.8, elinewidth=1.5, capthick=1.5, capsize=4, zorder=4,
     )
+    # Set labels, ticks and grid
     ax.set_ylabel(ylabel, color='white', fontsize=16)
     ax.set_xticks([])
     ax.tick_params(colors='white', labelsize=13)
     ax.set_xticks([xt[0] for xt in xticks], [xt[1] for xt in xticks], fontsize=16)

data.py CHANGED Viewed

@@ -25,15 +25,17 @@ class ModelBenchmarkData:
         num_tokens = len(measures["t_tokens"]) - 1
         return delta_t / num_tokens
-    def get_main_batch_size(self) -> int:
-        batch_sizes = {}
         for cfg_name, data in self.data.items():
-            for measure in data["measures"]:
-                bs = measure["batch_size"]
-                if bs not in batch_sizes:
-                    batch_sizes[bs] = 0
-                batch_sizes[bs] += 1
-        return max(batch_sizes, key=batch_sizes.get)
     def get_bar_plot_data(self, collapse_on_cache: bool = True, collapse_on_compile_mode: bool = True) -> dict:
         # Gather data for each scenario

         num_tokens = len(measures["t_tokens"]) - 1
         return delta_t / num_tokens
+    def ensure_coherence(self) -> tuple[int, int, int]:
+        all_hyperparams = set()
         for cfg_name, data in self.data.items():
+            config = data["metadata"]["config"]
+            hyperparams = (config["batch_size"], config["sequence_length"], config["num_tokens_to_generate"])
+            all_hyperparams.add(hyperparams)
+        if len(all_hyperparams) > 1:
+            raise ValueError(
+                f"Different batch size, sequence length or nb of tokens to generate between configs: {all_hyperparams}"
+            )
+        return all_hyperparams.pop()
     def get_bar_plot_data(self, collapse_on_cache: bool = True, collapse_on_compile_mode: bool = True) -> dict:
         # Gather data for each scenario