nouamanetazi HF Staff commited on
Commit
c68510e
·
1 Parent(s): d435c48

timeline view

Browse files
Files changed (1) hide show
  1. utils.py +83 -23
utils.py CHANGED
@@ -1,5 +1,6 @@
1
-
2
  import matplotlib.pyplot as plt
 
 
3
  def get_num_hidden_layers_in_pp(hidden_size, num_layers, vocab_size, intermediate_size, num_attention_heads, pp_size):
4
  # Get list of pipeline blocks and their costs
5
  pipeline_blocks = []
@@ -169,33 +170,92 @@ def plot_memory_breakdown(
169
 
170
  plt.tight_layout()
171
 
172
- # Create figure for aggregates plot
173
- fig2 = plt.figure(figsize=(10, 6))
174
  ax2 = fig2.add_subplot(1, 1, 1)
175
 
176
- # Plot aggregate metrics
177
- aggregates = results["Aggregates"]
178
- names = list(aggregates.keys())
179
- values = list(aggregates.values())
180
-
181
- bars2 = ax2.bar(range(len(aggregates)), values, color='orange')
182
-
183
- # Add value labels
184
- for bar in bars2:
185
- height = bar.get_height()
186
- ax2.text(bar.get_x() + bar.get_width()/2., height,
187
- f'{height:.1f} MiB',
188
- ha='center', va='bottom')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- # Customize the second plot
191
- ax2.set_xticks(range(len(aggregates)))
192
- ax2.set_xticklabels(names, rotation=45, ha='right')
 
 
 
 
 
 
 
 
 
 
 
 
193
  ax2.set_ylabel('Memory (MiB)')
194
- ax2.set_title('Aggregate Memory Metrics', pad=20)
195
- max_y_value = max(values)
196
- ax2.set_ylim(0, max(80000, max_y_value))
197
 
198
- # Adjust layout to prevent text overlap
 
 
 
 
199
  plt.tight_layout()
200
 
 
 
 
 
201
  return fig1, fig2
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
+
4
  def get_num_hidden_layers_in_pp(hidden_size, num_layers, vocab_size, intermediate_size, num_attention_heads, pp_size):
5
  # Get list of pipeline blocks and their costs
6
  pipeline_blocks = []
 
170
 
171
  plt.tight_layout()
172
 
173
+ # Create figure for timeline plot
174
+ fig2 = plt.figure(figsize=(12, 6))
175
  ax2 = fig2.add_subplot(1, 1, 1)
176
 
177
+ # Define timeline steps and their components
178
+ c = results["Components"]
179
+ timeline_steps = {
180
+ "Model Init": [
181
+ ("Model BF16", c["Model BF16"]),
182
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"]),
183
+ ],
184
+ "Gradient Accumulator Init": [
185
+ ("Model BF16", c["Model BF16"]),
186
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"]),
187
+ ("FP32 Parameters", c["FP32 Parameters"]),
188
+ ("FP32 Gradients", c["FP32 Gradients"])
189
+ ],
190
+ "Fwd-Bwd Peak": [
191
+ ("Model BF16", c["Model BF16"]),
192
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"]),
193
+ ("FP32 Parameters", c["FP32 Parameters"]),
194
+ ("FP32 Gradients", c["FP32 Gradients"]),
195
+ ("Activations", c["Activations"])
196
+ ],
197
+ "After Fwd-Bwd": [
198
+ ("Model BF16", c["Model BF16"]),
199
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"]),
200
+ ("FP32 Parameters", c["FP32 Parameters"]),
201
+ ("FP32 Gradients", c["FP32 Gradients"])
202
+ ],
203
+ "Optimizer Step": [
204
+ ("Model BF16", c["Model BF16"]),
205
+ ("FP32 Parameters", c["FP32 Parameters"]),
206
+ ("FP32 Gradients", c["FP32 Gradients"]),
207
+ ("Optimizer States", c["Optimizer States"])
208
+ ],
209
+ "2nd Fwd-Bwd Peak": [
210
+ ("Model BF16", c["Model BF16"]),
211
+ ("FP32 Parameters", c["FP32 Parameters"]),
212
+ ("FP32 Gradients", c["FP32 Gradients"]),
213
+ ("Optimizer States", c["Optimizer States"]),
214
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"]),
215
+ ("Activations", c["Activations"])
216
+ ],
217
+ "2nd Optimizer Step": [
218
+ ("Model BF16", c["Model BF16"]),
219
+ ("FP32 Parameters", c["FP32 Parameters"]),
220
+ ("FP32 Gradients", c["FP32 Gradients"]),
221
+ ("Optimizer States", c["Optimizer States"]),
222
+ ("DDP Gradient Buffers", c["DDP Gradient Buffers"])
223
+ ]
224
+ }
225
+ # Plot timeline
226
+ x = range(len(timeline_steps))
227
+ bottom = np.zeros(len(timeline_steps))
228
+ colors = plt.cm.Set3(np.linspace(0, 1, len(c)))
229
+ color_map = dict(zip(c.keys(), colors))
230
 
231
+ for component in c.keys():
232
+ heights = []
233
+ for step_components in timeline_steps.values():
234
+ height = 0
235
+ for comp_name, comp_value in step_components:
236
+ if comp_name == component:
237
+ height = comp_value
238
+ heights.append(height)
239
+
240
+ ax2.bar(x, heights, bottom=bottom, label=component, color=color_map[component])
241
+ bottom += heights
242
+
243
+ # Customize the timeline plot
244
+ ax2.set_xticks(x)
245
+ ax2.set_xticklabels(timeline_steps.keys(), rotation=45, ha='right')
246
  ax2.set_ylabel('Memory (MiB)')
247
+ ax2.set_title('Memory Timeline', pad=20)
248
+ ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
 
249
 
250
+ # Add total memory labels on top of each bar
251
+ for i, total in enumerate(bottom):
252
+ ax2.text(i, total, f'{total:.1f} MiB', ha='center', va='bottom')
253
+
254
+ # Adjust layout
255
  plt.tight_layout()
256
 
257
+ # Set y-axis limit
258
+ max_y_value = max(bottom)
259
+ ax2.set_ylim(0, max(80000, max_y_value))
260
+
261
  return fig1, fig2