lemonteaa
/

nanogpt-speedrun

Model card Files Files and versions

lemonteaa commited on Nov 10, 2024

Commit

d7ce189

·

verified ·

1 Parent(s): cf88959

Create analysis/plot_log.py

Files changed (1) hide show

baseline/analysis/plot_log.py +68 -0

baseline/analysis/plot_log.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.optimize import curve_fit
+def parse_file(file_path):
+    data = []
+    with open(file_path, 'r') as file:
+        for line in file:
+            parts = line.strip().split()
+            step = int(parts[0].split(':')[1].split('/')[0])
+            is_train = 'val' not in parts[1]
+            if is_train:
+                loss_key = 'train_loss'
+            else:
+                loss_key = 'val_loss'
+            loss = float(parts[1].split(':')[1])
+            step_avg = float(parts[3].split(':')[1].replace('ms', ''))
+            data.append({
+                'step': step,
+                'loss': loss,
+                'step_avg': step_avg,
+                'is_train': is_train
+            })
+    return data
+# Usage
+file_path = 'baseline_log.txt'
+data = parse_file(file_path)
+# Extract the steps and losses into separate lists
+steps = np.array([d['step'] for d in filter(lambda item: item['is_train'],data)])
+losses = np.array([d['loss'] for d in filter(lambda item: item['is_train'],data)])
+# Take the logarithm of the data
+log_steps = np.log10(steps)
+log_losses = np.log10(losses)
+# Define a linear function
+def linear_func(x, a, b):
+    return a * x + b
+# Fit the linear function to the logarithmic data
+popt, pcov = curve_fit(linear_func, log_steps, log_losses)
+# Create the plot
+plt.loglog(steps, losses, label='Data')
+# Plot the fitted line
+x_fit = np.logspace(np.log10(np.min(steps)), np.log10(np.max(steps)), 100)
+y_fit = 10 ** (popt[0] * np.log10(x_fit) + popt[1])
+plt.loglog(x_fit, y_fit, label='Fitted line', color='red')
+# Add title and labels
+plt.title('Loss as a function of step')
+plt.xlabel('Step')
+plt.ylabel('Loss')
+plt.legend()
+# Print the fitted parameters
+print('Fitted parameters: a = {:.2f}, b = {:.2f}'.format(popt[0], popt[1]))
+# Save the plot to a file
+plt.savefig('loss_plot2.png')
+# Show the plot
+plt.show()