| | import json |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | import numpy as np |
| |
|
| |
|
| | infile = "output.jsonl" |
| | date = "2024-03" |
| |
|
| | durations = [] |
| |
|
| | with open(infile) as f: |
| | for line in f: |
| | data = json.loads(line) |
| | l = data["left"]["finish"] |
| | r = data["right"]["finish"] |
| | v = data["timestamp"] |
| | durations.append(v - max(l, r)) |
| |
|
| | print( |
| | f"Avg: {np.mean(durations)}, Median: {np.median(durations)}, Max: {np.max(durations)}" |
| | ) |
| |
|
| | |
| | cutoff = 200.0 |
| | num_bins_inside_cutoff = 20 |
| |
|
| | for i, n in enumerate(durations): |
| | if n > cutoff: |
| | durations[i] = cutoff + 0.5 * cutoff / num_bins_inside_cutoff |
| |
|
| | |
| | bin_edges = np.linspace(0, cutoff, num_bins_inside_cutoff + 1) |
| |
|
| | |
| | overflow_cap = ( |
| | cutoff + cutoff / num_bins_inside_cutoff |
| | ) |
| | bin_edges = np.append(bin_edges, overflow_cap) |
| |
|
| | |
| | sns.histplot( |
| | durations, bins=bin_edges, kde=False |
| | ) |
| | plt.title(f'Distribution of "time to vote" {date}') |
| | plt.xlabel("Duration (seconds)") |
| | plt.ylabel("Frequency") |
| |
|
| | |
| | plt.axvline(x=cutoff, color="red", linestyle="--") |
| | plt.text( |
| | cutoff + 1, plt.ylim()[1] * 0.9, "Overflow", color="red", ha="left" |
| | ) |
| |
|
| | |
| | ax = plt.gca() |
| | labels = [item.get_text() for item in ax.get_xticklabels()] |
| | if "110" in labels: |
| | labels[labels.index("110")] = "" |
| | ax.set_xticklabels(labels) |
| |
|
| | |
| | plt.tight_layout() |
| |
|
| | |
| | plt.savefig(f"duration_distribution_time_to_vote_{date}.png", dpi=300) |
| |
|