Spaces:
Runtime error
Runtime error
Merge branch 'main' of gitlab.com:tangibleai/mathtext
Browse files- test_api.py → api_scaling.py +0 -0
- test_api.sh → api_scaling.sh +0 -0
- plot_calls.py +53 -14
- requirements.txt +1 -0
test_api.py → api_scaling.py
RENAMED
|
File without changes
|
test_api.sh → api_scaling.sh
RENAMED
|
File without changes
|
plot_calls.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from datetime import datetime
|
| 2 |
|
| 3 |
import matplotlib.pyplot as plt
|
|
@@ -14,14 +15,44 @@ log_files = [
|
|
| 14 |
for log_file in log_files:
|
| 15 |
path_ = f"./data/{log_file}"
|
| 16 |
df = pd.read_csv(filepath_or_buffer=path_, sep=";")
|
| 17 |
-
df["
|
| 18 |
-
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
|
| 19 |
-
df["
|
|
|
|
|
|
|
|
|
|
| 20 |
df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
|
| 21 |
-
df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
|
| 22 |
|
| 23 |
student_numbers = sorted(df['active_students'].unique())
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
result = df.groupby(['active_students', 'success']) \
|
| 26 |
.agg({
|
| 27 |
'elapsed': ['mean', 'median', 'min', 'max'],
|
|
@@ -38,6 +69,7 @@ for log_file in log_files:
|
|
| 38 |
title = "API result for 'text2int' endpoint"
|
| 39 |
|
| 40 |
for student_number in student_numbers:
|
|
|
|
| 41 |
try:
|
| 42 |
failed_calls = result.loc[(student_number, 0), 'success'][0]
|
| 43 |
except:
|
|
@@ -48,25 +80,17 @@ for log_file in log_files:
|
|
| 48 |
|
| 49 |
rows = len(student_numbers)
|
| 50 |
|
| 51 |
-
# plt.figure(figsize=(16, 10))
|
| 52 |
-
# for index, student_number in enumerate(student_numbers, 1):
|
| 53 |
-
# data = df[df["active_students"] == student_number]
|
| 54 |
-
# fig = plt.subplot(rows, 2, 2 * index - 1)
|
| 55 |
-
# plt.title("y=seconds, x=active students", x=0.75, y=0.75)
|
| 56 |
-
# plt.boxplot(x=data["elapsed"], labels=[student_number])
|
| 57 |
-
# plt.subplot(rows, 2, 2 * index)
|
| 58 |
-
# plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75)
|
| 59 |
-
# plt.hist(x=data["elapsed"], bins=25, edgecolor='white')
|
| 60 |
-
|
| 61 |
fig, axs = plt.subplots(rows, 2) # (rows, columns)
|
| 62 |
|
| 63 |
for index, student_number in enumerate(student_numbers):
|
|
|
|
| 64 |
data = df[df["active_students"] == student_number]
|
| 65 |
axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
|
| 66 |
# axs[index][0].set_title(f'Boxplot for {student_number} students')
|
| 67 |
axs[index][0].set_xlabel(f'student number {student_number}')
|
| 68 |
axs[index][0].set_ylabel('Elapsed time (s)')
|
| 69 |
|
|
|
|
| 70 |
axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
|
| 71 |
# axs[index][1].set_title(f'Histogram for {student_number} students')
|
| 72 |
axs[index][1].set_xlabel('seconds')
|
|
@@ -74,4 +98,19 @@ for log_file in log_files:
|
|
| 74 |
|
| 75 |
fig.suptitle(title, fontsize=16)
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
plt.show()
|
|
|
|
| 1 |
+
import math
|
| 2 |
from datetime import datetime
|
| 3 |
|
| 4 |
import matplotlib.pyplot as plt
|
|
|
|
| 15 |
for log_file in log_files:
|
| 16 |
path_ = f"./data/{log_file}"
|
| 17 |
df = pd.read_csv(filepath_or_buffer=path_, sep=";")
|
| 18 |
+
df["finished_ts"] = df["finished"].apply(
|
| 19 |
+
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
|
| 20 |
+
df["started_ts"] = df["started"].apply(
|
| 21 |
+
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
|
| 22 |
+
df["elapsed"] = df["finished_ts"] - df["started_ts"]
|
| 23 |
+
|
| 24 |
df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
|
|
|
|
| 25 |
|
| 26 |
student_numbers = sorted(df['active_students'].unique())
|
| 27 |
|
| 28 |
+
bins_dict = dict() # bins size for each group
|
| 29 |
+
min_finished_dict = dict() # zero time for each group
|
| 30 |
+
|
| 31 |
+
for student_number in student_numbers:
|
| 32 |
+
# for each student group calculates bins size and zero time
|
| 33 |
+
min_finished = df["finished_ts"][df["active_students"] == student_number].min()
|
| 34 |
+
max_finished = df["finished_ts"][df["active_students"] == student_number].max()
|
| 35 |
+
bins = math.ceil(max_finished - min_finished)
|
| 36 |
+
bins_dict.update({student_number: bins})
|
| 37 |
+
min_finished_dict.update({student_number: min_finished})
|
| 38 |
+
print(f"student number: {student_number}")
|
| 39 |
+
print(f"min finished: {min_finished}")
|
| 40 |
+
print(f"max finished: {max_finished}")
|
| 41 |
+
print(f"bins finished seconds: {bins}, minutes: {bins / 60}")
|
| 42 |
+
|
| 43 |
+
df["time_line"] = None
|
| 44 |
+
for student_number in student_numbers:
|
| 45 |
+
# calculates time-line for each student group
|
| 46 |
+
df["time_line"] = df.apply(
|
| 47 |
+
lambda x: x["finished_ts"] - min_finished_dict[student_number]
|
| 48 |
+
if x["active_students"] == student_number
|
| 49 |
+
else x["time_line"],
|
| 50 |
+
axis=1
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# creates a '.csv' from the dataframe
|
| 54 |
+
df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
|
| 55 |
+
|
| 56 |
result = df.groupby(['active_students', 'success']) \
|
| 57 |
.agg({
|
| 58 |
'elapsed': ['mean', 'median', 'min', 'max'],
|
|
|
|
| 69 |
title = "API result for 'text2int' endpoint"
|
| 70 |
|
| 71 |
for student_number in student_numbers:
|
| 72 |
+
# Prints percentage of the successful and failed calls
|
| 73 |
try:
|
| 74 |
failed_calls = result.loc[(student_number, 0), 'success'][0]
|
| 75 |
except:
|
|
|
|
| 80 |
|
| 81 |
rows = len(student_numbers)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
fig, axs = plt.subplots(rows, 2) # (rows, columns)
|
| 84 |
|
| 85 |
for index, student_number in enumerate(student_numbers):
|
| 86 |
+
# creates a boxplot for each test group
|
| 87 |
data = df[df["active_students"] == student_number]
|
| 88 |
axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
|
| 89 |
# axs[index][0].set_title(f'Boxplot for {student_number} students')
|
| 90 |
axs[index][0].set_xlabel(f'student number {student_number}')
|
| 91 |
axs[index][0].set_ylabel('Elapsed time (s)')
|
| 92 |
|
| 93 |
+
# creates a histogram for each test group
|
| 94 |
axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
|
| 95 |
# axs[index][1].set_title(f'Histogram for {student_number} students')
|
| 96 |
axs[index][1].set_xlabel('seconds')
|
|
|
|
| 98 |
|
| 99 |
fig.suptitle(title, fontsize=16)
|
| 100 |
|
| 101 |
+
fig, axs = plt.subplots(rows, 1) # (rows, columns)
|
| 102 |
+
|
| 103 |
+
for index, student_number in enumerate(student_numbers):
|
| 104 |
+
# creates a histogram and shows API calls on a timeline for each test group
|
| 105 |
+
data = df[df["active_students"] == student_number]
|
| 106 |
+
|
| 107 |
+
print(data["time_line"].head(10))
|
| 108 |
+
|
| 109 |
+
axs[index].hist(x=data["time_line"], bins=bins_dict[student_number]) # axs[row][column]
|
| 110 |
+
# axs[index][1].set_title(f'Histogram for {student_number} students')
|
| 111 |
+
axs[index].set_xlabel('seconds')
|
| 112 |
+
axs[index].set_ylabel('Count of API calls')
|
| 113 |
+
|
| 114 |
+
fig.suptitle(title, fontsize=16)
|
| 115 |
+
|
| 116 |
plt.show()
|
requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ python-dotenv
|
|
| 6 |
transformers
|
| 7 |
torch
|
| 8 |
httpx
|
|
|
|
|
|
| 6 |
transformers
|
| 7 |
torch
|
| 8 |
httpx
|
| 9 |
+
matplotlib
|