Spaces:
Runtime error
Runtime error
Commit
·
b31a1d5
1
Parent(s):
dbafc77
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,10 @@ GB_S = 1935e9
|
|
| 7 |
# in ms
|
| 8 |
THREAD_OVERHEAD = 0.005
|
| 9 |
|
|
|
|
| 10 |
def calc_exec_time(comp_flop, mem_bytes):
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
def qkv_mha_exec(bs, h, n, d):
|
| 14 |
flop = 2*bs*1*d*3*d
|
|
@@ -100,7 +102,7 @@ n = st.sidebar.number_input('End seq', value=1024)
|
|
| 100 |
st.sidebar.header("GPU parameters")
|
| 101 |
|
| 102 |
|
| 103 |
-
st.header("
|
| 104 |
|
| 105 |
mqa_total_time = 0.
|
| 106 |
mha_total_time = 0.
|
|
@@ -113,8 +115,8 @@ for i in range(n_start, n):
|
|
| 113 |
mqa_time = shared_time + qkv_mqa_exec(bs, h, i, d)[2] + att1_mqa_exec(bs, h, i, d)[2] + att2_mqa_exec(bs, h, i, d)[2]
|
| 114 |
mqa_total_time += l*mqa_time
|
| 115 |
|
| 116 |
-
st.write("
|
| 117 |
-
st.write("
|
| 118 |
|
| 119 |
st.header('Attention layer')
|
| 120 |
|
|
|
|
| 7 |
# in ms
|
| 8 |
THREAD_OVERHEAD = 0.005
|
| 9 |
|
| 10 |
+
# in ms
|
| 11 |
def calc_exec_time(comp_flop, mem_bytes):
|
| 12 |
+
exec_time = comp_flop/TFLOPS + mem_bytes/GB_S
|
| 13 |
+
return max(exec_time*1000, THREAD_OVERHEAD)
|
| 14 |
|
| 15 |
def qkv_mha_exec(bs, h, n, d):
|
| 16 |
flop = 2*bs*1*d*3*d
|
|
|
|
| 102 |
st.sidebar.header("GPU parameters")
|
| 103 |
|
| 104 |
|
| 105 |
+
st.header("Execution time (ms)")
|
| 106 |
|
| 107 |
mqa_total_time = 0.
|
| 108 |
mha_total_time = 0.
|
|
|
|
| 115 |
mqa_time = shared_time + qkv_mqa_exec(bs, h, i, d)[2] + att1_mqa_exec(bs, h, i, d)[2] + att2_mqa_exec(bs, h, i, d)[2]
|
| 116 |
mqa_total_time += l*mqa_time
|
| 117 |
|
| 118 |
+
st.write("Multi-Head Attention: " + str(mha_total_time))
|
| 119 |
+
st.write("Multi-Query Attention: " + str(mqa_total_time))
|
| 120 |
|
| 121 |
st.header('Attention layer')
|
| 122 |
|