Spaces:
Runtime error
Runtime error
Commit
·
f0d14cb
1
Parent(s):
a275f69
Update app.py
Browse files
app.py
CHANGED
|
@@ -67,7 +67,7 @@ mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
|
|
| 67 |
c1, c2 = st.columns([2, 3])
|
| 68 |
att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
| 69 |
|
| 70 |
-
st.
|
| 71 |
st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
|
| 72 |
st.caption("Multi-Head Attention")
|
| 73 |
mha_flop = 2*bs*h*n*(d/h)
|
|
@@ -82,6 +82,13 @@ c1, c2 = st.columns([2, 3])
|
|
| 82 |
att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
| 83 |
|
| 84 |
st.subheader('Output projection')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
st.header('MLP')
|
| 87 |
st.subheader('First Linear')
|
|
|
|
| 67 |
c1, c2 = st.columns([2, 3])
|
| 68 |
att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
| 69 |
|
| 70 |
+
st.subheader('Attention-value gemm')
|
| 71 |
st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
|
| 72 |
st.caption("Multi-Head Attention")
|
| 73 |
mha_flop = 2*bs*h*n*(d/h)
|
|
|
|
| 82 |
att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
| 83 |
|
| 84 |
st.subheader('Output projection')
|
| 85 |
+
mlp1_flop = 2*bs*1*d
|
| 86 |
+
mlp1_bytes = 2*bs*1*d + 2*d*4*d + 2*bs*1*4*d
|
| 87 |
+
c1, c2 = st.columns([2, 3])
|
| 88 |
+
mlp1_time = print_kernel_execution(c1, c2, mlp1_flop, mlp1_bytes)
|
| 89 |
+
|
| 90 |
+
st.subheader('Element-wise ops')
|
| 91 |
+
st.write("A couple of layers ")
|
| 92 |
|
| 93 |
st.header('MLP')
|
| 94 |
st.subheader('First Linear')
|