Commit
·
4d46abb
1
Parent(s):
c4f69f6
Exposing user inputs
Browse files
app.py
CHANGED
|
@@ -26,6 +26,11 @@ precision_options = {
|
|
| 26 |
'mixed': 6,
|
| 27 |
'half': 2
|
| 28 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
|
| 30 |
def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
|
| 31 |
# Convert bit size to byte size
|
|
@@ -46,10 +51,8 @@ def calculate_memory_usage(parameter_count, context_length, data_type, batch_siz
|
|
| 46 |
return total_memory_usage_gb
|
| 47 |
|
| 48 |
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
|
| 49 |
-
#
|
| 50 |
-
|
| 51 |
-
attention_heads = 32 # assuming 32 attention heads
|
| 52 |
-
hidden_dimensions = int(parameter_count ** 0.5) # assuming square root relationship for hidden size
|
| 53 |
|
| 54 |
# Calculate activations based on the formula from the paper
|
| 55 |
activations_per_layer = context_length * batch_size * hidden_dimensions * (34 + ((5 * attention_heads * context_length) / hidden_dimensions))
|
|
@@ -76,3 +79,4 @@ precision = st.selectbox("Precision", options=list(precision_options.keys()))
|
|
| 76 |
if st.button("Calculate Memory Usage"):
|
| 77 |
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
|
| 78 |
st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")
|
|
|
|
|
|
| 26 |
'mixed': 6,
|
| 27 |
'half': 2
|
| 28 |
}
|
| 29 |
+
|
| 30 |
+
# Constants from the paper, now exposed as user inputs
|
| 31 |
+
layers = st.number_input("Number of Layers", value=32, step=1)
|
| 32 |
+
attention_heads = st.number_input("Number of Attention Heads", value=32, step=1)
|
| 33 |
+
|
| 34 |
# Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
|
| 35 |
def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
|
| 36 |
# Convert bit size to byte size
|
|
|
|
| 51 |
return total_memory_usage_gb
|
| 52 |
|
| 53 |
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
|
| 54 |
+
# Assuming square root relationship for hidden size
|
| 55 |
+
hidden_dimensions = int(parameter_count ** 0.5)
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# Calculate activations based on the formula from the paper
|
| 58 |
activations_per_layer = context_length * batch_size * hidden_dimensions * (34 + ((5 * attention_heads * context_length) / hidden_dimensions))
|
|
|
|
| 79 |
if st.button("Calculate Memory Usage"):
|
| 80 |
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
|
| 81 |
st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")
|
| 82 |
+
|