Commit
·
475bc5f
1
Parent(s):
86b55e7
removed training calculation functionality
Browse files
app.py
CHANGED
|
@@ -23,11 +23,11 @@ quantization_bit_sizes = {
|
|
| 23 |
# Define precision options
|
| 24 |
precision_options = {
|
| 25 |
'full': 4,
|
| 26 |
-
'mixed': 6,
|
| 27 |
'half': 2
|
| 28 |
}
|
| 29 |
|
| 30 |
-
def calculate_memory_usage(parameter_count, context_length, data_type,
|
| 31 |
# Convert bit size to byte size
|
| 32 |
byte_size = quantization_bit_sizes[data_type] / 8
|
| 33 |
|
|
@@ -35,10 +35,10 @@ def calculate_memory_usage(parameter_count, context_length, data_type, is_traini
|
|
| 35 |
memory_params = parameter_count * byte_size
|
| 36 |
|
| 37 |
# Memory usage for context (activations)
|
| 38 |
-
activations = calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision
|
| 39 |
|
| 40 |
# Outputs memory usage
|
| 41 |
-
outputs = 4 * batch_size * context_length * vocab_size
|
| 42 |
|
| 43 |
# Total memory usage
|
| 44 |
total_memory_usage = memory_params + activations + outputs
|
|
@@ -48,7 +48,7 @@ def calculate_memory_usage(parameter_count, context_length, data_type, is_traini
|
|
| 48 |
|
| 49 |
return total_memory_usage_gb
|
| 50 |
|
| 51 |
-
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision
|
| 52 |
# Simplified activation calculation
|
| 53 |
hidden_size = parameter_count ** 0.5 # assuming a square root relationship for hidden size
|
| 54 |
num_attention_heads = 16 # a typical number of attention heads
|
|
@@ -73,7 +73,7 @@ def calculate_activations(parameter_count, context_length, batch_size, vocab_siz
|
|
| 73 |
|
| 74 |
layer = attention_block + mlp_block + layer_norms
|
| 75 |
|
| 76 |
-
activations = layer
|
| 77 |
|
| 78 |
return activations
|
| 79 |
|
|
@@ -84,12 +84,11 @@ st.title("Memory Usage Calculator for Large Language Models")
|
|
| 84 |
parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
|
| 85 |
context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
|
| 86 |
data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
|
| 87 |
-
is_training = st.checkbox("Training Mode", value=False)
|
| 88 |
batch_size = st.number_input("Batch Size", value=1, step=1)
|
| 89 |
vocab_size = st.number_input("Vocabulary Size", value=30000, step=1000)
|
| 90 |
precision = st.selectbox("Precision", options=list(precision_options.keys()))
|
| 91 |
|
| 92 |
# Calculate memory usage
|
| 93 |
if st.button("Calculate Memory Usage"):
|
| 94 |
-
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type,
|
| 95 |
-
st.write(f"Estimated Memory Usage for
|
|
|
|
| 23 |
# Define precision options
|
| 24 |
precision_options = {
|
| 25 |
'full': 4,
|
| 26 |
+
'mixed': 6,
|
| 27 |
'half': 2
|
| 28 |
}
|
| 29 |
|
| 30 |
+
def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
|
| 31 |
# Convert bit size to byte size
|
| 32 |
byte_size = quantization_bit_sizes[data_type] / 8
|
| 33 |
|
|
|
|
| 35 |
memory_params = parameter_count * byte_size
|
| 36 |
|
| 37 |
# Memory usage for context (activations)
|
| 38 |
+
activations = calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision)
|
| 39 |
|
| 40 |
# Outputs memory usage
|
| 41 |
+
outputs = 4 * batch_size * context_length * vocab_size
|
| 42 |
|
| 43 |
# Total memory usage
|
| 44 |
total_memory_usage = memory_params + activations + outputs
|
|
|
|
| 48 |
|
| 49 |
return total_memory_usage_gb
|
| 50 |
|
| 51 |
+
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
|
| 52 |
# Simplified activation calculation
|
| 53 |
hidden_size = parameter_count ** 0.5 # assuming a square root relationship for hidden size
|
| 54 |
num_attention_heads = 16 # a typical number of attention heads
|
|
|
|
| 73 |
|
| 74 |
layer = attention_block + mlp_block + layer_norms
|
| 75 |
|
| 76 |
+
activations = layer # assuming 12 layers for simplicity
|
| 77 |
|
| 78 |
return activations
|
| 79 |
|
|
|
|
| 84 |
parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
|
| 85 |
context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
|
| 86 |
data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
|
|
|
|
| 87 |
batch_size = st.number_input("Batch Size", value=1, step=1)
|
| 88 |
vocab_size = st.number_input("Vocabulary Size", value=30000, step=1000)
|
| 89 |
precision = st.selectbox("Precision", options=list(precision_options.keys()))
|
| 90 |
|
| 91 |
# Calculate memory usage
|
| 92 |
if st.button("Calculate Memory Usage"):
|
| 93 |
+
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
|
| 94 |
+
st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")
|