Spaces:

henryholloway
/

LLM-Inference-Calculator

Sleeping

henryholloway commited on May 17, 2024

Commit

4d46abb

1 Parent(s): c4f69f6

Exposing user inputs

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,11 @@ precision_options = {
     'mixed': 6,
     'half': 2
 }
 # Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
 def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
     # Convert bit size to byte size
@@ -46,10 +51,8 @@ def calculate_memory_usage(parameter_count, context_length, data_type, batch_siz
     return total_memory_usage_gb
 def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
-    # Constants from the paper
-    layers = 32  # assuming 32 layers for the model
-    attention_heads = 32  # assuming 32 attention heads
-    hidden_dimensions = int(parameter_count ** 0.5)  # assuming square root relationship for hidden size
     # Calculate activations based on the formula from the paper
     activations_per_layer = context_length * batch_size * hidden_dimensions * (34 + ((5 * attention_heads * context_length) / hidden_dimensions))
@@ -76,3 +79,4 @@ precision = st.selectbox("Precision", options=list(precision_options.keys()))
 if st.button("Calculate Memory Usage"):
     memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
     st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")

     'mixed': 6,
     'half': 2
 }
+# Constants from the paper, now exposed as user inputs
+layers = st.number_input("Number of Layers", value=32, step=1)
+attention_heads = st.number_input("Number of Attention Heads", value=32, step=1)
 # Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
 def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
     # Convert bit size to byte size
     return total_memory_usage_gb
 def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
+    # Assuming square root relationship for hidden size
+    hidden_dimensions = int(parameter_count ** 0.5)
     # Calculate activations based on the formula from the paper
     activations_per_layer = context_length * batch_size * hidden_dimensions * (34 + ((5 * attention_heads * context_length) / hidden_dimensions))
 if st.button("Calculate Memory Usage"):
     memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
     st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")