Spaces:

henryholloway
/

LLM-Inference-Calculator

Sleeping

App Files Files Community

henryholloway commited on May 17, 2024

Commit

86b55e7

1 Parent(s): bb43133

Created calculator

Browse files

Files changed (1) hide show

app.py +95 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import streamlit as st
+# Define bit sizes for different quantization options
+quantization_bit_sizes = {
+    'float32': 32,
+    'float16': 16,
+    'Q2_K': 2,
+    'Q3_K_L': 3,
+    'Q3_K_M': 3,
+    'Q3_K_S': 3,
+    'Q4_0': 4,
+    'Q4_1': 4,
+    'Q4_K_M': 4,
+    'Q4_K_S': 4,
+    'Q5_0': 5,
+    'Q5_1': 5,
+    'Q5_K_M': 5,
+    'Q5_K_S': 5,
+    'Q6_K': 6,
+    'Q8_0': 8
+}
+# Define precision options
+precision_options = {
+    'full': 4,
+    'mixed': 6,  # for training mixed precision
+    'half': 2
+}
+def calculate_memory_usage(parameter_count, context_length, data_type, is_training, batch_size, vocab_size, precision):
+    # Convert bit size to byte size
+    byte_size = quantization_bit_sizes[data_type] / 8
+    # Memory usage for model parameters
+    memory_params = parameter_count * byte_size
+    # Memory usage for context (activations)
+    activations = calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision, is_training)
+    # Outputs memory usage
+    outputs = 4 * batch_size * context_length * vocab_size * (2 if is_training else 1)
+    # Total memory usage
+    total_memory_usage = memory_params + activations + outputs
+    # Convert bytes to gigabytes
+    total_memory_usage_gb = total_memory_usage / (1024 ** 3)
+    return total_memory_usage_gb
+def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision, is_training):
+    # Simplified activation calculation
+    hidden_size = parameter_count ** 0.5  # assuming a square root relationship for hidden size
+    num_attention_heads = 16  # a typical number of attention heads
+    intermediate_size = hidden_size * 4  # common in transformers
+    bytes_per_param = precision_options[precision] / 8
+    attention_input = bytes_per_param * batch_size * context_length * hidden_size
+    q = bytes_per_param * batch_size * context_length * (hidden_size / num_attention_heads) * num_attention_heads
+    k = bytes_per_param * batch_size * context_length * (hidden_size / num_attention_heads) * num_attention_heads
+    softmax_output = bytes_per_param * batch_size * num_attention_heads * (context_length ** 2)
+    v = bytes_per_param * batch_size * context_length * (hidden_size / num_attention_heads) * num_attention_heads
+    out_proj_input = bytes_per_param * batch_size * context_length * hidden_size
+    attention_block = attention_input + q + k + softmax_output + v + out_proj_input
+    mlp_input = bytes_per_param * batch_size * context_length * hidden_size
+    activation_input = bytes_per_param * batch_size * context_length * intermediate_size
+    down_proj_input = bytes_per_param * batch_size * context_length * intermediate_size
+    mlp_block = mlp_input + activation_input + down_proj_input
+    layer_norms = bytes_per_param * batch_size * context_length * hidden_size * 2
+    layer = attention_block + mlp_block + layer_norms
+    activations = layer * (12 if is_training else 1)  # assuming 12 layers for simplicity
+    return activations
+# Streamlit app
+st.title("Memory Usage Calculator for Large Language Models")
+# User inputs
+parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
+context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
+data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
+is_training = st.checkbox("Training Mode", value=False)
+batch_size = st.number_input("Batch Size", value=1, step=1)
+vocab_size = st.number_input("Vocabulary Size", value=30000, step=1000)
+precision = st.selectbox("Precision", options=list(precision_options.keys()))
+# Calculate memory usage
+if st.button("Calculate Memory Usage"):
+    memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, is_training, batch_size, vocab_size, precision)
+    st.write(f"Estimated Memory Usage for {'Training' if is_training else 'Inference'}: {memory_usage:.2f} GB")