Spaces:

derek-thomas
/

transformer_calculator

Paused

App Files Files Community

derek-thomas commited on Sep 13, 2024

Commit

3951475

verified ·

1 Parent(s): 0847403

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -24

app.py CHANGED Viewed

@@ -3,16 +3,6 @@ from transformers import AutoConfig  # Required for Hugging Face integration
 from calc_params import calc_params  # Import calc_params from the new file
 # ---- Helper Functions ---- #
-def convert_params(params):
-    if params == 0:
-        return "0"
-    size_name = ("", "K", "M", "B", "T", "P", "E", "Z", "Y")
-    i = int(math.floor(math.log(params, 1000)))
-    p = math.pow(1000, i)
-    s = round(params / p, 2)
-    return "%s %s" % (s, size_name[i])
-# Get Hugging Face model configuration and update the parameters
 def get_hf_model_args(hf_model_name_or_path):
     try:
         config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
@@ -34,6 +24,20 @@ def get_hf_model_args(hf_model_name_or_path):
         "sequence_length": sequence_length,
     }, None
 # ---- Memory Calculation ---- #
 def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
     model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
@@ -62,20 +66,6 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
     return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
-# ---- Update Gradio inputs with Hugging Face model config ---- #
-def update_from_hf_model(hf_model_name_or_path):
-    model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
-    if hf_error:
-        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
-    return (gr.update(value=model_params["num_layers"]),
-            gr.update(value=model_params["hidden_size"]),
-            gr.update(value=model_params["num_attention_heads"]),
-            gr.update(value=model_params["vocab_size"]),
-            gr.update(value=model_params["sequence_length"]),
-            "")
 # ---- Gradio Interface ---- #
 with gr.Blocks() as demo:
     with gr.Tabs():
@@ -107,6 +97,7 @@ with gr.Blocks() as demo:
         # Parameter Calculation Tab
         with gr.TabItem("Parameter Calculation"):
             vocab_size = gr.Number(label="Vocab Size", value=51200)
             tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
             hidden_size = gr.Number(label="Hidden Size", value=6144)
@@ -128,4 +119,8 @@ with gr.Blocks() as demo:
                 inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
                 outputs=param_result)
 demo.launch()

 from calc_params import calc_params  # Import calc_params from the new file
 # ---- Helper Functions ---- #
 def get_hf_model_args(hf_model_name_or_path):
     try:
         config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
         "sequence_length": sequence_length,
     }, None
+# ---- Update Gradio inputs with Hugging Face model config ---- #
+def update_from_hf_model(hf_model_name_or_path):
+    model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
+    if hf_error:
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
+    return (gr.update(value=model_params["num_layers"]),
+            gr.update(value=model_params["hidden_size"]),
+            gr.update(value=model_params["num_attention_heads"]),
+            gr.update(value=model_params["vocab_size"]),
+            gr.update(value=model_params["sequence_length"]),
+            "")
 # ---- Memory Calculation ---- #
 def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
     model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
     return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
 # ---- Gradio Interface ---- #
 with gr.Blocks() as demo:
     with gr.Tabs():
         # Parameter Calculation Tab
         with gr.TabItem("Parameter Calculation"):
+            hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
             vocab_size = gr.Number(label="Vocab Size", value=51200)
             tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
             hidden_size = gr.Number(label="Hidden Size", value=6144)
                 inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
                 outputs=param_result)
+            hf_model_name_or_path.change(fn=update_from_hf_model,
+                inputs=[hf_model_name_or_path],
+                outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
 demo.launch()