Spaces:

nanotron
/

predict_memory

Running

App Files Files Community

nouamanetazi HF Staff commited on Jan 21, 2025

Commit

5f67cc3

1 Parent(s): 421f3af

init

Browse files

Files changed (3) hide show

.gitignore +175 -0
app.py +145 -0
utils.py +152 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,175 @@

+mem_viz/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.vscode
+checkpoints/
+wandb/
+gg/
+lighteval/
+logs/
+snapshots/
+tb_logs*
+.test_cache/
+benchmark/

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+import matplotlib.pyplot as plt
+import yaml
+from pathlib import Path
+import io
+from utils import calculate_memory_components, plot_memory_breakdown
+def load_config_from_yaml_content(yaml_content):
+    try:
+        config = yaml.safe_load(yaml_content)
+        # Extract relevant parameters from config
+        model_config = config['model']['model_config']
+        parallelism = config['parallelism']
+        tokens = config['tokens']
+        optimizer = config['optimizer']
+        return {
+            'hidden_size': model_config['hidden_size'],
+            'num_layers': model_config['num_hidden_layers'],
+            'vocab_size': model_config['vocab_size'],
+            'intermediate_size': model_config['intermediate_size'],
+            'seq_len': tokens['sequence_length'],
+            'mbs': tokens['micro_batch_size'],
+            'batch_accum': tokens['batch_accumulation_per_replica'],
+            'tp': parallelism['tp'],
+            'pp': parallelism['pp'],
+            'dp': parallelism['dp'],
+            'zero_stage': optimizer['zero_stage'],
+            'tie_word_embeddings': model_config['tie_word_embeddings']
+        }
+    except Exception as e:
+        raise gr.Error(f"Error parsing YAML: {str(e)}")
+def load_config_from_yaml_file(yaml_path):
+    if not yaml_path:
+        return None
+    with open(yaml_path.name, 'r') as f:
+        return load_config_from_yaml_content(f.read())
+def format_config_display(config):
+    if not config:
+        return "No configuration loaded"
+    sections = {
+        "Model Architecture": [
+            "hidden_size", "num_layers", "vocab_size",
+            "intermediate_size", "tie_word_embeddings"
+        ],
+        "Training Configuration": [
+            "seq_len", "mbs", "batch_accum"
+        ],
+        "Parallelism": [
+            "tp", "pp", "dp", "zero_stage"
+        ]
+    }
+    output = []
+    for section, params in sections.items():
+        output.append(f"\n### {section}")
+        for param in params:
+            output.append(f"- {param}: {config[param]}")
+    return "\n".join(output)
+def process_yaml_and_plot(config):
+    if not config:
+        return None, None, "No configuration loaded"
+    fig1, fig2 = plot_memory_breakdown(**config)
+    return fig1, fig2, format_config_display(config)
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            with gr.Accordion("YAML Configuration", open=True):
+                yaml_file = gr.File(label="Upload YAML Config", file_types=[".yaml", ".yml"])
+                yaml_text = gr.Textbox(
+                    label="Or paste YAML content here",
+                    placeholder="Paste your YAML configuration here...",
+                    lines=10
+                )
+                yaml_submit = gr.Button("Calculate Memory from YAML")
+            with gr.Accordion("Manual Configuration", open=False):
+                with gr.Accordion("Model Architecture", open=True):
+                    hidden_size = gr.Number(4096, label="Hidden Size")
+                    num_layers = gr.Number(32, label="Number of Layers")
+                    vocab_size = gr.Number(50432, label="Vocabulary Size")
+                    intermediate_size = gr.Number(11008, label="Intermediate Size")
+                    tie_word_embeddings = gr.Checkbox(True, label="Tie Word Embeddings")
+                with gr.Accordion("Training Configuration", open=True):
+                    seq_len = gr.Number(2048, label="Sequence Length")
+                    mbs = gr.Number(1, label="Micro Batch Size")
+                    batch_accum = gr.Number(1, label="Gradient Accumulation Steps")
+                with gr.Accordion("Parallelism", open=True):
+                    tp = gr.Number(1, label="Tensor Parallelism")
+                    pp = gr.Number(1, label="Pipeline Parallelism")
+                    dp = gr.Number(1, label="Data Parallelism")
+                    zero_stage = gr.Radio([0, 1, 2, 3], value=0, label="ZeRO Stage")
+                manual_submit = gr.Button("Calculate Memory (Manual Input)")
+        with gr.Column(scale=2):
+            config_display = gr.Markdown(label="Configuration Values")
+            plot1 = gr.Plot(label="Memory Component Breakdown")
+            plot2 = gr.Plot(label="Aggregate Memory Metrics")
+    # Handle YAML file upload
+    yaml_file.change(
+        lambda x: process_yaml_and_plot(load_config_from_yaml_file(x) if x else None),
+        inputs=[yaml_file],
+        outputs=[plot1, plot2, config_display]
+    )
+    # Handle YAML text input
+    yaml_submit.click(
+        lambda x: process_yaml_and_plot(load_config_from_yaml_content(x) if x else None),
+        inputs=[yaml_text],
+        outputs=[plot1, plot2, config_display]
+    )
+    # Handle manual input
+    def manual_input_to_config(*args):
+        config = dict(zip([
+            'hidden_size', 'num_layers', 'vocab_size', 'intermediate_size',
+            'seq_len', 'mbs', 'batch_accum', 'tp', 'pp', 'dp', 'zero_stage',
+            'tie_word_embeddings'
+        ], args))
+        return process_yaml_and_plot(config)
+    manual_submit.click(
+        manual_input_to_config,
+        inputs=[
+            hidden_size, num_layers, vocab_size, intermediate_size,
+            seq_len, mbs, batch_accum, tp, pp, dp, zero_stage,
+            tie_word_embeddings
+        ],
+        outputs=[plot1, plot2, config_display]
+    )
+if __name__ == "__main__":
+    demo.launch()

utils.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import matplotlib.pyplot as plt
+def calculate_memory_components(
+    hidden_size, num_layers, vocab_size, intermediate_size,
+    seq_len, mbs, batch_accum, tp, pp, dp, zero_stage,
+    tie_word_embeddings
+):
+    # Calculate base components first
+    num_hidden_layers_in_pp = num_layers // pp
+    # Model BF16 calculation
+    vocab_embeddings = vocab_size * hidden_size * (2 if (not tie_word_embeddings and pp==1) else 1)
+    layer_params = (
+        (hidden_size * 3 * hidden_size)  # qkv_proj
+        + (hidden_size * hidden_size)     # out_proj
+        + (hidden_size * 2 * intermediate_size)  # gate_up_proj
+        + (intermediate_size * hidden_size)      # down_proj
+    )
+    model_bf16 = (vocab_embeddings + num_hidden_layers_in_pp * layer_params) * (2 / 1024 / 1024) / tp
+    # Other components
+    dp_if_zero = 1 if zero_stage == 0 else dp
+    fp32_params = 2 * model_bf16
+    fp32_grads = 2 * model_bf16
+    optimstates = 4 * model_bf16
+    use_ddp = zero_stage == 0 and dp > 1
+    ddp_grads_buffers = model_bf16 if use_ddp else 0
+    overhead = 72 + 32 * mbs
+    # Activations
+    decoder_layer_mib = (seq_len * mbs * hidden_size/tp) * (2/1024/1024) * (4*intermediate_size/hidden_size + 10)
+    if pp > 1:
+        activs = min(pp, batch_accum) * num_hidden_layers_in_pp * decoder_layer_mib
+    else:
+        cast_to_fp32 = sharded_cross_entropy = seq_len * mbs * vocab_size * (2 / 1024 / 1024) * 2 / tp
+        activs = num_layers * decoder_layer_mib + cast_to_fp32 + sharded_cross_entropy
+    # Calculate aggregate metrics
+    memory_usage_after_optimstates = (
+        model_bf16 +
+        fp32_params/dp_if_zero +
+        fp32_grads +
+        optimstates/dp_if_zero +
+        ddp_grads_buffers +
+        overhead
+    )
+    memory_usage_before_optimstates = (
+        model_bf16 +
+        fp32_params/dp_if_zero +
+        fp32_grads +
+        ddp_grads_buffers
+    )
+    memory_usage_peak_tbi = (
+        model_bf16 +
+        fp32_params/dp_if_zero +
+        fp32_grads +
+        optimstates/dp_if_zero +
+        ddp_grads_buffers +
+        overhead +
+        activs
+    )
+    return {
+        "Components": {
+            "Model BF16": model_bf16,
+            "FP32 Parameters": fp32_params/dp_if_zero,
+            "FP32 Gradients": fp32_grads,
+            "Optimizer States": optimstates/dp_if_zero,
+            "DDP Gradient Buffers": ddp_grads_buffers,
+            "Overhead": overhead,
+            "Activations": activs
+        },
+        "Aggregates": {
+            "Memory Before Optimizer States": memory_usage_before_optimstates,
+            "Memory After Optimizer States": memory_usage_after_optimstates,
+            "Peak Memory (TBI)": memory_usage_peak_tbi
+        }
+    }
+def plot_memory_breakdown(
+    hidden_size, num_layers, vocab_size, intermediate_size,
+    seq_len, mbs, batch_accum, tp, pp, dp, zero_stage,
+    tie_word_embeddings
+):
+    results = calculate_memory_components(
+        hidden_size, num_layers, vocab_size, intermediate_size,
+        seq_len, mbs, batch_accum, tp, pp, dp, zero_stage,
+        tie_word_embeddings
+    )
+    # Create figure for components plot
+    plt.close('all')
+    fig1 = plt.figure(figsize=(10, 6))
+    ax1 = fig1.add_subplot(1, 1, 1)
+    # Plot components
+    components = results["Components"]
+    names = list(components.keys())
+    values = list(components.values())
+    bars1 = ax1.bar(range(len(components)), values)
+    # Add value labels with better positioning
+    for bar in bars1:
+        height = bar.get_height()
+        ax1.text(bar.get_x() + bar.get_width()/2., height,
+                f'{height:.1f} MiB',
+                ha='center', va='bottom',
+                rotation=0)  # Remove rotation for better readability
+    # Customize the first plot
+    ax1.set_xticks(range(len(components)))
+    ax1.set_xticklabels(names, rotation=45, ha='right')
+    ax1.set_ylabel('Memory (MiB)')
+    ax1.set_title('Memory Component Breakdown', pad=20)
+    plt.tight_layout()
+    # Create figure for aggregates plot
+    fig2 = plt.figure(figsize=(10, 6))
+    ax2 = fig2.add_subplot(1, 1, 1)
+    # Plot aggregate metrics
+    aggregates = results["Aggregates"]
+    names = list(aggregates.keys())
+    values = list(aggregates.values())
+    bars2 = ax2.bar(range(len(aggregates)), values, color='orange')
+    # Add value labels
+    for bar in bars2:
+        height = bar.get_height()
+        ax2.text(bar.get_x() + bar.get_width()/2., height,
+                f'{height:.1f} MiB',
+                ha='center', va='bottom')
+    # Customize the second plot
+    ax2.set_xticks(range(len(aggregates)))
+    ax2.set_xticklabels(names, rotation=45, ha='right')
+    ax2.set_ylabel('Memory (MiB)')
+    ax2.set_title('Aggregate Memory Metrics', pad=20)
+    # Adjust layout to prevent text overlap
+    plt.tight_layout()
+    return fig1, fig2