import gradio as gr from defaults import DEFAULTS def greet(name, intensity) -> str: return "Hello, " + name + "!" * int(intensity) def create_parallelism_block(): with gr.Column(): gr.Markdown("# Parallelism Parameters") tp = gr.Number(label="Tensor Parallelism", value=1, interactive=True) pp = gr.Number(label="Pipeline Parallelism", value=1, interactive=True) cp = gr.Number(label="Context Parallelism", value=1, interactive=True) ep = gr.Number(label="Expert Parallelism", value=1, interactive=True) return tp, pp, cp, ep def create_model_block(): with gr.Column(): gr.Markdown("# Model Parameters") layers = gr.Number(label="Number of Layers", value=32, interactive=True) vocab = gr.Number(label="Vocab Size", value=32000, interactive=True) hidden = gr.Number(label="Hidden Dim", value=4096, interactive=True) intermediate = gr.Number( label="Intermediate Dim", value=11008, interactive=True ) presets = gr.Dropdown(list(DEFAULTS.keys()), label="Presets", interactive=True) return layers, vocab, hidden, intermediate, presets def create_training_block(): with gr.Column(): gr.Markdown("# Training Parameters") seq_len = gr.Number(label="Sequence Length", value=8192, interactive=True) batch_size = gr.Number(label="Batch Size", value=8, interactive=True) return seq_len, batch_size def calculate(*args) -> int: out = 1 for arg in args: out *= arg return arg with gr.Blocks() as demo: with gr.Column(): with gr.Row(): tp, pp, cp, ep = create_parallelism_block() layers, vocab, hidden, intermediate, presets = create_model_block() seq_len, batch_size = create_training_block() calculate_button = gr.Button("Calculate") output = gr.Number(label="Output") calculate_button.click(fn=calculate, inputs=[tp, pp, cp, ep], outputs=output) demo.launch()