File size: 2,029 Bytes
84f0b80
 
 
b79954f
 
84f0b80
 
b79954f
84f0b80
 
 
 
 
 
 
 
 
b79954f
84f0b80
 
 
 
 
 
b79954f
 
 
84f0b80
 
 
b79954f
84f0b80
 
b79954f
84f0b80
 
 
 
b79954f
 
84f0b80
 
 
 
 
 
 
 
 
 
 
 
 
 
b79954f
 
 
84f0b80
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from defaults import DEFAULTS


def greet(name, intensity) -> str:
    return "Hello, " + name + "!" * int(intensity)


def create_parallelism_block():
    with gr.Column():
        gr.Markdown("# Parallelism Parameters")
        tp = gr.Number(label="Tensor Parallelism", value=1, interactive=True)
        pp = gr.Number(label="Pipeline Parallelism", value=1, interactive=True)
        cp = gr.Number(label="Context Parallelism", value=1, interactive=True)
        ep = gr.Number(label="Expert Parallelism", value=1, interactive=True)
        return tp, pp, cp, ep


def create_model_block():
    with gr.Column():
        gr.Markdown("# Model Parameters")
        layers = gr.Number(label="Number of Layers", value=32, interactive=True)
        vocab = gr.Number(label="Vocab Size", value=32000, interactive=True)
        hidden = gr.Number(label="Hidden Dim", value=4096, interactive=True)
        intermediate = gr.Number(
            label="Intermediate Dim", value=11008, interactive=True
        )
        presets = gr.Dropdown(list(DEFAULTS.keys()), label="Presets", interactive=True)
        return layers, vocab, hidden, intermediate, presets


def create_training_block():
    with gr.Column():
        gr.Markdown("# Training Parameters")
        seq_len = gr.Number(label="Sequence Length", value=8192, interactive=True)
        batch_size = gr.Number(label="Batch Size", value=8, interactive=True)
        return seq_len, batch_size


def calculate(*args) -> int:
    out = 1
    for arg in args:
        out *= arg
    return arg


with gr.Blocks() as demo:
    with gr.Column():
        with gr.Row():
            tp, pp, cp, ep = create_parallelism_block()
            layers, vocab, hidden, intermediate, presets = create_model_block()
            seq_len, batch_size = create_training_block()
        calculate_button = gr.Button("Calculate")
        output = gr.Number(label="Output")

        calculate_button.click(fn=calculate, inputs=[tp, pp, cp, ep], outputs=output)


demo.launch()