| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Model Parameter Calculator</title> |
| <style> |
| body { |
| font-family: Arial, sans-serif; |
| margin: 0; |
| padding: 0; |
| display: flex; |
| flex-direction: row; |
| height: 100vh; |
| } |
| |
| .console { |
| width: 20%; |
| padding: 20px; |
| background-color: #f6b5b5; |
| overflow-y: auto; |
| border-right: 2px solid #ee4a4f; |
| } |
| |
| #additionalFieldsMOE { |
| display: none; |
| } |
| |
| .output { |
| width: 80%; |
| padding: 20px; |
| overflow-y: auto; |
| } |
| |
| table { |
| width: 100%; |
| border-collapse: collapse; |
| margin-bottom: 20px; |
| background-color: #fff7f9; |
| border: 1px solid #f6b5b5; |
| } |
| |
| th, td { |
| border: 1px solid #f6b5b5; |
| padding: 8px; |
| text-align: center; |
| } |
| |
| th { |
| background-color: #f6d4d4; |
| } |
| |
| .highlight { |
| background-color: #ffe8e8; |
| color: #ad0d0d; |
| padding: 10px; |
| margin: 20px 0; |
| border: 1px solid #f6b5b5; |
| border-radius: 4px; |
| text-align: center; |
| } |
| |
| h3 { |
| color: #ad0d0d; |
| } |
| |
| .section { |
| margin-bottom: 20px; |
| } |
| label { |
| font-weight: bold; |
| font-size: 0.9rem; |
| } |
| |
| input[type="number"], select { |
| width: 100%; |
| padding: 8px; |
| border: 1px solid #ddd; |
| border-radius: 5px; |
| box-sizing: border-box; |
| font-size: 0.9rem; |
| } |
| |
| input[type="number"]:focus, select:focus { |
| border-color: #ff6666; |
| outline: none; |
| box-shadow: 0px 0px 5px rgba(255, 102, 102, 0.5); |
| } |
| |
| button { |
| background-color: #ff6666; |
| color: white; |
| border: none; |
| border-radius: 5px; |
| padding: 5px; |
| width: 100%; |
| font-size: 1rem; |
| cursor: pointer; |
| transition: background-color 0.3s ease; |
| } |
| |
| button:hover { |
| background-color: #e60000; |
| } |
| </style> |
| <script> |
| function handleSelectChange() { |
| // Récupère la valeur sélectionnée |
| const isMoe = document.getElementById('is_moe').value; |
| const additionalFieldsMOE = document.getElementById('additionalFieldsMOE'); |
| |
| // Affiche ou cache les champs supplémentaires |
| if (isMoe === 'yes') { |
| additionalFieldsMOE.style.display = 'block'; |
| } else { |
| additionalFieldsMOE.style.display = 'none'; |
| } |
| } |
| |
| function calculateParameters() { |
| const hiddenSize = parseInt(document.getElementById('hidden_size').value); |
| const num_kv_heads = parseInt(document.getElementById('num_key_value_heads').value); |
| const num_heads = parseInt(document.getElementById('num_heads').value); |
| const intermediateSize = parseInt(document.getElementById('intermediate_size').value); |
| const vocabSize = parseInt(document.getElementById('vocab_size').value); |
| const numHiddenLayers = parseInt(document.getElementById('num_hidden_layers').value); |
| const active_expert_number = parseInt(document.getElementById('active_expert_number').value); |
| const expert_number = parseInt(document.getElementById('expert_number').value); |
| const includeBias = document.getElementById('include_bias').value === 'yes'; |
| const isMoe = document.getElementById('is_moe').value === 'yes'; |
| |
| const calcTotal = (input, output) => input * output; |
| const calcKVdim = (hiddenSize, num_heads,num_kv_heads) => hiddenSize * num_kv_heads / num_heads; |
| |
| // Attention calculations |
| const attention = [ |
| { name: 'Query', input: hiddenSize, output: hiddenSize }, |
| { name: 'Key', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) }, |
| { name: 'Value', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) }, |
| { name: 'Projection', input: hiddenSize, output: hiddenSize } |
| ].map(entry => ({ |
| ...entry, |
| wTotal: calcTotal(entry.input, entry.output), |
| bTotal: includeBias ? entry.output : 0 |
| })); |
| |
| const attentionTotal = attention.reduce((sum, entry) => sum + entry.wTotal + entry.bTotal, 0); |
| |
| // Feed Forward calculations |
| const switchW = calcTotal(hiddenSize, intermediateSize); |
| const switchB = includeBias ? intermediateSize : 0; |
| const luW = calcTotal(hiddenSize, intermediateSize); |
| const luB = includeBias ? intermediateSize : 0; |
| const projW = calcTotal(intermediateSize, hiddenSize); |
| const projB = includeBias ? hiddenSize : 0; |
| let feedForwardTotal; |
| |
| feedForwardTotal = switchW + switchB + luW + luB + projW + projB; |
| const feedForwardTotalCache = feedForwardTotal |
| |
| let feedForwardTotalActive; |
| |
| if (isMoe) { |
| feedForwardTotalActive = feedForwardTotalCache * active_expert_number; |
| feedForwardTotal = feedForwardTotalCache * expert_number; |
| console.log(feedForwardTotal) |
| } |
| // Embedding |
| const embeddingTotal = calcTotal(vocabSize, hiddenSize); |
| |
| // 1 layer parameters |
| const oneLayerParams = attentionTotal + feedForwardTotal; |
| |
| let oneLayerParamsActive; |
| if (isMoe) { |
| oneLayerParamsActive = attentionTotal + feedForwardTotalActive; |
| } |
| |
| // Full layers parameters |
| const fullLayersParams = oneLayerParams * numHiddenLayers; |
| |
| let fullLayersParamsMOEActive; |
| if (isMoe) { |
| fullLayersParamsMOEActive = oneLayerParamsActive * numHiddenLayers; |
| } |
| |
| // Full size (includes embedding) |
| const fullSize = fullLayersParams + embeddingTotal; |
| |
| let fullSizeActive; |
| if (isMoe) { |
| fullSizeActive = fullLayersParamsMOEActive + embeddingTotal; |
| } |
| |
| // Display results |
| const outputDiv = document.getElementById('output'); |
| outputDiv.innerHTML = ` |
| <h1>Model Parameter Calculator</h1> |
| <div class="section"> |
| <h3>Attention</h3> |
| <table> |
| <tr> |
| <th>Parameter</th> |
| <th>Input Size</th> |
| <th>Output Size</th> |
| <th>Total parameters (input*ouput)</th> |
| </tr> |
| ${attention.map(entry => ` |
| <tr> |
| <td>${entry.name} - W</td> |
| <td>${entry.input.toLocaleString()}</td> |
| <td>${entry.output.toLocaleString()}</td> |
| <td>${entry.wTotal.toLocaleString()}</td> |
| </tr> |
| ${includeBias ? ` |
| <tr> |
| <td>${entry.name} - b</td> |
| <td>-</td> |
| <td>${entry.output.toLocaleString()}</td> |
| <td>${entry.bTotal.toLocaleString()}</td> |
| </tr> |
| ` : ''} |
| `).join('')} |
| <tr> |
| <th colspan="3">Total attention parameters</th> |
| <td colspan="2">${attentionTotal.toLocaleString()}</td> |
| </tr> |
| </table> |
| </div> |
| <div class="section"> |
| <h3>Feed Forward</h3> |
| <table> |
| <tr> |
| <th>Parameter</th> |
| <th>Input Size</th> |
| <th>Output Size</th> |
| <th>Total (input*output)</th> |
| </tr> |
| <tr> |
| <td>Swish - W</td> |
| <td>${hiddenSize.toLocaleString()}</td> |
| <td>${intermediateSize.toLocaleString()}</td> |
| <td>${switchW.toLocaleString()}</td> |
| </tr> |
| ${includeBias ? `<tr> |
| <td>Swish - b</td> |
| <td>-</td> |
| <td>${intermediateSize.toLocaleString()}</td> |
| <td>${switchB.toLocaleString()}</td> |
| </tr>` : ''} |
| <tr> |
| <td>GLU W</td> |
| <td>${hiddenSize.toLocaleString()}</td> |
| <td>${intermediateSize.toLocaleString()}</td> |
| <td>${luW.toLocaleString()}</td> |
| </tr> |
| ${includeBias ? `<tr> |
| <td>GLU - b</td> |
| <td>-</td> |
| <td>${intermediateSize.toLocaleString()}</td> |
| <td>${luB.toLocaleString()}</td> |
| </tr>` : ''} |
| <tr> |
| <td>Projection - W</td> |
| <td>${intermediateSize.toLocaleString()}</td> |
| <td>${hiddenSize.toLocaleString()}</td> |
| <td>${projW.toLocaleString()}</td> |
| </tr> |
| ${includeBias ? `<tr> |
| <td>Projection - b</td> |
| <td>-</td> |
| <td>${hiddenSize.toLocaleString()}</td> |
| <td>${projB.toLocaleString()}</td> |
| </tr>` : ''} |
| <tr> |
| <th colspan="3">Total Feed Forward parameters</th> |
| <td>${feedForwardTotal.toLocaleString()}</td> |
| </tr> |
| ${isMoe ?` |
| <tr> |
| <th colspan="3">Total active Feed Forward parameters</th> |
| <td>${feedForwardTotalActive.toLocaleString()}</td> |
| </tr>` : ''} |
| </table> |
| </div> |
| <div class="highlight"> |
| <strong>1 Layer Parameters (Attention + FFN):</strong> ${oneLayerParams.toLocaleString()}<br> |
| <strong>Full Layers Parameters (1 layer parameters * num layers):</strong> ${fullLayersParams.toLocaleString()}<br><br> |
| ${isMoe ? `<tr> |
| <strong>1 Layer Parameters Active(Attention + FFN):</strong> ${oneLayerParamsActive.toLocaleString()}<br> |
| <strong>Full Layers Parameters Active(1 layer active parameters * num layers):</strong> ${fullLayersParamsMOEActive.toLocaleString()}<br> |
| </tr>` : ''} |
| </div> |
| <div class="section"> |
| <h3>Embedding</h3> |
| <table> |
| <tr> |
| <th>Parameter</th> |
| <th>Vocab Size</th> |
| <th>Hidden Size</th> |
| <th>Total (vocab * hidden)</th> |
| </tr> |
| <tr> |
| <td>Embedding</td> |
| <td>${vocabSize.toLocaleString()}</td> |
| <td>${hiddenSize.toLocaleString()}</td> |
| <td>${embeddingTotal.toLocaleString()}</td> |
| </tr> |
| </table> |
| </div> |
| <div class="highlight"> |
| <strong>Complete Model Parmeters (embedding size + full layers size):</strong> ${fullSize.toLocaleString()}<br><br> |
| ${isMoe ? `<strong>Complete Model Parmeters Active:</strong> ${fullSizeActive.toLocaleString()}` : ''} |
| </div> |
| `; |
| } |
| </script> |
| </head> |
| <body> |
|
|
| <div class="console"> |
| <h3>Input Parameters</h3> |
| <label for="hidden_size">Hidden size:</label><br> |
| <input type="number" id="hidden_size" value="896"><br><br> |
|
|
| <label for="intermediate_size">Intermediate size:</label><br> |
| <input type="number" id="intermediate_size" value="4864"><br><br> |
|
|
| <label for="vocab_size">Vocab size:</label><br> |
| <input type="number" id="vocab_size" value="151646"><br><br> |
|
|
| <label for="num_key_value_heads">Number of key-value heads:</label><br> |
| <input type="number" id="num_key_value_heads" value="2"><br><br> |
|
|
| <label for="num_heads">Number of attention (query) heads:</label><br> |
| <input type="number" id="num_heads" value="14"><br><br> |
|
|
| <label for="num_hidden_layers">Number of hidden layers:</label><br> |
| <input type="number" id="num_hidden_layers" value="24"><br><br> |
|
|
| <label for="include_bias">Include bias?</label><br> |
| <select id="include_bias"> |
| <option value="no">No</option> |
| <option value="yes">Yes</option> |
| </select><br><br> |
|
|
| <label for="is_moe">Is MOE ?</label><br> |
| <select id="is_moe" onchange="handleSelectChange()"> |
| <option value="no">No</option> |
| <option value="yes">Yes</option> |
| </select><br><br> |
|
|
| <div id="additionalFieldsMOE"> |
| <label for="expert_number">Total expert number :</label><br> |
| <input type="number" id="expert_number" name="expert_number"><br><br> |
|
|
| <label for="active_expert_number">Total active experts (shared + specifics):</label><br> |
| <input type="number" id="active_expert_number" name="active_expert_number"><br><br> |
| </div> |
|
|
| <button onclick="calculateParameters()">Calculate</button> |
| </div> |
|
|
| <div class="output" id="output"> |
| <h1>Transformer total number of parameters Calculator</h1> |
| <h3>Enter model hyperparameters in the console and press calculate (curently working for classic transformer/LLM architecture with GQA and GLU)</h3> |
| </div> |
|
|
| </body> |
| </html> |
|
|