Text Generation
Transformers
PyTorch
English
experimental
research
bit-level
transformer
reversible
safety
telemetry
language-modeling
Instructions to use WCNegentropy/BitTransformerLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WCNegentropy/BitTransformerLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="WCNegentropy/BitTransformerLM")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("WCNegentropy/BitTransformerLM", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use WCNegentropy/BitTransformerLM with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "WCNegentropy/BitTransformerLM" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/WCNegentropy/BitTransformerLM
- SGLang
How to use WCNegentropy/BitTransformerLM with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use WCNegentropy/BitTransformerLM with Docker Model Runner:
docker model run hf.co/WCNegentropy/BitTransformerLM
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Bit Transformer Dashboard</title> | |
| <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}"> | |
| <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | |
| </head> | |
| <body> | |
| <h1>Bit Transformer Dashboard</h1> | |
| <div class="container"> | |
| <section> | |
| <h2>Initialize Model</h2> | |
| <form id="initForm"> | |
| d_model: <input type="number" name="d_model" value="{{ defaults.d_model }}" title="Model width (default {{ defaults.d_model }})"><br> | |
| nhead: <input type="number" name="nhead" value="{{ defaults.nhead }}" title="Attention heads (default {{ defaults.nhead }})"><br> | |
| num_layers: <input type="number" name="num_layers" value="{{ defaults.num_layers }}" title="Transformer layers (default {{ defaults.num_layers }})"><br> | |
| dim_feedforward: <input type="number" name="dim_feedforward" value="{{ defaults.dim_feedforward }}" title="Feedforward dim (default {{ defaults.dim_feedforward }})"><br> | |
| max_seq_len: <input type="number" name="max_seq_len" value="{{ defaults.max_seq_len }}" title="Max sequence length (default {{ defaults.max_seq_len }})"><br> | |
| chunk_size: <input type="number" name="chunk_size" title="Chunked attention size"><br> | |
| overlap: <input type="number" name="overlap" value="{{ defaults.overlap }}" title="Sliding window overlap"><br> | |
| Reversible: <input type="checkbox" name="reversible" id="reversible_box" title="Use reversible layers (default {{ defaults.reversible }})"><br> | |
| Gradient Checkpointing: <input type="checkbox" name="use_checkpoint" id="checkpoint_box" checked title="Enable gradient checkpointing (default {{ defaults.use_checkpoint }})"><br> | |
| act_threshold: <input type="number" step="0.01" name="act_threshold" value="{{ defaults.act_threshold }}" title="ACT halt threshold (default {{ defaults.act_threshold }})"><br> | |
| c_floor: <input type="number" step="0.01" name="c_floor" value="{{ c_floor }}" title="Complexity floor"><br> | |
| s_floor: <input type="number" step="0.01" name="s_floor" value="{{ s_floor }}" title="Symbiosis floor"><br> | |
| <button type="submit">Init</button> | |
| </form> | |
| </section> | |
| <section> | |
| <h2>Train Step</h2> | |
| <form id="trainForm"> | |
| Bits (e.g. 0 1 0 1): <input type="text" name="bits" value="0 1 0 1"><br> | |
| Upload file: <input type="file" id="train_file"><br> | |
| <button type="submit">Train</button> | |
| </form> | |
| <label>Load sample dataset: | |
| <select id="datasetSelect"> | |
| <option value="">--Select--</option> | |
| <option value="wikitext2_train">Wikitext-2 (train)</option> | |
| <option value="wikitext2_validation">Wikitext-2 (validation)</option> | |
| </select> | |
| </label> | |
| <p id="trainOut"></p> | |
| </section> | |
| <section> | |
| <h2>Scale Up</h2> | |
| Width Mult: <input type="number" step="0.1" id="width_mult" value="1.0"><br> | |
| <button id="scaleBtn">Scale Model</button> | |
| </section> | |
| <section> | |
| <h2>Collapse Submodel</h2> | |
| <form id="collapseForm"> | |
| Cluster Bits (JSON array of arrays):<br> | |
| <textarea name="clusters" rows="3" cols="40">[[0,1,0,1],[1,1,0,0]]</textarea><br> | |
| Target Params (JSON):<br> | |
| <textarea name="params" rows="3" cols="40">{"d_model":32,"nhead":4,"num_layers":1,"dim_feedforward":64,"max_seq_len":16}</textarea><br> | |
| Width Scale: <input type="number" step="0.1" id="width_scale" value="1.0"><br> | |
| <button type="submit">Collapse</button> | |
| </form> | |
| </section> | |
| <section> | |
| <h2>Inference</h2> | |
| <form id="inferForm"> | |
| Bits: <input type="text" name="bits" value="0 1 0 1"><br> | |
| Upload file: <input type="file" id="infer_file"><br> | |
| <button type="submit">Infer</button> | |
| </form> | |
| <pre id="inferOut"></pre> | |
| </section> | |
| <section> | |
| <h2>Long Inference</h2> | |
| <form id="inferLongForm"> | |
| Bits: <input type="text" name="bits" value="0 1 0 1"><br> | |
| ctx_bits: <input type="number" name="ctx_bits" value="4096"><br> | |
| overlap: <input type="number" name="overlap" value="256"><br> | |
| <button type="submit">Infer Long</button> | |
| </form> | |
| <pre id="inferLongOut"></pre> | |
| </section> | |
| <section> | |
| <h2>Text Inference</h2> | |
| <form id="textInferForm"> | |
| Text: <input type="text" name="text" value="hello"><br> | |
| <button type="submit">Infer Text</button> | |
| </form> | |
| <pre id="textInferOut"></pre> | |
| </section> | |
| <section> | |
| <h2>λ Weights</h2> | |
| <form id="lambdaForm"> | |
| λ<sub>K</sub>: <input type="range" min="0" max="2" step="0.1" id="lambda_K" oninput="lambda_K_val.innerText=value"><span id="lambda_K_val"></span><br> | |
| λ<sub>C</sub>: <input type="range" min="0" max="2" step="0.1" id="lambda_C" oninput="lambda_C_val.innerText=value"><span id="lambda_C_val"></span><br> | |
| λ<sub>S</sub>: <input type="range" min="0" max="2" step="0.1" id="lambda_S" oninput="lambda_S_val.innerText=value"><span id="lambda_S_val"></span><br> | |
| <button type="submit">Update</button> | |
| </form> | |
| </section> | |
| <section> | |
| <h2>Diffusion LM</h2> | |
| <label><input type="checkbox" id="diffusion_box"> Enable Diffusion Mode</label> | |
| </section> | |
| <section> | |
| <h2>GPU Acceleration</h2> | |
| <label><input type="checkbox" id="gpu_box"> Enable FSDP & CUDA</label> | |
| </section> | |
| <section> | |
| <h2>Enable Compression</h2> | |
| <label><input type="checkbox" id="compression_box"> Compress I/O</label> | |
| <p>Ratio: <span id="comp_ratio">1.0</span></p> | |
| </section> | |
| <section> | |
| <h2>Quantization Aware Training</h2> | |
| <label><input type="checkbox" id="qat_box"> Enable 4-bit QAT</label> | |
| </section> | |
| <section> | |
| <h2>Model Status</h2> | |
| <pre id="statusOut"></pre> | |
| </section> | |
| <section> | |
| <h2>Telemetry</h2> | |
| <canvas id="metricChart" width="600" height="300"></canvas> | |
| </section> | |
| <section> | |
| <h2>Hugging Face Checkpoints</h2> | |
| Repo ID: <input type="text" id="hf_repo"><br> | |
| Token: <input type="password" id="hf_token" placeholder="optional"><br> | |
| <button id="uploadBtn">Upload weights</button> | |
| <button id="downloadBtn">Download weights</button> | |
| <p id="hfStatus"></p> | |
| </section> | |
| <script> | |
| async function postJSON(url, data){ | |
| const resp = await fetch(url, {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(data)}); | |
| return resp.json(); | |
| } | |
| async function pollJob(id){ | |
| while(true){ | |
| const job = await fetch(`/job/${id}`).then(r=>r.json()); | |
| if(job.status === 'completed') return job.result; | |
| if(job.status === 'error') throw job.error || 'Job failed'; | |
| await new Promise(r=>setTimeout(r, 1000)); | |
| } | |
| } | |
| function loadInitParams(){ | |
| const saved = JSON.parse(localStorage.getItem('init_params')||'{}'); | |
| const form = document.getElementById('initForm'); | |
| for(const [k,v] of Object.entries(saved)){ | |
| const el = form.elements[k]; | |
| if(!el) continue; | |
| if(el.type === 'checkbox') el.checked = v; else el.value = v; | |
| } | |
| } | |
| loadInitParams(); | |
| function byteArrayToBits(arr){ | |
| const bits=[]; | |
| for(const b of arr){ | |
| for(let i=7;i>=0;i--) bits.push((b>>i)&1); | |
| } | |
| return bits; | |
| } | |
| let trainFileBits=null, inferFileBits=null, datasetBits=null; | |
| async function fileToBits(file){ | |
| if(file.type.startsWith('text')){ | |
| const text = await file.text(); | |
| const res = await postJSON('/text_to_bits', {text}); | |
| return res.bits; | |
| } | |
| const buf = await file.arrayBuffer(); | |
| return byteArrayToBits(new Uint8Array(buf)); | |
| } | |
| let metricChart; | |
| async function initChart(){ | |
| const data = await fetch('/metrics').then(r=>r.json()); | |
| const labels = data.negentropy.map((_,i)=>i); | |
| const ctx = document.getElementById('metricChart').getContext('2d'); | |
| metricChart = new Chart(ctx, { | |
| type:'line', | |
| data:{ | |
| labels:labels, | |
| datasets:[ | |
| {label:'Negentropy', data:data.negentropy, borderColor:'blue', fill:false}, | |
| {label:'LZ Complexity', data:data.lz_complexity, borderColor:'orange', fill:false}, | |
| {label:'Symbiosis', data:data.symbiosis, borderColor:'green', fill:false} | |
| ] | |
| }, | |
| options:{responsive:false, interaction:{mode:'index', intersect:false}} | |
| }); | |
| } | |
| async function updateChart(){ | |
| const data = await fetch('/metrics').then(r=>r.json()); | |
| const labels = data.negentropy.map((_,i)=>i); | |
| metricChart.data.labels = labels; | |
| metricChart.data.datasets[0].data = data.negentropy; | |
| metricChart.data.datasets[1].data = data.lz_complexity; | |
| metricChart.data.datasets[2].data = data.symbiosis; | |
| metricChart.update(); | |
| } | |
| initChart(); | |
| setInterval(updateChart, 2000); | |
| async function refreshStatus(){ | |
| const [s, c] = await Promise.all([fetch('/status'), fetch('/model_config')]); | |
| const status = await s.json(); | |
| const config = await c.json(); | |
| document.getElementById('statusOut').innerText = JSON.stringify({...status, ...config}, null, 2); | |
| } | |
| document.getElementById('initForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const fd = new FormData(e.target); | |
| const obj = Object.fromEntries(fd.entries()); | |
| const ints = ['d_model','nhead','num_layers','dim_feedforward','max_seq_len','chunk_size','overlap']; | |
| ints.forEach(k=>{ if(obj[k]===''){ delete obj[k]; } else obj[k]=parseInt(obj[k]); }); | |
| obj.reversible = document.getElementById('reversible_box').checked; | |
| obj.use_checkpoint = document.getElementById('checkpoint_box').checked; | |
| obj.act_threshold = parseFloat(obj.act_threshold); | |
| const floors = {c_floor: parseFloat(obj.c_floor), s_floor: parseFloat(obj.s_floor)}; | |
| delete obj.c_floor; delete obj.s_floor; | |
| await postJSON('/init', obj); | |
| await postJSON('/config/telemetry', floors); | |
| localStorage.setItem('init_params', JSON.stringify({...obj, ...floors})); | |
| refreshStatus(); | |
| updateChart(); | |
| }); | |
| document.getElementById('trainForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const form = e.target; | |
| let payload; | |
| if(trainFileBits){ | |
| payload = trainFileBits; | |
| } else if(datasetBits){ | |
| payload = datasetBits; | |
| } else { | |
| payload = [form.bits.value.trim().split(/\s+/).map(Number)]; | |
| } | |
| for(const el of form.elements) el.disabled = true; | |
| const out = document.getElementById('trainOut'); | |
| out.innerText = '⏳'; | |
| try{ | |
| const job = await postJSON('/train', {bits: payload}); | |
| const res = await pollJob(job.job_id); | |
| out.innerText = 'Loss: '+res.loss.toFixed(4); | |
| if(res.ratio !== undefined){ | |
| document.getElementById('comp_ratio').innerText = res.ratio.toFixed(2); | |
| } | |
| } catch(err){ | |
| out.innerText = 'Error'; | |
| alert(err); | |
| } finally { | |
| for(const el of form.elements) el.disabled = false; | |
| refreshStatus(); | |
| updateChart(); | |
| } | |
| }); | |
| document.getElementById('train_file').addEventListener('change', async (e)=>{ | |
| const f = e.target.files[0]; | |
| if(!f) return; | |
| const bits = await fileToBits(f); | |
| trainFileBits = [bits]; | |
| datasetBits = null; | |
| document.querySelector('#trainForm input[name="bits"]').value = bits.slice(0,64).join(' '); | |
| }); | |
| document.querySelector('#trainForm input[name="bits"]').addEventListener('input', ()=>{ | |
| trainFileBits = null; | |
| datasetBits = null; | |
| }); | |
| document.getElementById('scaleBtn').addEventListener('click', async ()=>{ | |
| const btn = document.getElementById('scaleBtn'); | |
| const input = document.getElementById('width_mult'); | |
| const mult = parseFloat(input.value); | |
| btn.disabled = true; input.disabled = true; | |
| const original = btn.innerText; btn.innerText = '⏳'; | |
| try{ | |
| const job = await postJSON('/scale_up', {width_mult: mult}); | |
| await pollJob(job.job_id); | |
| } catch(err){ | |
| alert(err); | |
| } finally { | |
| btn.innerText = original; | |
| btn.disabled = false; input.disabled = false; | |
| refreshStatus(); | |
| updateChart(); | |
| } | |
| }); | |
| document.getElementById('collapseForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const form = e.target; | |
| const btn = form.querySelector('button'); | |
| for(const el of form.elements) el.disabled = true; | |
| const clusters = JSON.parse(form.clusters.value); | |
| const params = JSON.parse(form.params.value); | |
| const w = parseFloat(document.getElementById('width_scale').value); | |
| const original = btn.innerText; btn.innerText = '⏳'; | |
| try{ | |
| const job = await postJSON('/collapse', {clusters: clusters, params: params, width_scale: w}); | |
| await pollJob(job.job_id); | |
| } catch(err){ | |
| alert(err); | |
| } finally { | |
| btn.innerText = original; | |
| for(const el of form.elements) el.disabled = false; | |
| refreshStatus(); | |
| updateChart(); | |
| } | |
| }); | |
| document.getElementById('inferForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| let bits; | |
| if(inferFileBits){ | |
| bits = inferFileBits; | |
| } else if(datasetBits){ | |
| bits = [datasetBits[0]]; | |
| } else { | |
| bits = [e.target.bits.value.trim().split(/\s+/).map(Number)]; | |
| } | |
| const res = await postJSON('/infer', {bits}); | |
| if(res.error){ | |
| alert(res.error + '\n' + (res.suggestion||'')); | |
| } else { | |
| document.getElementById('inferOut').innerText = JSON.stringify(res, null, 2); | |
| if(res.ratio !== undefined){ | |
| document.getElementById('comp_ratio').innerText = res.ratio.toFixed(2); | |
| } | |
| } | |
| refreshStatus(); | |
| updateChart(); | |
| }); | |
| document.getElementById('infer_file').addEventListener('change', async (e)=>{ | |
| const f = e.target.files[0]; | |
| if(!f) return; | |
| const bits = await fileToBits(f); | |
| inferFileBits = [bits]; | |
| datasetBits = null; | |
| document.querySelector('#inferForm input[name="bits"]').value = bits.slice(0,64).join(' '); | |
| }); | |
| document.querySelector('#inferForm input[name="bits"]').addEventListener('input', ()=>{ | |
| inferFileBits = null; | |
| datasetBits = null; | |
| }); | |
| document.getElementById('datasetSelect').addEventListener('change', async (e)=>{ | |
| const val = e.target.value; | |
| trainFileBits = null; | |
| inferFileBits = null; | |
| if(!val){ datasetBits = null; return; } | |
| const [name, split] = val.split('_'); | |
| const resp = await fetch(`/dataset?name=${name}&split=${split}&size=4&seq_len=64`); | |
| const data = await resp.json(); | |
| datasetBits = data.bits; | |
| const preview = data.bits[0].slice(0,64).join(' '); | |
| document.querySelector('#trainForm input[name="bits"]').value = preview; | |
| document.querySelector('#inferForm input[name="bits"]').value = preview; | |
| }); | |
| document.getElementById('inferLongForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const bits = e.target.bits.value.trim().split(/\s+/).map(Number); | |
| const ctx = parseInt(e.target.ctx_bits.value); | |
| const ov = parseInt(e.target.overlap.value); | |
| const res = await postJSON('/infer_long', {bits: bits, ctx_bits: ctx, overlap: ov}); | |
| document.getElementById('inferLongOut').innerText = JSON.stringify(res, null, 2); | |
| refreshStatus(); | |
| updateChart(); | |
| }); | |
| document.getElementById('textInferForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const text = e.target.text.value; | |
| const res = await postJSON('/infer_text', {text:text}); | |
| document.getElementById('textInferOut').innerText = JSON.stringify(res, null, 2); | |
| refreshStatus(); | |
| updateChart(); | |
| }); | |
| async function loadLambdas(){ | |
| const resp = await fetch('/lambdas'); | |
| const vals = await resp.json(); | |
| for(const k of ['lambda_K','lambda_C','lambda_S']){ | |
| document.getElementById(k).value = vals[k]; | |
| document.getElementById(k+"_val").innerText = vals[k]; | |
| } | |
| } | |
| document.getElementById('lambdaForm').addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const data = { | |
| lambda_K: parseFloat(document.getElementById('lambda_K').value), | |
| lambda_C: parseFloat(document.getElementById('lambda_C').value), | |
| lambda_S: parseFloat(document.getElementById('lambda_S').value), | |
| }; | |
| await postJSON('/lambdas', data); | |
| for(const k in data){ | |
| document.getElementById(k+"_val").innerText = data[k]; | |
| } | |
| refreshStatus(); | |
| }); | |
| loadLambdas(); | |
| function restoreToggle(id,key,endpoint,field){ | |
| const box = document.getElementById(id); | |
| const saved = localStorage.getItem(key); | |
| if(saved !== null){ box.checked = saved === 'true'; postJSON(endpoint,{[field]: box.checked}); } | |
| box.addEventListener('change', async (e)=>{ | |
| await postJSON(endpoint, {[field]: e.target.checked}); | |
| localStorage.setItem(key, e.target.checked); | |
| refreshStatus(); | |
| }); | |
| } | |
| restoreToggle('diffusion_box','diffusion','/diffusion','diffusion'); | |
| restoreToggle('gpu_box','use_gpu','/gpu','use_gpu'); | |
| restoreToggle('compression_box','compression','/compression','compression'); | |
| restoreToggle('qat_box','qat','/qat','qat'); | |
| document.getElementById('uploadBtn').addEventListener('click', async ()=>{ | |
| const repo = document.getElementById('hf_repo').value; | |
| const token = document.getElementById('hf_token').value; | |
| const res = await postJSON('/save_checkpoint', {repo_id: repo, token: token||undefined}); | |
| document.getElementById('hfStatus').innerText = res.status || res.error; | |
| }); | |
| document.getElementById('downloadBtn').addEventListener('click', async ()=>{ | |
| const repo = document.getElementById('hf_repo').value; | |
| const token = document.getElementById('hf_token').value; | |
| const res = await postJSON('/download_checkpoint', {repo_id: repo, token: token||undefined}); | |
| document.getElementById('hfStatus').innerText = res.status || res.error; | |
| refreshStatus(); | |
| updateChart(); | |
| }); | |
| refreshStatus(); | |
| </script> | |
| </div> | |
| </body> | |
| </html> | |