NakliTechie commited on
Commit
79d5b3b
·
1 Parent(s): a923b4b

Sync from GitHub 2026-04-30T05:31:00Z

Browse files
Files changed (1) hide show
  1. index.html +16 -1
index.html CHANGED
@@ -1971,11 +1971,12 @@
1971
  <li><strong>Ternary Bonsai 1.7B</strong> (~470 MB, default) &mdash; text + agent (tool calling). Smallest download with tool calling. 1.58-bit ternary weights, Qwen3 backbone, Apache-2.0.</li>
1972
  <li><strong>Ternary Bonsai 4B</strong> (~1.1 GB) &mdash; same capabilities, better quality.</li>
1973
  <li><strong>Ternary Bonsai 8B</strong> (~2.2 GB) &mdash; best Bonsai quality, 65K context.</li>
 
1974
  <li><strong>Gemma 3 1B</strong> (~760 MB) &mdash; text-only, no tool calling. Fallback option.</li>
1975
  <li><strong>Gemma 4 E2B</strong> (~1.5 GB) &mdash; multimodal (image + audio) + agent.</li>
1976
  <li><strong>Gemma 4 E4B</strong> (~4.9 GB) &mdash; multimodal + agent, best quality.</li>
1977
  </ul>
1978
- <h4>Agent Tools (Ternary Bonsai + Gemma 4)</h4>
1979
  <ul>
1980
  <li><strong>calculate</strong> &mdash; math, percentages, conversions</li>
1981
  <li><strong>get_current_time</strong> &mdash; date/time with timezone</li>
@@ -2389,6 +2390,7 @@
2389
  <option value="bonsai-ternary-1.7b" selected>Ternary Bonsai 1.7B &middot; Agent (~470 MB)</option>
2390
  <option value="bonsai-ternary-4b">Ternary Bonsai 4B &middot; Agent (~1.1 GB)</option>
2391
  <option value="bonsai-ternary-8b">Ternary Bonsai 8B &middot; Agent (~2.2 GB)</option>
 
2392
  <option value="gemma3-1b">Gemma 3 1B &middot; Fast (~760 MB)</option>
2393
  <option value="gemma4-e2b">Gemma 4 E2B &middot; Multimodal (~1.5 GB)</option>
2394
  <option value="gemma4-e4b">Gemma 4 E4B &middot; Multimodal (~4.9 GB)</option>
@@ -2544,6 +2546,19 @@
2544
  contextSize: 65536,
2545
  genConfig: { temperature: 0.7, top_k: 20, top_p: 0.8, max_new_tokens: 2048, repetition_penalty: 1.05 },
2546
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
2547
  'gemma3-1b': {
2548
  id: 'onnx-community/gemma-3-1b-it-ONNX-GQA',
2549
  label: 'Gemma 3 1B',
 
1971
  <li><strong>Ternary Bonsai 1.7B</strong> (~470 MB, default) &mdash; text + agent (tool calling). Smallest download with tool calling. 1.58-bit ternary weights, Qwen3 backbone, Apache-2.0.</li>
1972
  <li><strong>Ternary Bonsai 4B</strong> (~1.1 GB) &mdash; same capabilities, better quality.</li>
1973
  <li><strong>Ternary Bonsai 8B</strong> (~2.2 GB) &mdash; best Bonsai quality, 65K context.</li>
1974
+ <li><strong>Qwen3 4B</strong> (~2.8 GB) &mdash; text + agent (tool calling). Standard Qwen3-4B at q4f16, Apache-2.0, 32K context.</li>
1975
  <li><strong>Gemma 3 1B</strong> (~760 MB) &mdash; text-only, no tool calling. Fallback option.</li>
1976
  <li><strong>Gemma 4 E2B</strong> (~1.5 GB) &mdash; multimodal (image + audio) + agent.</li>
1977
  <li><strong>Gemma 4 E4B</strong> (~4.9 GB) &mdash; multimodal + agent, best quality.</li>
1978
  </ul>
1979
+ <h4>Agent Tools (Ternary Bonsai + Qwen3 + Gemma 4)</h4>
1980
  <ul>
1981
  <li><strong>calculate</strong> &mdash; math, percentages, conversions</li>
1982
  <li><strong>get_current_time</strong> &mdash; date/time with timezone</li>
 
2390
  <option value="bonsai-ternary-1.7b" selected>Ternary Bonsai 1.7B &middot; Agent (~470 MB)</option>
2391
  <option value="bonsai-ternary-4b">Ternary Bonsai 4B &middot; Agent (~1.1 GB)</option>
2392
  <option value="bonsai-ternary-8b">Ternary Bonsai 8B &middot; Agent (~2.2 GB)</option>
2393
+ <option value="qwen3-4b">Qwen3 4B &middot; Agent (~2.8 GB)</option>
2394
  <option value="gemma3-1b">Gemma 3 1B &middot; Fast (~760 MB)</option>
2395
  <option value="gemma4-e2b">Gemma 4 E2B &middot; Multimodal (~1.5 GB)</option>
2396
  <option value="gemma4-e4b">Gemma 4 E4B &middot; Multimodal (~4.9 GB)</option>
 
2546
  contextSize: 65536,
2547
  genConfig: { temperature: 0.7, top_k: 20, top_p: 0.8, max_new_tokens: 2048, repetition_penalty: 1.05 },
2548
  },
2549
+ 'qwen3-4b': {
2550
+ id: 'onnx-community/Qwen3-4B-ONNX',
2551
+ label: 'Qwen3 4B',
2552
+ dtype: 'q4f16',
2553
+ size: '~2.8 GB',
2554
+ type: 'causal',
2555
+ multimodal: false,
2556
+ agentCapable: true,
2557
+ contextSize: 32768,
2558
+ // Qwen3 recommended sampling for non-thinking mode (temperature 0.7,
2559
+ // top_p 0.8, top_k 20). Apache-2.0, native tool calling.
2560
+ genConfig: { temperature: 0.7, top_k: 20, top_p: 0.8, max_new_tokens: 2048, repetition_penalty: 1.05 },
2561
+ },
2562
  'gemma3-1b': {
2563
  id: 'onnx-community/gemma-3-1b-it-ONNX-GQA',
2564
  label: 'Gemma 3 1B',