Upload folder using huggingface_hub
Browse files- README.md +140 -32
- config.json +4 -0
- generation_config.json +12 -0
- index.html +216 -0
- tokenizer_config.json +5 -1
README.md
CHANGED
|
@@ -1,15 +1,25 @@
|
|
| 1 |
# QED-75M Web (ONNX)
|
| 2 |
|
| 3 |
-
QED-75M — языковая модель (384 hidden, 32 слоя
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
-
-
|
| 8 |
-
- `tokenizer.json` — токенизатор
|
| 9 |
-
- `tokenizer_config.json` — конфиг токенизатора
|
| 10 |
-
- `config.json` — архитектура модели
|
| 11 |
|
| 12 |
-
## Ф
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
Модель обучена на чат-формате. Для лучших результатов используйте:
|
| 15 |
|
|
@@ -17,60 +27,158 @@ QED-75M — языковая модель (384 hidden, 32 слоя), экспо
|
|
| 17 |
<|user|>ваш вопрос<|assistant|>
|
| 18 |
```
|
| 19 |
|
| 20 |
-
**Пример:**
|
| 21 |
-
- Input: `<|user|>What is 2+2?<|assistant|>`
|
| 22 |
-
- Output: `The answer is 4. This is because...`
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
// Генерация
|
| 39 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
max_new_tokens: 128,
|
| 41 |
temperature: 0.7,
|
| 42 |
top_k: 40,
|
| 43 |
-
do_sample: true
|
|
|
|
|
|
|
| 44 |
});
|
| 45 |
|
| 46 |
-
|
|
|
|
| 47 |
```
|
| 48 |
|
| 49 |
-
### ONNX Runtime Web
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
```javascript
|
| 52 |
import * as ort from 'onnxruntime-web';
|
| 53 |
|
|
|
|
| 54 |
const session = await ort.InferenceSession.create('model.onnx');
|
| 55 |
|
| 56 |
-
//
|
| 57 |
-
const inputIds =
|
| 58 |
-
const tensor = new ort.Tensor('int64', inputIds, [1, inputIds.length]);
|
| 59 |
const { logits } = await session.run({ input_ids: tensor });
|
|
|
|
|
|
|
|
|
|
| 60 |
```
|
| 61 |
|
| 62 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
| Параметр | Значение |
|
| 65 |
|----------|----------|
|
| 66 |
-
| Vocabulary | 49,152 |
|
| 67 |
| Hidden dim | 384 |
|
| 68 |
| Layers | 32 |
|
| 69 |
-
|
|
| 70 |
| FFN dim | 1,024 |
|
| 71 |
-
| Max length | 8,192 |
|
| 72 |
| RoPE θ | 10,000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
-
## Лицензия
|
| 75 |
|
| 76 |
MIT
|
|
|
|
| 1 |
# QED-75M Web (ONNX)
|
| 2 |
|
| 3 |
+
QED-75M — языковая модель (384 hidden, 32 слоя, 75M параметров), оптимизированная для веб-деплоя.
|
| 4 |
|
| 5 |
+
**Репозиторий:** https://huggingface.co/levossadtchi/QED-75M_web
|
| 6 |
|
| 7 |
+
---
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
## 📁 Файлы
|
| 10 |
+
|
| 11 |
+
| Файл | Описание | Размер |
|
| 12 |
+
|------|----------|--------|
|
| 13 |
+
| `model.onnx` | Веса модели (FP32) | ~365 MB |
|
| 14 |
+
| `tokenizer.json` | Словарь токенизатора | ~3 MB |
|
| 15 |
+
| `tokenizer_config.json` | Конфиг токенизатора | <1 KB |
|
| 16 |
+
| `config.json` | Архитектура модели | <1 KB |
|
| 17 |
+
| `generation_config.json` | Параметры генерации | <1 KB |
|
| 18 |
+
| `index.html` | Демо-плейграунд | <10 KB |
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## 💬 Формат промптов
|
| 23 |
|
| 24 |
Модель обучена на чат-формате. Для лучших результатов используйте:
|
| 25 |
|
|
|
|
| 27 |
<|user|>ваш вопрос<|assistant|>
|
| 28 |
```
|
| 29 |
|
| 30 |
+
**Примеры:**
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
| Промпт | Ожидаемый ответ |
|
| 33 |
+
|--------|-----------------|
|
| 34 |
+
| `<|user|>What is 2+2?<|assistant|>` | "The answer is 2 + 2 = 4." |
|
| 35 |
+
| `<|user|>Explain gravity in one sentence.<|assistant|>` | "Gravity is a fundamental force..." |
|
| 36 |
+
| `<|user|>Write a haiku about cats.<|assistant|>` | Стихотворение про котов |
|
| 37 |
|
| 38 |
+
---
|
| 39 |
|
| 40 |
+
## 🚀 Использование
|
| 41 |
+
|
| 42 |
+
### Вариант 1: Transformers.js (рекомендуется)
|
| 43 |
|
| 44 |
+
```bash
|
| 45 |
+
npm install @xenova/transformers
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
```javascript
|
| 49 |
+
import { AutoTokenizer, AutoModelForCausalLM } from '@xenova/transformers';
|
| 50 |
+
|
| 51 |
+
// Загрузка модели
|
| 52 |
+
const tokenizer = await AutoTokenizer.from_pretrained('levossadtchi/QED-75M_web');
|
| 53 |
+
const model = await AutoModelForCausalLM.from_pretrained('levossadtchi/QED-75M_web', {
|
| 54 |
+
quantized: true, // Использовать int8 квантование
|
| 55 |
+
dtype: 'q8',
|
| 56 |
+
device: 'webgpu', // или 'wasm' для CPU
|
| 57 |
+
});
|
| 58 |
|
| 59 |
// Генерация
|
| 60 |
+
const prompt = '<|user|>What is 2+2?<|assistant|>';
|
| 61 |
+
const inputs = await tokenizer(prompt, { return_tensors: 'pt' });
|
| 62 |
+
|
| 63 |
+
const outputs = await model.generate({
|
| 64 |
+
...inputs,
|
| 65 |
max_new_tokens: 128,
|
| 66 |
temperature: 0.7,
|
| 67 |
top_k: 40,
|
| 68 |
+
do_sample: true,
|
| 69 |
+
eos_token_id: tokenizer.eos_token_id,
|
| 70 |
+
pad_token_id: tokenizer.pad_token_id,
|
| 71 |
});
|
| 72 |
|
| 73 |
+
const text = tokenizer.decode(outputs[0], { skip_special_tokens: false });
|
| 74 |
+
console.log(text);
|
| 75 |
```
|
| 76 |
|
| 77 |
+
### Вариант 2: ONNX Runtime Web (низкоуровневый)
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
npm install onnxruntime-web
|
| 81 |
+
```
|
| 82 |
|
| 83 |
```javascript
|
| 84 |
import * as ort from 'onnxruntime-web';
|
| 85 |
|
| 86 |
+
// Загрузка
|
| 87 |
const session = await ort.InferenceSession.create('model.onnx');
|
| 88 |
|
| 89 |
+
// Инференс
|
| 90 |
+
const inputIds = [1, 15826, 15, 638]; // токены
|
| 91 |
+
const tensor = new ort.Tensor('int64', BigInt64Array.from(inputIds.map(BigInt)), [1, inputIds.length]);
|
| 92 |
const { logits } = await session.run({ input_ids: tensor });
|
| 93 |
+
|
| 94 |
+
// Greedy decoding
|
| 95 |
+
const nextToken = logits.data.reduce((maxIdx, val, idx) => val > logits.data[maxIdx] ? idx : maxIdx, 0);
|
| 96 |
```
|
| 97 |
|
| 98 |
+
### Вариант 3: Готовый HTML
|
| 99 |
+
|
| 100 |
+
Откройте `index.html` в браузере или задеплойте на Vercel/Netlify.
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
## ⚙️ Параметры генерации
|
| 105 |
+
|
| 106 |
+
| Параметр | По умолчанию | Описание |
|
| 107 |
+
|----------|--------------|----------|
|
| 108 |
+
| `max_new_tokens` | 128 | Макс. количество новых токенов |
|
| 109 |
+
| `temperature` | 0.7 | Креативность (0 = greedy, >1 = хаос) |
|
| 110 |
+
| `top_k` | 40 | Сэмплирование из top-k токенов |
|
| 111 |
+
| `top_p` | 0.9 | Nucleus sampling (альтернатива top_k) |
|
| 112 |
+
| `repetition_penalty` | 1.1 | Штраф за повторы |
|
| 113 |
+
|
| 114 |
+
**Рекомендации:**
|
| 115 |
+
- Для фактов: `temperature=0.5, top_k=30`
|
| 116 |
+
- Для креатива: `temperature=0.8, top_k=50`
|
| 117 |
+
- Для кода: `temperature=0.2, top_k=20`
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
## 🏗 Архитектура
|
| 122 |
|
| 123 |
| Параметр | Значение |
|
| 124 |
|----------|----------|
|
| 125 |
+
| Vocabulary | 49,152 токенов |
|
| 126 |
| Hidden dim | 384 |
|
| 127 |
| Layers | 32 |
|
| 128 |
+
| Attention heads | 6 |
|
| 129 |
| FFN dim | 1,024 |
|
| 130 |
+
| Max length | 8,192 токена |
|
| 131 |
| RoPE θ | 10,000 |
|
| 132 |
+
| RMSNorm ε | 1e-5 |
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
## 📦 Квантование
|
| 137 |
+
|
| 138 |
+
Для уменьшения размера модели используйте int8 квантование:
|
| 139 |
+
|
| 140 |
+
```bash
|
| 141 |
+
pip install onnxruntime-tools
|
| 142 |
+
|
| 143 |
+
python -c "
|
| 144 |
+
from onnxruntime.quantization import quantize_dynamic, QuantType
|
| 145 |
+
quantize_dynamic('model.onnx', 'model_quantized.onnx', weight_type=QuantType.QUInt8)
|
| 146 |
+
"
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
**Размеры:**
|
| 150 |
+
- Оригинал (FP32): ~365 MB
|
| 151 |
+
- Квантованная (INT8): ~95 MB (−74%)
|
| 152 |
+
|
| 153 |
+
---
|
| 154 |
+
|
| 155 |
+
## 🌐 Браузерная поддержка
|
| 156 |
+
|
| 157 |
+
| Технология | Поддержка | Размер | Скорость |
|
| 158 |
+
|------------|-----------|--------|----------|
|
| 159 |
+
| **WebGPU** | Chrome 113+, Edge | ~100 MB | ⚡⚡⚡ Быстро |
|
| 160 |
+
| **WASM** | Все браузеры | ~100 MB | ⚡⚡ Средне |
|
| 161 |
+
| **CPU** | Резервный режим | ~365 MB | ⚡ Медленно |
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
## 🔧 Локальный запуск
|
| 166 |
+
|
| 167 |
+
```bash
|
| 168 |
+
# Клонировать репозиторий
|
| 169 |
+
git lfs install
|
| 170 |
+
git clone https://huggingface.co/levossadtchi/QED-75M_web
|
| 171 |
+
|
| 172 |
+
# Запустить локальный сервер
|
| 173 |
+
cd QED-75M_web
|
| 174 |
+
python -m http.server 8000
|
| 175 |
+
|
| 176 |
+
# Открыть в браузере
|
| 177 |
+
open http://localhost:8000/index.html
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
---
|
| 181 |
|
| 182 |
+
## 📝 Лицензия
|
| 183 |
|
| 184 |
MIT
|
config.json
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
"pad_token_id": 0,
|
| 15 |
"bos_token_id": 1,
|
| 16 |
"eos_token_id": 2,
|
|
|
|
| 17 |
"model_type": "qed",
|
| 18 |
"architectures": [
|
| 19 |
"QEDForCausalLM"
|
|
@@ -21,5 +22,8 @@
|
|
| 21 |
"auto_map": {
|
| 22 |
"AutoConfig": "modeling_qed.QEDConfig",
|
| 23 |
"AutoModelForCausalLM": "modeling_qed.QEDForCausalLM"
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
}
|
|
|
|
| 14 |
"pad_token_id": 0,
|
| 15 |
"bos_token_id": 1,
|
| 16 |
"eos_token_id": 2,
|
| 17 |
+
"unk_token_id": 3,
|
| 18 |
"model_type": "qed",
|
| 19 |
"architectures": [
|
| 20 |
"QEDForCausalLM"
|
|
|
|
| 22 |
"auto_map": {
|
| 23 |
"AutoConfig": "modeling_qed.QEDConfig",
|
| 24 |
"AutoModelForCausalLM": "modeling_qed.QEDForCausalLM"
|
| 25 |
+
},
|
| 26 |
+
"onnx": {
|
| 27 |
+
"quantized": true
|
| 28 |
}
|
| 29 |
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_length": 8192,
|
| 3 |
+
"max_new_tokens": 512,
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"temperature": 0.7,
|
| 6 |
+
"top_k": 40,
|
| 7 |
+
"top_p": 0.9,
|
| 8 |
+
"repetition_penalty": 1.1,
|
| 9 |
+
"pad_token_id": 0,
|
| 10 |
+
"bos_token_id": 1,
|
| 11 |
+
"eos_token_id": 2
|
| 12 |
+
}
|
index.html
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>QED-75M Playground</title>
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: system-ui, -apple-system, sans-serif;
|
| 10 |
+
max-width: 800px;
|
| 11 |
+
margin: 0 auto;
|
| 12 |
+
padding: 20px;
|
| 13 |
+
background: #f5f5f5;
|
| 14 |
+
}
|
| 15 |
+
.container {
|
| 16 |
+
background: white;
|
| 17 |
+
border-radius: 8px;
|
| 18 |
+
padding: 20px;
|
| 19 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 20 |
+
}
|
| 21 |
+
h1 { color: #333; }
|
| 22 |
+
textarea {
|
| 23 |
+
width: 100%;
|
| 24 |
+
min-height: 100px;
|
| 25 |
+
padding: 12px;
|
| 26 |
+
border: 1px solid #ddd;
|
| 27 |
+
border-radius: 4px;
|
| 28 |
+
font-size: 16px;
|
| 29 |
+
resize: vertical;
|
| 30 |
+
}
|
| 31 |
+
button {
|
| 32 |
+
background: #007bff;
|
| 33 |
+
color: white;
|
| 34 |
+
border: none;
|
| 35 |
+
padding: 12px 24px;
|
| 36 |
+
border-radius: 4px;
|
| 37 |
+
font-size: 16px;
|
| 38 |
+
cursor: pointer;
|
| 39 |
+
margin-top: 10px;
|
| 40 |
+
}
|
| 41 |
+
button:hover { background: #0056b3; }
|
| 42 |
+
button:disabled { background: #ccc; cursor: not-allowed; }
|
| 43 |
+
.output {
|
| 44 |
+
margin-top: 20px;
|
| 45 |
+
padding: 15px;
|
| 46 |
+
background: #f8f9fa;
|
| 47 |
+
border-radius: 4px;
|
| 48 |
+
white-space: pre-wrap;
|
| 49 |
+
line-height: 1.6;
|
| 50 |
+
}
|
| 51 |
+
.status {
|
| 52 |
+
margin-top: 10px;
|
| 53 |
+
padding: 10px;
|
| 54 |
+
border-radius: 4px;
|
| 55 |
+
font-size: 14px;
|
| 56 |
+
}
|
| 57 |
+
.loading { background: #fff3cd; color: #856404; }
|
| 58 |
+
.ready { background: #d4edda; color: #155724; }
|
| 59 |
+
.error { background: #f8d7da; color: #721c24; }
|
| 60 |
+
.settings {
|
| 61 |
+
margin-top: 15px;
|
| 62 |
+
display: grid;
|
| 63 |
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
| 64 |
+
gap: 10px;
|
| 65 |
+
}
|
| 66 |
+
.settings label {
|
| 67 |
+
display: block;
|
| 68 |
+
font-size: 14px;
|
| 69 |
+
margin-bottom: 5px;
|
| 70 |
+
}
|
| 71 |
+
.settings input {
|
| 72 |
+
width: 100%;
|
| 73 |
+
padding: 8px;
|
| 74 |
+
border: 1px solid #ddd;
|
| 75 |
+
border-radius: 4px;
|
| 76 |
+
}
|
| 77 |
+
</style>
|
| 78 |
+
</head>
|
| 79 |
+
<body>
|
| 80 |
+
<div class="container">
|
| 81 |
+
<h1>🧪 QED-75M Playground</h1>
|
| 82 |
+
|
| 83 |
+
<div id="status" class="status loading">Loading model...</div>
|
| 84 |
+
|
| 85 |
+
<div class="settings">
|
| 86 |
+
<div>
|
| 87 |
+
<label>Max tokens: <span id="maxTokensVal">128</span></label>
|
| 88 |
+
<input type="range" id="maxTokens" min="32" max="512" value="128" step="32">
|
| 89 |
+
</div>
|
| 90 |
+
<div>
|
| 91 |
+
<label>Temperature: <span id="tempVal">0.7</span></label>
|
| 92 |
+
<input type="range" id="temperature" min="0.1" max="1.5" value="0.7" step="0.1">
|
| 93 |
+
</div>
|
| 94 |
+
<div>
|
| 95 |
+
<label>Top K: <span id="topKVal">40</span></label>
|
| 96 |
+
<input type="range" id="topK" min="10" max="100" value="40" step="10">
|
| 97 |
+
</div>
|
| 98 |
+
</div>
|
| 99 |
+
|
| 100 |
+
<textarea id="prompt" placeholder="Enter your prompt here...
|
| 101 |
+
|
| 102 |
+
Example: <|user|>What is 2+2?<|assistant|>"></textarea>
|
| 103 |
+
|
| 104 |
+
<button id="generateBtn" disabled>Generate</button>
|
| 105 |
+
|
| 106 |
+
<div class="output" id="output"></div>
|
| 107 |
+
</div>
|
| 108 |
+
|
| 109 |
+
<script type="module">
|
| 110 |
+
// Import Transformers.js
|
| 111 |
+
import { AutoTokenizer, AutoModelForCausalLM, GenerationConfig } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
|
| 112 |
+
|
| 113 |
+
const MODEL_ID = 'levossadtchi/QED-75M_web';
|
| 114 |
+
|
| 115 |
+
let tokenizer = null;
|
| 116 |
+
let model = null;
|
| 117 |
+
|
| 118 |
+
// UI elements
|
| 119 |
+
const statusEl = document.getElementById('status');
|
| 120 |
+
const promptEl = document.getElementById('prompt');
|
| 121 |
+
const outputEl = document.getElementById('output');
|
| 122 |
+
const generateBtn = document.getElementById('generateBtn');
|
| 123 |
+
const maxTokensEl = document.getElementById('maxTokens');
|
| 124 |
+
const tempEl = document.getElementById('temperature');
|
| 125 |
+
const topKEl = document.getElementById('topK');
|
| 126 |
+
|
| 127 |
+
// Update value displays
|
| 128 |
+
maxTokensEl.addEventListener('input', (e) => {
|
| 129 |
+
document.getElementById('maxTokensVal').textContent = e.target.value;
|
| 130 |
+
});
|
| 131 |
+
tempEl.addEventListener('input', (e) => {
|
| 132 |
+
document.getElementById('tempVal').textContent = e.target.value;
|
| 133 |
+
});
|
| 134 |
+
topKEl.addEventListener('input', (e) => {
|
| 135 |
+
document.getElementById('topKVal').textContent = e.target.value;
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
// Load model
|
| 139 |
+
async function loadModel() {
|
| 140 |
+
try {
|
| 141 |
+
// Load tokenizer
|
| 142 |
+
tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID, {
|
| 143 |
+
local_files_only: false,
|
| 144 |
+
});
|
| 145 |
+
|
| 146 |
+
// Load model with quantization
|
| 147 |
+
model = await AutoModelForCausalLM.from_pretrained(MODEL_ID, {
|
| 148 |
+
quantized: true,
|
| 149 |
+
dtype: 'q8', // int8 quantization
|
| 150 |
+
device: 'webgpu', // Try WebGPU first, fallback to WASM
|
| 151 |
+
});
|
| 152 |
+
|
| 153 |
+
statusEl.textContent = '✅ Model ready!';
|
| 154 |
+
statusEl.className = 'status ready';
|
| 155 |
+
generateBtn.disabled = false;
|
| 156 |
+
} catch (error) {
|
| 157 |
+
statusEl.textContent = '❌ Error loading model: ' + error.message;
|
| 158 |
+
statusEl.className = 'status error';
|
| 159 |
+
console.error('Model loading error:', error);
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
// Generate text
|
| 164 |
+
async function generate() {
|
| 165 |
+
const prompt = promptEl.value.trim();
|
| 166 |
+
if (!prompt) return;
|
| 167 |
+
|
| 168 |
+
generateBtn.disabled = true;
|
| 169 |
+
generateBtn.textContent = 'Generating...';
|
| 170 |
+
outputEl.textContent = '';
|
| 171 |
+
|
| 172 |
+
try {
|
| 173 |
+
// Tokenize input
|
| 174 |
+
const inputs = await tokenizer(prompt, {
|
| 175 |
+
return_tensors: 'pt',
|
| 176 |
+
add_special_tokens: false,
|
| 177 |
+
});
|
| 178 |
+
|
| 179 |
+
// Generate
|
| 180 |
+
const outputs = await model.generate({
|
| 181 |
+
...inputs,
|
| 182 |
+
max_new_tokens: parseInt(maxTokensEl.value),
|
| 183 |
+
temperature: parseFloat(tempEl.value),
|
| 184 |
+
top_k: parseInt(topKEl.value),
|
| 185 |
+
do_sample: parseFloat(tempEl.value) > 0,
|
| 186 |
+
eos_token_id: tokenizer.eos_token_id,
|
| 187 |
+
pad_token_id: tokenizer.pad_token_id,
|
| 188 |
+
});
|
| 189 |
+
|
| 190 |
+
// Decode and display
|
| 191 |
+
const text = tokenizer.decode(outputs[0], {
|
| 192 |
+
skip_special_tokens: false,
|
| 193 |
+
});
|
| 194 |
+
outputEl.textContent = text;
|
| 195 |
+
} catch (error) {
|
| 196 |
+
outputEl.textContent = 'Error: ' + error.message;
|
| 197 |
+
} finally {
|
| 198 |
+
generateBtn.disabled = false;
|
| 199 |
+
generateBtn.textContent = 'Generate';
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
generateBtn.addEventListener('click', generate);
|
| 204 |
+
|
| 205 |
+
// Allow Ctrl+Enter to generate
|
| 206 |
+
promptEl.addEventListener('keydown', (e) => {
|
| 207 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 208 |
+
generate();
|
| 209 |
+
}
|
| 210 |
+
});
|
| 211 |
+
|
| 212 |
+
// Start loading
|
| 213 |
+
loadModel();
|
| 214 |
+
</script>
|
| 215 |
+
</body>
|
| 216 |
+
</html>
|
tokenizer_config.json
CHANGED
|
@@ -9,5 +9,9 @@
|
|
| 9 |
"pad_token_id": 0,
|
| 10 |
"bos_token_id": 1,
|
| 11 |
"eos_token_id": 2,
|
| 12 |
-
"unk_token_id": 3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
}
|
|
|
|
| 9 |
"pad_token_id": 0,
|
| 10 |
"bos_token_id": 1,
|
| 11 |
"eos_token_id": 2,
|
| 12 |
+
"unk_token_id": 3,
|
| 13 |
+
"add_bos_token": false,
|
| 14 |
+
"add_eos_token": false,
|
| 15 |
+
"clean_up_tokenization_spaces": true,
|
| 16 |
+
"split_special_tokens": false
|
| 17 |
}
|