Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- .trillim-quantize-complete +1 -0
- README.md +55 -0
- adapter_config.json +46 -0
- chat_template.jinja +1 -0
- qmodel.lora +1 -1
- tokenizer.json +3 -0
- tokenizer_config.json +13 -0
- trillim_config.json +4 -3
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
lora_tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
qmodel.lora filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
lora_tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
qmodel.lora filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
.trillim-quantize-complete
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ready
|
README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
tags:
|
| 4 |
+
- bitnet
|
| 5 |
+
- lora
|
| 6 |
+
- ternary
|
| 7 |
+
- trillim
|
| 8 |
+
- cpu-inference
|
| 9 |
+
base_model: microsoft/bitnet-b1.58-2B-4T-bf16
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# BitNet-Search-LoRA-TRNQ
|
| 13 |
+
|
| 14 |
+
Ternary-quantized LoRA adapter for [Trillim/BitNet-TRNQ](https://huggingface.co/Trillim/BitNet-TRNQ) that teaches the model to search. Generates search queries in <search>...</search> XML tags. Need to use Trillim (https://trillim.com) with a search harness to respond to the model's search calls and improve results.
|
| 15 |
+
|
| 16 |
+
This adapter runs entirely on CPU — no GPU required.
|
| 17 |
+
|
| 18 |
+
## Adapter Details
|
| 19 |
+
|
| 20 |
+
| | |
|
| 21 |
+
|---|---|
|
| 22 |
+
| **Type** | LoRA adapter |
|
| 23 |
+
| **Style** | Search tool call |
|
| 24 |
+
| **Architecture** | BitNet (BitNetForCausalLM) |
|
| 25 |
+
| **Quantization** | Ternary ({-1, 0, 1}) |
|
| 26 |
+
| **Platforms** | x86_64, aarch64 |
|
| 27 |
+
| **Base model** | [Trillim/BitNet-TRNQ](https://huggingface.co/Trillim/BitNet-TRNQ) |
|
| 28 |
+
| **Source model** | [microsoft/bitnet-b1.58-2B-4T-bf16](https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-bf16) |
|
| 29 |
+
| **License** | MIT |
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
pip install trillim
|
| 35 |
+
trillim pull Trillim/BitNet-TRNQ
|
| 36 |
+
trillim pull Trillim/BitNet-Search-LoRA-TRNQ
|
| 37 |
+
trillim chat Trillim/BitNet-TRNQ Trillim/BitNet-GenZ-LoRA-TRNQ
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
This starts an interactive CLI chat.
|
| 41 |
+
|
| 42 |
+
## What's in this repo
|
| 43 |
+
|
| 44 |
+
| File | Description |
|
| 45 |
+
|---|---|
|
| 46 |
+
| `qmodel.lora` | Ternary-quantized LoRA weights in Trillim format |
|
| 47 |
+
| `tokenizer.json` | Tokenizer |
|
| 48 |
+
| `tokenizer_config.json` | Tokenizer configuration |
|
| 49 |
+
| `chat_template.jinja` | Chat template |
|
| 50 |
+
| `trillim_config.json` | Trillim metadata |
|
| 51 |
+
|
| 52 |
+
## License
|
| 53 |
+
|
| 54 |
+
This adapter is released under the [MIT License](https://opensource.org/licenses/MIT), following the license of the source model.
|
| 55 |
+
|
adapter_config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alora_invocation_tokens": null,
|
| 3 |
+
"alpha_pattern": {},
|
| 4 |
+
"arrow_config": null,
|
| 5 |
+
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "microsoft/bitnet-b1.58-2B-4T-bf16",
|
| 7 |
+
"bias": "none",
|
| 8 |
+
"corda_config": null,
|
| 9 |
+
"ensure_weight_tying": false,
|
| 10 |
+
"eva_config": null,
|
| 11 |
+
"exclude_modules": null,
|
| 12 |
+
"fan_in_fan_out": false,
|
| 13 |
+
"inference_mode": true,
|
| 14 |
+
"init_lora_weights": true,
|
| 15 |
+
"layer_replication": null,
|
| 16 |
+
"layers_pattern": null,
|
| 17 |
+
"layers_to_transform": null,
|
| 18 |
+
"loftq_config": {},
|
| 19 |
+
"lora_alpha": 64,
|
| 20 |
+
"lora_bias": false,
|
| 21 |
+
"lora_dropout": 0.05,
|
| 22 |
+
"megatron_config": null,
|
| 23 |
+
"megatron_core": "megatron.core",
|
| 24 |
+
"modules_to_save": null,
|
| 25 |
+
"peft_type": "LORA",
|
| 26 |
+
"peft_version": "0.18.1",
|
| 27 |
+
"qalora_group_size": 16,
|
| 28 |
+
"r": 32,
|
| 29 |
+
"rank_pattern": {},
|
| 30 |
+
"revision": null,
|
| 31 |
+
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"k_proj",
|
| 34 |
+
"up_proj",
|
| 35 |
+
"gate_proj",
|
| 36 |
+
"q_proj",
|
| 37 |
+
"o_proj",
|
| 38 |
+
"down_proj"
|
| 39 |
+
],
|
| 40 |
+
"target_parameters": null,
|
| 41 |
+
"task_type": "CAUSAL_LM",
|
| 42 |
+
"trainable_token_indices": null,
|
| 43 |
+
"use_dora": false,
|
| 44 |
+
"use_qalora": false,
|
| 45 |
+
"use_rslora": false
|
| 46 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{% set has_system = messages | selectattr('role', 'equalto', 'system') | list | length > 0 %}{% if not has_system %}System: You have access to a search tool. To search the web, write <search>your query</search> and you will receive results. Use search for questions about current events, specific people, places, or facts you are unsure about. Answer directly for math, reasoning, coding, or general knowledge you are confident about.<|eot_id|>{% endif %}{% for message in messages %}{% if message['role'] == 'system' %}System: {{ message['content'] | trim }}<|eot_id|>{% elif message['role'] == 'user' %}User: {{ message['content'] | trim }}<|eot_id|>{% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] | trim }}<|eot_id|>{% elif message['role'] == 'search' %}Search: {{ message['content'] | trim }}<|eot_id|>{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}
|
qmodel.lora
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 86507754
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b8afda40ec5631ab6ef56e6d62f700e33a1dfe05488f9afb9c9560be8f01329
|
| 3 |
size 86507754
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fc5ed64d17c57f61c0ef996ac8b3a8918e7d406866cc4a0292d362a31a217e4
|
| 3 |
+
size 17210125
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"bos_token": "<|begin_of_text|>",
|
| 4 |
+
"clean_up_tokenization_spaces": true,
|
| 5 |
+
"eos_token": "<|eot_id|>",
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"model_input_names": [
|
| 8 |
+
"input_ids",
|
| 9 |
+
"attention_mask"
|
| 10 |
+
],
|
| 11 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 12 |
+
"pad_token": "<|eot_id|>"
|
| 13 |
+
}
|
trillim_config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"trillim_version": "0.
|
| 3 |
-
"format_version":
|
| 4 |
"type": "lora_adapter",
|
| 5 |
"quantization": "ternary",
|
| 6 |
"source_model": "microsoft/bitnet-b1.58-2B-4T-bf16",
|
|
@@ -9,5 +9,6 @@
|
|
| 9 |
"x86_64",
|
| 10 |
"aarch64"
|
| 11 |
],
|
| 12 |
-
"base_model_config_hash": "
|
|
|
|
| 13 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"trillim_version": "0.6.0",
|
| 3 |
+
"format_version": 4,
|
| 4 |
"type": "lora_adapter",
|
| 5 |
"quantization": "ternary",
|
| 6 |
"source_model": "microsoft/bitnet-b1.58-2B-4T-bf16",
|
|
|
|
| 9 |
"x86_64",
|
| 10 |
"aarch64"
|
| 11 |
],
|
| 12 |
+
"base_model_config_hash": "f70d9d651af0fcf2a7c89c2194160b1139bf77638d0d7bf7f730770984bc2623",
|
| 13 |
+
"remote_code": false
|
| 14 |
}
|