Text Generation
MLX
Safetensors
Rust
qwen3_5
27b
agentic-coding
alloy-backfilled
android
apple-silicon
attested
bash
c
chain-of-custody
chinese
code
code-completion
code-generation
code-infill
coder
coding
compacted
consumer-gpu
cpp
cryptographically-verified
css
edge-inference
efficient
embedded
english
forge-alloy
function-calling
go
head-pruning
html
iphone
java
javascript
kotlin
llama-cpp
lm-studio
local-inference
macbook
mobile
multilingual
ollama
on-device
optimized
php
programming
pruned
python
qwen
qwen3
qwen3.5
raspberry-pi
reproducible
ruby
software-engineering
sql
swift
typescript
conversational
Upload config.json with huggingface_hub
Browse files- config.json +0 -15
config.json
CHANGED
|
@@ -96,21 +96,6 @@
|
|
| 96 |
"num_key_value_heads": 4,
|
| 97 |
"pad_token_id": null,
|
| 98 |
"partial_rotary_factor": 0.25,
|
| 99 |
-
"quantization_config": {
|
| 100 |
-
"_load_in_4bit": true,
|
| 101 |
-
"_load_in_8bit": false,
|
| 102 |
-
"bnb_4bit_compute_dtype": "float16",
|
| 103 |
-
"bnb_4bit_quant_storage": "uint8",
|
| 104 |
-
"bnb_4bit_quant_type": "nf4",
|
| 105 |
-
"bnb_4bit_use_double_quant": true,
|
| 106 |
-
"llm_int8_enable_fp32_cpu_offload": false,
|
| 107 |
-
"llm_int8_has_fp16_weight": false,
|
| 108 |
-
"llm_int8_skip_modules": null,
|
| 109 |
-
"llm_int8_threshold": 6.0,
|
| 110 |
-
"load_in_4bit": true,
|
| 111 |
-
"load_in_8bit": false,
|
| 112 |
-
"quant_method": "bitsandbytes"
|
| 113 |
-
},
|
| 114 |
"rms_norm_eps": 1e-06,
|
| 115 |
"rope_parameters": {
|
| 116 |
"mrope_interleaved": true,
|
|
|
|
| 96 |
"num_key_value_heads": 4,
|
| 97 |
"pad_token_id": null,
|
| 98 |
"partial_rotary_factor": 0.25,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
"rms_norm_eps": 1e-06,
|
| 100 |
"rope_parameters": {
|
| 101 |
"mrope_interleaved": true,
|