bnjmnmarie commited on
Commit
71e0eee
·
verified ·
1 Parent(s): 230033c

Delete .ipynb_checkpoints

Browse files
.ipynb_checkpoints/README-checkpoint.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- base_model:
4
- - allenai/Olmo-3-7B-Think
5
- tags:
6
- - llmcompressor
7
- ---
8
- This is [allenai/Olmo-3-7B-Think](https://huggingface.co/allenai/Olmo-3-7B-Think) quantized with [LLM Compressor](https://github.com/vllm-project/llm-compressor) with Smoothquant (W8A8). The model is compatible with vLLM (tested: v0.11.2). Tested with an RTX 4090.
9
-
10
-
11
- - **Developed by:** [The Kaitchup](https://kaitchup.substack.com/)
12
- - **License:** Apache 2.0 license
13
-
14
- ## How to Support My Work
15
- Subscribe to [The Kaitchup](https://kaitchup.substack.com/subscribe). This helps me a lot to continue quantizing and evaluating models for free. Or you can "[buy me a kofi](https://ko-fi.com/bnjmn_marie)".
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/config-checkpoint.json DELETED
@@ -1,114 +0,0 @@
1
- {
2
- "architectures": [
3
- "Olmo3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "dtype": "bfloat16",
8
- "eos_token_id": 100257,
9
- "hidden_act": "silu",
10
- "hidden_size": 4096,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 11008,
13
- "layer_types": [
14
- "sliding_attention",
15
- "sliding_attention",
16
- "sliding_attention",
17
- "full_attention",
18
- "sliding_attention",
19
- "sliding_attention",
20
- "sliding_attention",
21
- "full_attention",
22
- "sliding_attention",
23
- "sliding_attention",
24
- "sliding_attention",
25
- "full_attention",
26
- "sliding_attention",
27
- "sliding_attention",
28
- "sliding_attention",
29
- "full_attention",
30
- "sliding_attention",
31
- "sliding_attention",
32
- "sliding_attention",
33
- "full_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "full_attention",
38
- "sliding_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "full_attention",
42
- "sliding_attention",
43
- "sliding_attention",
44
- "sliding_attention",
45
- "full_attention"
46
- ],
47
- "max_position_embeddings": 65536,
48
- "model_type": "olmo3",
49
- "num_attention_heads": 32,
50
- "num_hidden_layers": 32,
51
- "num_key_value_heads": 32,
52
- "pad_token_id": 100277,
53
- "quantization_config": {
54
- "config_groups": {
55
- "group_0": {
56
- "format": "int-quantized",
57
- "input_activations": {
58
- "actorder": null,
59
- "block_structure": null,
60
- "dynamic": true,
61
- "group_size": null,
62
- "num_bits": 8,
63
- "observer": null,
64
- "observer_kwargs": {},
65
- "strategy": "token",
66
- "symmetric": true,
67
- "type": "int"
68
- },
69
- "output_activations": null,
70
- "targets": [
71
- "Linear"
72
- ],
73
- "weights": {
74
- "actorder": null,
75
- "block_structure": null,
76
- "dynamic": false,
77
- "group_size": null,
78
- "num_bits": 8,
79
- "observer": "minmax",
80
- "observer_kwargs": {},
81
- "strategy": "channel",
82
- "symmetric": true,
83
- "type": "int"
84
- }
85
- }
86
- },
87
- "format": "int-quantized",
88
- "global_compression_ratio": null,
89
- "ignore": [
90
- "lm_head"
91
- ],
92
- "kv_cache_scheme": null,
93
- "quant_method": "compressed-tensors",
94
- "quantization_status": "compressed",
95
- "sparsity_config": {},
96
- "transform_config": {},
97
- "version": "0.12.3.a20251114"
98
- },
99
- "rms_norm_eps": 1e-06,
100
- "rope_scaling": {
101
- "attention_factor": 1.2079441541679836,
102
- "beta_fast": 32,
103
- "beta_slow": 1,
104
- "factor": 8.0,
105
- "original_max_position_embeddings": 8192,
106
- "rope_type": "yarn"
107
- },
108
- "rope_theta": 500000,
109
- "sliding_window": 4096,
110
- "tie_word_embeddings": false,
111
- "transformers_version": "4.57.3",
112
- "use_cache": false,
113
- "vocab_size": 100278
114
- }