versae commited on
Commit
191258e
·
0 Parent(s):

Squash Borealis release history

Browse files
.gitattributes ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ LICENSE_FAQ.pdf filter=lfs diff=lfs merge=lfs -text
38
+ borealis.png filter=lfs diff=lfs merge=lfs -text
39
+ borealis_evals_202605.png filter=lfs diff=lfs merge=lfs -text
40
+ Model_Documentation_Form.pdf filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NB-License
2
+ Version 1.0, June 2026
3
+ https://ai.nb.no/license/
4
+
5
+ This license (henceforth: NB-license) establishes terms and conditions for use
6
+ of Borealis models with the following copyright notice: Borealis Copyright 2026
7
+ Nasjonalbiblioteket.
8
+
9
+ This model is a fine-tuned derivative of Gemma 3, licensed under the Gemma Terms
10
+ of Use. The original model is available from Google at
11
+ https://deepmind.google/models/gemma/gemma-3/. Modifications, fine-tuning, and
12
+ subsequent derivative works are distributed under the NB-license. The original
13
+ Gemma Terms of Use notices, disclaimers, attribution, and use restrictions
14
+ remain fully intact and are provided at https://ai.google.dev/gemma/terms. The
15
+ additional usage restrictions apply only to our derivative work.
16
+
17
+ 1. Definitions.
18
+
19
+ - "License" means the terms and conditions for use, reproduction, and
20
+ Distribution as defined in this document.
21
+
22
+ - "Licensor" means the copyright owner or entity authorized by the copyright
23
+ owner that is granting the License, including the persons or entities that may
24
+ have rights in the Model and/or distributing the Model.
25
+
26
+ - "You" (or "Your") means an individual or Legal Entity exercising permissions
27
+ granted by this License and/or making use of the Model for whichever purpose and
28
+ in any field of use, including usage of the Model in an end-use application -
29
+ e.g. chatbot, translator, image generator.
30
+
31
+ - "Third Parties" means individuals or legal entities that are not under common
32
+ control with Licensor or You.
33
+
34
+ - "Data" means a collection of information and/or content extracted from the
35
+ dataset used with the Model, including to train, pretrain, or otherwise evaluate
36
+ the Model. The Data is not licensed under this License.
37
+
38
+ - "Output" means the results of operating a Model as embodied in informational
39
+ content resulting therefrom.
40
+
41
+ - "Model" means any accompanying machine-learning based assemblies (including
42
+ checkpoints), consisting of learnt weights, parameters (including optimizer
43
+ states), corresponding to the model architecture as embodied in the
44
+ Complementary Material, that have been trained or tuned, in whole or in part on
45
+ the Data, using the Complementary Material.
46
+
47
+ - "Derivatives of the Model" means all modifications to the Model, works based
48
+ on the Model, or any other model which is created or initialized by transfer of
49
+ patterns of the weights, parameters, activations or output of the Model, to the
50
+ other model, in order to cause the other model to perform similarly to the
51
+ Model, including - but not limited to - distillation methods entailing the use
52
+ of intermediate data representations or methods based on the generation of
53
+ synthetic data by the Model for training the other model.
54
+
55
+ - "Complementary Material" means the accompanying source code and scripts used
56
+ to define, run, load, benchmark or evaluate the Model, and used to prepare data
57
+ for training or evaluation, if any. This includes any accompanying
58
+ documentation, tutorials, examples, etc, if any.
59
+
60
+ - "Distribution" means any transmission, reproduction, publication or other
61
+ sharing of the Model or Derivatives of the Model to a third party, including
62
+ providing the Model as a hosted service made available by electronic or other
63
+ remote means - e.g. API-based or web access.
64
+
65
+ - “Use” includes accessing and utilizing a Model or Derivates of a Model, which
66
+ may also include creating content, fine-tuning, updating, running, training,
67
+ evaluating and/or re-parametrizing such Model or Derivates of a Model.
68
+
69
+ - "Contribution" means any work of authorship, including the original version of
70
+ the Model and any modifications or additions to that Model or Derivatives of the
71
+ Model thereof, that is intentionally submitted to Licensor for inclusion in the
72
+ Model.
73
+
74
+ - "Contributor" means Licensor and any individual or Legal Entity on behalf of
75
+ whom a Contribution has been received by Licensor and subsequently incorporated
76
+ within the Model.
77
+
78
+ 2. Grant of License.
79
+
80
+ Subject to the terms and conditions of this License, each Contributor hereby
81
+ grants to You a time-unlimited, worldwide, non-exclusive, no-charge, royalty-
82
+ free license to use, make, have made, reproduce, prepare derivatives of the
83
+ model, publicly display, publicly perform, sublicense, offer to sell, sell,
84
+ import and otherwise transfer or distribute the model and derivatives of the
85
+ model.
86
+
87
+ 3. Redistribution
88
+
89
+ You may reproduce and distribute copies of the model or derivatives of the model
90
+ in any medium, with or without modifications, provided that You meet the
91
+ following conditions: (a) You must give any other recipients of the model or
92
+ derivatives of the model a copy of this License; and (b) You must retain all
93
+ copyright notices in any redistribution of the model or derivatives of the
94
+ model, excluding those notices that do not pertain to any part of a derivative
95
+ of the model; and (c) You must include the use-based restrictions in section 4
96
+ as a condition precedent to effect any type of legal agreement (e.g. a license)
97
+ governing the use and/or distribution of the model or its Derivatives, and give
98
+ such notice to any subsequent Third Party recipients;
99
+
100
+ 4. Use-based Restrictions.
101
+
102
+ You can only use the Model in compliance with the following conditions:
103
+
104
+ (a) You must comply with applicable laws and regulations.
105
+
106
+ (b) You must not use the model to intentionally recreate material from the
107
+ training data, whether protected by intellectual property rights or as personal
108
+ data
109
+
110
+ (c) Neither the name of the National library nor the names of creators or
111
+ publishers of training data may be used to endorse or promote products derived
112
+ from this model without specific prior written permission.
113
+
114
+ (d) You must not use the Model or its outputs, to provide end-user services
115
+ whose primary purpose is to summarize, restate, copy, or otherwise replace
116
+ services for access to the press publications licensed as training data, unless you have a separate agreement with the
117
+ copyright holders. This includes, but is not limited to, services that provide
118
+ users with functional substitutes for access to the original press content.
119
+
120
+ 5. Outputs.
121
+
122
+ Except as set forth herein, Licensor claims no rights in the Output You generate
123
+ with a model. You are accountable for the Output You generate and its subsequent
124
+ uses. No use of the Output may contravene any provision as stated in this
125
+ License, hereunder section 4 on use-based restrictions.
126
+
127
+ 6. Disclaimer of Warranty.
128
+
129
+ Unless required by applicable law or agreed to in writing, Licensor provides the
130
+ Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
131
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
132
+ including, without limitation, any warranties or conditions of TITLE, NON-
133
+ INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
134
+ solely responsible for determining the appropriateness of using or
135
+ redistributing the Work and assume any risks associated with Your exercise of
136
+ permissions under this License. The Licensor makes no representations or
137
+ warranties that the Model or its Output will be free from factual inaccuracies,
138
+ biases, or potential misinterpretation of the underlying training data. You are
139
+ solely responsible for ensuring that any use of the Model adheres to applicable
140
+ legal and ethical standards.
141
+
142
+ 7. Limitation of Liability.
143
+
144
+ In no event and under no legal theory, whether in tort (including negligence),
145
+ contract, or otherwise, unless required by applicable law (such as deliberate
146
+ and grossly negligent acts) or agreed to in writing, shall any Contributor be
147
+ liable to You for damages, including any direct, indirect, special, incidental,
148
+ or consequential damages of any character arising as a result of this License or
149
+ out of the use or inability to use the Work (including but not limited to
150
+ damages for loss of goodwill, work stoppage, computer failure or malfunction, or
151
+ any and all other commercial damages or losses), even if such Contributor has
152
+ been advised of the possibility of such damages.
153
+
154
+ 8. Accepting Warranty or Additional Liability.
155
+
156
+ While redistributing the Work or Derivative Works thereof, You may choose to
157
+ offer, and charge a fee for, acceptance of support, warranty, indemnity, or
158
+ other liability obligations and/or rights consistent with this License. However,
159
+ in accepting such obligations, You may act only on Your own behalf and on Your
160
+ sole responsibility, not on behalf of any other Contributor, and only if You
161
+ agree to indemnify, defend, and hold each Contributor harmless for any liability
162
+ incurred by, or claims asserted against, such Contributor by reason of your
163
+ accepting any such warranty or additional liability.
164
+
165
+ 9. Term and Termination.
166
+
167
+ The term of this Agreement will commence upon your acceptance of this Agreement
168
+ and will continue in full force and effect until terminated in accordance with
169
+ the terms and conditions herein. Licensor may terminate this Agreement if you
170
+ are in breach of any term or condition of this Agreement. Upon termination of
171
+ this Agreement, you shall cease use of the licensed model. Section 7 shall
172
+ survive the termination of this Agreement. Termination of the agreement does not
173
+ affect your continued rights to use parts of the model the licensed model was
174
+ based on (if warm trained) under its original license.
175
+
176
+ 10. Governing Law and Jurisdiction.
177
+
178
+ This Agreement will be governed and construed under the laws of Norway. The Oslo
179
+ District Court shall have exclusive jurisdiction of any dispute arising out of
180
+ this Agreement.
LICENSE_FAQ.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a87118b0b0c1e2d5a06b27bb4b56517d4d62b2b74860f77b8ac2d601224840
3
+ size 140945
Model_Documentation_Form.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89919c8ec2f2d5c700c9afbdfca21cd3dfaa15aee0a305e13b399ed058766cf3
3
+ size 335301
README.md ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ datasets:
4
+ - NbAiLab/aurora-sft
5
+ language:
6
+ - 'no'
7
+ - nb
8
+ - nn
9
+ base_model:
10
+ - google/gemma-3-1b-it
11
+ pipeline_tag: text-generation
12
+ library_name: transformers
13
+ tags:
14
+ - conversational
15
+ - instruct
16
+ - borealis
17
+ - gemma3_text
18
+ - norwegian
19
+ - norwegian-bokmal
20
+ - norwegian-nynorsk
21
+ - full-release
22
+ ---
23
+
24
+ ![Borealis](./borealis.png)
25
+
26
+ # Borealis 1B
27
+
28
+ ## Model Summary
29
+
30
+ **NbAiLab/borealis-1b** is a **1B-parameter** instruction-tuned **full release** model in the Borealis family from the National Library of Norway (Nasjonalbiblioteket, NB).
31
+
32
+ This is the first Borealis release to incorporate data made available under the agreement between rights-holder organizations in Norway and the Norwegian government. To date, only a limited subset of the material has been used, specifically to teach the model how to generate news article titles and ingress texts. Models with the suffix `-open`, do not include any material from the agreement.
33
+
34
+ All Borealis released models are based on the **Gemma 3** family. This model is based on [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it), and fine-tuned for Norwegian-centric instruction following.
35
+
36
+ ### Sizes
37
+
38
+ | Size | Full release | Open release |
39
+ |---:|---|---|
40
+ | 27B | [NbAiLab/borealis-27b](https://huggingface.co/NbAiLab/borealis-27b) | [NbAiLab/borealis-open-27b](https://huggingface.co/NbAiLab/borealis-open-27b) |
41
+ | 12B | [NbAiLab/borealis-12b](https://huggingface.co/NbAiLab/borealis-12b) | [NbAiLab/borealis-open-12b](https://huggingface.co/NbAiLab/borealis-open-12b) |
42
+ | 4B | [NbAiLab/borealis-4b](https://huggingface.co/NbAiLab/borealis-4b) | [NbAiLab/borealis-open-4b](https://huggingface.co/NbAiLab/borealis-open-4b) |
43
+ | 1B | [NbAiLab/borealis-1b](https://huggingface.co/NbAiLab/borealis-1b) | [NbAiLab/borealis-open-1b](https://huggingface.co/NbAiLab/borealis-open-1b) |
44
+ | 270M | [NbAiLab/borealis-270m](https://huggingface.co/NbAiLab/borealis-270m) | [NbAiLab/borealis-open-270m](https://huggingface.co/NbAiLab/borealis-open-270m) |
45
+
46
+ ## Training Data
47
+
48
+ Supervised fine-tuning (SFT) uses instruction data prepared by the National Library of Norway for Norwegian-centric assistant behavior, writing, summarization, question answering, and related tasks.
49
+
50
+ The SFT dataset for this model is [NbAiLab/aurora-sft](https://huggingface.co/datasets/NbAiLab/aurora-sft). [NbAiLab/aurora-sft-open](https://huggingface.co/datasets/NbAiLab/aurora-sft-open) is the open version of the SFT dataset. The only difference between [NbAiLab/aurora-sft-open](https://huggingface.co/datasets/NbAiLab/aurora-sft-open) and [NbAiLab/aurora-sft](https://huggingface.co/datasets/NbAiLab/aurora-sft) is the addition of 10k tasks derived from copyright-protected newspapers material.
51
+
52
+ The Norwegian government has entered into an agreement, through the National Library of Norway, to access copyrighted press material via Kopinor on behalf of the Norwegian Media Businesses' Association (MBL), enabling the lawful training, development, maintenance, and public release of Norwegian language models. The agreement covers copyright-protected press publications published in Norway up to one year from the date of publication of the model, effectively creating a rolling cutoff date rather than a fixed historical cutoff. For this release, the cutoff date is January 1, 2025.
53
+
54
+ ## Evaluation
55
+
56
+ <figure>
57
+ <img src="./borealis_evals_202605.png" alt="Borealis evaluation results">
58
+ <figcaption>Borealis evaluation results on selected tasks (best score among {0-5}-shot).</figcaption>
59
+ </figure>
60
+
61
+ We evaluate Borealis with NorEval, MMLU-English, and nb-gpt-bench, our own evaluation suite, which will be published and described in an upcoming paper. Despite using only around 10k newspaper-derived tasks from the abovementioned agreement, we observe a slight performance increase in some key metrics. We hope to further increase the difference by incorporating proper pre-training on the newspaper material.
62
+
63
+ ## Safety and Alignment
64
+
65
+ The Borealis family of models are aligned for safety using prompt baking and weighted merging of SFT and aligned models. The goal of this process is to balance model quality, usefulness, and safer behavior.
66
+
67
+ As with all generative models, outputs can still be incorrect, biased, harmful, or inappropriate. Do not use the model for safety-critical or high-stakes applications without additional evaluation and safeguards.
68
+
69
+ ### Prompt Baking
70
+
71
+ To align the Borealis models, we employ *prompt baking*, a procedure that distills the behavior induced by a system prompt directly into the model weights using [`bakery`](https://github.com/marksverdhei/bakery). Specifically, we train a LoRA adapter to minimize the KL-divergence between two model distributions: Borealis conditioned on the system prompt, and the same base model augmented with the LoRA adapter but evaluated without the system prompt in context. This objective encourages the adapter to reproduce the behavioral effects of the prompt without requiring the prompt to be present at inference time.
72
+
73
+ To reduce degradation on downstream tasks and preserve general model utility, we merge the resulting prompt adapter into the base model using a scaling factor of `0.25`, which we found to provide the best empirical trade-off.
74
+
75
+ ## Intended Use
76
+
77
+ - Norwegian-centric assistant-style tasks, including drafting, summarization, Q&A, and light reasoning (this is not a reasoning model).
78
+ - Assessment and improvement of Norwegian writing style and quality.
79
+ - Evaluation of behavior and language coverage for Norwegian, Bokmål, and Nynorsk.
80
+
81
+ ## Usage
82
+
83
+ This repository contains the Transformers/safetensors version of the model. The
84
+ examples below use `NbAiLab/borealis-1b` as the model id; replace it with
85
+ another Borealis repo id if needed.
86
+
87
+ ### Transformers
88
+
89
+ Use the regular causal language-model interface:
90
+
91
+ ```python
92
+ import torch
93
+ from transformers import AutoModelForCausalLM, AutoTokenizer
94
+
95
+ model_id = "NbAiLab/borealis-1b"
96
+
97
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
98
+ model = AutoModelForCausalLM.from_pretrained(
99
+ model_id,
100
+ device_map="auto",
101
+ torch_dtype=torch.bfloat16,
102
+ )
103
+
104
+ messages = [
105
+ {
106
+ "role": "user",
107
+ "content": "Skriv et kort sammendrag av hva Nasjonalbiblioteket gjør.",
108
+ }
109
+ ]
110
+
111
+ inputs = tokenizer.apply_chat_template(
112
+ messages,
113
+ add_generation_prompt=True,
114
+ return_tensors="pt",
115
+ return_dict=True,
116
+ ).to(model.device)
117
+
118
+ outputs = model.generate(**inputs, max_new_tokens=256)
119
+ new_tokens = outputs[0, inputs["input_ids"].shape[-1]:]
120
+ print(tokenizer.decode(new_tokens, skip_special_tokens=True))
121
+ ```
122
+
123
+ ### vLLM
124
+
125
+ Serve the model with vLLM's OpenAI-compatible server:
126
+
127
+ ```bash
128
+ vllm serve NbAiLab/borealis-1b --served-model-name borealis-1b
129
+ ```
130
+
131
+ Then call the local chat completions endpoint:
132
+
133
+ ```bash
134
+ curl http://localhost:8000/v1/chat/completions \
135
+ -H "Content-Type: application/json" \
136
+ -d '{
137
+ "model": "borealis-1b",
138
+ "messages": [
139
+ {
140
+ "role": "user",
141
+ "content": "Skriv tre korte punkter om norsk språkteknologi."
142
+ }
143
+ ],
144
+ "max_tokens": 256
145
+ }'
146
+ ```
147
+
148
+ Large models may require tensor parallelism or reduced memory settings depending
149
+ on the available GPU memory.
150
+
151
+ ### llama.cpp
152
+
153
+ llama.cpp uses GGUF files, not the safetensors files in this repository. When
154
+ the planned GGUF companion repository is available, you can run it directly from
155
+ the Hub:
156
+
157
+ ```bash
158
+ llama-server -hf NbAiLab/borealis-1b-gguf --port 8080
159
+ ```
160
+
161
+ Or download a GGUF file and run it locally:
162
+
163
+ ```bash
164
+ llama-cli -m borealis-1b.gguf \
165
+ -p "Skriv et kort sammendrag av hva Nasjonalbiblioteket gjør." \
166
+ -n 256
167
+ ```
168
+
169
+ ### Ollama
170
+
171
+ Ollama also requires a GGUF model. Once the GGUF companion repository is
172
+ available, you can run it from Hugging Face:
173
+
174
+ ```bash
175
+ ollama run hf.co/NbAiLab/borealis-1b-gguf
176
+ ```
177
+
178
+ For a local GGUF file, create a minimal `Modelfile`:
179
+
180
+ ```text
181
+ FROM ./borealis-1b.gguf
182
+ ```
183
+
184
+ Then create and run the local Ollama model:
185
+
186
+ ```bash
187
+ ollama create borealis-1b -f Modelfile
188
+ ollama run borealis-1b "Skriv tre korte punkter om norsk språkteknologi."
189
+ ```
190
+
191
+ ## Limitations
192
+
193
+ - The model may hallucinate or produce incorrect information.
194
+ - Safety alignment reduces but does not eliminate harmful or inappropriate outputs.
195
+ - Performance outside Norwegian and English use cases has not been fully characterized.
196
+
197
+ ## EU AI Act
198
+
199
+ The model is a fine-tune of Gemma 3. Using Gemma 3 27B as a conservative upper-bound reference, the original Gemma 3 27B training compute is estimated at approximately 2.1-2.3 x 10^24 FLOPs, based on the disclosed 14T training-token budget and the 27B parameter scale. The fine-tuning run used approximately 3.4 x 10^20 FLOPs, or about 0.015% of the estimated original training compute. This is substantially below the European Commission's indicative one-third threshold for treating a downstream modification as a significant modification that would make the modifier the provider of the modified General Purpose AI (GPAI) model.
200
+
201
+ On that basis, the fine-tuning activity is preliminarily assessed as not constituting a substantial modification for the purpose of becoming the provider of a new modified GPAI model under the compute-based criterion. However, the resulting model remains derived from a generative general-purpose AI model and may still be subject to downstream AI-system obligations under the EU AI Act.
202
+
203
+ For additional model-level documentation, see the [Model Documentation Form](./Model_Documentation_Form.pdf).
204
+
205
+ ## License
206
+
207
+ The license of this model is an adaptation of the Apache 2.0 license with additional use-based restrictions. In particular, users of the model are required to refrain from intentionally using the model to recreate data the model has been trained on. The license also requires users not to use the model or its output to provide end-user services whose primary purpose is to give access to licensed press publications in the training data.
208
+
209
+ For more information, see the [LICENSE](./LICENSE) and the [License FAQ](./LICENSE_FAQ.pdf).
210
+
211
+ ## Authenticity
212
+
213
+ This model release is signed by the National Library of Norway. The signed manifest in `signing/SHA256SUMS` covers the model-runtime artifacts, including model weights, configuration, tokenizer files, and chat template.
214
+
215
+ To verify model authenticity and file integrity after downloading the repository, run:
216
+
217
+ ```bash
218
+ bash signing/verify.sh
219
+ ```
220
+
221
+ For more verification instructions, see [ai.nb.no/verify](https://ai.nb.no/verify).
222
+
223
+ ## Weights
224
+
225
+ This repository contains the Transformers (safetensors) release of **NbAiLab/borealis-1b**.
226
+
227
+ Companion formats:
228
+ - GGUF: [NbAiLab/borealis-1b-gguf](https://huggingface.co/NbAiLab/borealis-1b-gguf)
229
+ - MLX: [NbAiLab/borealis-1b-mlx](https://huggingface.co/NbAiLab/borealis-1b-mlx)
230
+ - MLX 8-bit: [NbAiLab/borealis-1b-mlx-8bits](https://huggingface.co/NbAiLab/borealis-1b-mlx-8bits)
231
+
232
+ ## Citation and Contributors
233
+
234
+ The Borealis family of models is a joint effort of multiple teams at the National Library of Norway. Led by Javier de la Rosa ([@versae](https://huggingface.co/versae)), key contributors include (in alphabetical order) Rolv-Arild Braaten, Magnus Breder Birkenes, Lucas Charpentier, Pawel Cyrta, Tita Enstad, Markus Sverdvik Heiervang, Arne Martinus Lindstad, Marthe Løken Midtgaard, Marie Roald, Marie Røsok, Thea Tollersrud, and Angelina Zanardi. Olaus Ingskog Bergstrøm contributed with legal advice. And Yngvil Beyer, Svein Arne Brygfjeld, and Wilfred Østgulen helped with strategic oversight.
235
+
236
+ A tecnical report will be released soon.
237
+
238
+ ## Acknowledgements
239
+
240
+ Thanks to the Gemma team at Google for releasing Gemma 3, and to everyone contributing to the Norwegian language technology ecosystem.
241
+
242
+ ## Disclaimer
243
+
244
+ The models published in this repository are intended for a generalist purpose and are available to third parties. These models may have bias and/or any other undesirable distortions. When third parties, deploy or provide systems and/or services to other parties using any of these models (or using systems based on these models) or become users of the models, they should note that it is their responsibility to mitigate the risks arising from their use and, in any event, to comply with applicable regulations, including regulations regarding the use of artificial intelligence. In no event shall the owner of the models (The National Library of Norway) be liable for any results arising from the use made by third parties of these models.
245
+
246
+ ## Contact
247
+
248
+ For feedback, technical concerns, or collaboration inquiries, please contact <a rel="noopener nofollow" href="mailto:ailab@nb.no">ailab@nb.no</a>.
borealis.png ADDED

Git LFS Details

  • SHA256: 5e6d03db57f4f12b359910620c4ba80b70f4fe0711d8125600434663137eb528
  • Pointer size: 131 Bytes
  • Size of remote file: 778 kB
borealis_evals_202605.png ADDED

Git LFS Details

  • SHA256: 5f2b6e74b343ba10c6ee5e8a7913a70d8b8e5335a4138ef583ee2baeb9e07655
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "dtype": "bfloat16",
12
+ "eos_token_id": 106,
13
+ "final_logit_softcapping": null,
14
+ "head_dim": 256,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "hidden_size": 1152,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 6912,
19
+ "layer_types": [
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "sliding_attention",
25
+ "full_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "full_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention"
46
+ ],
47
+ "max_position_embeddings": 32768,
48
+ "model_type": "gemma3_text",
49
+ "num_attention_heads": 4,
50
+ "num_hidden_layers": 26,
51
+ "num_key_value_heads": 1,
52
+ "pad_token_id": 0,
53
+ "query_pre_attn_scalar": 256,
54
+ "rms_norm_eps": 1e-06,
55
+ "rope_parameters": {
56
+ "full_attention": {
57
+ "rope_theta": 1000000,
58
+ "rope_type": "default"
59
+ },
60
+ "sliding_attention": {
61
+ "rope_theta": 10000,
62
+ "rope_type": "default"
63
+ }
64
+ },
65
+ "sliding_window": 512,
66
+ "sliding_window_pattern": 6,
67
+ "tie_word_embeddings": true,
68
+ "transformers_version": "5.8.0",
69
+ "use_bidirectional_attention": false,
70
+ "use_cache": false,
71
+ "vocab_size": 262144
72
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 106,
7
+ 1,
8
+ 106
9
+ ],
10
+ "pad_token_id": 0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.8.0"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc44cae80af9457f9e6d22346d5c6c9598ff2cbdf2f61ee5aba58659130bf8a6
3
+ size 1999811208
signing/SHA256SUMS ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 7de1c58e208eda46e9c7f86397df37ec49883aeece39fb961e0a6b24088dd3c4 chat_template.jinja
2
+ f3ec312ba0a893f7adf275d4203edaa4348d74bd6047a43b4812b1ff787b19c6 config.json
3
+ d658664f05444ad13f0b7f1298cea34e5b745583f0c720d74531c49549fa1fa1 generation_config.json
4
+ cc44cae80af9457f9e6d22346d5c6c9598ff2cbdf2f61ee5aba58659130bf8a6 model.safetensors
5
+ 2f7b0adf4fb469770bb1490e3e35df87b1dc578246c5e7e6fc76ecf33213a397 special_tokens_map.json
6
+ daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726 tokenizer.json
7
+ 1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c tokenizer.model
8
+ d9aedd7a33a5aff2d3b8426eb18aed48e701213b0945564537ef6cd8844b9150 tokenizer_config.json
signing/SHA256SUMS.sig ADDED
Binary file (512 Bytes). View file
 
signing/ca-chain.pem ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIG1jCCBL6gAwIBAgIQSIcdqlEaPQ81vpaZKLkkvzANBgkqhkiG9w0BAQsFADB1
3
+ MQswCQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl
4
+ c2VhcmNoIEluc3RpdHV0aW9ucyBDQTEtMCsGA1UEAwwkSEFSSUNBIENvZGUgU2ln
5
+ bmluZyBSU0EgUm9vdCBDQSAyMDIxMB4XDTIxMDMxOTA5MjEzNFoXDTM2MDMxNTA5
6
+ MjEzM1owaDELMAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWlj
7
+ IGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgQ0ExIDAeBgNVBAMMF0hBUklDQSBD
8
+ b2RlIFNpZ25pbmcgUlNBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA
9
+ 31Aluoq9mggXHg7TLFGOUTVDufEJjAKMhSYM4vh98DxFUPCE0soUcvj4AcbfOq0E
10
+ rXG93MW2o/8byyISWPDTu7CKSYzF2x67GEcjAlOvdQn5UZOvI2fw86727Q0GHZ77
11
+ +EfBRkLcLHPEGeVs4mkchtLydKjv42/Lsqu9/5AcYWro03lYf1q++LOmaJjSscvY
12
+ LdAPPnzm+Aa9FIZc3K3JeZ1dxNhT6fAheWeAO4JxnBT2k7Qa1oJqJ1u6t3RYiadj
13
+ shRVAwDq6/WVCW0hysPB9GuQEFkRKgkpQjcvZqG2yCMIVy9cCRQjIQWG9LSnLezv
14
+ wAan4sUIzzgoABiodCU7Gc6uJsshdjHUBiMxQzsbd5rKPZYPk+uiiXaN6V5LP079
15
+ 9treRIh6h3h111HITXE2rQDEBsdKrandn4oUTUz3VZMO6XLQA6xpPAinx7ISDdkO
16
+ 3VOkEYebMxDF/7lo1GSgrgPvFPRVNWhPxHdVrRkB8/ZKg90HyP0g339NkmDwhVqo
17
+ ZK4EiL5J3033w+U+HNowegNKxn8b1g/vB6ZHlVBGCugsO+77M6Z8/ouk755Jr8TI
18
+ gcyn/CKKadsb748oy/AvqmviFOM+ilORjZVWOFXy82QdjBDapfMOM6JqtSFUTvf5
19
+ OmVXDdIHMvosCWurRYXbVKgTNgOXVgJRAaquQBahtc0CAwEAAaOCAW0wggFpMBIG
20
+ A1UdEwEB/wQIMAYBAf8CAQAwHwYDVR0jBBgwFoAUtGQWSOj8WkszKYnrmUC5ILT2
21
+ YRowXAYIKwYBBQUHAQEEUDBOMEwGCCsGAQUFBzAChkBodHRwOi8vcmVwby5oYXJp
22
+ Y2EuZ3IvY2VydHMvSEFSSUNBLUNvZGVTaWduaW5nLVJvb3QtMjAyMS1SU0EuY2Vy
23
+ MEQGA1UdIAQ9MDswOQYEVR0gADAxMC8GCCsGAQUFBwIBFiNodHRwOi8vcmVwby5o
24
+ YXJpY2EuZ3IvZG9jdW1lbnRzL0NQUzATBgNVHSUEDDAKBggrBgEFBQcDAzBKBgNV
25
+ HR8EQzBBMD+gPaA7hjlodHRwOi8vY3JsLmhhcmljYS5nci9IQVJJQ0EtQ29kZVNp
26
+ Z25pbmctUm9vdC0yMDIxLVJTQS5jcmwwHQYDVR0OBBYEFKnsNRTrbmcFDsRtCQyg
27
+ cgYhtMouMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAhcU6MtmA
28
+ N9i3luIBF/JqB+CyCqlc9TFuu0d/Gi1PU86Mf6sqIqAObiDcM4J/uL2QWogUk5GZ
29
+ b1bNCRBrmhPv9C8rFsbeNKJYdDEWtslXQrnnd9vpAKkurG9imNW4RE/jsPV28T2I
30
+ iHWEYA0zGiP0qyAyEVvwSntEpFigaslhLFtVqf0uCGIvHRMfdD53WTXq37sInwcG
31
+ /W2C0zcno3PUK8qtCvF7cO1jujGxsLG/h0blm+M6b09doO1iSq3SAO4kwue+7AvB
32
+ 97ppo4XKgcp1Kq6LGl4Rzac23KeMRkucwofNILRbIBwxdaeudozb6XIvgGeNPmFq
33
+ 71HdLzBS0i9Tyxsf4VjlJj1vazncSFxz56RDNyendlYR39bZrQ6FMr6W5/vt1cwe
34
+ e7HlVnnQvdNMiFX/+uWH8kz29rP5q1NEQx8xjV7WnDqADnbTzGoUg9P3GR5Mv4m9
35
+ tSktUu6WqserABWf8e0L3ClvUKcnCeG/lyXbmQWxRIGrWAMmUSP1/BVajITUrEDW
36
+ qrrBmUlCoSpa0OZ7lfmlQ/QtaWvwrGRNgbLx7RgqlxsjeJtZUoBwC3nFNO6j80QB
37
+ CQEdsxUWKYZmXTnSuiG63O/wON+TmWrM7SbxqbiZRIGw90aHT2bK+8CfMgSdxl1k
38
+ JT844f3ByJb2qHA+z2lw0JqwVp07ORkrwwE=
39
+ -----END CERTIFICATE-----
40
+ -----BEGIN CERTIFICATE-----
41
+ MIIFtjCCA56gAwIBAgIQFcKuKk2ZmmOM07oTGXYI9TANBgkqhkiG9w0BAQsFADB1
42
+ MQswCQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl
43
+ c2VhcmNoIEluc3RpdHV0aW9ucyBDQTEtMCsGA1UEAwwkSEFSSUNBIENvZGUgU2ln
44
+ bmluZyBSU0EgUm9vdCBDQSAyMDIxMB4XDTIxMDIxOTEwNTk1NFoXDTQ1MDIxMzEw
45
+ NTk1M1owdTELMAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWlj
46
+ IGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgQ0ExLTArBgNVBAMMJEhBUklDQSBD
47
+ b2RlIFNpZ25pbmcgUlNBIFJvb3QgQ0EgMjAyMTCCAiIwDQYJKoZIhvcNAQEBBQAD
48
+ ggIPADCCAgoCggIBAIpq7qoKI1UMiwZC45VbVHhfxY4GLHw8Mb6vDamh7EogWAWd
49
+ 4miyu+tffyozufJVnG+qpB7tEL6DKRE25p4/+m17UeHVd6W9y2kOOyIglAwxZUAN
50
+ Ca8QNXqb6nkIRKSLZ6krTcHn0Nen9rU6jdmjqXm4pGVcvPM+95+Z9rjDZWgtq4Mu
51
+ 3YWZBKn10VzVUUIBuZ9BtUsisgD0y2cQ72nEEK36lAZ2UBJXgq7FFK08fbud8XPh
52
+ fPCucd5b9xLd94Dx3D7xYLrJGZdvdXSFTP05Q8NkbjSsE8EcZbZSQvFG6y/6XabP
53
+ Dkmd93R8eA0GOiwS++JLJuRviyGBM0I5E+hCq1tV1Bu7N7YSkffzfbzS/+yxw9Wg
54
+ zLIrx49dfkxSQkj7j/1akHMQ1alyVI1J2zhSqkZIDi/6ACzMcww2SyTOE2ympKSj
55
+ 1nyb6TgLxiTbTaxnIUlbQTdk5mBrH+0qL2AZMB7YPJwZQ4ffwAr05MpgiHrWo7nk
56
+ JS554v7Byzy3989OWEz7w62rfarKiPsOOA0fnlw567gHxFAiTfiFf27sjPq2cU3e
57
+ fZZpxN0+Ht4mkAosTRWVqaI+3D0Od3yNQShPuINRvj63j5A6cDGJWvqTU/xgyY11
58
+ kO5aLx2EnwCp5sOGI6Ie3RLjoUZgG2e/UBUiknxKtIyPbpyVwizcOzogtryLAgMB
59
+ AAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFLRkFkjo/FpLMymJ65lA
60
+ uSC09mEaMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAK6xZi+Y5
61
+ RNz7rUqI4GT/q8Py0s5wLwPlb7LE9TbWU2uHqx2LmQfIMI5Hcv2huHXWF6EWvWRz
62
+ kTBXrX39BUCGk9FJlrIL1HxkE4vTIUk4u/vozD4x8a57TCrfggajjgWEnH9wzsC7
63
+ RRffXWUM5FBIB0SLpC9W2sZ69mCNpDjxnYcWMLLwX3pCVSBxz8zuALESR79CueSz
64
+ tR0ZhLmYbeBptBVBteJQSMcf6pq3eZHd1dVTGfyuGGxp261ZKJtkmKc8x1WO0TAA
65
+ z9QynmKxwyruNSLGu/B8u4O4AIn13BqX2TiaKVNhGaan9DtH/d1nlQ+OpGYsgpq3
66
+ LXHm3674+GjBuw/qUKBFl7calRJvyLO2BWjaah4ONYWErHQMirT0dZ8ir7BUu7ae
67
+ IvnZ69WKDcfc+JgxX576xpf0QRB104G2MV963YiFCK9wRwI3e+JK7F3y3SkSRMiK
68
+ qt3SVXgXda9xaQ13cEqwH39C28dx3FjWGL1QxblyBMdne3xTYMpJGBW8QHOuK6gr
69
+ rG8RRDnsgkh+Ecr71j+2wLm4BpN1vZMndxcNXKWe6sFbAC0MqDVgx+JvNRs9drXn
70
+ rd10I0+GskfvyC/QhXw5ljehKykCvT6Hz6j2HXUyzzjjc7HuEIG1KrGIBlEeWjpI
71
+ UfM2WWLfQmZZULdYvvV2QJoSFuYOqtdsoNU=
72
+ -----END CERTIFICATE-----
signing/cert.pem ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIG6TCCBNGgAwIBAgIQRH6DOxTQI6Hp8NKCckMm7DANBgkqhkiG9w0BAQsFADBo
3
+ MQswCQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl
4
+ c2VhcmNoIEluc3RpdHV0aW9ucyBDQTEgMB4GA1UEAwwXSEFSSUNBIENvZGUgU2ln
5
+ bmluZyBSU0EwHhcNMjYwNTA4MDc1NjU1WhcNMjcwNTA4MDc1NjU1WjBlMQswCQYD
6
+ VQQGEwJOTzENMAsGA1UEBwwET3NsbzEcMBoGA1UECgwTTmFzam9uYWxiaWJsaW90
7
+ ZWtldDELMAkGA1UECwwCSVQxHDAaBgNVBAMME05hc2pvbmFsYmlibGlvdGVrZXQw
8
+ ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCgChY1uSq4zwWzCCPkSMVo
9
+ +nCvBwBvGIuew3AHO1EVDc2wCr0HMI6U0KP9/DrPbtPUMFVRFT7RHn3GN8wY9GKb
10
+ tSBqeiFdHOBTesPqoi5QGutvGJfjFXcODk8LtUcPwfj11rV20rupjK5XOWlRURDd
11
+ rHjPgnWqOe2LuQqhhtD6l/k63cCi3pC4//qC3txaiOhYujTZpCaUi/9jMvPVao5Z
12
+ 1YTfFzIR8F6oAYXACIdyI8raltCVtqh0Fav/gY4oj2WNUFyluPHEuaKkief65JRQ
13
+ NprwC/XpwdZiVTLlAivngJ+HS+HDqtT4R2SQNkwpouM5g/Vdz6Oxi9v5LO4yeF1Z
14
+ zXxk5SM7CIS8dw4RVJH2+H0cc6OQg4G2RUhaFlQQwuGrGT6zWGRmav2lr+sNmhfT
15
+ 9tgD9RVzzxPb9/lyEY3z5t/Aq9DTDp980qtloj4Wn6LUPmUv42tCLOdFU6BESOmI
16
+ G1Wgvxr8qGomxUAIlGABDGwekmaqVRBqpGkxvWvoSHgez/rYophyxBAx+nAdYo+O
17
+ xhk9d1ipzVUihCkQ6GNogRPpQ2N5ga6id0ULX+iDieU6xVA4lD8xoCkL38I64TxS
18
+ /8ixkiC604DAAQvfoP3mqmyCYbXLDnmyGFQ9S8cxbVD+OV+EWTY+GwGyfZ7A1fZF
19
+ o5+5ey8X4EfW3HA5/8SULQIDAQABo4IBkDCCAYwwCQYDVR0TBAIwADAfBgNVHSME
20
+ GDAWgBSp7DUU625nBQ7EbQkMoHIGIbTKLjBxBggrBgEFBQcBAQRlMGMwPgYIKwYB
21
+ BQUHMAKGMmh0dHA6Ly9jcnQuaGFyaWNhLmdyL0hBUklDQS1Db2RlU2lnbmluZy1T
22
+ dWItUjEuY2VyMCEGCCsGAQUFBzABhhVodHRwOi8vb2NzcC5oYXJpY2EuZ3IwYgYD
23
+ VR0gBFswWTAIBgZngQwBBAEwCAYGBACPegECMEMGDSsGAQQBgc8RAQEDAQIwMjAw
24
+ BggrBgEFBQcCARYkaHR0cHM6Ly9yZXBvLmhhcmljYS5nci9kb2N1bWVudHMvQ1BT
25
+ MBMGA1UdJQQMMAoGCCsGAQUFBwMDMEMGA1UdHwQ8MDowOKA2oDSGMmh0dHA6Ly9j
26
+ cmwuaGFyaWNhLmdyL0hBUklDQS1Db2RlU2lnbmluZy1TdWItUjEuY3JsMB0GA1Ud
27
+ DgQWBBRkiwQILl9eSkYw+Nbx5KokmIBoUjAOBgNVHQ8BAf8EBAMCB4AwDQYJKoZI
28
+ hvcNAQELBQADggIBABE2vKd3Q6JMFSxZUXr/kH88h40CsVG5mZ0Hr3tgm9iITjzY
29
+ 6ICVO2YPQXKn7Bt6hddsDWEVjQgYGArAxFV0rS9/Olr1YKdYOE/ANF7TGvbGCcVE
30
+ qNIisEuHqoah406LW6Om+nSlIzizpiKWPcm4RSL0LQrwNj/Z31aNAbQNKu97eI3x
31
+ uy6ZAU1xrRbRy5HNxbmXJGfy84wvogaGqHsWkYtbPL/uqzWJoj/yxLKl7YKiyfZ0
32
+ h604HpGlv8Xq6CjgMWTmHyGgxIIX+EBd3tvPpGHdswGvGhX8HFkBjH2uWAmFdYbI
33
+ DMTyqh4UzV3WLguaHWHsZ9QzFOcETXOFTwmH5NCckeGGaaPVxjuuAu9jqyenU84k
34
+ a8rZ1qmFJzUNbjrWXK9pjsLxY+ePMu0UW9dnNGppsPCh9Ac7gBzHHDj3pQMZvIWN
35
+ UKMmMw/a6hKV6VYnmYRJa3K0nEXObJv53vgEEF0H9WePUEcFAp9VKDeTGULWyT+b
36
+ 6iaWQqgXlkWz3isiaJgmYnyqct//aXOo+HP4TRmM6Azpd3/IlTal9T400rzc2OkQ
37
+ V89AmB9F8G7ZuHduRA/TKhKL/xFq+rQs0G2S41IgqTRX0Ss6wgNv/pBDWr+3kYgV
38
+ 4M7Vr6zpk7ZogqJKcXZpwnegxchSdjMn62WESKM636yJYdsZAikCs8T+sopK
39
+ -----END CERTIFICATE-----
signing/verify.sh ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #
3
+ # Verify the integrity and authenticity of this model release.
4
+ #
5
+ # Usage: bash signing/verify.sh
6
+ #
7
+ # This script verifies:
8
+ # 1. The signing certificate is issued by a trusted CA
9
+ # 2. The SHA256SUMS manifest was signed by Nasjonalbiblioteket
10
+ # 3. All file checksums match the manifest
11
+ #
12
+
13
+ set -euo pipefail
14
+
15
+ RED='\033[0;31m'
16
+ GREEN='\033[0;32m'
17
+ NC='\033[0m'
18
+
19
+ pass() { echo -e "${GREEN}[PASS]${NC} $*"; }
20
+ fail() { echo -e "${RED}[FAIL]${NC} $*" >&2; }
21
+
22
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
23
+ MODEL_DIR="$(dirname "$SCRIPT_DIR")"
24
+ SIGNING_DIR="$SCRIPT_DIR"
25
+
26
+ cd "$MODEL_DIR"
27
+
28
+ errors=0
29
+
30
+ # Check required files exist
31
+ for f in "$SIGNING_DIR/SHA256SUMS" "$SIGNING_DIR/SHA256SUMS.sig" \
32
+ "$SIGNING_DIR/cert.pem" "$SIGNING_DIR/ca-chain.pem"; do
33
+ if [[ ! -f "$f" ]]; then
34
+ fail "Missing file: $f"
35
+ errors=$((errors + 1))
36
+ fi
37
+ done
38
+
39
+ if [[ $errors -gt 0 ]]; then
40
+ echo ""
41
+ fail "Required signing files are missing. Cannot verify."
42
+ exit 1
43
+ fi
44
+
45
+ echo "=== Nasjonalbiblioteket Model Verification ==="
46
+ echo ""
47
+
48
+ # Show certificate info
49
+ echo "Certificate subject:"
50
+ openssl x509 -in "$SIGNING_DIR/cert.pem" -subject -noout 2>/dev/null | sed 's/^subject=/ /'
51
+ echo "Certificate issuer:"
52
+ openssl x509 -in "$SIGNING_DIR/cert.pem" -issuer -noout 2>/dev/null | sed 's/^issuer=/ /'
53
+ echo "Certificate fingerprint (SHA-256):"
54
+ openssl x509 -in "$SIGNING_DIR/cert.pem" -fingerprint -sha256 -noout 2>/dev/null | sed 's/^.*=/ /'
55
+ echo ""
56
+
57
+ # 1. Verify certificate chain
58
+ echo "--- Step 1: Verify certificate chain ---"
59
+ if openssl verify -CAfile "$SIGNING_DIR/ca-chain.pem" "$SIGNING_DIR/cert.pem" > /dev/null 2>&1; then
60
+ pass "Certificate chain is valid."
61
+ else
62
+ fail "Certificate chain verification failed!"
63
+ errors=$((errors + 1))
64
+ fi
65
+
66
+ # 2. Verify signature
67
+ echo "--- Step 2: Verify manifest signature ---"
68
+ PUBKEY=$(mktemp)
69
+ trap "rm -f '$PUBKEY'" EXIT
70
+ openssl x509 -in "$SIGNING_DIR/cert.pem" -pubkey -noout > "$PUBKEY" 2>/dev/null
71
+
72
+ if openssl dgst -sha256 -verify "$PUBKEY" \
73
+ -signature "$SIGNING_DIR/SHA256SUMS.sig" \
74
+ "$SIGNING_DIR/SHA256SUMS" > /dev/null 2>&1; then
75
+ pass "Manifest signature is valid."
76
+ else
77
+ fail "Manifest signature verification failed!"
78
+ errors=$((errors + 1))
79
+ fi
80
+
81
+ # 3. Verify file checksums
82
+ echo "--- Step 3: Verify file checksums ---"
83
+ if sha256sum -c "$SIGNING_DIR/SHA256SUMS" 2>/dev/null; then
84
+ pass "All file checksums match."
85
+ else
86
+ fail "One or more file checksums do not match!"
87
+ errors=$((errors + 1))
88
+ fi
89
+
90
+ # Summary
91
+ echo ""
92
+ if [[ $errors -eq 0 ]]; then
93
+ echo -e "${GREEN}✅ Verification successful. All files are authentic and unmodified.${NC}"
94
+ exit 0
95
+ else
96
+ echo -e "${RED}❌ Verification failed with $errors error(s).${NC}"
97
+ exit 1
98
+ fi
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726
3
+ size 33384567
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<end_of_turn>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "local_files_only": false,
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "model_specific_special_tokens": {
14
+ "boi_token": "<start_of_image>",
15
+ "eoi_token": "<end_of_image>",
16
+ "image_token": "<image_soft_token>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "right",
20
+ "processor_class": "Gemma3Processor",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "split_special_tokens": false,
24
+ "tokenizer_class": "GemmaTokenizer",
25
+ "unk_token": "<unk>",
26
+ "use_default_system_prompt": false
27
+ }