DuarteMRAlves commited on
Commit
06b1700
·
verified ·
1 Parent(s): 47e0a5e

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +161 -0
README.md ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - de
6
+ - es
7
+ - fr
8
+ - it
9
+ - pt
10
+ - pl
11
+ - nl
12
+ - tr
13
+ - sv
14
+ - cs
15
+ - el
16
+ - hu
17
+ - ro
18
+ - fi
19
+ - uk
20
+ - sl
21
+ - sk
22
+ - da
23
+ - lt
24
+ - lv
25
+ - et
26
+ - bg
27
+ - 'no'
28
+ - ca
29
+ - hr
30
+ - ga
31
+ - mt
32
+ - gl
33
+ - zh
34
+ - ru
35
+ - ko
36
+ - ja
37
+ - ar
38
+ - hi
39
+ library_name: transformers
40
+ base_model:
41
+ - utter-project/EuroLLM-9B-2512
42
+ ---
43
+
44
+ # Model Card for EuroLLM-9B-Instruct-2512
45
+
46
+ This is the model card for EuroLLM-9B-Instruct-2512, an improved version of [utter-project/EuroLLM-9B-Instruct](https://huggingface.co/utter-project/EuroLLM-9B-Instruct).
47
+ In comparison with the previous version, this version includes the long-context extension phase and the revamped post-training recipe from [utter-project/EuroLLM-22B-Instruct](https://huggingface.co/utter-project/EuroLLM-22B-Instruct-2512).
48
+
49
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
50
+ <details><summary>See axolotl config</summary>
51
+
52
+ axolotl version: `0.12.2`
53
+ ```yaml
54
+ auto_resume_from_checkpoints: true
55
+ use_tensorboard: true
56
+
57
+ base_model: utter-project/EuroLLM-9B-2512
58
+ model_type: AutoModelForCausalLM
59
+ tokenizer_type: AutoTokenizer
60
+
61
+ load_in_8bit: false
62
+ load_in_4bit: false
63
+ strict: false
64
+
65
+ dataset_processes: 64
66
+ datasets:
67
+ - path: utter-project/EuroBlocks-SFT-2512
68
+ type: chat_template
69
+ split: train
70
+ conversation: chatml
71
+ field_messages: conversations
72
+ message_field_role: role
73
+ message_field_content: content
74
+ roles_to_train: ["assistant"]
75
+ train_on_eos: all
76
+
77
+
78
+ chat_template_jinja: "{% for message in messages %}{% if message['role'] == 'assistant' %}{% set role = 'assistant' %}{% else %}{% set role = message['role'] %}{% endif %}<|im_start|>{{ role }}\n{{ message['content'] | trim }}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant\n'}}{% endif %}"
79
+
80
+ output_dir: checkpoints
81
+ val_set_size: 0
82
+
83
+ sequence_len: 32768
84
+ sample_packing: true
85
+ pad_to_sequence_len: true
86
+
87
+ # sequence_parallel_degree: 4
88
+ # heads_k_stride: 1
89
+ # ring_attn_func:
90
+
91
+ plugins:
92
+ - axolotl.integrations.liger.LigerPlugin
93
+ liger_rope: true
94
+ liger_rms_norm: true
95
+ liger_glu_activation: true
96
+ liger_layer_norm: true
97
+ liger_fused_linear_cross_entropy: true
98
+
99
+ # N_GPUS * GRAD_ACC_STEPS * MICRO_BATCH_SIZE * SEQ_LEN = tokens/step ->
100
+ # Assuming 32 gpus (32 * 2 * 2 * 32k = 4 096 000 tokens/step)
101
+ gradient_accumulation_steps: 2
102
+ micro_batch_size: 2
103
+
104
+ eval_batch_size: 1
105
+ num_epochs: 5
106
+ optimizer: adamw_torch
107
+ lr_scheduler: cosine
108
+ learning_rate: 1e-5
109
+
110
+ train_on_inputs: false
111
+ group_by_length: false
112
+ bf16: true
113
+ fp16: false
114
+ tf32: false
115
+
116
+ gradient_checkpointing: true
117
+ logging_steps: 1
118
+ flash_attention: true
119
+ flash_attn_cross_entropy: false
120
+ flash_attn_rms_norm: false
121
+ flash_attn_fuse_qkv: false
122
+ flash_attn_fuse_mlp: false
123
+
124
+ warmup_steps: 125
125
+ eval_sample_packing: False
126
+ save_steps: 500
127
+ save_total_limit: 2
128
+ deepspeed: deepspeed_configs/zero3_bf16.json
129
+ weight_decay: 0.01
130
+
131
+ special_tokens:
132
+ eos_token: "<|im_end|>"
133
+
134
+ ```
135
+ </details><br>
136
+
137
+ ## Run the model
138
+
139
+ from transformers import AutoModelForCausalLM, AutoTokenizer
140
+
141
+ model_id = "utter-project/EuroLLM-9B-Instruct-2512"
142
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
143
+ model = AutoModelForCausalLM.from_pretrained(model_id)
144
+
145
+ messages = [
146
+ {
147
+ "role": "system",
148
+ "content": "You are EuroLLM --- an AI assistant specialized in European languages that provides safe, educational and helpful answers.",
149
+ },
150
+ {
151
+ "role": "user", "content": "What is the capital of Portugal? How would you describe it?"
152
+ },
153
+ ]
154
+
155
+ inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
156
+ outputs = model.generate(inputs, max_new_tokens=1024)
157
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
158
+
159
+ ## Bias, Risks, and Limitations
160
+
161
+ EuroLLM-9B has not been aligned to human preferences, so the model may generate problematic outputs (e.g., hallucinations, harmful content, or false statements).