Prince-1 commited on
Commit
96c6bad
·
verified ·
1 Parent(s): b959884

Sarvam-1 Onnx version

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.onnx.data filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - bn
4
+ - en
5
+ - gu
6
+ - hi
7
+ - kn
8
+ - ml
9
+ - mr
10
+ - or
11
+ - pa
12
+ - ta
13
+ - te
14
+ base_model:
15
+ - sarvamai/sarvam-1
16
+ tags:
17
+ - onnx
18
+ - onnxruntime-genai
19
+ ---
20
+
21
+
22
+ # Sarvam-1
23
+
24
+ Sarvam-1 is a 2-billion parameter language model specifically optimized for Indian languages. It provides best in-class performance in 10 Indic languages (bn, gu, hi, kn, ml, mr, or, pa, ta, te) when compared with popular models like Gemma-2-2B and Llama-3.2-3B. It is also competitive against the much larger models like Llama-3.1-8B in these languages. More details can be found in our [release blog](https://www.sarvam.ai/blogs/sarvam-1).
25
+
26
+ The model was trained with [NVIDIA NeMo™ Framework](https://github.com/NVIDIA/NeMo) on the Yotta Shakti Cloud using HGX H100 systems.
27
+
28
+ *Note: This is a text-completion model. It is meant to be finetuned on downstream tasks, and cannot be used directly as a chat or an instruction-following model.*
29
+
30
+ ## Key Features
31
+
32
+ - **Optimized for 10 Indian Languages**: Built from the ground up to support major Indian languages alongside English
33
+ - **Superior Token Efficiency**: Achieves fertility rates of 1.4-2.1 across all supported languages, 2-4x more efficient than existing multilingual models
34
+ - **High-Quality Training Data**: Trained on a curated corpus of ~4 trillion tokens with 2 trillion high-quality Indic tokens
35
+ - **Efficient Inference**: 4-6x faster inference compared to larger models while matching or exceeding their performance on Indic language tasks
36
+
37
+ ## Model Architecture
38
+
39
+ - Hidden size: 2048
40
+ - Intermediate size: 11,008
41
+ - Number of attention heads: 16
42
+ - Number of hidden layers: 28
43
+ - Number of key-value heads: 8
44
+ - Maximum position embeddings: 8,192
45
+ - Activation function: SwiGLU
46
+ - Positional embeddings: Rotary (RoPE) with theta=10,000
47
+ - Training: Grouped-query attention and bfloat16 mixed-precision
48
+
49
+ ## Performance
50
+
51
+ ### Translated Academic Benchmarks (Zero-shot)
52
+
53
+ - MMLU: 44.44
54
+ - ARC-Challenge: 58.50
55
+ - TriviaQA: 90.62
56
+ - BoolQ: 80.68
57
+
58
+ ### IndicGenBench (One-shot)
59
+
60
+ - Flores English-to-Indic translation: 39.83 chrF++
61
+ - CrossSum: 20.48 chrF++
62
+ - XORQA: 25.27 F1
63
+ - XQUAD: 41.58 F1
64
+
65
+ ## Usage
66
+
67
+ ```python
68
+ from transformers import AutoModelForCausalLM, AutoTokenizer
69
+
70
+ # Load model and tokenizer
71
+ model = AutoModelForCausalLM.from_pretrained("sarvamai/sarvam-1")
72
+ tokenizer = AutoTokenizer.from_pretrained("sarvamai/sarvam-1")
73
+
74
+ # Example usage
75
+ text = "कर्नाटक की राजधानी है:"
76
+ inputs = tokenizer(text, return_tensors="pt")
77
+ outputs = model.generate(**inputs, max_new_tokens=5)
78
+ result = tokenizer.decode(outputs[0])
79
+ ```
80
+
81
+ ## Training Details
82
+
83
+ - Training Infrastructure: Yotta's Shakti cluster
84
+ - Hardware: 1,024 GPUs
85
+ - Training Duration: 5 days
86
+ - Framework: NVIDIA NeMo
87
+
88
+ ## License
89
+
90
+ Sarvam non-commercial license: See the [LICENSE](LICENSE.md) file
91
+
92
+ ## Acknowledgements
93
+
94
+ - NVIDIA: for support with the NeMo codebase
95
+ - Yotta: for sccess to the Shakti GPU cluster
96
+ - AI4Bharat: for their academic partnership and expertise in Indian language technologies
chat_template.jinja ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}
2
+ {% for message in loop_messages %}
3
+ {% if message['role'] not in ['user', 'assistant', 'tool_calls'] %}
4
+ {{ raise_exception('Invalid role: ' + message['role'] + '. Must be user, assistant, or tool_calls.') }}
5
+ {% endif %}
6
+ {% if loop.index0 == 0 and system_message != false %}
7
+ {% set content = '<<SYS>>
8
+ ' + system_message + '
9
+ <</SYS>>
10
+
11
+ ' + message['content'] %}
12
+ {% else %}
13
+ {% set content = message['content'] %}
14
+ {% endif %}
15
+ {% if message['role'] == 'user' %}
16
+ {{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}
17
+ {% elif message['role'] == 'assistant' %}
18
+ {{ ' ' + content.strip() + ' ' + eos_token }}
19
+ {% elif message['role'] == 'tool_calls' %}
20
+ {{ ' [TOOL_CALLS] ' + content.strip() + ' [/TOOL_CALLS] ' }}
21
+ {% endif %}
22
+ {% endfor %}
genai_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 8192,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": []
9
+ },
10
+ "filename": "model.onnx",
11
+ "head_size": 128,
12
+ "hidden_size": 2048,
13
+ "inputs": {
14
+ "input_ids": "input_ids",
15
+ "attention_mask": "attention_mask",
16
+ "position_ids": "position_ids",
17
+ "past_key_names": "past_key_values.%d.key",
18
+ "past_value_names": "past_key_values.%d.value"
19
+ },
20
+ "outputs": {
21
+ "logits": "logits",
22
+ "present_key_names": "present.%d.key",
23
+ "present_value_names": "present.%d.value"
24
+ },
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 28,
27
+ "num_key_value_heads": 8
28
+ },
29
+ "eos_token_id": 2,
30
+ "pad_token_id": 2,
31
+ "type": "llama",
32
+ "vocab_size": 68096
33
+ },
34
+ "search": {
35
+ "diversity_penalty": 0.0,
36
+ "do_sample": true,
37
+ "early_stopping": true,
38
+ "length_penalty": 1.0,
39
+ "max_length": 8192,
40
+ "min_length": 0,
41
+ "no_repeat_ngram_size": 0,
42
+ "num_beams": 1,
43
+ "num_return_sequences": 1,
44
+ "past_present_share_buffer": false,
45
+ "repetition_penalty": 1.05,
46
+ "temperature": 0.1,
47
+ "top_k": 1,
48
+ "top_p": 0.95
49
+ }
50
+ }
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a67a9abca05d823c965fe6220c2377c3da26d588929d427e52937eebac972f3
3
+ size 655872
model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ac2b750fc9664f5310cf45e91f634bc4216ffb7bfccf2708c7de61c69b77e4
3
+ size 5052301312
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd33409a577e8b416247587b0f5bd7a3eec245a1f18d4ec7793ff299ad3fbe2
3
+ size 1935856
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff