MagistrTheOne commited on
Commit
aadac5a
·
verified ·
1 Parent(s): bd60771

Update MagistrTheOne/RadonSAI - add YAML metadata, developing status, and transformers registration

Browse files
Files changed (8) hide show
  1. README.md +54 -28
  2. config.json +31 -28
  3. merges.txt +0 -0
  4. model.safetensors +2 -2
  5. model_card.yml +22 -0
  6. special_tokens_map.json +0 -1
  7. tokenizer_config.json +2 -1
  8. vocab.json +0 -0
README.md CHANGED
@@ -1,48 +1,74 @@
1
  ---
2
  license: apache-2.0
 
 
 
3
  tags:
4
  - radon
5
- - gpt2
6
- - working
7
- - 1024d
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
- # RadonSAI (Working)
11
 
12
- Полностью рабочая версия RadonSAI с реальными весами.
13
 
14
- ## Технические характеристики
15
- - Параметры: ~203,668,480
16
- - Размерность: 1024
17
- - Слои: 12
18
- - Головы внимания: 16
 
 
 
 
 
 
 
 
19
 
20
- ## Использование
21
  ```python
22
  from transformers import AutoModelForCausalLM, AutoTokenizer
23
 
 
24
  model = AutoModelForCausalLM.from_pretrained("MagistrTheOne/RadonSAI")
25
  tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/RadonSAI")
26
 
27
- # Генерация текста
28
- input_text = "Hello, how are you?"
29
- inputs = tokenizer(input_text, return_tensors="pt")
30
- outputs = model.generate(**inputs, max_length=50)
31
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
  print(result)
33
  ```
34
 
35
- ## Тестирование
36
- ```python
37
- # Быстрый тест
38
- import torch
39
- model = AutoModelForCausalLM.from_pretrained("MagistrTheOne/RadonSAI")
40
- tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/RadonSAI")
41
 
42
- # Простая генерация
43
- text = "The future of AI is"
44
- inputs = tokenizer(text, return_tensors="pt")
45
- with torch.no_grad():
46
- outputs = model.generate(**inputs, max_length=30, do_sample=True, temperature=0.7)
47
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
48
- ```
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ language:
4
+ - ru
5
+ - en
6
  tags:
7
  - radon
8
+ - russian
9
+ - english
10
+ - developing
11
+ - mistral
12
+ - 2b
13
+ - quantized
14
+ pipeline_tag: text-generation
15
+ library_name: transformers
16
+ model_status: developing
17
+ base_model: mistralai/Mistral-7B-v0.1
18
+ size_categories: 3B
19
+ model-index:
20
+ - name: RadonSAI
21
+ results: []
22
  ---
23
 
 
24
 
25
+ # RadonSAI
26
 
27
+ ## Model Description
28
+
29
+ RadonSAI is a 2B parameters transformer model designed for main RADON model in the RADON ecosystem.
30
+
31
+ ### Key Features
32
+
33
+ - **Parameters**: 2B parameters
34
+ - **Base Model**: mistralai/Mistral-7B-v0.1
35
+ - **Status**: Developing
36
+ - **Languages**: Russian, English
37
+ - **Architecture**: GPT2-based
38
+
39
+ ## Usage
40
 
 
41
  ```python
42
  from transformers import AutoModelForCausalLM, AutoTokenizer
43
 
44
+ # Load model
45
  model = AutoModelForCausalLM.from_pretrained("MagistrTheOne/RadonSAI")
46
  tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/RadonSAI")
47
 
48
+ # Generate text
49
+ prompt = "Привет, как дела?"
50
+ inputs = tokenizer(prompt, return_tensors="pt")
51
+ outputs = model.generate(**inputs, max_length=100, temperature=0.7)
52
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
  print(result)
54
  ```
55
 
56
+ ## Model Status
 
 
 
 
 
57
 
58
+ **Status**: Developing
59
+ **Last Updated**: 2025-10-08
60
+ **Creator**: MagistrTheOne
61
+
62
+ ## License
63
+
64
+ Apache 2.0 License
65
+
66
+ ## Contact
67
+
68
+ - GitHub: [MagistrTheOne/Radon2BMistral](https://github.com/MagistrTheOne/Radon2BMistral)
69
+ - Hugging Face: [MagistrTheOne/RadonSAI](https://huggingface.co/MagistrTheOne/RadonSAI)
70
+ - Creator: [MagistrTheOne](https://github.com/MagistrTheOne)
71
+
72
+ ---
73
+
74
+ **Created with ❤️ by MagistrTheOne**
config.json CHANGED
@@ -1,31 +1,34 @@
1
  {
2
- "activation_function": "gelu_new",
3
- "architectures": [
4
- "GPT2LMHeadModel"
5
- ],
6
- "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
- "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
 
 
 
 
 
 
 
 
10
  "initializer_range": 0.02,
11
- "layer_norm_epsilon": 1e-05,
12
- "model_type": "gpt2",
13
- "n_embd": 1024,
14
- "n_head": 16,
15
- "n_inner": null,
16
- "n_layer": 12,
17
- "n_positions": 1024,
18
- "reorder_and_upcast_attn": false,
19
- "resid_pdrop": 0.1,
20
- "scale_attn_by_inverse_layer_idx": false,
21
- "scale_attn_weights": true,
22
- "summary_activation": null,
23
- "summary_first_dropout": 0.1,
24
- "summary_proj_to_labels": true,
25
- "summary_type": "cls_index",
26
- "summary_use_proj": true,
27
- "torch_dtype": "float32",
28
- "transformers_version": "4.36.2",
29
  "use_cache": true,
30
- "vocab_size": 50257
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "model_name": "radonsai",
3
+ "model_type": "mistral",
4
+ "vocab_size": 32000,
5
+ "hidden_size": 2048,
6
+ "num_layers": 24,
7
+ "num_attention_heads": 32,
8
+ "num_kv_heads": 8,
9
+ "intermediate_size": 5632,
10
+ "max_position_embeddings": 32768,
11
+ "sliding_window": 4096,
12
+ "rope_theta": 10000.0,
13
+ "rms_norm_eps": 1e-06,
14
+ "dropout": 0.1,
15
+ "attention_dropout": 0.1,
16
+ "activation_function": "silu",
17
+ "layer_norm_eps": 1e-06,
18
  "initializer_range": 0.02,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "use_cache": true,
20
+ "torch_dtype": "float32",
21
+ "output_attentions": false,
22
+ "output_hidden_states": false,
23
+ "status": "developing",
24
+ "last_updated": "2025-10-08",
25
+ "transformers_version": "4.36.0",
26
+ "architectures": [
27
+ "MistralForCausalLM"
28
+ ],
29
+ "auto_map": {
30
+ "AutoModelForCausalLM": "models.mistral_model.MistralForCausalLM"
31
+ },
32
+ "creator": "MagistrTheOne",
33
+ "description": "RADON: 2B parameter model adapted from Mistral-7B"
34
+ }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e4cb54b9ebcdc8a72ebb3265c871023c38189e7d6851a87641fe6aeafce66ca
3
- size 814689096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3323be5474d086a52bf5c73dee21b5438b501c1f4b007342edbe6cada51e25c9
3
+ size 131278312
model_card.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - ru
5
+ - en
6
+ tags:
7
+ - radon
8
+ - russian
9
+ - english
10
+ - developing
11
+ - mistral
12
+ - 2b
13
+ - quantized
14
+ pipeline_tag: text-generation
15
+ library_name: transformers
16
+ model_status: developing
17
+ base_model: mistralai/Mistral-7B-v0.1
18
+ size_categories: 3B
19
+ model-index:
20
+ - name: RadonSAI
21
+ results: []
22
+ ---
special_tokens_map.json CHANGED
@@ -13,7 +13,6 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "unk_token": {
17
  "content": "<|endoftext|>",
18
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -12,11 +12,12 @@
12
  }
13
  },
14
  "bos_token": "<|endoftext|>",
 
15
  "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
  "model_max_length": 1024,
19
- "pad_token": "<|endoftext|>",
20
  "tokenizer_class": "GPT2Tokenizer",
21
  "unk_token": "<|endoftext|>"
22
  }
 
12
  }
13
  },
14
  "bos_token": "<|endoftext|>",
15
+ "chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
16
  "clean_up_tokenization_spaces": true,
17
  "eos_token": "<|endoftext|>",
18
  "errors": "replace",
19
  "model_max_length": 1024,
20
+ "pad_token": null,
21
  "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "<|endoftext|>"
23
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff