RinKana commited on
Commit
2fcf641
·
verified ·
1 Parent(s): 8e37854

Fix shared memory error and update config to 2026 standard

Browse files
Files changed (7) hide show
  1. README.md +34 -0
  2. config.json +12 -0
  3. merges.txt +0 -0
  4. model.safetensors +3 -0
  5. special_tokens_map.json +23 -0
  6. tokenizer_config.json +23 -0
  7. vocab.json +0 -0
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: mit
4
+ library_name: pytorch
5
+ tags:
6
+ - text-generation
7
+ - gpt
8
+ - slm
9
+ - scratch
10
+ - decoder-only
11
+ datasets:
12
+ - wikitext
13
+ pipeline_tag: text-generation
14
+ ---
15
+
16
+ # MiniGPT-30M-Wikipedia-Var1
17
+
18
+ Small Language Model (SLM) ~29.9M parameters built from scratch.
19
+
20
+ ## Model Details
21
+ - **Architecture**: GPT-style Decoder-only with RoPE, RMSNorm, SwiGLU
22
+ - **Weight Tying**: Enabled
23
+ - **Parameters**: 29,920,512
24
+ - **Layers**: 6
25
+ - **Embedding Dim**: 384
26
+ - **Context Size**: 512 tokens
27
+
28
+ ## Usage
29
+ ```python
30
+ from transformers import AutoModelForCausalLM, AutoTokenizer
31
+
32
+ model = AutoModelForCausalLM.from_pretrained("RinKana/MiniGPT-30M-Wikipedia-Var1")
33
+ tokenizer = AutoTokenizer.from_pretrained("RinKana/MiniGPT-30M-Wikipedia-Var1")
34
+ ```
config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 50257,
3
+ "block_size": 512,
4
+ "n_layer": 6,
5
+ "n_head": 8,
6
+ "n_embd": 384,
7
+ "architectures": [
8
+ "MiniGPT"
9
+ ],
10
+ "dtype": "float32",
11
+ "model_type": "gpt2"
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e063b1e0ad491f72bd6deaa31c82b974577d7442daf8ae1aba3970b633938228
3
+ size 197078136
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 1024,
20
+ "pad_token": null,
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff