apps90 commited on
Commit
28da64f
·
verified ·
1 Parent(s): 5c35123

Upload model

Browse files
Files changed (4) hide show
  1. README.md +3 -1
  2. config.json +12 -11
  3. generation_config.json +3 -2
  4. model.safetensors +2 -2
README.md CHANGED
@@ -1,6 +1,8 @@
1
  ---
2
  library_name: transformers
3
- tags: []
 
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: transformers
3
+ tags:
4
+ - trl
5
+ - orpo
6
  ---
7
 
8
  # Model Card for Model ID
config.json CHANGED
@@ -5,9 +5,9 @@
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
- "bos_token_id": 50256,
9
  "embd_pdrop": 0.1,
10
- "eos_token_id": 50256,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
@@ -17,19 +17,20 @@
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
 
20
  "quantization_config": {
21
- "_load_in_4bit": false,
22
- "_load_in_8bit": true,
23
- "bnb_4bit_compute_dtype": "float32",
24
  "bnb_4bit_quant_storage": "uint8",
25
- "bnb_4bit_quant_type": "fp4",
26
- "bnb_4bit_use_double_quant": false,
27
  "llm_int8_enable_fp32_cpu_offload": false,
28
  "llm_int8_has_fp16_weight": false,
29
  "llm_int8_skip_modules": null,
30
  "llm_int8_threshold": 6.0,
31
- "load_in_4bit": false,
32
- "load_in_8bit": true,
33
  "quant_method": "bitsandbytes"
34
  },
35
  "reorder_and_upcast_attn": false,
@@ -47,8 +48,8 @@
47
  "max_length": 50
48
  }
49
  },
50
- "torch_dtype": "float16",
51
  "transformers_version": "4.41.0",
52
  "use_cache": true,
53
- "vocab_size": 50257
54
  }
 
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
+ "bos_token_id": 50257,
9
  "embd_pdrop": 0.1,
10
+ "eos_token_id": 50258,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
 
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
20
+ "pad_token_id": 50258,
21
  "quantization_config": {
22
+ "_load_in_4bit": true,
23
+ "_load_in_8bit": false,
24
+ "bnb_4bit_compute_dtype": "float16",
25
  "bnb_4bit_quant_storage": "uint8",
26
+ "bnb_4bit_quant_type": "nf4",
27
+ "bnb_4bit_use_double_quant": true,
28
  "llm_int8_enable_fp32_cpu_offload": false,
29
  "llm_int8_has_fp16_weight": false,
30
  "llm_int8_skip_modules": null,
31
  "llm_int8_threshold": 6.0,
32
+ "load_in_4bit": true,
33
+ "load_in_8bit": false,
34
  "quant_method": "bitsandbytes"
35
  },
36
  "reorder_and_upcast_attn": false,
 
48
  "max_length": 50
49
  }
50
  },
51
+ "torch_dtype": "float32",
52
  "transformers_version": "4.41.0",
53
  "use_cache": true,
54
+ "vocab_size": 50259
55
  }
generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
 
5
  "transformers_version": "4.41.0"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 50257,
4
+ "eos_token_id": 50258,
5
+ "pad_token_id": 50258,
6
  "transformers_version": "4.41.0"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:169b433e4d424c0dcf2b3c5905e79b1a54791212ab36c95cb5a891b3c344136c
3
- size 166663760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad32865291e3569bed247f4a26c7126789296ff4b14bc47e4209e1cd22cb54e4
3
+ size 204307666