halxj commited on
Commit
8dd7479
·
verified ·
1 Parent(s): 6c1c88d

Upload Qwen3ForCausalLM

Browse files
Files changed (4) hide show
  1. README.md +2 -0
  2. config.json +17 -2
  3. generation_config.json +0 -1
  4. model.safetensors +3 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  tags:
4
  - GRPO
5
  - Reasoning-Course
 
 
6
  ---
7
 
8
  # Model Card for Model ID
 
3
  tags:
4
  - GRPO
5
  - Reasoning-Course
6
+ - trl
7
+ - sft
8
  ---
9
 
10
  # Model Card for Model ID
config.json CHANGED
@@ -4,8 +4,7 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
@@ -56,6 +55,22 @@
56
  "num_attention_heads": 32,
57
  "num_hidden_layers": 36,
58
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "rms_norm_eps": 1e-06,
60
  "rope_scaling": null,
61
  "rope_theta": 5000000,
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "dtype": "float16",
 
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
 
55
  "num_attention_heads": 32,
56
  "num_hidden_layers": 36,
57
  "num_key_value_heads": 8,
58
+ "pad_token_id": 151643,
59
+ "quantization_config": {
60
+ "_load_in_4bit": true,
61
+ "_load_in_8bit": false,
62
+ "bnb_4bit_compute_dtype": "float32",
63
+ "bnb_4bit_quant_storage": "uint8",
64
+ "bnb_4bit_quant_type": "nf4",
65
+ "bnb_4bit_use_double_quant": true,
66
+ "llm_int8_enable_fp32_cpu_offload": false,
67
+ "llm_int8_has_fp16_weight": false,
68
+ "llm_int8_skip_modules": null,
69
+ "llm_int8_threshold": 6.0,
70
+ "load_in_4bit": true,
71
+ "load_in_8bit": false,
72
+ "quant_method": "bitsandbytes"
73
+ },
74
  "rms_norm_eps": 1e-06,
75
  "rope_scaling": null,
76
  "rope_theta": 5000000,
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "bos_token_id": 151643,
3
  "do_sample": true,
4
  "eos_token_id": [
5
  151645,
 
1
  {
 
2
  "do_sample": true,
3
  "eos_token_id": [
4
  151645,
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e4aa00ee0daec74f4af4a7ede267db0205d7abdb3fced379ce74e326cc9bf9
3
+ size 2653133484