haoyang-amd commited on
Commit
a1904ec
·
verified ·
1 Parent(s): 6edb1bd
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. chat_template.jinja +101 -0
  2. config.json +785 -3
  3. generation_config.json +1 -1
  4. model-00001-of-00082.safetensors +3 -0
  5. model-00002-of-00082.safetensors +3 -0
  6. model-00003-of-00082.safetensors +3 -0
  7. model-00004-of-00082.safetensors +3 -0
  8. model-00005-of-00082.safetensors +3 -0
  9. model-00006-of-00082.safetensors +3 -0
  10. model-00007-of-00082.safetensors +3 -0
  11. model-00008-of-00082.safetensors +3 -0
  12. model-00009-of-00082.safetensors +3 -0
  13. model-00010-of-00082.safetensors +3 -0
  14. model-00011-of-00082.safetensors +3 -0
  15. model-00012-of-00082.safetensors +3 -0
  16. model-00013-of-00082.safetensors +3 -0
  17. model-00014-of-00082.safetensors +3 -0
  18. model-00015-of-00082.safetensors +3 -0
  19. model-00016-of-00082.safetensors +3 -0
  20. model-00017-of-00082.safetensors +3 -0
  21. model-00018-of-00082.safetensors +3 -0
  22. model-00019-of-00082.safetensors +3 -0
  23. model-00020-of-00082.safetensors +3 -0
  24. model-00021-of-00082.safetensors +3 -0
  25. model-00022-of-00082.safetensors +3 -0
  26. model-00023-of-00082.safetensors +3 -0
  27. model-00024-of-00082.safetensors +3 -0
  28. model-00025-of-00082.safetensors +3 -0
  29. model-00026-of-00082.safetensors +3 -0
  30. model-00027-of-00082.safetensors +3 -0
  31. model-00028-of-00082.safetensors +3 -0
  32. model-00029-of-00082.safetensors +3 -0
  33. model-00030-of-00082.safetensors +3 -0
  34. model-00031-of-00082.safetensors +3 -0
  35. model-00032-of-00082.safetensors +3 -0
  36. model-00033-of-00082.safetensors +3 -0
  37. model-00034-of-00082.safetensors +3 -0
  38. model-00035-of-00082.safetensors +3 -0
  39. model-00036-of-00082.safetensors +3 -0
  40. model-00037-of-00082.safetensors +3 -0
  41. model-00038-of-00082.safetensors +3 -0
  42. model-00039-of-00082.safetensors +3 -0
  43. model-00040-of-00082.safetensors +3 -0
  44. model-00041-of-00082.safetensors +3 -0
  45. model-00042-of-00082.safetensors +3 -0
  46. model-00043-of-00082.safetensors +3 -0
  47. model-00044-of-00082.safetensors +3 -0
  48. model-00045-of-00082.safetensors +3 -0
  49. model-00046-of-00082.safetensors +3 -0
  50. model-00047-of-00082.safetensors +3 -0
chat_template.jinja ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if not add_generation_prompt is defined %}
2
+ {%- set add_generation_prompt = false %}
3
+ {%- endif %}
4
+ {%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
5
+ {%- for message in messages %}
6
+ {%- if message['role'] == 'system' %}
7
+ {%- if ns.is_first_sp %}
8
+ {%- set ns.system_prompt = ns.system_prompt + message['content'] %}
9
+ {%- set ns.is_first_sp = false %}
10
+ {%- else %}
11
+ {%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
12
+ {%- endif %}
13
+ {%- endif %}
14
+ {%- endfor %}
15
+
16
+ {#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
17
+ {%- if tools is defined and tools is not none %}
18
+ {%- set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. ' + 'When a tool call is needed, you MUST use the following format to issue the call:\n' + '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>FUNCTION_NAME\n' + '```json\n{"param1": "value1", "param2": "value2"}\n```<|tool▁call▁end|><|tool▁calls▁end|>\n\n' + 'Make sure the JSON is valid.' + '## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
19
+ {%- for tool in tools %}
20
+ {%- set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
21
+ {%- endfor %}
22
+ {%- if ns.system_prompt|length != 0 %}
23
+ {%- set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
24
+ {%- else %}
25
+ {%- set ns.system_prompt = tool_ns.text %}
26
+ {%- endif %}
27
+ {%- endif %}
28
+ {{- bos_token }}
29
+ {{- ns.system_prompt }}
30
+ {%- set last_index = (messages|length - 1) %}
31
+ {%- for message in messages %}
32
+ {%- set content = message['content'] %}
33
+ {%- if message['role'] == 'user' %}
34
+ {%- set ns.is_tool = false -%}
35
+ {%- set ns.is_first = false -%}
36
+ {%- set ns.is_last_user = true -%}
37
+ {%- if loop.index0 == last_index %}
38
+ {{- '<|User|>' + content }}
39
+ {%- else %}
40
+ {{- '<|User|>' + content + '<|Assistant|>'}}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if message['role'] == 'assistant' %}
44
+ {%- if '</think>' in content %}
45
+ {%- set content = (content.split('</think>')|last) %}
46
+ {%- endif %}
47
+ {%- endif %}
48
+ {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
49
+ {%- set ns.is_last_user = false -%}
50
+ {%- if ns.is_tool %}
51
+ {{- '<|tool▁outputs▁end|>'}}
52
+ {%- endif %}
53
+ {%- set ns.is_first = false %}
54
+ {%- set ns.is_tool = false -%}
55
+ {%- set ns.is_output_first = true %}
56
+ {%- for tool in message['tool_calls'] %}
57
+ {%- set arguments = tool['function']['arguments'] %}
58
+ {%- if arguments is not string %}
59
+ {%- set arguments = arguments|tojson %}
60
+ {%- endif %}
61
+ {%- if not ns.is_first %}
62
+ {%- if content is none %}
63
+ {{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
64
+ }
65
+ {%- else %}
66
+ {{- content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
67
+ {%- endif %}
68
+ {%- set ns.is_first = true -%}
69
+ {%- else %}
70
+ {{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
71
+ {%- endif %}
72
+ {%- endfor %}
73
+ {{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
74
+ {%- endif %}
75
+ {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
76
+ {%- set ns.is_last_user = false -%}
77
+ {%- if ns.is_tool %}
78
+ {{- '<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
79
+ {%- set ns.is_tool = false -%}
80
+ {%- else %}
81
+ {{- content + '<|end▁of▁sentence|>'}}
82
+ {%- endif %}
83
+ {%- endif %}
84
+ {%- if message['role'] == 'tool' %}
85
+ {%- set ns.is_last_user = false -%}
86
+ {%- set ns.is_tool = true -%}
87
+ {%- if ns.is_output_first %}
88
+ {{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
89
+ {%- set ns.is_output_first = false %}
90
+ {%- else %}
91
+ {{- '\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
92
+ {%- endif %}
93
+ {%- endif %}
94
+ {%- endfor -%}
95
+ {%- if ns.is_tool %}
96
+ {{- '<|tool▁outputs▁end|>'}}
97
+ {%- endif %}
98
+ {#- if add_generation_prompt and not ns.is_last_user and not ns.is_tool #}
99
+ {%- if add_generation_prompt and not ns.is_tool %}
100
+ {{- '<|Assistant|>'}}
101
+ {%- endif %}
config.json CHANGED
@@ -10,6 +10,7 @@
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
12
  "bos_token_id": 0,
 
13
  "eos_token_id": 1,
14
  "ep_size": 1,
15
  "first_k_dense_replace": 3,
@@ -38,6 +39,7 @@
38
  "quantization_config": {
39
  "algo_config": null,
40
  "exclude": [
 
41
  "model.layers.0.self_attn.q_a_proj",
42
  "model.layers.0.self_attn.q_b_proj",
43
  "model.layers.0.self_attn.kv_a_proj_with_mqa",
@@ -343,6 +345,785 @@
343
  "model.layers.60.self_attn.kv_a_proj_with_mqa",
344
  "model.layers.60.self_attn.kv_b_proj",
345
  "model.layers.60.self_attn.o_proj",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  "lm_head"
347
  ],
348
  "export": {
@@ -387,12 +1168,14 @@
387
  "symmetric": null
388
  }
389
  },
 
 
390
  "layer_quant_config": {},
391
  "layer_type_quant_config": {},
392
  "quant_method": "quark",
393
  "quant_mode": "eager_mode",
394
  "softmax_quant_spec": null,
395
- "version": "0.10+577c94ac99"
396
  },
397
  "rms_norm_eps": 1e-06,
398
  "rope_scaling": {
@@ -410,8 +1193,7 @@
410
  "tie_word_embeddings": false,
411
  "topk_group": 4,
412
  "topk_method": "noaux_tc",
413
- "torch_dtype": "bfloat16",
414
- "transformers_version": "4.51.3",
415
  "unsloth_fixed": true,
416
  "use_cache": true,
417
  "v_head_dim": 128,
 
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
12
  "bos_token_id": 0,
13
+ "dtype": "bfloat16",
14
  "eos_token_id": 1,
15
  "ep_size": 1,
16
  "first_k_dense_replace": 3,
 
39
  "quantization_config": {
40
  "algo_config": null,
41
  "exclude": [
42
+ "re:model.layers.61.*",
43
  "model.layers.0.self_attn.q_a_proj",
44
  "model.layers.0.self_attn.q_b_proj",
45
  "model.layers.0.self_attn.kv_a_proj_with_mqa",
 
345
  "model.layers.60.self_attn.kv_a_proj_with_mqa",
346
  "model.layers.60.self_attn.kv_b_proj",
347
  "model.layers.60.self_attn.o_proj",
348
+ "model.layers.61.embed_tokens",
349
+ "model.layers.61.eh_proj",
350
+ "model.layers.61.self_attn.q_a_proj",
351
+ "model.layers.61.self_attn.q_b_proj",
352
+ "model.layers.61.self_attn.kv_a_proj_with_mqa",
353
+ "model.layers.61.self_attn.kv_b_proj",
354
+ "model.layers.61.self_attn.o_proj",
355
+ "model.layers.61.mlp.experts.0.gate_proj",
356
+ "model.layers.61.mlp.experts.0.up_proj",
357
+ "model.layers.61.mlp.experts.0.down_proj",
358
+ "model.layers.61.mlp.experts.1.gate_proj",
359
+ "model.layers.61.mlp.experts.1.up_proj",
360
+ "model.layers.61.mlp.experts.1.down_proj",
361
+ "model.layers.61.mlp.experts.2.gate_proj",
362
+ "model.layers.61.mlp.experts.2.up_proj",
363
+ "model.layers.61.mlp.experts.2.down_proj",
364
+ "model.layers.61.mlp.experts.3.gate_proj",
365
+ "model.layers.61.mlp.experts.3.up_proj",
366
+ "model.layers.61.mlp.experts.3.down_proj",
367
+ "model.layers.61.mlp.experts.4.gate_proj",
368
+ "model.layers.61.mlp.experts.4.up_proj",
369
+ "model.layers.61.mlp.experts.4.down_proj",
370
+ "model.layers.61.mlp.experts.5.gate_proj",
371
+ "model.layers.61.mlp.experts.5.up_proj",
372
+ "model.layers.61.mlp.experts.5.down_proj",
373
+ "model.layers.61.mlp.experts.6.gate_proj",
374
+ "model.layers.61.mlp.experts.6.up_proj",
375
+ "model.layers.61.mlp.experts.6.down_proj",
376
+ "model.layers.61.mlp.experts.7.gate_proj",
377
+ "model.layers.61.mlp.experts.7.up_proj",
378
+ "model.layers.61.mlp.experts.7.down_proj",
379
+ "model.layers.61.mlp.experts.8.gate_proj",
380
+ "model.layers.61.mlp.experts.8.up_proj",
381
+ "model.layers.61.mlp.experts.8.down_proj",
382
+ "model.layers.61.mlp.experts.9.gate_proj",
383
+ "model.layers.61.mlp.experts.9.up_proj",
384
+ "model.layers.61.mlp.experts.9.down_proj",
385
+ "model.layers.61.mlp.experts.10.gate_proj",
386
+ "model.layers.61.mlp.experts.10.up_proj",
387
+ "model.layers.61.mlp.experts.10.down_proj",
388
+ "model.layers.61.mlp.experts.11.gate_proj",
389
+ "model.layers.61.mlp.experts.11.up_proj",
390
+ "model.layers.61.mlp.experts.11.down_proj",
391
+ "model.layers.61.mlp.experts.12.gate_proj",
392
+ "model.layers.61.mlp.experts.12.up_proj",
393
+ "model.layers.61.mlp.experts.12.down_proj",
394
+ "model.layers.61.mlp.experts.13.gate_proj",
395
+ "model.layers.61.mlp.experts.13.up_proj",
396
+ "model.layers.61.mlp.experts.13.down_proj",
397
+ "model.layers.61.mlp.experts.14.gate_proj",
398
+ "model.layers.61.mlp.experts.14.up_proj",
399
+ "model.layers.61.mlp.experts.14.down_proj",
400
+ "model.layers.61.mlp.experts.15.gate_proj",
401
+ "model.layers.61.mlp.experts.15.up_proj",
402
+ "model.layers.61.mlp.experts.15.down_proj",
403
+ "model.layers.61.mlp.experts.16.gate_proj",
404
+ "model.layers.61.mlp.experts.16.up_proj",
405
+ "model.layers.61.mlp.experts.16.down_proj",
406
+ "model.layers.61.mlp.experts.17.gate_proj",
407
+ "model.layers.61.mlp.experts.17.up_proj",
408
+ "model.layers.61.mlp.experts.17.down_proj",
409
+ "model.layers.61.mlp.experts.18.gate_proj",
410
+ "model.layers.61.mlp.experts.18.up_proj",
411
+ "model.layers.61.mlp.experts.18.down_proj",
412
+ "model.layers.61.mlp.experts.19.gate_proj",
413
+ "model.layers.61.mlp.experts.19.up_proj",
414
+ "model.layers.61.mlp.experts.19.down_proj",
415
+ "model.layers.61.mlp.experts.20.gate_proj",
416
+ "model.layers.61.mlp.experts.20.up_proj",
417
+ "model.layers.61.mlp.experts.20.down_proj",
418
+ "model.layers.61.mlp.experts.21.gate_proj",
419
+ "model.layers.61.mlp.experts.21.up_proj",
420
+ "model.layers.61.mlp.experts.21.down_proj",
421
+ "model.layers.61.mlp.experts.22.gate_proj",
422
+ "model.layers.61.mlp.experts.22.up_proj",
423
+ "model.layers.61.mlp.experts.22.down_proj",
424
+ "model.layers.61.mlp.experts.23.gate_proj",
425
+ "model.layers.61.mlp.experts.23.up_proj",
426
+ "model.layers.61.mlp.experts.23.down_proj",
427
+ "model.layers.61.mlp.experts.24.gate_proj",
428
+ "model.layers.61.mlp.experts.24.up_proj",
429
+ "model.layers.61.mlp.experts.24.down_proj",
430
+ "model.layers.61.mlp.experts.25.gate_proj",
431
+ "model.layers.61.mlp.experts.25.up_proj",
432
+ "model.layers.61.mlp.experts.25.down_proj",
433
+ "model.layers.61.mlp.experts.26.gate_proj",
434
+ "model.layers.61.mlp.experts.26.up_proj",
435
+ "model.layers.61.mlp.experts.26.down_proj",
436
+ "model.layers.61.mlp.experts.27.gate_proj",
437
+ "model.layers.61.mlp.experts.27.up_proj",
438
+ "model.layers.61.mlp.experts.27.down_proj",
439
+ "model.layers.61.mlp.experts.28.gate_proj",
440
+ "model.layers.61.mlp.experts.28.up_proj",
441
+ "model.layers.61.mlp.experts.28.down_proj",
442
+ "model.layers.61.mlp.experts.29.gate_proj",
443
+ "model.layers.61.mlp.experts.29.up_proj",
444
+ "model.layers.61.mlp.experts.29.down_proj",
445
+ "model.layers.61.mlp.experts.30.gate_proj",
446
+ "model.layers.61.mlp.experts.30.up_proj",
447
+ "model.layers.61.mlp.experts.30.down_proj",
448
+ "model.layers.61.mlp.experts.31.gate_proj",
449
+ "model.layers.61.mlp.experts.31.up_proj",
450
+ "model.layers.61.mlp.experts.31.down_proj",
451
+ "model.layers.61.mlp.experts.32.gate_proj",
452
+ "model.layers.61.mlp.experts.32.up_proj",
453
+ "model.layers.61.mlp.experts.32.down_proj",
454
+ "model.layers.61.mlp.experts.33.gate_proj",
455
+ "model.layers.61.mlp.experts.33.up_proj",
456
+ "model.layers.61.mlp.experts.33.down_proj",
457
+ "model.layers.61.mlp.experts.34.gate_proj",
458
+ "model.layers.61.mlp.experts.34.up_proj",
459
+ "model.layers.61.mlp.experts.34.down_proj",
460
+ "model.layers.61.mlp.experts.35.gate_proj",
461
+ "model.layers.61.mlp.experts.35.up_proj",
462
+ "model.layers.61.mlp.experts.35.down_proj",
463
+ "model.layers.61.mlp.experts.36.gate_proj",
464
+ "model.layers.61.mlp.experts.36.up_proj",
465
+ "model.layers.61.mlp.experts.36.down_proj",
466
+ "model.layers.61.mlp.experts.37.gate_proj",
467
+ "model.layers.61.mlp.experts.37.up_proj",
468
+ "model.layers.61.mlp.experts.37.down_proj",
469
+ "model.layers.61.mlp.experts.38.gate_proj",
470
+ "model.layers.61.mlp.experts.38.up_proj",
471
+ "model.layers.61.mlp.experts.38.down_proj",
472
+ "model.layers.61.mlp.experts.39.gate_proj",
473
+ "model.layers.61.mlp.experts.39.up_proj",
474
+ "model.layers.61.mlp.experts.39.down_proj",
475
+ "model.layers.61.mlp.experts.40.gate_proj",
476
+ "model.layers.61.mlp.experts.40.up_proj",
477
+ "model.layers.61.mlp.experts.40.down_proj",
478
+ "model.layers.61.mlp.experts.41.gate_proj",
479
+ "model.layers.61.mlp.experts.41.up_proj",
480
+ "model.layers.61.mlp.experts.41.down_proj",
481
+ "model.layers.61.mlp.experts.42.gate_proj",
482
+ "model.layers.61.mlp.experts.42.up_proj",
483
+ "model.layers.61.mlp.experts.42.down_proj",
484
+ "model.layers.61.mlp.experts.43.gate_proj",
485
+ "model.layers.61.mlp.experts.43.up_proj",
486
+ "model.layers.61.mlp.experts.43.down_proj",
487
+ "model.layers.61.mlp.experts.44.gate_proj",
488
+ "model.layers.61.mlp.experts.44.up_proj",
489
+ "model.layers.61.mlp.experts.44.down_proj",
490
+ "model.layers.61.mlp.experts.45.gate_proj",
491
+ "model.layers.61.mlp.experts.45.up_proj",
492
+ "model.layers.61.mlp.experts.45.down_proj",
493
+ "model.layers.61.mlp.experts.46.gate_proj",
494
+ "model.layers.61.mlp.experts.46.up_proj",
495
+ "model.layers.61.mlp.experts.46.down_proj",
496
+ "model.layers.61.mlp.experts.47.gate_proj",
497
+ "model.layers.61.mlp.experts.47.up_proj",
498
+ "model.layers.61.mlp.experts.47.down_proj",
499
+ "model.layers.61.mlp.experts.48.gate_proj",
500
+ "model.layers.61.mlp.experts.48.up_proj",
501
+ "model.layers.61.mlp.experts.48.down_proj",
502
+ "model.layers.61.mlp.experts.49.gate_proj",
503
+ "model.layers.61.mlp.experts.49.up_proj",
504
+ "model.layers.61.mlp.experts.49.down_proj",
505
+ "model.layers.61.mlp.experts.50.gate_proj",
506
+ "model.layers.61.mlp.experts.50.up_proj",
507
+ "model.layers.61.mlp.experts.50.down_proj",
508
+ "model.layers.61.mlp.experts.51.gate_proj",
509
+ "model.layers.61.mlp.experts.51.up_proj",
510
+ "model.layers.61.mlp.experts.51.down_proj",
511
+ "model.layers.61.mlp.experts.52.gate_proj",
512
+ "model.layers.61.mlp.experts.52.up_proj",
513
+ "model.layers.61.mlp.experts.52.down_proj",
514
+ "model.layers.61.mlp.experts.53.gate_proj",
515
+ "model.layers.61.mlp.experts.53.up_proj",
516
+ "model.layers.61.mlp.experts.53.down_proj",
517
+ "model.layers.61.mlp.experts.54.gate_proj",
518
+ "model.layers.61.mlp.experts.54.up_proj",
519
+ "model.layers.61.mlp.experts.54.down_proj",
520
+ "model.layers.61.mlp.experts.55.gate_proj",
521
+ "model.layers.61.mlp.experts.55.up_proj",
522
+ "model.layers.61.mlp.experts.55.down_proj",
523
+ "model.layers.61.mlp.experts.56.gate_proj",
524
+ "model.layers.61.mlp.experts.56.up_proj",
525
+ "model.layers.61.mlp.experts.56.down_proj",
526
+ "model.layers.61.mlp.experts.57.gate_proj",
527
+ "model.layers.61.mlp.experts.57.up_proj",
528
+ "model.layers.61.mlp.experts.57.down_proj",
529
+ "model.layers.61.mlp.experts.58.gate_proj",
530
+ "model.layers.61.mlp.experts.58.up_proj",
531
+ "model.layers.61.mlp.experts.58.down_proj",
532
+ "model.layers.61.mlp.experts.59.gate_proj",
533
+ "model.layers.61.mlp.experts.59.up_proj",
534
+ "model.layers.61.mlp.experts.59.down_proj",
535
+ "model.layers.61.mlp.experts.60.gate_proj",
536
+ "model.layers.61.mlp.experts.60.up_proj",
537
+ "model.layers.61.mlp.experts.60.down_proj",
538
+ "model.layers.61.mlp.experts.61.gate_proj",
539
+ "model.layers.61.mlp.experts.61.up_proj",
540
+ "model.layers.61.mlp.experts.61.down_proj",
541
+ "model.layers.61.mlp.experts.62.gate_proj",
542
+ "model.layers.61.mlp.experts.62.up_proj",
543
+ "model.layers.61.mlp.experts.62.down_proj",
544
+ "model.layers.61.mlp.experts.63.gate_proj",
545
+ "model.layers.61.mlp.experts.63.up_proj",
546
+ "model.layers.61.mlp.experts.63.down_proj",
547
+ "model.layers.61.mlp.experts.64.gate_proj",
548
+ "model.layers.61.mlp.experts.64.up_proj",
549
+ "model.layers.61.mlp.experts.64.down_proj",
550
+ "model.layers.61.mlp.experts.65.gate_proj",
551
+ "model.layers.61.mlp.experts.65.up_proj",
552
+ "model.layers.61.mlp.experts.65.down_proj",
553
+ "model.layers.61.mlp.experts.66.gate_proj",
554
+ "model.layers.61.mlp.experts.66.up_proj",
555
+ "model.layers.61.mlp.experts.66.down_proj",
556
+ "model.layers.61.mlp.experts.67.gate_proj",
557
+ "model.layers.61.mlp.experts.67.up_proj",
558
+ "model.layers.61.mlp.experts.67.down_proj",
559
+ "model.layers.61.mlp.experts.68.gate_proj",
560
+ "model.layers.61.mlp.experts.68.up_proj",
561
+ "model.layers.61.mlp.experts.68.down_proj",
562
+ "model.layers.61.mlp.experts.69.gate_proj",
563
+ "model.layers.61.mlp.experts.69.up_proj",
564
+ "model.layers.61.mlp.experts.69.down_proj",
565
+ "model.layers.61.mlp.experts.70.gate_proj",
566
+ "model.layers.61.mlp.experts.70.up_proj",
567
+ "model.layers.61.mlp.experts.70.down_proj",
568
+ "model.layers.61.mlp.experts.71.gate_proj",
569
+ "model.layers.61.mlp.experts.71.up_proj",
570
+ "model.layers.61.mlp.experts.71.down_proj",
571
+ "model.layers.61.mlp.experts.72.gate_proj",
572
+ "model.layers.61.mlp.experts.72.up_proj",
573
+ "model.layers.61.mlp.experts.72.down_proj",
574
+ "model.layers.61.mlp.experts.73.gate_proj",
575
+ "model.layers.61.mlp.experts.73.up_proj",
576
+ "model.layers.61.mlp.experts.73.down_proj",
577
+ "model.layers.61.mlp.experts.74.gate_proj",
578
+ "model.layers.61.mlp.experts.74.up_proj",
579
+ "model.layers.61.mlp.experts.74.down_proj",
580
+ "model.layers.61.mlp.experts.75.gate_proj",
581
+ "model.layers.61.mlp.experts.75.up_proj",
582
+ "model.layers.61.mlp.experts.75.down_proj",
583
+ "model.layers.61.mlp.experts.76.gate_proj",
584
+ "model.layers.61.mlp.experts.76.up_proj",
585
+ "model.layers.61.mlp.experts.76.down_proj",
586
+ "model.layers.61.mlp.experts.77.gate_proj",
587
+ "model.layers.61.mlp.experts.77.up_proj",
588
+ "model.layers.61.mlp.experts.77.down_proj",
589
+ "model.layers.61.mlp.experts.78.gate_proj",
590
+ "model.layers.61.mlp.experts.78.up_proj",
591
+ "model.layers.61.mlp.experts.78.down_proj",
592
+ "model.layers.61.mlp.experts.79.gate_proj",
593
+ "model.layers.61.mlp.experts.79.up_proj",
594
+ "model.layers.61.mlp.experts.79.down_proj",
595
+ "model.layers.61.mlp.experts.80.gate_proj",
596
+ "model.layers.61.mlp.experts.80.up_proj",
597
+ "model.layers.61.mlp.experts.80.down_proj",
598
+ "model.layers.61.mlp.experts.81.gate_proj",
599
+ "model.layers.61.mlp.experts.81.up_proj",
600
+ "model.layers.61.mlp.experts.81.down_proj",
601
+ "model.layers.61.mlp.experts.82.gate_proj",
602
+ "model.layers.61.mlp.experts.82.up_proj",
603
+ "model.layers.61.mlp.experts.82.down_proj",
604
+ "model.layers.61.mlp.experts.83.gate_proj",
605
+ "model.layers.61.mlp.experts.83.up_proj",
606
+ "model.layers.61.mlp.experts.83.down_proj",
607
+ "model.layers.61.mlp.experts.84.gate_proj",
608
+ "model.layers.61.mlp.experts.84.up_proj",
609
+ "model.layers.61.mlp.experts.84.down_proj",
610
+ "model.layers.61.mlp.experts.85.gate_proj",
611
+ "model.layers.61.mlp.experts.85.up_proj",
612
+ "model.layers.61.mlp.experts.85.down_proj",
613
+ "model.layers.61.mlp.experts.86.gate_proj",
614
+ "model.layers.61.mlp.experts.86.up_proj",
615
+ "model.layers.61.mlp.experts.86.down_proj",
616
+ "model.layers.61.mlp.experts.87.gate_proj",
617
+ "model.layers.61.mlp.experts.87.up_proj",
618
+ "model.layers.61.mlp.experts.87.down_proj",
619
+ "model.layers.61.mlp.experts.88.gate_proj",
620
+ "model.layers.61.mlp.experts.88.up_proj",
621
+ "model.layers.61.mlp.experts.88.down_proj",
622
+ "model.layers.61.mlp.experts.89.gate_proj",
623
+ "model.layers.61.mlp.experts.89.up_proj",
624
+ "model.layers.61.mlp.experts.89.down_proj",
625
+ "model.layers.61.mlp.experts.90.gate_proj",
626
+ "model.layers.61.mlp.experts.90.up_proj",
627
+ "model.layers.61.mlp.experts.90.down_proj",
628
+ "model.layers.61.mlp.experts.91.gate_proj",
629
+ "model.layers.61.mlp.experts.91.up_proj",
630
+ "model.layers.61.mlp.experts.91.down_proj",
631
+ "model.layers.61.mlp.experts.92.gate_proj",
632
+ "model.layers.61.mlp.experts.92.up_proj",
633
+ "model.layers.61.mlp.experts.92.down_proj",
634
+ "model.layers.61.mlp.experts.93.gate_proj",
635
+ "model.layers.61.mlp.experts.93.up_proj",
636
+ "model.layers.61.mlp.experts.93.down_proj",
637
+ "model.layers.61.mlp.experts.94.gate_proj",
638
+ "model.layers.61.mlp.experts.94.up_proj",
639
+ "model.layers.61.mlp.experts.94.down_proj",
640
+ "model.layers.61.mlp.experts.95.gate_proj",
641
+ "model.layers.61.mlp.experts.95.up_proj",
642
+ "model.layers.61.mlp.experts.95.down_proj",
643
+ "model.layers.61.mlp.experts.96.gate_proj",
644
+ "model.layers.61.mlp.experts.96.up_proj",
645
+ "model.layers.61.mlp.experts.96.down_proj",
646
+ "model.layers.61.mlp.experts.97.gate_proj",
647
+ "model.layers.61.mlp.experts.97.up_proj",
648
+ "model.layers.61.mlp.experts.97.down_proj",
649
+ "model.layers.61.mlp.experts.98.gate_proj",
650
+ "model.layers.61.mlp.experts.98.up_proj",
651
+ "model.layers.61.mlp.experts.98.down_proj",
652
+ "model.layers.61.mlp.experts.99.gate_proj",
653
+ "model.layers.61.mlp.experts.99.up_proj",
654
+ "model.layers.61.mlp.experts.99.down_proj",
655
+ "model.layers.61.mlp.experts.100.gate_proj",
656
+ "model.layers.61.mlp.experts.100.up_proj",
657
+ "model.layers.61.mlp.experts.100.down_proj",
658
+ "model.layers.61.mlp.experts.101.gate_proj",
659
+ "model.layers.61.mlp.experts.101.up_proj",
660
+ "model.layers.61.mlp.experts.101.down_proj",
661
+ "model.layers.61.mlp.experts.102.gate_proj",
662
+ "model.layers.61.mlp.experts.102.up_proj",
663
+ "model.layers.61.mlp.experts.102.down_proj",
664
+ "model.layers.61.mlp.experts.103.gate_proj",
665
+ "model.layers.61.mlp.experts.103.up_proj",
666
+ "model.layers.61.mlp.experts.103.down_proj",
667
+ "model.layers.61.mlp.experts.104.gate_proj",
668
+ "model.layers.61.mlp.experts.104.up_proj",
669
+ "model.layers.61.mlp.experts.104.down_proj",
670
+ "model.layers.61.mlp.experts.105.gate_proj",
671
+ "model.layers.61.mlp.experts.105.up_proj",
672
+ "model.layers.61.mlp.experts.105.down_proj",
673
+ "model.layers.61.mlp.experts.106.gate_proj",
674
+ "model.layers.61.mlp.experts.106.up_proj",
675
+ "model.layers.61.mlp.experts.106.down_proj",
676
+ "model.layers.61.mlp.experts.107.gate_proj",
677
+ "model.layers.61.mlp.experts.107.up_proj",
678
+ "model.layers.61.mlp.experts.107.down_proj",
679
+ "model.layers.61.mlp.experts.108.gate_proj",
680
+ "model.layers.61.mlp.experts.108.up_proj",
681
+ "model.layers.61.mlp.experts.108.down_proj",
682
+ "model.layers.61.mlp.experts.109.gate_proj",
683
+ "model.layers.61.mlp.experts.109.up_proj",
684
+ "model.layers.61.mlp.experts.109.down_proj",
685
+ "model.layers.61.mlp.experts.110.gate_proj",
686
+ "model.layers.61.mlp.experts.110.up_proj",
687
+ "model.layers.61.mlp.experts.110.down_proj",
688
+ "model.layers.61.mlp.experts.111.gate_proj",
689
+ "model.layers.61.mlp.experts.111.up_proj",
690
+ "model.layers.61.mlp.experts.111.down_proj",
691
+ "model.layers.61.mlp.experts.112.gate_proj",
692
+ "model.layers.61.mlp.experts.112.up_proj",
693
+ "model.layers.61.mlp.experts.112.down_proj",
694
+ "model.layers.61.mlp.experts.113.gate_proj",
695
+ "model.layers.61.mlp.experts.113.up_proj",
696
+ "model.layers.61.mlp.experts.113.down_proj",
697
+ "model.layers.61.mlp.experts.114.gate_proj",
698
+ "model.layers.61.mlp.experts.114.up_proj",
699
+ "model.layers.61.mlp.experts.114.down_proj",
700
+ "model.layers.61.mlp.experts.115.gate_proj",
701
+ "model.layers.61.mlp.experts.115.up_proj",
702
+ "model.layers.61.mlp.experts.115.down_proj",
703
+ "model.layers.61.mlp.experts.116.gate_proj",
704
+ "model.layers.61.mlp.experts.116.up_proj",
705
+ "model.layers.61.mlp.experts.116.down_proj",
706
+ "model.layers.61.mlp.experts.117.gate_proj",
707
+ "model.layers.61.mlp.experts.117.up_proj",
708
+ "model.layers.61.mlp.experts.117.down_proj",
709
+ "model.layers.61.mlp.experts.118.gate_proj",
710
+ "model.layers.61.mlp.experts.118.up_proj",
711
+ "model.layers.61.mlp.experts.118.down_proj",
712
+ "model.layers.61.mlp.experts.119.gate_proj",
713
+ "model.layers.61.mlp.experts.119.up_proj",
714
+ "model.layers.61.mlp.experts.119.down_proj",
715
+ "model.layers.61.mlp.experts.120.gate_proj",
716
+ "model.layers.61.mlp.experts.120.up_proj",
717
+ "model.layers.61.mlp.experts.120.down_proj",
718
+ "model.layers.61.mlp.experts.121.gate_proj",
719
+ "model.layers.61.mlp.experts.121.up_proj",
720
+ "model.layers.61.mlp.experts.121.down_proj",
721
+ "model.layers.61.mlp.experts.122.gate_proj",
722
+ "model.layers.61.mlp.experts.122.up_proj",
723
+ "model.layers.61.mlp.experts.122.down_proj",
724
+ "model.layers.61.mlp.experts.123.gate_proj",
725
+ "model.layers.61.mlp.experts.123.up_proj",
726
+ "model.layers.61.mlp.experts.123.down_proj",
727
+ "model.layers.61.mlp.experts.124.gate_proj",
728
+ "model.layers.61.mlp.experts.124.up_proj",
729
+ "model.layers.61.mlp.experts.124.down_proj",
730
+ "model.layers.61.mlp.experts.125.gate_proj",
731
+ "model.layers.61.mlp.experts.125.up_proj",
732
+ "model.layers.61.mlp.experts.125.down_proj",
733
+ "model.layers.61.mlp.experts.126.gate_proj",
734
+ "model.layers.61.mlp.experts.126.up_proj",
735
+ "model.layers.61.mlp.experts.126.down_proj",
736
+ "model.layers.61.mlp.experts.127.gate_proj",
737
+ "model.layers.61.mlp.experts.127.up_proj",
738
+ "model.layers.61.mlp.experts.127.down_proj",
739
+ "model.layers.61.mlp.experts.128.gate_proj",
740
+ "model.layers.61.mlp.experts.128.up_proj",
741
+ "model.layers.61.mlp.experts.128.down_proj",
742
+ "model.layers.61.mlp.experts.129.gate_proj",
743
+ "model.layers.61.mlp.experts.129.up_proj",
744
+ "model.layers.61.mlp.experts.129.down_proj",
745
+ "model.layers.61.mlp.experts.130.gate_proj",
746
+ "model.layers.61.mlp.experts.130.up_proj",
747
+ "model.layers.61.mlp.experts.130.down_proj",
748
+ "model.layers.61.mlp.experts.131.gate_proj",
749
+ "model.layers.61.mlp.experts.131.up_proj",
750
+ "model.layers.61.mlp.experts.131.down_proj",
751
+ "model.layers.61.mlp.experts.132.gate_proj",
752
+ "model.layers.61.mlp.experts.132.up_proj",
753
+ "model.layers.61.mlp.experts.132.down_proj",
754
+ "model.layers.61.mlp.experts.133.gate_proj",
755
+ "model.layers.61.mlp.experts.133.up_proj",
756
+ "model.layers.61.mlp.experts.133.down_proj",
757
+ "model.layers.61.mlp.experts.134.gate_proj",
758
+ "model.layers.61.mlp.experts.134.up_proj",
759
+ "model.layers.61.mlp.experts.134.down_proj",
760
+ "model.layers.61.mlp.experts.135.gate_proj",
761
+ "model.layers.61.mlp.experts.135.up_proj",
762
+ "model.layers.61.mlp.experts.135.down_proj",
763
+ "model.layers.61.mlp.experts.136.gate_proj",
764
+ "model.layers.61.mlp.experts.136.up_proj",
765
+ "model.layers.61.mlp.experts.136.down_proj",
766
+ "model.layers.61.mlp.experts.137.gate_proj",
767
+ "model.layers.61.mlp.experts.137.up_proj",
768
+ "model.layers.61.mlp.experts.137.down_proj",
769
+ "model.layers.61.mlp.experts.138.gate_proj",
770
+ "model.layers.61.mlp.experts.138.up_proj",
771
+ "model.layers.61.mlp.experts.138.down_proj",
772
+ "model.layers.61.mlp.experts.139.gate_proj",
773
+ "model.layers.61.mlp.experts.139.up_proj",
774
+ "model.layers.61.mlp.experts.139.down_proj",
775
+ "model.layers.61.mlp.experts.140.gate_proj",
776
+ "model.layers.61.mlp.experts.140.up_proj",
777
+ "model.layers.61.mlp.experts.140.down_proj",
778
+ "model.layers.61.mlp.experts.141.gate_proj",
779
+ "model.layers.61.mlp.experts.141.up_proj",
780
+ "model.layers.61.mlp.experts.141.down_proj",
781
+ "model.layers.61.mlp.experts.142.gate_proj",
782
+ "model.layers.61.mlp.experts.142.up_proj",
783
+ "model.layers.61.mlp.experts.142.down_proj",
784
+ "model.layers.61.mlp.experts.143.gate_proj",
785
+ "model.layers.61.mlp.experts.143.up_proj",
786
+ "model.layers.61.mlp.experts.143.down_proj",
787
+ "model.layers.61.mlp.experts.144.gate_proj",
788
+ "model.layers.61.mlp.experts.144.up_proj",
789
+ "model.layers.61.mlp.experts.144.down_proj",
790
+ "model.layers.61.mlp.experts.145.gate_proj",
791
+ "model.layers.61.mlp.experts.145.up_proj",
792
+ "model.layers.61.mlp.experts.145.down_proj",
793
+ "model.layers.61.mlp.experts.146.gate_proj",
794
+ "model.layers.61.mlp.experts.146.up_proj",
795
+ "model.layers.61.mlp.experts.146.down_proj",
796
+ "model.layers.61.mlp.experts.147.gate_proj",
797
+ "model.layers.61.mlp.experts.147.up_proj",
798
+ "model.layers.61.mlp.experts.147.down_proj",
799
+ "model.layers.61.mlp.experts.148.gate_proj",
800
+ "model.layers.61.mlp.experts.148.up_proj",
801
+ "model.layers.61.mlp.experts.148.down_proj",
802
+ "model.layers.61.mlp.experts.149.gate_proj",
803
+ "model.layers.61.mlp.experts.149.up_proj",
804
+ "model.layers.61.mlp.experts.149.down_proj",
805
+ "model.layers.61.mlp.experts.150.gate_proj",
806
+ "model.layers.61.mlp.experts.150.up_proj",
807
+ "model.layers.61.mlp.experts.150.down_proj",
808
+ "model.layers.61.mlp.experts.151.gate_proj",
809
+ "model.layers.61.mlp.experts.151.up_proj",
810
+ "model.layers.61.mlp.experts.151.down_proj",
811
+ "model.layers.61.mlp.experts.152.gate_proj",
812
+ "model.layers.61.mlp.experts.152.up_proj",
813
+ "model.layers.61.mlp.experts.152.down_proj",
814
+ "model.layers.61.mlp.experts.153.gate_proj",
815
+ "model.layers.61.mlp.experts.153.up_proj",
816
+ "model.layers.61.mlp.experts.153.down_proj",
817
+ "model.layers.61.mlp.experts.154.gate_proj",
818
+ "model.layers.61.mlp.experts.154.up_proj",
819
+ "model.layers.61.mlp.experts.154.down_proj",
820
+ "model.layers.61.mlp.experts.155.gate_proj",
821
+ "model.layers.61.mlp.experts.155.up_proj",
822
+ "model.layers.61.mlp.experts.155.down_proj",
823
+ "model.layers.61.mlp.experts.156.gate_proj",
824
+ "model.layers.61.mlp.experts.156.up_proj",
825
+ "model.layers.61.mlp.experts.156.down_proj",
826
+ "model.layers.61.mlp.experts.157.gate_proj",
827
+ "model.layers.61.mlp.experts.157.up_proj",
828
+ "model.layers.61.mlp.experts.157.down_proj",
829
+ "model.layers.61.mlp.experts.158.gate_proj",
830
+ "model.layers.61.mlp.experts.158.up_proj",
831
+ "model.layers.61.mlp.experts.158.down_proj",
832
+ "model.layers.61.mlp.experts.159.gate_proj",
833
+ "model.layers.61.mlp.experts.159.up_proj",
834
+ "model.layers.61.mlp.experts.159.down_proj",
835
+ "model.layers.61.mlp.experts.160.gate_proj",
836
+ "model.layers.61.mlp.experts.160.up_proj",
837
+ "model.layers.61.mlp.experts.160.down_proj",
838
+ "model.layers.61.mlp.experts.161.gate_proj",
839
+ "model.layers.61.mlp.experts.161.up_proj",
840
+ "model.layers.61.mlp.experts.161.down_proj",
841
+ "model.layers.61.mlp.experts.162.gate_proj",
842
+ "model.layers.61.mlp.experts.162.up_proj",
843
+ "model.layers.61.mlp.experts.162.down_proj",
844
+ "model.layers.61.mlp.experts.163.gate_proj",
845
+ "model.layers.61.mlp.experts.163.up_proj",
846
+ "model.layers.61.mlp.experts.163.down_proj",
847
+ "model.layers.61.mlp.experts.164.gate_proj",
848
+ "model.layers.61.mlp.experts.164.up_proj",
849
+ "model.layers.61.mlp.experts.164.down_proj",
850
+ "model.layers.61.mlp.experts.165.gate_proj",
851
+ "model.layers.61.mlp.experts.165.up_proj",
852
+ "model.layers.61.mlp.experts.165.down_proj",
853
+ "model.layers.61.mlp.experts.166.gate_proj",
854
+ "model.layers.61.mlp.experts.166.up_proj",
855
+ "model.layers.61.mlp.experts.166.down_proj",
856
+ "model.layers.61.mlp.experts.167.gate_proj",
857
+ "model.layers.61.mlp.experts.167.up_proj",
858
+ "model.layers.61.mlp.experts.167.down_proj",
859
+ "model.layers.61.mlp.experts.168.gate_proj",
860
+ "model.layers.61.mlp.experts.168.up_proj",
861
+ "model.layers.61.mlp.experts.168.down_proj",
862
+ "model.layers.61.mlp.experts.169.gate_proj",
863
+ "model.layers.61.mlp.experts.169.up_proj",
864
+ "model.layers.61.mlp.experts.169.down_proj",
865
+ "model.layers.61.mlp.experts.170.gate_proj",
866
+ "model.layers.61.mlp.experts.170.up_proj",
867
+ "model.layers.61.mlp.experts.170.down_proj",
868
+ "model.layers.61.mlp.experts.171.gate_proj",
869
+ "model.layers.61.mlp.experts.171.up_proj",
870
+ "model.layers.61.mlp.experts.171.down_proj",
871
+ "model.layers.61.mlp.experts.172.gate_proj",
872
+ "model.layers.61.mlp.experts.172.up_proj",
873
+ "model.layers.61.mlp.experts.172.down_proj",
874
+ "model.layers.61.mlp.experts.173.gate_proj",
875
+ "model.layers.61.mlp.experts.173.up_proj",
876
+ "model.layers.61.mlp.experts.173.down_proj",
877
+ "model.layers.61.mlp.experts.174.gate_proj",
878
+ "model.layers.61.mlp.experts.174.up_proj",
879
+ "model.layers.61.mlp.experts.174.down_proj",
880
+ "model.layers.61.mlp.experts.175.gate_proj",
881
+ "model.layers.61.mlp.experts.175.up_proj",
882
+ "model.layers.61.mlp.experts.175.down_proj",
883
+ "model.layers.61.mlp.experts.176.gate_proj",
884
+ "model.layers.61.mlp.experts.176.up_proj",
885
+ "model.layers.61.mlp.experts.176.down_proj",
886
+ "model.layers.61.mlp.experts.177.gate_proj",
887
+ "model.layers.61.mlp.experts.177.up_proj",
888
+ "model.layers.61.mlp.experts.177.down_proj",
889
+ "model.layers.61.mlp.experts.178.gate_proj",
890
+ "model.layers.61.mlp.experts.178.up_proj",
891
+ "model.layers.61.mlp.experts.178.down_proj",
892
+ "model.layers.61.mlp.experts.179.gate_proj",
893
+ "model.layers.61.mlp.experts.179.up_proj",
894
+ "model.layers.61.mlp.experts.179.down_proj",
895
+ "model.layers.61.mlp.experts.180.gate_proj",
896
+ "model.layers.61.mlp.experts.180.up_proj",
897
+ "model.layers.61.mlp.experts.180.down_proj",
898
+ "model.layers.61.mlp.experts.181.gate_proj",
899
+ "model.layers.61.mlp.experts.181.up_proj",
900
+ "model.layers.61.mlp.experts.181.down_proj",
901
+ "model.layers.61.mlp.experts.182.gate_proj",
902
+ "model.layers.61.mlp.experts.182.up_proj",
903
+ "model.layers.61.mlp.experts.182.down_proj",
904
+ "model.layers.61.mlp.experts.183.gate_proj",
905
+ "model.layers.61.mlp.experts.183.up_proj",
906
+ "model.layers.61.mlp.experts.183.down_proj",
907
+ "model.layers.61.mlp.experts.184.gate_proj",
908
+ "model.layers.61.mlp.experts.184.up_proj",
909
+ "model.layers.61.mlp.experts.184.down_proj",
910
+ "model.layers.61.mlp.experts.185.gate_proj",
911
+ "model.layers.61.mlp.experts.185.up_proj",
912
+ "model.layers.61.mlp.experts.185.down_proj",
913
+ "model.layers.61.mlp.experts.186.gate_proj",
914
+ "model.layers.61.mlp.experts.186.up_proj",
915
+ "model.layers.61.mlp.experts.186.down_proj",
916
+ "model.layers.61.mlp.experts.187.gate_proj",
917
+ "model.layers.61.mlp.experts.187.up_proj",
918
+ "model.layers.61.mlp.experts.187.down_proj",
919
+ "model.layers.61.mlp.experts.188.gate_proj",
920
+ "model.layers.61.mlp.experts.188.up_proj",
921
+ "model.layers.61.mlp.experts.188.down_proj",
922
+ "model.layers.61.mlp.experts.189.gate_proj",
923
+ "model.layers.61.mlp.experts.189.up_proj",
924
+ "model.layers.61.mlp.experts.189.down_proj",
925
+ "model.layers.61.mlp.experts.190.gate_proj",
926
+ "model.layers.61.mlp.experts.190.up_proj",
927
+ "model.layers.61.mlp.experts.190.down_proj",
928
+ "model.layers.61.mlp.experts.191.gate_proj",
929
+ "model.layers.61.mlp.experts.191.up_proj",
930
+ "model.layers.61.mlp.experts.191.down_proj",
931
+ "model.layers.61.mlp.experts.192.gate_proj",
932
+ "model.layers.61.mlp.experts.192.up_proj",
933
+ "model.layers.61.mlp.experts.192.down_proj",
934
+ "model.layers.61.mlp.experts.193.gate_proj",
935
+ "model.layers.61.mlp.experts.193.up_proj",
936
+ "model.layers.61.mlp.experts.193.down_proj",
937
+ "model.layers.61.mlp.experts.194.gate_proj",
938
+ "model.layers.61.mlp.experts.194.up_proj",
939
+ "model.layers.61.mlp.experts.194.down_proj",
940
+ "model.layers.61.mlp.experts.195.gate_proj",
941
+ "model.layers.61.mlp.experts.195.up_proj",
942
+ "model.layers.61.mlp.experts.195.down_proj",
943
+ "model.layers.61.mlp.experts.196.gate_proj",
944
+ "model.layers.61.mlp.experts.196.up_proj",
945
+ "model.layers.61.mlp.experts.196.down_proj",
946
+ "model.layers.61.mlp.experts.197.gate_proj",
947
+ "model.layers.61.mlp.experts.197.up_proj",
948
+ "model.layers.61.mlp.experts.197.down_proj",
949
+ "model.layers.61.mlp.experts.198.gate_proj",
950
+ "model.layers.61.mlp.experts.198.up_proj",
951
+ "model.layers.61.mlp.experts.198.down_proj",
952
+ "model.layers.61.mlp.experts.199.gate_proj",
953
+ "model.layers.61.mlp.experts.199.up_proj",
954
+ "model.layers.61.mlp.experts.199.down_proj",
955
+ "model.layers.61.mlp.experts.200.gate_proj",
956
+ "model.layers.61.mlp.experts.200.up_proj",
957
+ "model.layers.61.mlp.experts.200.down_proj",
958
+ "model.layers.61.mlp.experts.201.gate_proj",
959
+ "model.layers.61.mlp.experts.201.up_proj",
960
+ "model.layers.61.mlp.experts.201.down_proj",
961
+ "model.layers.61.mlp.experts.202.gate_proj",
962
+ "model.layers.61.mlp.experts.202.up_proj",
963
+ "model.layers.61.mlp.experts.202.down_proj",
964
+ "model.layers.61.mlp.experts.203.gate_proj",
965
+ "model.layers.61.mlp.experts.203.up_proj",
966
+ "model.layers.61.mlp.experts.203.down_proj",
967
+ "model.layers.61.mlp.experts.204.gate_proj",
968
+ "model.layers.61.mlp.experts.204.up_proj",
969
+ "model.layers.61.mlp.experts.204.down_proj",
970
+ "model.layers.61.mlp.experts.205.gate_proj",
971
+ "model.layers.61.mlp.experts.205.up_proj",
972
+ "model.layers.61.mlp.experts.205.down_proj",
973
+ "model.layers.61.mlp.experts.206.gate_proj",
974
+ "model.layers.61.mlp.experts.206.up_proj",
975
+ "model.layers.61.mlp.experts.206.down_proj",
976
+ "model.layers.61.mlp.experts.207.gate_proj",
977
+ "model.layers.61.mlp.experts.207.up_proj",
978
+ "model.layers.61.mlp.experts.207.down_proj",
979
+ "model.layers.61.mlp.experts.208.gate_proj",
980
+ "model.layers.61.mlp.experts.208.up_proj",
981
+ "model.layers.61.mlp.experts.208.down_proj",
982
+ "model.layers.61.mlp.experts.209.gate_proj",
983
+ "model.layers.61.mlp.experts.209.up_proj",
984
+ "model.layers.61.mlp.experts.209.down_proj",
985
+ "model.layers.61.mlp.experts.210.gate_proj",
986
+ "model.layers.61.mlp.experts.210.up_proj",
987
+ "model.layers.61.mlp.experts.210.down_proj",
988
+ "model.layers.61.mlp.experts.211.gate_proj",
989
+ "model.layers.61.mlp.experts.211.up_proj",
990
+ "model.layers.61.mlp.experts.211.down_proj",
991
+ "model.layers.61.mlp.experts.212.gate_proj",
992
+ "model.layers.61.mlp.experts.212.up_proj",
993
+ "model.layers.61.mlp.experts.212.down_proj",
994
+ "model.layers.61.mlp.experts.213.gate_proj",
995
+ "model.layers.61.mlp.experts.213.up_proj",
996
+ "model.layers.61.mlp.experts.213.down_proj",
997
+ "model.layers.61.mlp.experts.214.gate_proj",
998
+ "model.layers.61.mlp.experts.214.up_proj",
999
+ "model.layers.61.mlp.experts.214.down_proj",
1000
+ "model.layers.61.mlp.experts.215.gate_proj",
1001
+ "model.layers.61.mlp.experts.215.up_proj",
1002
+ "model.layers.61.mlp.experts.215.down_proj",
1003
+ "model.layers.61.mlp.experts.216.gate_proj",
1004
+ "model.layers.61.mlp.experts.216.up_proj",
1005
+ "model.layers.61.mlp.experts.216.down_proj",
1006
+ "model.layers.61.mlp.experts.217.gate_proj",
1007
+ "model.layers.61.mlp.experts.217.up_proj",
1008
+ "model.layers.61.mlp.experts.217.down_proj",
1009
+ "model.layers.61.mlp.experts.218.gate_proj",
1010
+ "model.layers.61.mlp.experts.218.up_proj",
1011
+ "model.layers.61.mlp.experts.218.down_proj",
1012
+ "model.layers.61.mlp.experts.219.gate_proj",
1013
+ "model.layers.61.mlp.experts.219.up_proj",
1014
+ "model.layers.61.mlp.experts.219.down_proj",
1015
+ "model.layers.61.mlp.experts.220.gate_proj",
1016
+ "model.layers.61.mlp.experts.220.up_proj",
1017
+ "model.layers.61.mlp.experts.220.down_proj",
1018
+ "model.layers.61.mlp.experts.221.gate_proj",
1019
+ "model.layers.61.mlp.experts.221.up_proj",
1020
+ "model.layers.61.mlp.experts.221.down_proj",
1021
+ "model.layers.61.mlp.experts.222.gate_proj",
1022
+ "model.layers.61.mlp.experts.222.up_proj",
1023
+ "model.layers.61.mlp.experts.222.down_proj",
1024
+ "model.layers.61.mlp.experts.223.gate_proj",
1025
+ "model.layers.61.mlp.experts.223.up_proj",
1026
+ "model.layers.61.mlp.experts.223.down_proj",
1027
+ "model.layers.61.mlp.experts.224.gate_proj",
1028
+ "model.layers.61.mlp.experts.224.up_proj",
1029
+ "model.layers.61.mlp.experts.224.down_proj",
1030
+ "model.layers.61.mlp.experts.225.gate_proj",
1031
+ "model.layers.61.mlp.experts.225.up_proj",
1032
+ "model.layers.61.mlp.experts.225.down_proj",
1033
+ "model.layers.61.mlp.experts.226.gate_proj",
1034
+ "model.layers.61.mlp.experts.226.up_proj",
1035
+ "model.layers.61.mlp.experts.226.down_proj",
1036
+ "model.layers.61.mlp.experts.227.gate_proj",
1037
+ "model.layers.61.mlp.experts.227.up_proj",
1038
+ "model.layers.61.mlp.experts.227.down_proj",
1039
+ "model.layers.61.mlp.experts.228.gate_proj",
1040
+ "model.layers.61.mlp.experts.228.up_proj",
1041
+ "model.layers.61.mlp.experts.228.down_proj",
1042
+ "model.layers.61.mlp.experts.229.gate_proj",
1043
+ "model.layers.61.mlp.experts.229.up_proj",
1044
+ "model.layers.61.mlp.experts.229.down_proj",
1045
+ "model.layers.61.mlp.experts.230.gate_proj",
1046
+ "model.layers.61.mlp.experts.230.up_proj",
1047
+ "model.layers.61.mlp.experts.230.down_proj",
1048
+ "model.layers.61.mlp.experts.231.gate_proj",
1049
+ "model.layers.61.mlp.experts.231.up_proj",
1050
+ "model.layers.61.mlp.experts.231.down_proj",
1051
+ "model.layers.61.mlp.experts.232.gate_proj",
1052
+ "model.layers.61.mlp.experts.232.up_proj",
1053
+ "model.layers.61.mlp.experts.232.down_proj",
1054
+ "model.layers.61.mlp.experts.233.gate_proj",
1055
+ "model.layers.61.mlp.experts.233.up_proj",
1056
+ "model.layers.61.mlp.experts.233.down_proj",
1057
+ "model.layers.61.mlp.experts.234.gate_proj",
1058
+ "model.layers.61.mlp.experts.234.up_proj",
1059
+ "model.layers.61.mlp.experts.234.down_proj",
1060
+ "model.layers.61.mlp.experts.235.gate_proj",
1061
+ "model.layers.61.mlp.experts.235.up_proj",
1062
+ "model.layers.61.mlp.experts.235.down_proj",
1063
+ "model.layers.61.mlp.experts.236.gate_proj",
1064
+ "model.layers.61.mlp.experts.236.up_proj",
1065
+ "model.layers.61.mlp.experts.236.down_proj",
1066
+ "model.layers.61.mlp.experts.237.gate_proj",
1067
+ "model.layers.61.mlp.experts.237.up_proj",
1068
+ "model.layers.61.mlp.experts.237.down_proj",
1069
+ "model.layers.61.mlp.experts.238.gate_proj",
1070
+ "model.layers.61.mlp.experts.238.up_proj",
1071
+ "model.layers.61.mlp.experts.238.down_proj",
1072
+ "model.layers.61.mlp.experts.239.gate_proj",
1073
+ "model.layers.61.mlp.experts.239.up_proj",
1074
+ "model.layers.61.mlp.experts.239.down_proj",
1075
+ "model.layers.61.mlp.experts.240.gate_proj",
1076
+ "model.layers.61.mlp.experts.240.up_proj",
1077
+ "model.layers.61.mlp.experts.240.down_proj",
1078
+ "model.layers.61.mlp.experts.241.gate_proj",
1079
+ "model.layers.61.mlp.experts.241.up_proj",
1080
+ "model.layers.61.mlp.experts.241.down_proj",
1081
+ "model.layers.61.mlp.experts.242.gate_proj",
1082
+ "model.layers.61.mlp.experts.242.up_proj",
1083
+ "model.layers.61.mlp.experts.242.down_proj",
1084
+ "model.layers.61.mlp.experts.243.gate_proj",
1085
+ "model.layers.61.mlp.experts.243.up_proj",
1086
+ "model.layers.61.mlp.experts.243.down_proj",
1087
+ "model.layers.61.mlp.experts.244.gate_proj",
1088
+ "model.layers.61.mlp.experts.244.up_proj",
1089
+ "model.layers.61.mlp.experts.244.down_proj",
1090
+ "model.layers.61.mlp.experts.245.gate_proj",
1091
+ "model.layers.61.mlp.experts.245.up_proj",
1092
+ "model.layers.61.mlp.experts.245.down_proj",
1093
+ "model.layers.61.mlp.experts.246.gate_proj",
1094
+ "model.layers.61.mlp.experts.246.up_proj",
1095
+ "model.layers.61.mlp.experts.246.down_proj",
1096
+ "model.layers.61.mlp.experts.247.gate_proj",
1097
+ "model.layers.61.mlp.experts.247.up_proj",
1098
+ "model.layers.61.mlp.experts.247.down_proj",
1099
+ "model.layers.61.mlp.experts.248.gate_proj",
1100
+ "model.layers.61.mlp.experts.248.up_proj",
1101
+ "model.layers.61.mlp.experts.248.down_proj",
1102
+ "model.layers.61.mlp.experts.249.gate_proj",
1103
+ "model.layers.61.mlp.experts.249.up_proj",
1104
+ "model.layers.61.mlp.experts.249.down_proj",
1105
+ "model.layers.61.mlp.experts.250.gate_proj",
1106
+ "model.layers.61.mlp.experts.250.up_proj",
1107
+ "model.layers.61.mlp.experts.250.down_proj",
1108
+ "model.layers.61.mlp.experts.251.gate_proj",
1109
+ "model.layers.61.mlp.experts.251.up_proj",
1110
+ "model.layers.61.mlp.experts.251.down_proj",
1111
+ "model.layers.61.mlp.experts.252.gate_proj",
1112
+ "model.layers.61.mlp.experts.252.up_proj",
1113
+ "model.layers.61.mlp.experts.252.down_proj",
1114
+ "model.layers.61.mlp.experts.253.gate_proj",
1115
+ "model.layers.61.mlp.experts.253.up_proj",
1116
+ "model.layers.61.mlp.experts.253.down_proj",
1117
+ "model.layers.61.mlp.experts.254.gate_proj",
1118
+ "model.layers.61.mlp.experts.254.up_proj",
1119
+ "model.layers.61.mlp.experts.254.down_proj",
1120
+ "model.layers.61.mlp.experts.255.gate_proj",
1121
+ "model.layers.61.mlp.experts.255.up_proj",
1122
+ "model.layers.61.mlp.experts.255.down_proj",
1123
+ "model.layers.61.mlp.shared_experts.gate_proj",
1124
+ "model.layers.61.mlp.shared_experts.up_proj",
1125
+ "model.layers.61.mlp.shared_experts.down_proj",
1126
+ "model.layers.61.shared_head.head",
1127
  "lm_head"
1128
  ],
1129
  "export": {
 
1168
  "symmetric": null
1169
  }
1170
  },
1171
+ "kv_cache_post_rope": false,
1172
+ "kv_cache_quant_config": {},
1173
  "layer_quant_config": {},
1174
  "layer_type_quant_config": {},
1175
  "quant_method": "quark",
1176
  "quant_mode": "eager_mode",
1177
  "softmax_quant_spec": null,
1178
+ "version": "0.11"
1179
  },
1180
  "rms_norm_eps": 1e-06,
1181
  "rope_scaling": {
 
1193
  "tie_word_embeddings": false,
1194
  "topk_group": 4,
1195
  "topk_method": "noaux_tc",
1196
+ "transformers_version": "4.57.1",
 
1197
  "unsloth_fixed": true,
1198
  "use_cache": true,
1199
  "v_head_dim": 128,
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "eos_token_id": 1,
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
- "transformers_version": "4.51.3"
9
  }
 
5
  "eos_token_id": 1,
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
+ "transformers_version": "4.57.1"
9
  }
model-00001-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18dbe98abe48f2c7d84cb28a2b97101b72634a2888e3d6eb7a8d6fbf5281b3ba
3
+ size 4995884768
model-00002-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0d18aabfc7cce4d182027358d7ffd4504ac6278c9c9ca12bb60dd8975ff86e
3
+ size 4995052496
model-00003-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:673d355712864dbd1f27314c0360f0a963e0ee9452ea36c8ae42bee4a4990756
3
+ size 4999071368
model-00004-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b4f4daa2b95ce73e353a8fef1f049bc07c75125bbc6fa601b554440adadd81
3
+ size 4994943096
model-00005-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effea69cc5740f7ea8b94d7b4af9311c483a4b5bdd59b5eeb0a36cd47f7e6ae1
3
+ size 4994943232
model-00006-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:683ef342bcc6c2d72c94e35d35b384f9821ef27afb9d78573279fd287aa9e385
3
+ size 4994943456
model-00007-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4837be87f1a2876a0012f4da8b380b06c4a683b53974195ff24b2b7411bcad
3
+ size 4999180624
model-00008-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5fe73fa25a336e6be2707c63474d96b0add20015a4317dde154953ffcc343d2
3
+ size 4994943096
model-00009-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165cdcb1f16f14ef6a83851960d46375dbaabf7c172336cf38456942c879303c
3
+ size 4994943096
model-00010-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d55d0234e36694eb8575633f04612cbee1be0b055f5f41d8519975a7060eb353
3
+ size 4994943720
model-00011-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a97b781181af8e0c81d0837a559b13711480de19de251df87c61a2ab2f50f3
3
+ size 4916044152
model-00012-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b62c10bde8448a29f71160465ba9cf418545e124fa145ca9a6c855745cd42a
3
+ size 5000091936
model-00013-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe8b74198c04e16b7a0301385e2248815338eb0af42aca2efb7e4b100fa9ee5
3
+ size 4994944296
model-00014-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81dc88daf114818e8ad9866ef85c403869b7b4391af2f92bc5092344e4a3df55
3
+ size 4994944392
model-00015-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28a0848194500e9f8daff3e3c3b20ec8b52cea81b8b7869e7a2daad7b0f2b5f9
3
+ size 4994944648
model-00016-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8407bd0d3ac2397a66e75f86034e7789cb547846b2bea484d293f1222041de
3
+ size 4999181952
model-00017-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f9bbf0b681bdbb51f09045a4894cc4c91ff86aca6a040a03d959a1b04f86c9
3
+ size 4994944288
model-00018-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67808e128166d99dd2285e6454668bdbb205a9d9efa88bcc51a4187fce22282
3
+ size 4994944296
model-00019-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d41bad246c7fca9ea4b052e3200c84642f20e855e3d1caf78f5dd7ec9f7cc8
3
+ size 4994944536
model-00020-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062665046cb9cb0e49f0ca7a194cd0b11a22e57e2604e6d75231705bb274207c
3
+ size 4767862480
model-00021-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a109448c141bbca3c63ec74444b6bdf07ac997acd9e3d25d935403f6ea57f576
3
+ size 5000091936
model-00022-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba1783ead4f05432085a1de14f0fb7c19de024f8de671fd38881fc046ade127
3
+ size 4994944296
model-00023-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5219488f545c644e71eb3d1eb75032c34dd89601a6249f29533336a0f1a6e4c0
3
+ size 4994944392
model-00024-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ee834cc694739ddb1b118a3658c600ebc81f0f3b8169f33940bb4501dc5742
3
+ size 4994944648
model-00025-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b96f16d81ccbdbbc7ab261be03bc07070c2bc00e27a6e47cf7f36b87463017e
3
+ size 4999181952
model-00026-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf31ec6ae326831ccfd684b99e4642193606baf2c116f3903b57c81e1e28fa8
3
+ size 4994944288
model-00027-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059afaaf6c34ee1d1826fe85f859d4a3a86c404f274eaf9e3e871e168c405fc6
3
+ size 4994944296
model-00028-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d2b6363a3be0f08947d8f83d71f5b9985ef67acb231db25d1b0d96216c15b1
3
+ size 4994944536
model-00029-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ae064e782c09e2656598ffd40d71dca996e8fdc6c9a06397e976b91eb83914
3
+ size 4767862480
model-00030-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd6637572be2f16e282a1681e30c3d35ebe60d29f7a43e3efaa69be2cf6e8f6e
3
+ size 5000091936
model-00031-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3601bdcbc7cf88fd65faf66f82b096e58b8efe5753278e914a253e7c9d65af17
3
+ size 4994944296
model-00032-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282a65080530b3414164124adea2fc874343eec6393010780818a5a581677a86
3
+ size 4994944392
model-00033-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:509b077f714559ea4084aabd88bfaecf3dfd0f93250b9f3b836512b0b29dafb4
3
+ size 4994944648
model-00034-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b564d29b021df8fc56a990de50a0d2c7f0354636ed1ce39eb9109bb89a9962b
3
+ size 4999181952
model-00035-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a6772db37a06a7033f8dd902a0873eaccb897e4332b72bda489ddaed49ed5e
3
+ size 4994944288
model-00036-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b087e16feb88f56ec6e75e2d2d6d5a40e2300754a7601b23cd4adb6b61ddebb
3
+ size 4994944296
model-00037-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:503e5e5d916120f954937fa31f8a8868e07fb2ae3c0d3851f80add2c864a5f2b
3
+ size 4994944536
model-00038-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd876c9d8b73a45da3e38fc2245da325d8bd8087c16bde87e972b16ad87d4cb
3
+ size 4767862480
model-00039-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68299e7dd1be2f23bde4b3edd8035f1eb0f27e118f245ebea5c30f7c0d0ba748
3
+ size 5000091936
model-00040-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18d545c348f00cf557de60f50a77f421c0375c7bcf4efec8ae707df2bc9d294
3
+ size 4994944296
model-00041-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc2ccd0ea0b2ba73074674c8c66b994b94711f3f89452dfc5f527c7b5ec4db84
3
+ size 4994944392
model-00042-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a1d0d5712e4883ec88de234f83bd555c028052b525c4d75aa7cdf164fe252e
3
+ size 4994944648
model-00043-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3777ce084609b712999c04974765608ed8fc53d53dbb4b9a32aa4edc5e31e759
3
+ size 4999181952
model-00044-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c5aace2e6d240cd46e71c64a2b1a6c54a910428d7aacddf290ee4f2283ab36e
3
+ size 4994944288
model-00045-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22300a5cbe541012f4eb354ae2998cb49cb8fdaa2b5471d1c48dd97d297ca56e
3
+ size 4994944296
model-00046-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb7f3ecd38ece6ead75a735b22ad44d9f84fd461cdfbf3f822cbcef70726f66
3
+ size 4994944536
model-00047-of-00082.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9cea92f2e38b2f5f953838410058530cdb5213efcf2d5413cba6bce067db7d
3
+ size 4767862480