INC4AI commited on
Commit
0b4e5fc
·
verified ·
1 Parent(s): 9e72768

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. chat_template.jinja +86 -0
  3. config.json +3585 -0
  4. generation_config.json +13 -0
  5. model-00001-of-00079.safetensors +3 -0
  6. model-00002-of-00079.safetensors +3 -0
  7. model-00003-of-00079.safetensors +3 -0
  8. model-00004-of-00079.safetensors +3 -0
  9. model-00005-of-00079.safetensors +3 -0
  10. model-00006-of-00079.safetensors +3 -0
  11. model-00007-of-00079.safetensors +3 -0
  12. model-00008-of-00079.safetensors +3 -0
  13. model-00009-of-00079.safetensors +3 -0
  14. model-00010-of-00079.safetensors +3 -0
  15. model-00011-of-00079.safetensors +3 -0
  16. model-00012-of-00079.safetensors +3 -0
  17. model-00013-of-00079.safetensors +3 -0
  18. model-00014-of-00079.safetensors +3 -0
  19. model-00015-of-00079.safetensors +3 -0
  20. model-00016-of-00079.safetensors +3 -0
  21. model-00017-of-00079.safetensors +3 -0
  22. model-00018-of-00079.safetensors +3 -0
  23. model-00019-of-00079.safetensors +3 -0
  24. model-00020-of-00079.safetensors +3 -0
  25. model-00021-of-00079.safetensors +3 -0
  26. model-00022-of-00079.safetensors +3 -0
  27. model-00023-of-00079.safetensors +3 -0
  28. model-00024-of-00079.safetensors +3 -0
  29. model-00025-of-00079.safetensors +3 -0
  30. model-00026-of-00079.safetensors +3 -0
  31. model-00027-of-00079.safetensors +3 -0
  32. model-00028-of-00079.safetensors +3 -0
  33. model-00029-of-00079.safetensors +3 -0
  34. model-00030-of-00079.safetensors +3 -0
  35. model-00031-of-00079.safetensors +3 -0
  36. model-00032-of-00079.safetensors +3 -0
  37. model-00033-of-00079.safetensors +3 -0
  38. model-00034-of-00079.safetensors +3 -0
  39. model-00035-of-00079.safetensors +3 -0
  40. model-00036-of-00079.safetensors +3 -0
  41. model-00037-of-00079.safetensors +3 -0
  42. model-00038-of-00079.safetensors +3 -0
  43. model-00039-of-00079.safetensors +3 -0
  44. model-00040-of-00079.safetensors +3 -0
  45. model-00041-of-00079.safetensors +3 -0
  46. model-00042-of-00079.safetensors +3 -0
  47. model-00043-of-00079.safetensors +3 -0
  48. model-00044-of-00079.safetensors +3 -0
  49. model-00045-of-00079.safetensors +3 -0
  50. model-00046-of-00079.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ # Tools
5
+
6
+ You may call one or more functions to assist with the user query.
7
+
8
+ You are provided with function signatures within <tools></tools> XML tags:
9
+ <tools>
10
+ {% for tool in tools %}
11
+ {{ tool | tojson(ensure_ascii=False) }}
12
+ {% endfor %}
13
+ </tools>
14
+
15
+ For each function call, output the function name and arguments within the following XML format:
16
+ <tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
17
+ {%- macro visible_text(content) -%}
18
+ {%- if content is string -%}
19
+ {{- content }}
20
+ {%- elif content is iterable and content is not mapping -%}
21
+ {%- for item in content -%}
22
+ {%- if item is mapping and item.type == 'text' -%}
23
+ {{- item.text }}
24
+ {%- elif item is string -%}
25
+ {{- item }}
26
+ {%- endif -%}
27
+ {%- endfor -%}
28
+ {%- else -%}
29
+ {{- content }}
30
+ {%- endif -%}
31
+ {%- endmacro -%}
32
+ {%- set ns = namespace(last_user_index=-1) %}
33
+ {%- for m in messages %}
34
+ {%- if m.role == 'user' %}
35
+ {% set ns.last_user_index = loop.index0 -%}
36
+ {%- endif %}
37
+ {%- endfor %}
38
+ {% for m in messages %}
39
+ {%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
40
+ {%- elif m.role == 'assistant' -%}
41
+ <|assistant|>
42
+ {%- set reasoning_content = '' %}
43
+ {%- set content = visible_text(m.content) %}
44
+ {%- if m.reasoning_content is string %}
45
+ {%- set reasoning_content = m.reasoning_content %}
46
+ {%- else %}
47
+ {%- if '</think>' in content %}
48
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
49
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
53
+ {{ '<think>' + reasoning_content.strip() + '</think>'}}
54
+ {%- else -%}
55
+ {{ '</think>' }}
56
+ {%- endif -%}
57
+ {%- if content.strip() -%}
58
+ {{ content.strip() }}
59
+ {%- endif -%}
60
+ {% if m.tool_calls %}
61
+ {% for tc in m.tool_calls %}
62
+ {%- if tc.function %}
63
+ {%- set tc = tc.function %}
64
+ {%- endif %}
65
+ {{- '<tool_call>' + tc.name -}}
66
+ {% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
67
+ {% endif %}
68
+ {%- elif m.role == 'tool' -%}
69
+ {%- if m.content is string -%}
70
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
71
+ {{- '<|observation|>' }}
72
+ {%- endif %}
73
+ {{- '<tool_response>' }}
74
+ {{- m.content }}
75
+ {{- '</tool_response>' }}
76
+ {%- else -%}
77
+ <|observation|>{% for tr in m.content %}
78
+ <tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
79
+ {% endif -%}
80
+ {%- elif m.role == 'system' -%}
81
+ <|system|>{{ visible_text(m.content) }}
82
+ {%- endif -%}
83
+ {%- endfor -%}
84
+ {%- if add_generation_prompt -%}
85
+ <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
86
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,3585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GlmMoeDsaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": [
10
+ 154820,
11
+ 154827,
12
+ 154829
13
+ ],
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "head_dim": 64,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 6144,
19
+ "index_head_dim": 128,
20
+ "index_n_heads": 32,
21
+ "index_topk": 2048,
22
+ "indexer_rope_interleave": true,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 12288,
25
+ "kv_lora_rank": 512,
26
+ "max_position_embeddings": 202752,
27
+ "mlp_layer_types": [
28
+ "dense",
29
+ "dense",
30
+ "dense",
31
+ "sparse",
32
+ "sparse",
33
+ "sparse",
34
+ "sparse",
35
+ "sparse",
36
+ "sparse",
37
+ "sparse",
38
+ "sparse",
39
+ "sparse",
40
+ "sparse",
41
+ "sparse",
42
+ "sparse",
43
+ "sparse",
44
+ "sparse",
45
+ "sparse",
46
+ "sparse",
47
+ "sparse",
48
+ "sparse",
49
+ "sparse",
50
+ "sparse",
51
+ "sparse",
52
+ "sparse",
53
+ "sparse",
54
+ "sparse",
55
+ "sparse",
56
+ "sparse",
57
+ "sparse",
58
+ "sparse",
59
+ "sparse",
60
+ "sparse",
61
+ "sparse",
62
+ "sparse",
63
+ "sparse",
64
+ "sparse",
65
+ "sparse",
66
+ "sparse",
67
+ "sparse",
68
+ "sparse",
69
+ "sparse",
70
+ "sparse",
71
+ "sparse",
72
+ "sparse",
73
+ "sparse",
74
+ "sparse",
75
+ "sparse",
76
+ "sparse",
77
+ "sparse",
78
+ "sparse",
79
+ "sparse",
80
+ "sparse",
81
+ "sparse",
82
+ "sparse",
83
+ "sparse",
84
+ "sparse",
85
+ "sparse",
86
+ "sparse",
87
+ "sparse",
88
+ "sparse",
89
+ "sparse",
90
+ "sparse",
91
+ "sparse",
92
+ "sparse",
93
+ "sparse",
94
+ "sparse",
95
+ "sparse",
96
+ "sparse",
97
+ "sparse",
98
+ "sparse",
99
+ "sparse",
100
+ "sparse",
101
+ "sparse",
102
+ "sparse",
103
+ "sparse",
104
+ "sparse",
105
+ "sparse"
106
+ ],
107
+ "model_type": "glm_moe_dsa",
108
+ "moe_intermediate_size": 2048,
109
+ "moe_layer_freq": 1,
110
+ "n_group": 1,
111
+ "n_routed_experts": 256,
112
+ "n_shared_experts": 1,
113
+ "norm_topk_prob": true,
114
+ "num_attention_heads": 64,
115
+ "num_experts_per_tok": 8,
116
+ "num_hidden_layers": 78,
117
+ "num_key_value_heads": 64,
118
+ "num_nextn_predict_layers": 1,
119
+ "pad_token_id": 154820,
120
+ "pretraining_tp": 1,
121
+ "q_lora_rank": 2048,
122
+ "qk_head_dim": 256,
123
+ "qk_nope_head_dim": 192,
124
+ "qk_rope_head_dim": 64,
125
+ "quantization_config": {
126
+ "autoround_version": "0.12.0",
127
+ "bits": 4,
128
+ "data_type": "int",
129
+ "extra_config": {
130
+ "model.layers.0.mlp.down_proj": {
131
+ "bits": 16,
132
+ "data_type": "float"
133
+ },
134
+ "model.layers.0.mlp.gate_proj": {
135
+ "bits": 16,
136
+ "data_type": "float"
137
+ },
138
+ "model.layers.0.mlp.up_proj": {
139
+ "bits": 16,
140
+ "data_type": "float"
141
+ },
142
+ "model.layers.0.self_attn.indexer.weights_proj": {
143
+ "bits": 16,
144
+ "data_type": "float"
145
+ },
146
+ "model.layers.0.self_attn.indexer.wk": {
147
+ "bits": 16,
148
+ "data_type": "float"
149
+ },
150
+ "model.layers.0.self_attn.indexer.wq_b": {
151
+ "bits": 16,
152
+ "data_type": "float"
153
+ },
154
+ "model.layers.0.self_attn.kv_a_proj_with_mqa": {
155
+ "bits": 16,
156
+ "data_type": "float"
157
+ },
158
+ "model.layers.0.self_attn.kv_b_proj": {
159
+ "bits": 16,
160
+ "data_type": "float"
161
+ },
162
+ "model.layers.0.self_attn.o_proj": {
163
+ "bits": 16,
164
+ "data_type": "float"
165
+ },
166
+ "model.layers.0.self_attn.q_a_proj": {
167
+ "bits": 16,
168
+ "data_type": "float"
169
+ },
170
+ "model.layers.0.self_attn.q_b_proj": {
171
+ "bits": 16,
172
+ "data_type": "float"
173
+ },
174
+ "model.layers.1.mlp.down_proj": {
175
+ "bits": 16,
176
+ "data_type": "float"
177
+ },
178
+ "model.layers.1.mlp.gate_proj": {
179
+ "bits": 16,
180
+ "data_type": "float"
181
+ },
182
+ "model.layers.1.mlp.up_proj": {
183
+ "bits": 16,
184
+ "data_type": "float"
185
+ },
186
+ "model.layers.1.self_attn.indexer.weights_proj": {
187
+ "bits": 16,
188
+ "data_type": "float"
189
+ },
190
+ "model.layers.1.self_attn.indexer.wk": {
191
+ "bits": 16,
192
+ "data_type": "float"
193
+ },
194
+ "model.layers.1.self_attn.indexer.wq_b": {
195
+ "bits": 16,
196
+ "data_type": "float"
197
+ },
198
+ "model.layers.1.self_attn.kv_a_proj_with_mqa": {
199
+ "bits": 16,
200
+ "data_type": "float"
201
+ },
202
+ "model.layers.1.self_attn.kv_b_proj": {
203
+ "bits": 16,
204
+ "data_type": "float"
205
+ },
206
+ "model.layers.1.self_attn.o_proj": {
207
+ "bits": 16,
208
+ "data_type": "float"
209
+ },
210
+ "model.layers.1.self_attn.q_a_proj": {
211
+ "bits": 16,
212
+ "data_type": "float"
213
+ },
214
+ "model.layers.1.self_attn.q_b_proj": {
215
+ "bits": 16,
216
+ "data_type": "float"
217
+ },
218
+ "model.layers.10.mlp.shared_experts.down_proj": {
219
+ "bits": 16,
220
+ "data_type": "float"
221
+ },
222
+ "model.layers.10.mlp.shared_experts.gate_proj": {
223
+ "bits": 16,
224
+ "data_type": "float"
225
+ },
226
+ "model.layers.10.mlp.shared_experts.up_proj": {
227
+ "bits": 16,
228
+ "data_type": "float"
229
+ },
230
+ "model.layers.10.self_attn.indexer.weights_proj": {
231
+ "bits": 16,
232
+ "data_type": "float"
233
+ },
234
+ "model.layers.10.self_attn.indexer.wk": {
235
+ "bits": 16,
236
+ "data_type": "float"
237
+ },
238
+ "model.layers.10.self_attn.indexer.wq_b": {
239
+ "bits": 16,
240
+ "data_type": "float"
241
+ },
242
+ "model.layers.10.self_attn.kv_a_proj_with_mqa": {
243
+ "bits": 16,
244
+ "data_type": "float"
245
+ },
246
+ "model.layers.10.self_attn.kv_b_proj": {
247
+ "bits": 16,
248
+ "data_type": "float"
249
+ },
250
+ "model.layers.10.self_attn.o_proj": {
251
+ "bits": 16,
252
+ "data_type": "float"
253
+ },
254
+ "model.layers.10.self_attn.q_a_proj": {
255
+ "bits": 16,
256
+ "data_type": "float"
257
+ },
258
+ "model.layers.10.self_attn.q_b_proj": {
259
+ "bits": 16,
260
+ "data_type": "float"
261
+ },
262
+ "model.layers.11.mlp.shared_experts.down_proj": {
263
+ "bits": 16,
264
+ "data_type": "float"
265
+ },
266
+ "model.layers.11.mlp.shared_experts.gate_proj": {
267
+ "bits": 16,
268
+ "data_type": "float"
269
+ },
270
+ "model.layers.11.mlp.shared_experts.up_proj": {
271
+ "bits": 16,
272
+ "data_type": "float"
273
+ },
274
+ "model.layers.11.self_attn.indexer.weights_proj": {
275
+ "bits": 16,
276
+ "data_type": "float"
277
+ },
278
+ "model.layers.11.self_attn.indexer.wk": {
279
+ "bits": 16,
280
+ "data_type": "float"
281
+ },
282
+ "model.layers.11.self_attn.indexer.wq_b": {
283
+ "bits": 16,
284
+ "data_type": "float"
285
+ },
286
+ "model.layers.11.self_attn.kv_a_proj_with_mqa": {
287
+ "bits": 16,
288
+ "data_type": "float"
289
+ },
290
+ "model.layers.11.self_attn.kv_b_proj": {
291
+ "bits": 16,
292
+ "data_type": "float"
293
+ },
294
+ "model.layers.11.self_attn.o_proj": {
295
+ "bits": 16,
296
+ "data_type": "float"
297
+ },
298
+ "model.layers.11.self_attn.q_a_proj": {
299
+ "bits": 16,
300
+ "data_type": "float"
301
+ },
302
+ "model.layers.11.self_attn.q_b_proj": {
303
+ "bits": 16,
304
+ "data_type": "float"
305
+ },
306
+ "model.layers.12.mlp.shared_experts.down_proj": {
307
+ "bits": 16,
308
+ "data_type": "float"
309
+ },
310
+ "model.layers.12.mlp.shared_experts.gate_proj": {
311
+ "bits": 16,
312
+ "data_type": "float"
313
+ },
314
+ "model.layers.12.mlp.shared_experts.up_proj": {
315
+ "bits": 16,
316
+ "data_type": "float"
317
+ },
318
+ "model.layers.12.self_attn.indexer.weights_proj": {
319
+ "bits": 16,
320
+ "data_type": "float"
321
+ },
322
+ "model.layers.12.self_attn.indexer.wk": {
323
+ "bits": 16,
324
+ "data_type": "float"
325
+ },
326
+ "model.layers.12.self_attn.indexer.wq_b": {
327
+ "bits": 16,
328
+ "data_type": "float"
329
+ },
330
+ "model.layers.12.self_attn.kv_a_proj_with_mqa": {
331
+ "bits": 16,
332
+ "data_type": "float"
333
+ },
334
+ "model.layers.12.self_attn.kv_b_proj": {
335
+ "bits": 16,
336
+ "data_type": "float"
337
+ },
338
+ "model.layers.12.self_attn.o_proj": {
339
+ "bits": 16,
340
+ "data_type": "float"
341
+ },
342
+ "model.layers.12.self_attn.q_a_proj": {
343
+ "bits": 16,
344
+ "data_type": "float"
345
+ },
346
+ "model.layers.12.self_attn.q_b_proj": {
347
+ "bits": 16,
348
+ "data_type": "float"
349
+ },
350
+ "model.layers.13.mlp.shared_experts.down_proj": {
351
+ "bits": 16,
352
+ "data_type": "float"
353
+ },
354
+ "model.layers.13.mlp.shared_experts.gate_proj": {
355
+ "bits": 16,
356
+ "data_type": "float"
357
+ },
358
+ "model.layers.13.mlp.shared_experts.up_proj": {
359
+ "bits": 16,
360
+ "data_type": "float"
361
+ },
362
+ "model.layers.13.self_attn.indexer.weights_proj": {
363
+ "bits": 16,
364
+ "data_type": "float"
365
+ },
366
+ "model.layers.13.self_attn.indexer.wk": {
367
+ "bits": 16,
368
+ "data_type": "float"
369
+ },
370
+ "model.layers.13.self_attn.indexer.wq_b": {
371
+ "bits": 16,
372
+ "data_type": "float"
373
+ },
374
+ "model.layers.13.self_attn.kv_a_proj_with_mqa": {
375
+ "bits": 16,
376
+ "data_type": "float"
377
+ },
378
+ "model.layers.13.self_attn.kv_b_proj": {
379
+ "bits": 16,
380
+ "data_type": "float"
381
+ },
382
+ "model.layers.13.self_attn.o_proj": {
383
+ "bits": 16,
384
+ "data_type": "float"
385
+ },
386
+ "model.layers.13.self_attn.q_a_proj": {
387
+ "bits": 16,
388
+ "data_type": "float"
389
+ },
390
+ "model.layers.13.self_attn.q_b_proj": {
391
+ "bits": 16,
392
+ "data_type": "float"
393
+ },
394
+ "model.layers.14.mlp.shared_experts.down_proj": {
395
+ "bits": 16,
396
+ "data_type": "float"
397
+ },
398
+ "model.layers.14.mlp.shared_experts.gate_proj": {
399
+ "bits": 16,
400
+ "data_type": "float"
401
+ },
402
+ "model.layers.14.mlp.shared_experts.up_proj": {
403
+ "bits": 16,
404
+ "data_type": "float"
405
+ },
406
+ "model.layers.14.self_attn.indexer.weights_proj": {
407
+ "bits": 16,
408
+ "data_type": "float"
409
+ },
410
+ "model.layers.14.self_attn.indexer.wk": {
411
+ "bits": 16,
412
+ "data_type": "float"
413
+ },
414
+ "model.layers.14.self_attn.indexer.wq_b": {
415
+ "bits": 16,
416
+ "data_type": "float"
417
+ },
418
+ "model.layers.14.self_attn.kv_a_proj_with_mqa": {
419
+ "bits": 16,
420
+ "data_type": "float"
421
+ },
422
+ "model.layers.14.self_attn.kv_b_proj": {
423
+ "bits": 16,
424
+ "data_type": "float"
425
+ },
426
+ "model.layers.14.self_attn.o_proj": {
427
+ "bits": 16,
428
+ "data_type": "float"
429
+ },
430
+ "model.layers.14.self_attn.q_a_proj": {
431
+ "bits": 16,
432
+ "data_type": "float"
433
+ },
434
+ "model.layers.14.self_attn.q_b_proj": {
435
+ "bits": 16,
436
+ "data_type": "float"
437
+ },
438
+ "model.layers.15.mlp.shared_experts.down_proj": {
439
+ "bits": 16,
440
+ "data_type": "float"
441
+ },
442
+ "model.layers.15.mlp.shared_experts.gate_proj": {
443
+ "bits": 16,
444
+ "data_type": "float"
445
+ },
446
+ "model.layers.15.mlp.shared_experts.up_proj": {
447
+ "bits": 16,
448
+ "data_type": "float"
449
+ },
450
+ "model.layers.15.self_attn.indexer.weights_proj": {
451
+ "bits": 16,
452
+ "data_type": "float"
453
+ },
454
+ "model.layers.15.self_attn.indexer.wk": {
455
+ "bits": 16,
456
+ "data_type": "float"
457
+ },
458
+ "model.layers.15.self_attn.indexer.wq_b": {
459
+ "bits": 16,
460
+ "data_type": "float"
461
+ },
462
+ "model.layers.15.self_attn.kv_a_proj_with_mqa": {
463
+ "bits": 16,
464
+ "data_type": "float"
465
+ },
466
+ "model.layers.15.self_attn.kv_b_proj": {
467
+ "bits": 16,
468
+ "data_type": "float"
469
+ },
470
+ "model.layers.15.self_attn.o_proj": {
471
+ "bits": 16,
472
+ "data_type": "float"
473
+ },
474
+ "model.layers.15.self_attn.q_a_proj": {
475
+ "bits": 16,
476
+ "data_type": "float"
477
+ },
478
+ "model.layers.15.self_attn.q_b_proj": {
479
+ "bits": 16,
480
+ "data_type": "float"
481
+ },
482
+ "model.layers.16.mlp.shared_experts.down_proj": {
483
+ "bits": 16,
484
+ "data_type": "float"
485
+ },
486
+ "model.layers.16.mlp.shared_experts.gate_proj": {
487
+ "bits": 16,
488
+ "data_type": "float"
489
+ },
490
+ "model.layers.16.mlp.shared_experts.up_proj": {
491
+ "bits": 16,
492
+ "data_type": "float"
493
+ },
494
+ "model.layers.16.self_attn.indexer.weights_proj": {
495
+ "bits": 16,
496
+ "data_type": "float"
497
+ },
498
+ "model.layers.16.self_attn.indexer.wk": {
499
+ "bits": 16,
500
+ "data_type": "float"
501
+ },
502
+ "model.layers.16.self_attn.indexer.wq_b": {
503
+ "bits": 16,
504
+ "data_type": "float"
505
+ },
506
+ "model.layers.16.self_attn.kv_a_proj_with_mqa": {
507
+ "bits": 16,
508
+ "data_type": "float"
509
+ },
510
+ "model.layers.16.self_attn.kv_b_proj": {
511
+ "bits": 16,
512
+ "data_type": "float"
513
+ },
514
+ "model.layers.16.self_attn.o_proj": {
515
+ "bits": 16,
516
+ "data_type": "float"
517
+ },
518
+ "model.layers.16.self_attn.q_a_proj": {
519
+ "bits": 16,
520
+ "data_type": "float"
521
+ },
522
+ "model.layers.16.self_attn.q_b_proj": {
523
+ "bits": 16,
524
+ "data_type": "float"
525
+ },
526
+ "model.layers.17.mlp.shared_experts.down_proj": {
527
+ "bits": 16,
528
+ "data_type": "float"
529
+ },
530
+ "model.layers.17.mlp.shared_experts.gate_proj": {
531
+ "bits": 16,
532
+ "data_type": "float"
533
+ },
534
+ "model.layers.17.mlp.shared_experts.up_proj": {
535
+ "bits": 16,
536
+ "data_type": "float"
537
+ },
538
+ "model.layers.17.self_attn.indexer.weights_proj": {
539
+ "bits": 16,
540
+ "data_type": "float"
541
+ },
542
+ "model.layers.17.self_attn.indexer.wk": {
543
+ "bits": 16,
544
+ "data_type": "float"
545
+ },
546
+ "model.layers.17.self_attn.indexer.wq_b": {
547
+ "bits": 16,
548
+ "data_type": "float"
549
+ },
550
+ "model.layers.17.self_attn.kv_a_proj_with_mqa": {
551
+ "bits": 16,
552
+ "data_type": "float"
553
+ },
554
+ "model.layers.17.self_attn.kv_b_proj": {
555
+ "bits": 16,
556
+ "data_type": "float"
557
+ },
558
+ "model.layers.17.self_attn.o_proj": {
559
+ "bits": 16,
560
+ "data_type": "float"
561
+ },
562
+ "model.layers.17.self_attn.q_a_proj": {
563
+ "bits": 16,
564
+ "data_type": "float"
565
+ },
566
+ "model.layers.17.self_attn.q_b_proj": {
567
+ "bits": 16,
568
+ "data_type": "float"
569
+ },
570
+ "model.layers.18.mlp.shared_experts.down_proj": {
571
+ "bits": 16,
572
+ "data_type": "float"
573
+ },
574
+ "model.layers.18.mlp.shared_experts.gate_proj": {
575
+ "bits": 16,
576
+ "data_type": "float"
577
+ },
578
+ "model.layers.18.mlp.shared_experts.up_proj": {
579
+ "bits": 16,
580
+ "data_type": "float"
581
+ },
582
+ "model.layers.18.self_attn.indexer.weights_proj": {
583
+ "bits": 16,
584
+ "data_type": "float"
585
+ },
586
+ "model.layers.18.self_attn.indexer.wk": {
587
+ "bits": 16,
588
+ "data_type": "float"
589
+ },
590
+ "model.layers.18.self_attn.indexer.wq_b": {
591
+ "bits": 16,
592
+ "data_type": "float"
593
+ },
594
+ "model.layers.18.self_attn.kv_a_proj_with_mqa": {
595
+ "bits": 16,
596
+ "data_type": "float"
597
+ },
598
+ "model.layers.18.self_attn.kv_b_proj": {
599
+ "bits": 16,
600
+ "data_type": "float"
601
+ },
602
+ "model.layers.18.self_attn.o_proj": {
603
+ "bits": 16,
604
+ "data_type": "float"
605
+ },
606
+ "model.layers.18.self_attn.q_a_proj": {
607
+ "bits": 16,
608
+ "data_type": "float"
609
+ },
610
+ "model.layers.18.self_attn.q_b_proj": {
611
+ "bits": 16,
612
+ "data_type": "float"
613
+ },
614
+ "model.layers.19.mlp.shared_experts.down_proj": {
615
+ "bits": 16,
616
+ "data_type": "float"
617
+ },
618
+ "model.layers.19.mlp.shared_experts.gate_proj": {
619
+ "bits": 16,
620
+ "data_type": "float"
621
+ },
622
+ "model.layers.19.mlp.shared_experts.up_proj": {
623
+ "bits": 16,
624
+ "data_type": "float"
625
+ },
626
+ "model.layers.19.self_attn.indexer.weights_proj": {
627
+ "bits": 16,
628
+ "data_type": "float"
629
+ },
630
+ "model.layers.19.self_attn.indexer.wk": {
631
+ "bits": 16,
632
+ "data_type": "float"
633
+ },
634
+ "model.layers.19.self_attn.indexer.wq_b": {
635
+ "bits": 16,
636
+ "data_type": "float"
637
+ },
638
+ "model.layers.19.self_attn.kv_a_proj_with_mqa": {
639
+ "bits": 16,
640
+ "data_type": "float"
641
+ },
642
+ "model.layers.19.self_attn.kv_b_proj": {
643
+ "bits": 16,
644
+ "data_type": "float"
645
+ },
646
+ "model.layers.19.self_attn.o_proj": {
647
+ "bits": 16,
648
+ "data_type": "float"
649
+ },
650
+ "model.layers.19.self_attn.q_a_proj": {
651
+ "bits": 16,
652
+ "data_type": "float"
653
+ },
654
+ "model.layers.19.self_attn.q_b_proj": {
655
+ "bits": 16,
656
+ "data_type": "float"
657
+ },
658
+ "model.layers.2.mlp.down_proj": {
659
+ "bits": 16,
660
+ "data_type": "float"
661
+ },
662
+ "model.layers.2.mlp.gate_proj": {
663
+ "bits": 16,
664
+ "data_type": "float"
665
+ },
666
+ "model.layers.2.mlp.up_proj": {
667
+ "bits": 16,
668
+ "data_type": "float"
669
+ },
670
+ "model.layers.2.self_attn.indexer.weights_proj": {
671
+ "bits": 16,
672
+ "data_type": "float"
673
+ },
674
+ "model.layers.2.self_attn.indexer.wk": {
675
+ "bits": 16,
676
+ "data_type": "float"
677
+ },
678
+ "model.layers.2.self_attn.indexer.wq_b": {
679
+ "bits": 16,
680
+ "data_type": "float"
681
+ },
682
+ "model.layers.2.self_attn.kv_a_proj_with_mqa": {
683
+ "bits": 16,
684
+ "data_type": "float"
685
+ },
686
+ "model.layers.2.self_attn.kv_b_proj": {
687
+ "bits": 16,
688
+ "data_type": "float"
689
+ },
690
+ "model.layers.2.self_attn.o_proj": {
691
+ "bits": 16,
692
+ "data_type": "float"
693
+ },
694
+ "model.layers.2.self_attn.q_a_proj": {
695
+ "bits": 16,
696
+ "data_type": "float"
697
+ },
698
+ "model.layers.2.self_attn.q_b_proj": {
699
+ "bits": 16,
700
+ "data_type": "float"
701
+ },
702
+ "model.layers.20.mlp.shared_experts.down_proj": {
703
+ "bits": 16,
704
+ "data_type": "float"
705
+ },
706
+ "model.layers.20.mlp.shared_experts.gate_proj": {
707
+ "bits": 16,
708
+ "data_type": "float"
709
+ },
710
+ "model.layers.20.mlp.shared_experts.up_proj": {
711
+ "bits": 16,
712
+ "data_type": "float"
713
+ },
714
+ "model.layers.20.self_attn.indexer.weights_proj": {
715
+ "bits": 16,
716
+ "data_type": "float"
717
+ },
718
+ "model.layers.20.self_attn.indexer.wk": {
719
+ "bits": 16,
720
+ "data_type": "float"
721
+ },
722
+ "model.layers.20.self_attn.indexer.wq_b": {
723
+ "bits": 16,
724
+ "data_type": "float"
725
+ },
726
+ "model.layers.20.self_attn.kv_a_proj_with_mqa": {
727
+ "bits": 16,
728
+ "data_type": "float"
729
+ },
730
+ "model.layers.20.self_attn.kv_b_proj": {
731
+ "bits": 16,
732
+ "data_type": "float"
733
+ },
734
+ "model.layers.20.self_attn.o_proj": {
735
+ "bits": 16,
736
+ "data_type": "float"
737
+ },
738
+ "model.layers.20.self_attn.q_a_proj": {
739
+ "bits": 16,
740
+ "data_type": "float"
741
+ },
742
+ "model.layers.20.self_attn.q_b_proj": {
743
+ "bits": 16,
744
+ "data_type": "float"
745
+ },
746
+ "model.layers.21.mlp.shared_experts.down_proj": {
747
+ "bits": 16,
748
+ "data_type": "float"
749
+ },
750
+ "model.layers.21.mlp.shared_experts.gate_proj": {
751
+ "bits": 16,
752
+ "data_type": "float"
753
+ },
754
+ "model.layers.21.mlp.shared_experts.up_proj": {
755
+ "bits": 16,
756
+ "data_type": "float"
757
+ },
758
+ "model.layers.21.self_attn.indexer.weights_proj": {
759
+ "bits": 16,
760
+ "data_type": "float"
761
+ },
762
+ "model.layers.21.self_attn.indexer.wk": {
763
+ "bits": 16,
764
+ "data_type": "float"
765
+ },
766
+ "model.layers.21.self_attn.indexer.wq_b": {
767
+ "bits": 16,
768
+ "data_type": "float"
769
+ },
770
+ "model.layers.21.self_attn.kv_a_proj_with_mqa": {
771
+ "bits": 16,
772
+ "data_type": "float"
773
+ },
774
+ "model.layers.21.self_attn.kv_b_proj": {
775
+ "bits": 16,
776
+ "data_type": "float"
777
+ },
778
+ "model.layers.21.self_attn.o_proj": {
779
+ "bits": 16,
780
+ "data_type": "float"
781
+ },
782
+ "model.layers.21.self_attn.q_a_proj": {
783
+ "bits": 16,
784
+ "data_type": "float"
785
+ },
786
+ "model.layers.21.self_attn.q_b_proj": {
787
+ "bits": 16,
788
+ "data_type": "float"
789
+ },
790
+ "model.layers.22.mlp.shared_experts.down_proj": {
791
+ "bits": 16,
792
+ "data_type": "float"
793
+ },
794
+ "model.layers.22.mlp.shared_experts.gate_proj": {
795
+ "bits": 16,
796
+ "data_type": "float"
797
+ },
798
+ "model.layers.22.mlp.shared_experts.up_proj": {
799
+ "bits": 16,
800
+ "data_type": "float"
801
+ },
802
+ "model.layers.22.self_attn.indexer.weights_proj": {
803
+ "bits": 16,
804
+ "data_type": "float"
805
+ },
806
+ "model.layers.22.self_attn.indexer.wk": {
807
+ "bits": 16,
808
+ "data_type": "float"
809
+ },
810
+ "model.layers.22.self_attn.indexer.wq_b": {
811
+ "bits": 16,
812
+ "data_type": "float"
813
+ },
814
+ "model.layers.22.self_attn.kv_a_proj_with_mqa": {
815
+ "bits": 16,
816
+ "data_type": "float"
817
+ },
818
+ "model.layers.22.self_attn.kv_b_proj": {
819
+ "bits": 16,
820
+ "data_type": "float"
821
+ },
822
+ "model.layers.22.self_attn.o_proj": {
823
+ "bits": 16,
824
+ "data_type": "float"
825
+ },
826
+ "model.layers.22.self_attn.q_a_proj": {
827
+ "bits": 16,
828
+ "data_type": "float"
829
+ },
830
+ "model.layers.22.self_attn.q_b_proj": {
831
+ "bits": 16,
832
+ "data_type": "float"
833
+ },
834
+ "model.layers.23.mlp.shared_experts.down_proj": {
835
+ "bits": 16,
836
+ "data_type": "float"
837
+ },
838
+ "model.layers.23.mlp.shared_experts.gate_proj": {
839
+ "bits": 16,
840
+ "data_type": "float"
841
+ },
842
+ "model.layers.23.mlp.shared_experts.up_proj": {
843
+ "bits": 16,
844
+ "data_type": "float"
845
+ },
846
+ "model.layers.23.self_attn.indexer.weights_proj": {
847
+ "bits": 16,
848
+ "data_type": "float"
849
+ },
850
+ "model.layers.23.self_attn.indexer.wk": {
851
+ "bits": 16,
852
+ "data_type": "float"
853
+ },
854
+ "model.layers.23.self_attn.indexer.wq_b": {
855
+ "bits": 16,
856
+ "data_type": "float"
857
+ },
858
+ "model.layers.23.self_attn.kv_a_proj_with_mqa": {
859
+ "bits": 16,
860
+ "data_type": "float"
861
+ },
862
+ "model.layers.23.self_attn.kv_b_proj": {
863
+ "bits": 16,
864
+ "data_type": "float"
865
+ },
866
+ "model.layers.23.self_attn.o_proj": {
867
+ "bits": 16,
868
+ "data_type": "float"
869
+ },
870
+ "model.layers.23.self_attn.q_a_proj": {
871
+ "bits": 16,
872
+ "data_type": "float"
873
+ },
874
+ "model.layers.23.self_attn.q_b_proj": {
875
+ "bits": 16,
876
+ "data_type": "float"
877
+ },
878
+ "model.layers.24.mlp.shared_experts.down_proj": {
879
+ "bits": 16,
880
+ "data_type": "float"
881
+ },
882
+ "model.layers.24.mlp.shared_experts.gate_proj": {
883
+ "bits": 16,
884
+ "data_type": "float"
885
+ },
886
+ "model.layers.24.mlp.shared_experts.up_proj": {
887
+ "bits": 16,
888
+ "data_type": "float"
889
+ },
890
+ "model.layers.24.self_attn.indexer.weights_proj": {
891
+ "bits": 16,
892
+ "data_type": "float"
893
+ },
894
+ "model.layers.24.self_attn.indexer.wk": {
895
+ "bits": 16,
896
+ "data_type": "float"
897
+ },
898
+ "model.layers.24.self_attn.indexer.wq_b": {
899
+ "bits": 16,
900
+ "data_type": "float"
901
+ },
902
+ "model.layers.24.self_attn.kv_a_proj_with_mqa": {
903
+ "bits": 16,
904
+ "data_type": "float"
905
+ },
906
+ "model.layers.24.self_attn.kv_b_proj": {
907
+ "bits": 16,
908
+ "data_type": "float"
909
+ },
910
+ "model.layers.24.self_attn.o_proj": {
911
+ "bits": 16,
912
+ "data_type": "float"
913
+ },
914
+ "model.layers.24.self_attn.q_a_proj": {
915
+ "bits": 16,
916
+ "data_type": "float"
917
+ },
918
+ "model.layers.24.self_attn.q_b_proj": {
919
+ "bits": 16,
920
+ "data_type": "float"
921
+ },
922
+ "model.layers.25.mlp.shared_experts.down_proj": {
923
+ "bits": 16,
924
+ "data_type": "float"
925
+ },
926
+ "model.layers.25.mlp.shared_experts.gate_proj": {
927
+ "bits": 16,
928
+ "data_type": "float"
929
+ },
930
+ "model.layers.25.mlp.shared_experts.up_proj": {
931
+ "bits": 16,
932
+ "data_type": "float"
933
+ },
934
+ "model.layers.25.self_attn.indexer.weights_proj": {
935
+ "bits": 16,
936
+ "data_type": "float"
937
+ },
938
+ "model.layers.25.self_attn.indexer.wk": {
939
+ "bits": 16,
940
+ "data_type": "float"
941
+ },
942
+ "model.layers.25.self_attn.indexer.wq_b": {
943
+ "bits": 16,
944
+ "data_type": "float"
945
+ },
946
+ "model.layers.25.self_attn.kv_a_proj_with_mqa": {
947
+ "bits": 16,
948
+ "data_type": "float"
949
+ },
950
+ "model.layers.25.self_attn.kv_b_proj": {
951
+ "bits": 16,
952
+ "data_type": "float"
953
+ },
954
+ "model.layers.25.self_attn.o_proj": {
955
+ "bits": 16,
956
+ "data_type": "float"
957
+ },
958
+ "model.layers.25.self_attn.q_a_proj": {
959
+ "bits": 16,
960
+ "data_type": "float"
961
+ },
962
+ "model.layers.25.self_attn.q_b_proj": {
963
+ "bits": 16,
964
+ "data_type": "float"
965
+ },
966
+ "model.layers.26.mlp.shared_experts.down_proj": {
967
+ "bits": 16,
968
+ "data_type": "float"
969
+ },
970
+ "model.layers.26.mlp.shared_experts.gate_proj": {
971
+ "bits": 16,
972
+ "data_type": "float"
973
+ },
974
+ "model.layers.26.mlp.shared_experts.up_proj": {
975
+ "bits": 16,
976
+ "data_type": "float"
977
+ },
978
+ "model.layers.26.self_attn.indexer.weights_proj": {
979
+ "bits": 16,
980
+ "data_type": "float"
981
+ },
982
+ "model.layers.26.self_attn.indexer.wk": {
983
+ "bits": 16,
984
+ "data_type": "float"
985
+ },
986
+ "model.layers.26.self_attn.indexer.wq_b": {
987
+ "bits": 16,
988
+ "data_type": "float"
989
+ },
990
+ "model.layers.26.self_attn.kv_a_proj_with_mqa": {
991
+ "bits": 16,
992
+ "data_type": "float"
993
+ },
994
+ "model.layers.26.self_attn.kv_b_proj": {
995
+ "bits": 16,
996
+ "data_type": "float"
997
+ },
998
+ "model.layers.26.self_attn.o_proj": {
999
+ "bits": 16,
1000
+ "data_type": "float"
1001
+ },
1002
+ "model.layers.26.self_attn.q_a_proj": {
1003
+ "bits": 16,
1004
+ "data_type": "float"
1005
+ },
1006
+ "model.layers.26.self_attn.q_b_proj": {
1007
+ "bits": 16,
1008
+ "data_type": "float"
1009
+ },
1010
+ "model.layers.27.mlp.shared_experts.down_proj": {
1011
+ "bits": 16,
1012
+ "data_type": "float"
1013
+ },
1014
+ "model.layers.27.mlp.shared_experts.gate_proj": {
1015
+ "bits": 16,
1016
+ "data_type": "float"
1017
+ },
1018
+ "model.layers.27.mlp.shared_experts.up_proj": {
1019
+ "bits": 16,
1020
+ "data_type": "float"
1021
+ },
1022
+ "model.layers.27.self_attn.indexer.weights_proj": {
1023
+ "bits": 16,
1024
+ "data_type": "float"
1025
+ },
1026
+ "model.layers.27.self_attn.indexer.wk": {
1027
+ "bits": 16,
1028
+ "data_type": "float"
1029
+ },
1030
+ "model.layers.27.self_attn.indexer.wq_b": {
1031
+ "bits": 16,
1032
+ "data_type": "float"
1033
+ },
1034
+ "model.layers.27.self_attn.kv_a_proj_with_mqa": {
1035
+ "bits": 16,
1036
+ "data_type": "float"
1037
+ },
1038
+ "model.layers.27.self_attn.kv_b_proj": {
1039
+ "bits": 16,
1040
+ "data_type": "float"
1041
+ },
1042
+ "model.layers.27.self_attn.o_proj": {
1043
+ "bits": 16,
1044
+ "data_type": "float"
1045
+ },
1046
+ "model.layers.27.self_attn.q_a_proj": {
1047
+ "bits": 16,
1048
+ "data_type": "float"
1049
+ },
1050
+ "model.layers.27.self_attn.q_b_proj": {
1051
+ "bits": 16,
1052
+ "data_type": "float"
1053
+ },
1054
+ "model.layers.28.mlp.shared_experts.down_proj": {
1055
+ "bits": 16,
1056
+ "data_type": "float"
1057
+ },
1058
+ "model.layers.28.mlp.shared_experts.gate_proj": {
1059
+ "bits": 16,
1060
+ "data_type": "float"
1061
+ },
1062
+ "model.layers.28.mlp.shared_experts.up_proj": {
1063
+ "bits": 16,
1064
+ "data_type": "float"
1065
+ },
1066
+ "model.layers.28.self_attn.indexer.weights_proj": {
1067
+ "bits": 16,
1068
+ "data_type": "float"
1069
+ },
1070
+ "model.layers.28.self_attn.indexer.wk": {
1071
+ "bits": 16,
1072
+ "data_type": "float"
1073
+ },
1074
+ "model.layers.28.self_attn.indexer.wq_b": {
1075
+ "bits": 16,
1076
+ "data_type": "float"
1077
+ },
1078
+ "model.layers.28.self_attn.kv_a_proj_with_mqa": {
1079
+ "bits": 16,
1080
+ "data_type": "float"
1081
+ },
1082
+ "model.layers.28.self_attn.kv_b_proj": {
1083
+ "bits": 16,
1084
+ "data_type": "float"
1085
+ },
1086
+ "model.layers.28.self_attn.o_proj": {
1087
+ "bits": 16,
1088
+ "data_type": "float"
1089
+ },
1090
+ "model.layers.28.self_attn.q_a_proj": {
1091
+ "bits": 16,
1092
+ "data_type": "float"
1093
+ },
1094
+ "model.layers.28.self_attn.q_b_proj": {
1095
+ "bits": 16,
1096
+ "data_type": "float"
1097
+ },
1098
+ "model.layers.29.mlp.shared_experts.down_proj": {
1099
+ "bits": 16,
1100
+ "data_type": "float"
1101
+ },
1102
+ "model.layers.29.mlp.shared_experts.gate_proj": {
1103
+ "bits": 16,
1104
+ "data_type": "float"
1105
+ },
1106
+ "model.layers.29.mlp.shared_experts.up_proj": {
1107
+ "bits": 16,
1108
+ "data_type": "float"
1109
+ },
1110
+ "model.layers.29.self_attn.indexer.weights_proj": {
1111
+ "bits": 16,
1112
+ "data_type": "float"
1113
+ },
1114
+ "model.layers.29.self_attn.indexer.wk": {
1115
+ "bits": 16,
1116
+ "data_type": "float"
1117
+ },
1118
+ "model.layers.29.self_attn.indexer.wq_b": {
1119
+ "bits": 16,
1120
+ "data_type": "float"
1121
+ },
1122
+ "model.layers.29.self_attn.kv_a_proj_with_mqa": {
1123
+ "bits": 16,
1124
+ "data_type": "float"
1125
+ },
1126
+ "model.layers.29.self_attn.kv_b_proj": {
1127
+ "bits": 16,
1128
+ "data_type": "float"
1129
+ },
1130
+ "model.layers.29.self_attn.o_proj": {
1131
+ "bits": 16,
1132
+ "data_type": "float"
1133
+ },
1134
+ "model.layers.29.self_attn.q_a_proj": {
1135
+ "bits": 16,
1136
+ "data_type": "float"
1137
+ },
1138
+ "model.layers.29.self_attn.q_b_proj": {
1139
+ "bits": 16,
1140
+ "data_type": "float"
1141
+ },
1142
+ "model.layers.3.mlp.shared_experts.down_proj": {
1143
+ "bits": 16,
1144
+ "data_type": "float"
1145
+ },
1146
+ "model.layers.3.mlp.shared_experts.gate_proj": {
1147
+ "bits": 16,
1148
+ "data_type": "float"
1149
+ },
1150
+ "model.layers.3.mlp.shared_experts.up_proj": {
1151
+ "bits": 16,
1152
+ "data_type": "float"
1153
+ },
1154
+ "model.layers.3.self_attn.indexer.weights_proj": {
1155
+ "bits": 16,
1156
+ "data_type": "float"
1157
+ },
1158
+ "model.layers.3.self_attn.indexer.wk": {
1159
+ "bits": 16,
1160
+ "data_type": "float"
1161
+ },
1162
+ "model.layers.3.self_attn.indexer.wq_b": {
1163
+ "bits": 16,
1164
+ "data_type": "float"
1165
+ },
1166
+ "model.layers.3.self_attn.kv_a_proj_with_mqa": {
1167
+ "bits": 16,
1168
+ "data_type": "float"
1169
+ },
1170
+ "model.layers.3.self_attn.kv_b_proj": {
1171
+ "bits": 16,
1172
+ "data_type": "float"
1173
+ },
1174
+ "model.layers.3.self_attn.o_proj": {
1175
+ "bits": 16,
1176
+ "data_type": "float"
1177
+ },
1178
+ "model.layers.3.self_attn.q_a_proj": {
1179
+ "bits": 16,
1180
+ "data_type": "float"
1181
+ },
1182
+ "model.layers.3.self_attn.q_b_proj": {
1183
+ "bits": 16,
1184
+ "data_type": "float"
1185
+ },
1186
+ "model.layers.30.mlp.shared_experts.down_proj": {
1187
+ "bits": 16,
1188
+ "data_type": "float"
1189
+ },
1190
+ "model.layers.30.mlp.shared_experts.gate_proj": {
1191
+ "bits": 16,
1192
+ "data_type": "float"
1193
+ },
1194
+ "model.layers.30.mlp.shared_experts.up_proj": {
1195
+ "bits": 16,
1196
+ "data_type": "float"
1197
+ },
1198
+ "model.layers.30.self_attn.indexer.weights_proj": {
1199
+ "bits": 16,
1200
+ "data_type": "float"
1201
+ },
1202
+ "model.layers.30.self_attn.indexer.wk": {
1203
+ "bits": 16,
1204
+ "data_type": "float"
1205
+ },
1206
+ "model.layers.30.self_attn.indexer.wq_b": {
1207
+ "bits": 16,
1208
+ "data_type": "float"
1209
+ },
1210
+ "model.layers.30.self_attn.kv_a_proj_with_mqa": {
1211
+ "bits": 16,
1212
+ "data_type": "float"
1213
+ },
1214
+ "model.layers.30.self_attn.kv_b_proj": {
1215
+ "bits": 16,
1216
+ "data_type": "float"
1217
+ },
1218
+ "model.layers.30.self_attn.o_proj": {
1219
+ "bits": 16,
1220
+ "data_type": "float"
1221
+ },
1222
+ "model.layers.30.self_attn.q_a_proj": {
1223
+ "bits": 16,
1224
+ "data_type": "float"
1225
+ },
1226
+ "model.layers.30.self_attn.q_b_proj": {
1227
+ "bits": 16,
1228
+ "data_type": "float"
1229
+ },
1230
+ "model.layers.31.mlp.shared_experts.down_proj": {
1231
+ "bits": 16,
1232
+ "data_type": "float"
1233
+ },
1234
+ "model.layers.31.mlp.shared_experts.gate_proj": {
1235
+ "bits": 16,
1236
+ "data_type": "float"
1237
+ },
1238
+ "model.layers.31.mlp.shared_experts.up_proj": {
1239
+ "bits": 16,
1240
+ "data_type": "float"
1241
+ },
1242
+ "model.layers.31.self_attn.indexer.weights_proj": {
1243
+ "bits": 16,
1244
+ "data_type": "float"
1245
+ },
1246
+ "model.layers.31.self_attn.indexer.wk": {
1247
+ "bits": 16,
1248
+ "data_type": "float"
1249
+ },
1250
+ "model.layers.31.self_attn.indexer.wq_b": {
1251
+ "bits": 16,
1252
+ "data_type": "float"
1253
+ },
1254
+ "model.layers.31.self_attn.kv_a_proj_with_mqa": {
1255
+ "bits": 16,
1256
+ "data_type": "float"
1257
+ },
1258
+ "model.layers.31.self_attn.kv_b_proj": {
1259
+ "bits": 16,
1260
+ "data_type": "float"
1261
+ },
1262
+ "model.layers.31.self_attn.o_proj": {
1263
+ "bits": 16,
1264
+ "data_type": "float"
1265
+ },
1266
+ "model.layers.31.self_attn.q_a_proj": {
1267
+ "bits": 16,
1268
+ "data_type": "float"
1269
+ },
1270
+ "model.layers.31.self_attn.q_b_proj": {
1271
+ "bits": 16,
1272
+ "data_type": "float"
1273
+ },
1274
+ "model.layers.32.mlp.shared_experts.down_proj": {
1275
+ "bits": 16,
1276
+ "data_type": "float"
1277
+ },
1278
+ "model.layers.32.mlp.shared_experts.gate_proj": {
1279
+ "bits": 16,
1280
+ "data_type": "float"
1281
+ },
1282
+ "model.layers.32.mlp.shared_experts.up_proj": {
1283
+ "bits": 16,
1284
+ "data_type": "float"
1285
+ },
1286
+ "model.layers.32.self_attn.indexer.weights_proj": {
1287
+ "bits": 16,
1288
+ "data_type": "float"
1289
+ },
1290
+ "model.layers.32.self_attn.indexer.wk": {
1291
+ "bits": 16,
1292
+ "data_type": "float"
1293
+ },
1294
+ "model.layers.32.self_attn.indexer.wq_b": {
1295
+ "bits": 16,
1296
+ "data_type": "float"
1297
+ },
1298
+ "model.layers.32.self_attn.kv_a_proj_with_mqa": {
1299
+ "bits": 16,
1300
+ "data_type": "float"
1301
+ },
1302
+ "model.layers.32.self_attn.kv_b_proj": {
1303
+ "bits": 16,
1304
+ "data_type": "float"
1305
+ },
1306
+ "model.layers.32.self_attn.o_proj": {
1307
+ "bits": 16,
1308
+ "data_type": "float"
1309
+ },
1310
+ "model.layers.32.self_attn.q_a_proj": {
1311
+ "bits": 16,
1312
+ "data_type": "float"
1313
+ },
1314
+ "model.layers.32.self_attn.q_b_proj": {
1315
+ "bits": 16,
1316
+ "data_type": "float"
1317
+ },
1318
+ "model.layers.33.mlp.shared_experts.down_proj": {
1319
+ "bits": 16,
1320
+ "data_type": "float"
1321
+ },
1322
+ "model.layers.33.mlp.shared_experts.gate_proj": {
1323
+ "bits": 16,
1324
+ "data_type": "float"
1325
+ },
1326
+ "model.layers.33.mlp.shared_experts.up_proj": {
1327
+ "bits": 16,
1328
+ "data_type": "float"
1329
+ },
1330
+ "model.layers.33.self_attn.indexer.weights_proj": {
1331
+ "bits": 16,
1332
+ "data_type": "float"
1333
+ },
1334
+ "model.layers.33.self_attn.indexer.wk": {
1335
+ "bits": 16,
1336
+ "data_type": "float"
1337
+ },
1338
+ "model.layers.33.self_attn.indexer.wq_b": {
1339
+ "bits": 16,
1340
+ "data_type": "float"
1341
+ },
1342
+ "model.layers.33.self_attn.kv_a_proj_with_mqa": {
1343
+ "bits": 16,
1344
+ "data_type": "float"
1345
+ },
1346
+ "model.layers.33.self_attn.kv_b_proj": {
1347
+ "bits": 16,
1348
+ "data_type": "float"
1349
+ },
1350
+ "model.layers.33.self_attn.o_proj": {
1351
+ "bits": 16,
1352
+ "data_type": "float"
1353
+ },
1354
+ "model.layers.33.self_attn.q_a_proj": {
1355
+ "bits": 16,
1356
+ "data_type": "float"
1357
+ },
1358
+ "model.layers.33.self_attn.q_b_proj": {
1359
+ "bits": 16,
1360
+ "data_type": "float"
1361
+ },
1362
+ "model.layers.34.mlp.shared_experts.down_proj": {
1363
+ "bits": 16,
1364
+ "data_type": "float"
1365
+ },
1366
+ "model.layers.34.mlp.shared_experts.gate_proj": {
1367
+ "bits": 16,
1368
+ "data_type": "float"
1369
+ },
1370
+ "model.layers.34.mlp.shared_experts.up_proj": {
1371
+ "bits": 16,
1372
+ "data_type": "float"
1373
+ },
1374
+ "model.layers.34.self_attn.indexer.weights_proj": {
1375
+ "bits": 16,
1376
+ "data_type": "float"
1377
+ },
1378
+ "model.layers.34.self_attn.indexer.wk": {
1379
+ "bits": 16,
1380
+ "data_type": "float"
1381
+ },
1382
+ "model.layers.34.self_attn.indexer.wq_b": {
1383
+ "bits": 16,
1384
+ "data_type": "float"
1385
+ },
1386
+ "model.layers.34.self_attn.kv_a_proj_with_mqa": {
1387
+ "bits": 16,
1388
+ "data_type": "float"
1389
+ },
1390
+ "model.layers.34.self_attn.kv_b_proj": {
1391
+ "bits": 16,
1392
+ "data_type": "float"
1393
+ },
1394
+ "model.layers.34.self_attn.o_proj": {
1395
+ "bits": 16,
1396
+ "data_type": "float"
1397
+ },
1398
+ "model.layers.34.self_attn.q_a_proj": {
1399
+ "bits": 16,
1400
+ "data_type": "float"
1401
+ },
1402
+ "model.layers.34.self_attn.q_b_proj": {
1403
+ "bits": 16,
1404
+ "data_type": "float"
1405
+ },
1406
+ "model.layers.35.mlp.shared_experts.down_proj": {
1407
+ "bits": 16,
1408
+ "data_type": "float"
1409
+ },
1410
+ "model.layers.35.mlp.shared_experts.gate_proj": {
1411
+ "bits": 16,
1412
+ "data_type": "float"
1413
+ },
1414
+ "model.layers.35.mlp.shared_experts.up_proj": {
1415
+ "bits": 16,
1416
+ "data_type": "float"
1417
+ },
1418
+ "model.layers.35.self_attn.indexer.weights_proj": {
1419
+ "bits": 16,
1420
+ "data_type": "float"
1421
+ },
1422
+ "model.layers.35.self_attn.indexer.wk": {
1423
+ "bits": 16,
1424
+ "data_type": "float"
1425
+ },
1426
+ "model.layers.35.self_attn.indexer.wq_b": {
1427
+ "bits": 16,
1428
+ "data_type": "float"
1429
+ },
1430
+ "model.layers.35.self_attn.kv_a_proj_with_mqa": {
1431
+ "bits": 16,
1432
+ "data_type": "float"
1433
+ },
1434
+ "model.layers.35.self_attn.kv_b_proj": {
1435
+ "bits": 16,
1436
+ "data_type": "float"
1437
+ },
1438
+ "model.layers.35.self_attn.o_proj": {
1439
+ "bits": 16,
1440
+ "data_type": "float"
1441
+ },
1442
+ "model.layers.35.self_attn.q_a_proj": {
1443
+ "bits": 16,
1444
+ "data_type": "float"
1445
+ },
1446
+ "model.layers.35.self_attn.q_b_proj": {
1447
+ "bits": 16,
1448
+ "data_type": "float"
1449
+ },
1450
+ "model.layers.36.mlp.shared_experts.down_proj": {
1451
+ "bits": 16,
1452
+ "data_type": "float"
1453
+ },
1454
+ "model.layers.36.mlp.shared_experts.gate_proj": {
1455
+ "bits": 16,
1456
+ "data_type": "float"
1457
+ },
1458
+ "model.layers.36.mlp.shared_experts.up_proj": {
1459
+ "bits": 16,
1460
+ "data_type": "float"
1461
+ },
1462
+ "model.layers.36.self_attn.indexer.weights_proj": {
1463
+ "bits": 16,
1464
+ "data_type": "float"
1465
+ },
1466
+ "model.layers.36.self_attn.indexer.wk": {
1467
+ "bits": 16,
1468
+ "data_type": "float"
1469
+ },
1470
+ "model.layers.36.self_attn.indexer.wq_b": {
1471
+ "bits": 16,
1472
+ "data_type": "float"
1473
+ },
1474
+ "model.layers.36.self_attn.kv_a_proj_with_mqa": {
1475
+ "bits": 16,
1476
+ "data_type": "float"
1477
+ },
1478
+ "model.layers.36.self_attn.kv_b_proj": {
1479
+ "bits": 16,
1480
+ "data_type": "float"
1481
+ },
1482
+ "model.layers.36.self_attn.o_proj": {
1483
+ "bits": 16,
1484
+ "data_type": "float"
1485
+ },
1486
+ "model.layers.36.self_attn.q_a_proj": {
1487
+ "bits": 16,
1488
+ "data_type": "float"
1489
+ },
1490
+ "model.layers.36.self_attn.q_b_proj": {
1491
+ "bits": 16,
1492
+ "data_type": "float"
1493
+ },
1494
+ "model.layers.37.mlp.shared_experts.down_proj": {
1495
+ "bits": 16,
1496
+ "data_type": "float"
1497
+ },
1498
+ "model.layers.37.mlp.shared_experts.gate_proj": {
1499
+ "bits": 16,
1500
+ "data_type": "float"
1501
+ },
1502
+ "model.layers.37.mlp.shared_experts.up_proj": {
1503
+ "bits": 16,
1504
+ "data_type": "float"
1505
+ },
1506
+ "model.layers.37.self_attn.indexer.weights_proj": {
1507
+ "bits": 16,
1508
+ "data_type": "float"
1509
+ },
1510
+ "model.layers.37.self_attn.indexer.wk": {
1511
+ "bits": 16,
1512
+ "data_type": "float"
1513
+ },
1514
+ "model.layers.37.self_attn.indexer.wq_b": {
1515
+ "bits": 16,
1516
+ "data_type": "float"
1517
+ },
1518
+ "model.layers.37.self_attn.kv_a_proj_with_mqa": {
1519
+ "bits": 16,
1520
+ "data_type": "float"
1521
+ },
1522
+ "model.layers.37.self_attn.kv_b_proj": {
1523
+ "bits": 16,
1524
+ "data_type": "float"
1525
+ },
1526
+ "model.layers.37.self_attn.o_proj": {
1527
+ "bits": 16,
1528
+ "data_type": "float"
1529
+ },
1530
+ "model.layers.37.self_attn.q_a_proj": {
1531
+ "bits": 16,
1532
+ "data_type": "float"
1533
+ },
1534
+ "model.layers.37.self_attn.q_b_proj": {
1535
+ "bits": 16,
1536
+ "data_type": "float"
1537
+ },
1538
+ "model.layers.38.mlp.shared_experts.down_proj": {
1539
+ "bits": 16,
1540
+ "data_type": "float"
1541
+ },
1542
+ "model.layers.38.mlp.shared_experts.gate_proj": {
1543
+ "bits": 16,
1544
+ "data_type": "float"
1545
+ },
1546
+ "model.layers.38.mlp.shared_experts.up_proj": {
1547
+ "bits": 16,
1548
+ "data_type": "float"
1549
+ },
1550
+ "model.layers.38.self_attn.indexer.weights_proj": {
1551
+ "bits": 16,
1552
+ "data_type": "float"
1553
+ },
1554
+ "model.layers.38.self_attn.indexer.wk": {
1555
+ "bits": 16,
1556
+ "data_type": "float"
1557
+ },
1558
+ "model.layers.38.self_attn.indexer.wq_b": {
1559
+ "bits": 16,
1560
+ "data_type": "float"
1561
+ },
1562
+ "model.layers.38.self_attn.kv_a_proj_with_mqa": {
1563
+ "bits": 16,
1564
+ "data_type": "float"
1565
+ },
1566
+ "model.layers.38.self_attn.kv_b_proj": {
1567
+ "bits": 16,
1568
+ "data_type": "float"
1569
+ },
1570
+ "model.layers.38.self_attn.o_proj": {
1571
+ "bits": 16,
1572
+ "data_type": "float"
1573
+ },
1574
+ "model.layers.38.self_attn.q_a_proj": {
1575
+ "bits": 16,
1576
+ "data_type": "float"
1577
+ },
1578
+ "model.layers.38.self_attn.q_b_proj": {
1579
+ "bits": 16,
1580
+ "data_type": "float"
1581
+ },
1582
+ "model.layers.39.mlp.shared_experts.down_proj": {
1583
+ "bits": 16,
1584
+ "data_type": "float"
1585
+ },
1586
+ "model.layers.39.mlp.shared_experts.gate_proj": {
1587
+ "bits": 16,
1588
+ "data_type": "float"
1589
+ },
1590
+ "model.layers.39.mlp.shared_experts.up_proj": {
1591
+ "bits": 16,
1592
+ "data_type": "float"
1593
+ },
1594
+ "model.layers.39.self_attn.indexer.weights_proj": {
1595
+ "bits": 16,
1596
+ "data_type": "float"
1597
+ },
1598
+ "model.layers.39.self_attn.indexer.wk": {
1599
+ "bits": 16,
1600
+ "data_type": "float"
1601
+ },
1602
+ "model.layers.39.self_attn.indexer.wq_b": {
1603
+ "bits": 16,
1604
+ "data_type": "float"
1605
+ },
1606
+ "model.layers.39.self_attn.kv_a_proj_with_mqa": {
1607
+ "bits": 16,
1608
+ "data_type": "float"
1609
+ },
1610
+ "model.layers.39.self_attn.kv_b_proj": {
1611
+ "bits": 16,
1612
+ "data_type": "float"
1613
+ },
1614
+ "model.layers.39.self_attn.o_proj": {
1615
+ "bits": 16,
1616
+ "data_type": "float"
1617
+ },
1618
+ "model.layers.39.self_attn.q_a_proj": {
1619
+ "bits": 16,
1620
+ "data_type": "float"
1621
+ },
1622
+ "model.layers.39.self_attn.q_b_proj": {
1623
+ "bits": 16,
1624
+ "data_type": "float"
1625
+ },
1626
+ "model.layers.4.mlp.shared_experts.down_proj": {
1627
+ "bits": 16,
1628
+ "data_type": "float"
1629
+ },
1630
+ "model.layers.4.mlp.shared_experts.gate_proj": {
1631
+ "bits": 16,
1632
+ "data_type": "float"
1633
+ },
1634
+ "model.layers.4.mlp.shared_experts.up_proj": {
1635
+ "bits": 16,
1636
+ "data_type": "float"
1637
+ },
1638
+ "model.layers.4.self_attn.indexer.weights_proj": {
1639
+ "bits": 16,
1640
+ "data_type": "float"
1641
+ },
1642
+ "model.layers.4.self_attn.indexer.wk": {
1643
+ "bits": 16,
1644
+ "data_type": "float"
1645
+ },
1646
+ "model.layers.4.self_attn.indexer.wq_b": {
1647
+ "bits": 16,
1648
+ "data_type": "float"
1649
+ },
1650
+ "model.layers.4.self_attn.kv_a_proj_with_mqa": {
1651
+ "bits": 16,
1652
+ "data_type": "float"
1653
+ },
1654
+ "model.layers.4.self_attn.kv_b_proj": {
1655
+ "bits": 16,
1656
+ "data_type": "float"
1657
+ },
1658
+ "model.layers.4.self_attn.o_proj": {
1659
+ "bits": 16,
1660
+ "data_type": "float"
1661
+ },
1662
+ "model.layers.4.self_attn.q_a_proj": {
1663
+ "bits": 16,
1664
+ "data_type": "float"
1665
+ },
1666
+ "model.layers.4.self_attn.q_b_proj": {
1667
+ "bits": 16,
1668
+ "data_type": "float"
1669
+ },
1670
+ "model.layers.40.mlp.shared_experts.down_proj": {
1671
+ "bits": 16,
1672
+ "data_type": "float"
1673
+ },
1674
+ "model.layers.40.mlp.shared_experts.gate_proj": {
1675
+ "bits": 16,
1676
+ "data_type": "float"
1677
+ },
1678
+ "model.layers.40.mlp.shared_experts.up_proj": {
1679
+ "bits": 16,
1680
+ "data_type": "float"
1681
+ },
1682
+ "model.layers.40.self_attn.indexer.weights_proj": {
1683
+ "bits": 16,
1684
+ "data_type": "float"
1685
+ },
1686
+ "model.layers.40.self_attn.indexer.wk": {
1687
+ "bits": 16,
1688
+ "data_type": "float"
1689
+ },
1690
+ "model.layers.40.self_attn.indexer.wq_b": {
1691
+ "bits": 16,
1692
+ "data_type": "float"
1693
+ },
1694
+ "model.layers.40.self_attn.kv_a_proj_with_mqa": {
1695
+ "bits": 16,
1696
+ "data_type": "float"
1697
+ },
1698
+ "model.layers.40.self_attn.kv_b_proj": {
1699
+ "bits": 16,
1700
+ "data_type": "float"
1701
+ },
1702
+ "model.layers.40.self_attn.o_proj": {
1703
+ "bits": 16,
1704
+ "data_type": "float"
1705
+ },
1706
+ "model.layers.40.self_attn.q_a_proj": {
1707
+ "bits": 16,
1708
+ "data_type": "float"
1709
+ },
1710
+ "model.layers.40.self_attn.q_b_proj": {
1711
+ "bits": 16,
1712
+ "data_type": "float"
1713
+ },
1714
+ "model.layers.41.mlp.shared_experts.down_proj": {
1715
+ "bits": 16,
1716
+ "data_type": "float"
1717
+ },
1718
+ "model.layers.41.mlp.shared_experts.gate_proj": {
1719
+ "bits": 16,
1720
+ "data_type": "float"
1721
+ },
1722
+ "model.layers.41.mlp.shared_experts.up_proj": {
1723
+ "bits": 16,
1724
+ "data_type": "float"
1725
+ },
1726
+ "model.layers.41.self_attn.indexer.weights_proj": {
1727
+ "bits": 16,
1728
+ "data_type": "float"
1729
+ },
1730
+ "model.layers.41.self_attn.indexer.wk": {
1731
+ "bits": 16,
1732
+ "data_type": "float"
1733
+ },
1734
+ "model.layers.41.self_attn.indexer.wq_b": {
1735
+ "bits": 16,
1736
+ "data_type": "float"
1737
+ },
1738
+ "model.layers.41.self_attn.kv_a_proj_with_mqa": {
1739
+ "bits": 16,
1740
+ "data_type": "float"
1741
+ },
1742
+ "model.layers.41.self_attn.kv_b_proj": {
1743
+ "bits": 16,
1744
+ "data_type": "float"
1745
+ },
1746
+ "model.layers.41.self_attn.o_proj": {
1747
+ "bits": 16,
1748
+ "data_type": "float"
1749
+ },
1750
+ "model.layers.41.self_attn.q_a_proj": {
1751
+ "bits": 16,
1752
+ "data_type": "float"
1753
+ },
1754
+ "model.layers.41.self_attn.q_b_proj": {
1755
+ "bits": 16,
1756
+ "data_type": "float"
1757
+ },
1758
+ "model.layers.42.mlp.shared_experts.down_proj": {
1759
+ "bits": 16,
1760
+ "data_type": "float"
1761
+ },
1762
+ "model.layers.42.mlp.shared_experts.gate_proj": {
1763
+ "bits": 16,
1764
+ "data_type": "float"
1765
+ },
1766
+ "model.layers.42.mlp.shared_experts.up_proj": {
1767
+ "bits": 16,
1768
+ "data_type": "float"
1769
+ },
1770
+ "model.layers.42.self_attn.indexer.weights_proj": {
1771
+ "bits": 16,
1772
+ "data_type": "float"
1773
+ },
1774
+ "model.layers.42.self_attn.indexer.wk": {
1775
+ "bits": 16,
1776
+ "data_type": "float"
1777
+ },
1778
+ "model.layers.42.self_attn.indexer.wq_b": {
1779
+ "bits": 16,
1780
+ "data_type": "float"
1781
+ },
1782
+ "model.layers.42.self_attn.kv_a_proj_with_mqa": {
1783
+ "bits": 16,
1784
+ "data_type": "float"
1785
+ },
1786
+ "model.layers.42.self_attn.kv_b_proj": {
1787
+ "bits": 16,
1788
+ "data_type": "float"
1789
+ },
1790
+ "model.layers.42.self_attn.o_proj": {
1791
+ "bits": 16,
1792
+ "data_type": "float"
1793
+ },
1794
+ "model.layers.42.self_attn.q_a_proj": {
1795
+ "bits": 16,
1796
+ "data_type": "float"
1797
+ },
1798
+ "model.layers.42.self_attn.q_b_proj": {
1799
+ "bits": 16,
1800
+ "data_type": "float"
1801
+ },
1802
+ "model.layers.43.mlp.shared_experts.down_proj": {
1803
+ "bits": 16,
1804
+ "data_type": "float"
1805
+ },
1806
+ "model.layers.43.mlp.shared_experts.gate_proj": {
1807
+ "bits": 16,
1808
+ "data_type": "float"
1809
+ },
1810
+ "model.layers.43.mlp.shared_experts.up_proj": {
1811
+ "bits": 16,
1812
+ "data_type": "float"
1813
+ },
1814
+ "model.layers.43.self_attn.indexer.weights_proj": {
1815
+ "bits": 16,
1816
+ "data_type": "float"
1817
+ },
1818
+ "model.layers.43.self_attn.indexer.wk": {
1819
+ "bits": 16,
1820
+ "data_type": "float"
1821
+ },
1822
+ "model.layers.43.self_attn.indexer.wq_b": {
1823
+ "bits": 16,
1824
+ "data_type": "float"
1825
+ },
1826
+ "model.layers.43.self_attn.kv_a_proj_with_mqa": {
1827
+ "bits": 16,
1828
+ "data_type": "float"
1829
+ },
1830
+ "model.layers.43.self_attn.kv_b_proj": {
1831
+ "bits": 16,
1832
+ "data_type": "float"
1833
+ },
1834
+ "model.layers.43.self_attn.o_proj": {
1835
+ "bits": 16,
1836
+ "data_type": "float"
1837
+ },
1838
+ "model.layers.43.self_attn.q_a_proj": {
1839
+ "bits": 16,
1840
+ "data_type": "float"
1841
+ },
1842
+ "model.layers.43.self_attn.q_b_proj": {
1843
+ "bits": 16,
1844
+ "data_type": "float"
1845
+ },
1846
+ "model.layers.44.mlp.shared_experts.down_proj": {
1847
+ "bits": 16,
1848
+ "data_type": "float"
1849
+ },
1850
+ "model.layers.44.mlp.shared_experts.gate_proj": {
1851
+ "bits": 16,
1852
+ "data_type": "float"
1853
+ },
1854
+ "model.layers.44.mlp.shared_experts.up_proj": {
1855
+ "bits": 16,
1856
+ "data_type": "float"
1857
+ },
1858
+ "model.layers.44.self_attn.indexer.weights_proj": {
1859
+ "bits": 16,
1860
+ "data_type": "float"
1861
+ },
1862
+ "model.layers.44.self_attn.indexer.wk": {
1863
+ "bits": 16,
1864
+ "data_type": "float"
1865
+ },
1866
+ "model.layers.44.self_attn.indexer.wq_b": {
1867
+ "bits": 16,
1868
+ "data_type": "float"
1869
+ },
1870
+ "model.layers.44.self_attn.kv_a_proj_with_mqa": {
1871
+ "bits": 16,
1872
+ "data_type": "float"
1873
+ },
1874
+ "model.layers.44.self_attn.kv_b_proj": {
1875
+ "bits": 16,
1876
+ "data_type": "float"
1877
+ },
1878
+ "model.layers.44.self_attn.o_proj": {
1879
+ "bits": 16,
1880
+ "data_type": "float"
1881
+ },
1882
+ "model.layers.44.self_attn.q_a_proj": {
1883
+ "bits": 16,
1884
+ "data_type": "float"
1885
+ },
1886
+ "model.layers.44.self_attn.q_b_proj": {
1887
+ "bits": 16,
1888
+ "data_type": "float"
1889
+ },
1890
+ "model.layers.45.mlp.shared_experts.down_proj": {
1891
+ "bits": 16,
1892
+ "data_type": "float"
1893
+ },
1894
+ "model.layers.45.mlp.shared_experts.gate_proj": {
1895
+ "bits": 16,
1896
+ "data_type": "float"
1897
+ },
1898
+ "model.layers.45.mlp.shared_experts.up_proj": {
1899
+ "bits": 16,
1900
+ "data_type": "float"
1901
+ },
1902
+ "model.layers.45.self_attn.indexer.weights_proj": {
1903
+ "bits": 16,
1904
+ "data_type": "float"
1905
+ },
1906
+ "model.layers.45.self_attn.indexer.wk": {
1907
+ "bits": 16,
1908
+ "data_type": "float"
1909
+ },
1910
+ "model.layers.45.self_attn.indexer.wq_b": {
1911
+ "bits": 16,
1912
+ "data_type": "float"
1913
+ },
1914
+ "model.layers.45.self_attn.kv_a_proj_with_mqa": {
1915
+ "bits": 16,
1916
+ "data_type": "float"
1917
+ },
1918
+ "model.layers.45.self_attn.kv_b_proj": {
1919
+ "bits": 16,
1920
+ "data_type": "float"
1921
+ },
1922
+ "model.layers.45.self_attn.o_proj": {
1923
+ "bits": 16,
1924
+ "data_type": "float"
1925
+ },
1926
+ "model.layers.45.self_attn.q_a_proj": {
1927
+ "bits": 16,
1928
+ "data_type": "float"
1929
+ },
1930
+ "model.layers.45.self_attn.q_b_proj": {
1931
+ "bits": 16,
1932
+ "data_type": "float"
1933
+ },
1934
+ "model.layers.46.mlp.shared_experts.down_proj": {
1935
+ "bits": 16,
1936
+ "data_type": "float"
1937
+ },
1938
+ "model.layers.46.mlp.shared_experts.gate_proj": {
1939
+ "bits": 16,
1940
+ "data_type": "float"
1941
+ },
1942
+ "model.layers.46.mlp.shared_experts.up_proj": {
1943
+ "bits": 16,
1944
+ "data_type": "float"
1945
+ },
1946
+ "model.layers.46.self_attn.indexer.weights_proj": {
1947
+ "bits": 16,
1948
+ "data_type": "float"
1949
+ },
1950
+ "model.layers.46.self_attn.indexer.wk": {
1951
+ "bits": 16,
1952
+ "data_type": "float"
1953
+ },
1954
+ "model.layers.46.self_attn.indexer.wq_b": {
1955
+ "bits": 16,
1956
+ "data_type": "float"
1957
+ },
1958
+ "model.layers.46.self_attn.kv_a_proj_with_mqa": {
1959
+ "bits": 16,
1960
+ "data_type": "float"
1961
+ },
1962
+ "model.layers.46.self_attn.kv_b_proj": {
1963
+ "bits": 16,
1964
+ "data_type": "float"
1965
+ },
1966
+ "model.layers.46.self_attn.o_proj": {
1967
+ "bits": 16,
1968
+ "data_type": "float"
1969
+ },
1970
+ "model.layers.46.self_attn.q_a_proj": {
1971
+ "bits": 16,
1972
+ "data_type": "float"
1973
+ },
1974
+ "model.layers.46.self_attn.q_b_proj": {
1975
+ "bits": 16,
1976
+ "data_type": "float"
1977
+ },
1978
+ "model.layers.47.mlp.shared_experts.down_proj": {
1979
+ "bits": 16,
1980
+ "data_type": "float"
1981
+ },
1982
+ "model.layers.47.mlp.shared_experts.gate_proj": {
1983
+ "bits": 16,
1984
+ "data_type": "float"
1985
+ },
1986
+ "model.layers.47.mlp.shared_experts.up_proj": {
1987
+ "bits": 16,
1988
+ "data_type": "float"
1989
+ },
1990
+ "model.layers.47.self_attn.indexer.weights_proj": {
1991
+ "bits": 16,
1992
+ "data_type": "float"
1993
+ },
1994
+ "model.layers.47.self_attn.indexer.wk": {
1995
+ "bits": 16,
1996
+ "data_type": "float"
1997
+ },
1998
+ "model.layers.47.self_attn.indexer.wq_b": {
1999
+ "bits": 16,
2000
+ "data_type": "float"
2001
+ },
2002
+ "model.layers.47.self_attn.kv_a_proj_with_mqa": {
2003
+ "bits": 16,
2004
+ "data_type": "float"
2005
+ },
2006
+ "model.layers.47.self_attn.kv_b_proj": {
2007
+ "bits": 16,
2008
+ "data_type": "float"
2009
+ },
2010
+ "model.layers.47.self_attn.o_proj": {
2011
+ "bits": 16,
2012
+ "data_type": "float"
2013
+ },
2014
+ "model.layers.47.self_attn.q_a_proj": {
2015
+ "bits": 16,
2016
+ "data_type": "float"
2017
+ },
2018
+ "model.layers.47.self_attn.q_b_proj": {
2019
+ "bits": 16,
2020
+ "data_type": "float"
2021
+ },
2022
+ "model.layers.48.mlp.shared_experts.down_proj": {
2023
+ "bits": 16,
2024
+ "data_type": "float"
2025
+ },
2026
+ "model.layers.48.mlp.shared_experts.gate_proj": {
2027
+ "bits": 16,
2028
+ "data_type": "float"
2029
+ },
2030
+ "model.layers.48.mlp.shared_experts.up_proj": {
2031
+ "bits": 16,
2032
+ "data_type": "float"
2033
+ },
2034
+ "model.layers.48.self_attn.indexer.weights_proj": {
2035
+ "bits": 16,
2036
+ "data_type": "float"
2037
+ },
2038
+ "model.layers.48.self_attn.indexer.wk": {
2039
+ "bits": 16,
2040
+ "data_type": "float"
2041
+ },
2042
+ "model.layers.48.self_attn.indexer.wq_b": {
2043
+ "bits": 16,
2044
+ "data_type": "float"
2045
+ },
2046
+ "model.layers.48.self_attn.kv_a_proj_with_mqa": {
2047
+ "bits": 16,
2048
+ "data_type": "float"
2049
+ },
2050
+ "model.layers.48.self_attn.kv_b_proj": {
2051
+ "bits": 16,
2052
+ "data_type": "float"
2053
+ },
2054
+ "model.layers.48.self_attn.o_proj": {
2055
+ "bits": 16,
2056
+ "data_type": "float"
2057
+ },
2058
+ "model.layers.48.self_attn.q_a_proj": {
2059
+ "bits": 16,
2060
+ "data_type": "float"
2061
+ },
2062
+ "model.layers.48.self_attn.q_b_proj": {
2063
+ "bits": 16,
2064
+ "data_type": "float"
2065
+ },
2066
+ "model.layers.49.mlp.shared_experts.down_proj": {
2067
+ "bits": 16,
2068
+ "data_type": "float"
2069
+ },
2070
+ "model.layers.49.mlp.shared_experts.gate_proj": {
2071
+ "bits": 16,
2072
+ "data_type": "float"
2073
+ },
2074
+ "model.layers.49.mlp.shared_experts.up_proj": {
2075
+ "bits": 16,
2076
+ "data_type": "float"
2077
+ },
2078
+ "model.layers.49.self_attn.indexer.weights_proj": {
2079
+ "bits": 16,
2080
+ "data_type": "float"
2081
+ },
2082
+ "model.layers.49.self_attn.indexer.wk": {
2083
+ "bits": 16,
2084
+ "data_type": "float"
2085
+ },
2086
+ "model.layers.49.self_attn.indexer.wq_b": {
2087
+ "bits": 16,
2088
+ "data_type": "float"
2089
+ },
2090
+ "model.layers.49.self_attn.kv_a_proj_with_mqa": {
2091
+ "bits": 16,
2092
+ "data_type": "float"
2093
+ },
2094
+ "model.layers.49.self_attn.kv_b_proj": {
2095
+ "bits": 16,
2096
+ "data_type": "float"
2097
+ },
2098
+ "model.layers.49.self_attn.o_proj": {
2099
+ "bits": 16,
2100
+ "data_type": "float"
2101
+ },
2102
+ "model.layers.49.self_attn.q_a_proj": {
2103
+ "bits": 16,
2104
+ "data_type": "float"
2105
+ },
2106
+ "model.layers.49.self_attn.q_b_proj": {
2107
+ "bits": 16,
2108
+ "data_type": "float"
2109
+ },
2110
+ "model.layers.5.mlp.shared_experts.down_proj": {
2111
+ "bits": 16,
2112
+ "data_type": "float"
2113
+ },
2114
+ "model.layers.5.mlp.shared_experts.gate_proj": {
2115
+ "bits": 16,
2116
+ "data_type": "float"
2117
+ },
2118
+ "model.layers.5.mlp.shared_experts.up_proj": {
2119
+ "bits": 16,
2120
+ "data_type": "float"
2121
+ },
2122
+ "model.layers.5.self_attn.indexer.weights_proj": {
2123
+ "bits": 16,
2124
+ "data_type": "float"
2125
+ },
2126
+ "model.layers.5.self_attn.indexer.wk": {
2127
+ "bits": 16,
2128
+ "data_type": "float"
2129
+ },
2130
+ "model.layers.5.self_attn.indexer.wq_b": {
2131
+ "bits": 16,
2132
+ "data_type": "float"
2133
+ },
2134
+ "model.layers.5.self_attn.kv_a_proj_with_mqa": {
2135
+ "bits": 16,
2136
+ "data_type": "float"
2137
+ },
2138
+ "model.layers.5.self_attn.kv_b_proj": {
2139
+ "bits": 16,
2140
+ "data_type": "float"
2141
+ },
2142
+ "model.layers.5.self_attn.o_proj": {
2143
+ "bits": 16,
2144
+ "data_type": "float"
2145
+ },
2146
+ "model.layers.5.self_attn.q_a_proj": {
2147
+ "bits": 16,
2148
+ "data_type": "float"
2149
+ },
2150
+ "model.layers.5.self_attn.q_b_proj": {
2151
+ "bits": 16,
2152
+ "data_type": "float"
2153
+ },
2154
+ "model.layers.50.mlp.shared_experts.down_proj": {
2155
+ "bits": 16,
2156
+ "data_type": "float"
2157
+ },
2158
+ "model.layers.50.mlp.shared_experts.gate_proj": {
2159
+ "bits": 16,
2160
+ "data_type": "float"
2161
+ },
2162
+ "model.layers.50.mlp.shared_experts.up_proj": {
2163
+ "bits": 16,
2164
+ "data_type": "float"
2165
+ },
2166
+ "model.layers.50.self_attn.indexer.weights_proj": {
2167
+ "bits": 16,
2168
+ "data_type": "float"
2169
+ },
2170
+ "model.layers.50.self_attn.indexer.wk": {
2171
+ "bits": 16,
2172
+ "data_type": "float"
2173
+ },
2174
+ "model.layers.50.self_attn.indexer.wq_b": {
2175
+ "bits": 16,
2176
+ "data_type": "float"
2177
+ },
2178
+ "model.layers.50.self_attn.kv_a_proj_with_mqa": {
2179
+ "bits": 16,
2180
+ "data_type": "float"
2181
+ },
2182
+ "model.layers.50.self_attn.kv_b_proj": {
2183
+ "bits": 16,
2184
+ "data_type": "float"
2185
+ },
2186
+ "model.layers.50.self_attn.o_proj": {
2187
+ "bits": 16,
2188
+ "data_type": "float"
2189
+ },
2190
+ "model.layers.50.self_attn.q_a_proj": {
2191
+ "bits": 16,
2192
+ "data_type": "float"
2193
+ },
2194
+ "model.layers.50.self_attn.q_b_proj": {
2195
+ "bits": 16,
2196
+ "data_type": "float"
2197
+ },
2198
+ "model.layers.51.mlp.shared_experts.down_proj": {
2199
+ "bits": 16,
2200
+ "data_type": "float"
2201
+ },
2202
+ "model.layers.51.mlp.shared_experts.gate_proj": {
2203
+ "bits": 16,
2204
+ "data_type": "float"
2205
+ },
2206
+ "model.layers.51.mlp.shared_experts.up_proj": {
2207
+ "bits": 16,
2208
+ "data_type": "float"
2209
+ },
2210
+ "model.layers.51.self_attn.indexer.weights_proj": {
2211
+ "bits": 16,
2212
+ "data_type": "float"
2213
+ },
2214
+ "model.layers.51.self_attn.indexer.wk": {
2215
+ "bits": 16,
2216
+ "data_type": "float"
2217
+ },
2218
+ "model.layers.51.self_attn.indexer.wq_b": {
2219
+ "bits": 16,
2220
+ "data_type": "float"
2221
+ },
2222
+ "model.layers.51.self_attn.kv_a_proj_with_mqa": {
2223
+ "bits": 16,
2224
+ "data_type": "float"
2225
+ },
2226
+ "model.layers.51.self_attn.kv_b_proj": {
2227
+ "bits": 16,
2228
+ "data_type": "float"
2229
+ },
2230
+ "model.layers.51.self_attn.o_proj": {
2231
+ "bits": 16,
2232
+ "data_type": "float"
2233
+ },
2234
+ "model.layers.51.self_attn.q_a_proj": {
2235
+ "bits": 16,
2236
+ "data_type": "float"
2237
+ },
2238
+ "model.layers.51.self_attn.q_b_proj": {
2239
+ "bits": 16,
2240
+ "data_type": "float"
2241
+ },
2242
+ "model.layers.52.mlp.shared_experts.down_proj": {
2243
+ "bits": 16,
2244
+ "data_type": "float"
2245
+ },
2246
+ "model.layers.52.mlp.shared_experts.gate_proj": {
2247
+ "bits": 16,
2248
+ "data_type": "float"
2249
+ },
2250
+ "model.layers.52.mlp.shared_experts.up_proj": {
2251
+ "bits": 16,
2252
+ "data_type": "float"
2253
+ },
2254
+ "model.layers.52.self_attn.indexer.weights_proj": {
2255
+ "bits": 16,
2256
+ "data_type": "float"
2257
+ },
2258
+ "model.layers.52.self_attn.indexer.wk": {
2259
+ "bits": 16,
2260
+ "data_type": "float"
2261
+ },
2262
+ "model.layers.52.self_attn.indexer.wq_b": {
2263
+ "bits": 16,
2264
+ "data_type": "float"
2265
+ },
2266
+ "model.layers.52.self_attn.kv_a_proj_with_mqa": {
2267
+ "bits": 16,
2268
+ "data_type": "float"
2269
+ },
2270
+ "model.layers.52.self_attn.kv_b_proj": {
2271
+ "bits": 16,
2272
+ "data_type": "float"
2273
+ },
2274
+ "model.layers.52.self_attn.o_proj": {
2275
+ "bits": 16,
2276
+ "data_type": "float"
2277
+ },
2278
+ "model.layers.52.self_attn.q_a_proj": {
2279
+ "bits": 16,
2280
+ "data_type": "float"
2281
+ },
2282
+ "model.layers.52.self_attn.q_b_proj": {
2283
+ "bits": 16,
2284
+ "data_type": "float"
2285
+ },
2286
+ "model.layers.53.mlp.shared_experts.down_proj": {
2287
+ "bits": 16,
2288
+ "data_type": "float"
2289
+ },
2290
+ "model.layers.53.mlp.shared_experts.gate_proj": {
2291
+ "bits": 16,
2292
+ "data_type": "float"
2293
+ },
2294
+ "model.layers.53.mlp.shared_experts.up_proj": {
2295
+ "bits": 16,
2296
+ "data_type": "float"
2297
+ },
2298
+ "model.layers.53.self_attn.indexer.weights_proj": {
2299
+ "bits": 16,
2300
+ "data_type": "float"
2301
+ },
2302
+ "model.layers.53.self_attn.indexer.wk": {
2303
+ "bits": 16,
2304
+ "data_type": "float"
2305
+ },
2306
+ "model.layers.53.self_attn.indexer.wq_b": {
2307
+ "bits": 16,
2308
+ "data_type": "float"
2309
+ },
2310
+ "model.layers.53.self_attn.kv_a_proj_with_mqa": {
2311
+ "bits": 16,
2312
+ "data_type": "float"
2313
+ },
2314
+ "model.layers.53.self_attn.kv_b_proj": {
2315
+ "bits": 16,
2316
+ "data_type": "float"
2317
+ },
2318
+ "model.layers.53.self_attn.o_proj": {
2319
+ "bits": 16,
2320
+ "data_type": "float"
2321
+ },
2322
+ "model.layers.53.self_attn.q_a_proj": {
2323
+ "bits": 16,
2324
+ "data_type": "float"
2325
+ },
2326
+ "model.layers.53.self_attn.q_b_proj": {
2327
+ "bits": 16,
2328
+ "data_type": "float"
2329
+ },
2330
+ "model.layers.54.mlp.shared_experts.down_proj": {
2331
+ "bits": 16,
2332
+ "data_type": "float"
2333
+ },
2334
+ "model.layers.54.mlp.shared_experts.gate_proj": {
2335
+ "bits": 16,
2336
+ "data_type": "float"
2337
+ },
2338
+ "model.layers.54.mlp.shared_experts.up_proj": {
2339
+ "bits": 16,
2340
+ "data_type": "float"
2341
+ },
2342
+ "model.layers.54.self_attn.indexer.weights_proj": {
2343
+ "bits": 16,
2344
+ "data_type": "float"
2345
+ },
2346
+ "model.layers.54.self_attn.indexer.wk": {
2347
+ "bits": 16,
2348
+ "data_type": "float"
2349
+ },
2350
+ "model.layers.54.self_attn.indexer.wq_b": {
2351
+ "bits": 16,
2352
+ "data_type": "float"
2353
+ },
2354
+ "model.layers.54.self_attn.kv_a_proj_with_mqa": {
2355
+ "bits": 16,
2356
+ "data_type": "float"
2357
+ },
2358
+ "model.layers.54.self_attn.kv_b_proj": {
2359
+ "bits": 16,
2360
+ "data_type": "float"
2361
+ },
2362
+ "model.layers.54.self_attn.o_proj": {
2363
+ "bits": 16,
2364
+ "data_type": "float"
2365
+ },
2366
+ "model.layers.54.self_attn.q_a_proj": {
2367
+ "bits": 16,
2368
+ "data_type": "float"
2369
+ },
2370
+ "model.layers.54.self_attn.q_b_proj": {
2371
+ "bits": 16,
2372
+ "data_type": "float"
2373
+ },
2374
+ "model.layers.55.mlp.shared_experts.down_proj": {
2375
+ "bits": 16,
2376
+ "data_type": "float"
2377
+ },
2378
+ "model.layers.55.mlp.shared_experts.gate_proj": {
2379
+ "bits": 16,
2380
+ "data_type": "float"
2381
+ },
2382
+ "model.layers.55.mlp.shared_experts.up_proj": {
2383
+ "bits": 16,
2384
+ "data_type": "float"
2385
+ },
2386
+ "model.layers.55.self_attn.indexer.weights_proj": {
2387
+ "bits": 16,
2388
+ "data_type": "float"
2389
+ },
2390
+ "model.layers.55.self_attn.indexer.wk": {
2391
+ "bits": 16,
2392
+ "data_type": "float"
2393
+ },
2394
+ "model.layers.55.self_attn.indexer.wq_b": {
2395
+ "bits": 16,
2396
+ "data_type": "float"
2397
+ },
2398
+ "model.layers.55.self_attn.kv_a_proj_with_mqa": {
2399
+ "bits": 16,
2400
+ "data_type": "float"
2401
+ },
2402
+ "model.layers.55.self_attn.kv_b_proj": {
2403
+ "bits": 16,
2404
+ "data_type": "float"
2405
+ },
2406
+ "model.layers.55.self_attn.o_proj": {
2407
+ "bits": 16,
2408
+ "data_type": "float"
2409
+ },
2410
+ "model.layers.55.self_attn.q_a_proj": {
2411
+ "bits": 16,
2412
+ "data_type": "float"
2413
+ },
2414
+ "model.layers.55.self_attn.q_b_proj": {
2415
+ "bits": 16,
2416
+ "data_type": "float"
2417
+ },
2418
+ "model.layers.56.mlp.shared_experts.down_proj": {
2419
+ "bits": 16,
2420
+ "data_type": "float"
2421
+ },
2422
+ "model.layers.56.mlp.shared_experts.gate_proj": {
2423
+ "bits": 16,
2424
+ "data_type": "float"
2425
+ },
2426
+ "model.layers.56.mlp.shared_experts.up_proj": {
2427
+ "bits": 16,
2428
+ "data_type": "float"
2429
+ },
2430
+ "model.layers.56.self_attn.indexer.weights_proj": {
2431
+ "bits": 16,
2432
+ "data_type": "float"
2433
+ },
2434
+ "model.layers.56.self_attn.indexer.wk": {
2435
+ "bits": 16,
2436
+ "data_type": "float"
2437
+ },
2438
+ "model.layers.56.self_attn.indexer.wq_b": {
2439
+ "bits": 16,
2440
+ "data_type": "float"
2441
+ },
2442
+ "model.layers.56.self_attn.kv_a_proj_with_mqa": {
2443
+ "bits": 16,
2444
+ "data_type": "float"
2445
+ },
2446
+ "model.layers.56.self_attn.kv_b_proj": {
2447
+ "bits": 16,
2448
+ "data_type": "float"
2449
+ },
2450
+ "model.layers.56.self_attn.o_proj": {
2451
+ "bits": 16,
2452
+ "data_type": "float"
2453
+ },
2454
+ "model.layers.56.self_attn.q_a_proj": {
2455
+ "bits": 16,
2456
+ "data_type": "float"
2457
+ },
2458
+ "model.layers.56.self_attn.q_b_proj": {
2459
+ "bits": 16,
2460
+ "data_type": "float"
2461
+ },
2462
+ "model.layers.57.mlp.shared_experts.down_proj": {
2463
+ "bits": 16,
2464
+ "data_type": "float"
2465
+ },
2466
+ "model.layers.57.mlp.shared_experts.gate_proj": {
2467
+ "bits": 16,
2468
+ "data_type": "float"
2469
+ },
2470
+ "model.layers.57.mlp.shared_experts.up_proj": {
2471
+ "bits": 16,
2472
+ "data_type": "float"
2473
+ },
2474
+ "model.layers.57.self_attn.indexer.weights_proj": {
2475
+ "bits": 16,
2476
+ "data_type": "float"
2477
+ },
2478
+ "model.layers.57.self_attn.indexer.wk": {
2479
+ "bits": 16,
2480
+ "data_type": "float"
2481
+ },
2482
+ "model.layers.57.self_attn.indexer.wq_b": {
2483
+ "bits": 16,
2484
+ "data_type": "float"
2485
+ },
2486
+ "model.layers.57.self_attn.kv_a_proj_with_mqa": {
2487
+ "bits": 16,
2488
+ "data_type": "float"
2489
+ },
2490
+ "model.layers.57.self_attn.kv_b_proj": {
2491
+ "bits": 16,
2492
+ "data_type": "float"
2493
+ },
2494
+ "model.layers.57.self_attn.o_proj": {
2495
+ "bits": 16,
2496
+ "data_type": "float"
2497
+ },
2498
+ "model.layers.57.self_attn.q_a_proj": {
2499
+ "bits": 16,
2500
+ "data_type": "float"
2501
+ },
2502
+ "model.layers.57.self_attn.q_b_proj": {
2503
+ "bits": 16,
2504
+ "data_type": "float"
2505
+ },
2506
+ "model.layers.58.mlp.shared_experts.down_proj": {
2507
+ "bits": 16,
2508
+ "data_type": "float"
2509
+ },
2510
+ "model.layers.58.mlp.shared_experts.gate_proj": {
2511
+ "bits": 16,
2512
+ "data_type": "float"
2513
+ },
2514
+ "model.layers.58.mlp.shared_experts.up_proj": {
2515
+ "bits": 16,
2516
+ "data_type": "float"
2517
+ },
2518
+ "model.layers.58.self_attn.indexer.weights_proj": {
2519
+ "bits": 16,
2520
+ "data_type": "float"
2521
+ },
2522
+ "model.layers.58.self_attn.indexer.wk": {
2523
+ "bits": 16,
2524
+ "data_type": "float"
2525
+ },
2526
+ "model.layers.58.self_attn.indexer.wq_b": {
2527
+ "bits": 16,
2528
+ "data_type": "float"
2529
+ },
2530
+ "model.layers.58.self_attn.kv_a_proj_with_mqa": {
2531
+ "bits": 16,
2532
+ "data_type": "float"
2533
+ },
2534
+ "model.layers.58.self_attn.kv_b_proj": {
2535
+ "bits": 16,
2536
+ "data_type": "float"
2537
+ },
2538
+ "model.layers.58.self_attn.o_proj": {
2539
+ "bits": 16,
2540
+ "data_type": "float"
2541
+ },
2542
+ "model.layers.58.self_attn.q_a_proj": {
2543
+ "bits": 16,
2544
+ "data_type": "float"
2545
+ },
2546
+ "model.layers.58.self_attn.q_b_proj": {
2547
+ "bits": 16,
2548
+ "data_type": "float"
2549
+ },
2550
+ "model.layers.59.mlp.shared_experts.down_proj": {
2551
+ "bits": 16,
2552
+ "data_type": "float"
2553
+ },
2554
+ "model.layers.59.mlp.shared_experts.gate_proj": {
2555
+ "bits": 16,
2556
+ "data_type": "float"
2557
+ },
2558
+ "model.layers.59.mlp.shared_experts.up_proj": {
2559
+ "bits": 16,
2560
+ "data_type": "float"
2561
+ },
2562
+ "model.layers.59.self_attn.indexer.weights_proj": {
2563
+ "bits": 16,
2564
+ "data_type": "float"
2565
+ },
2566
+ "model.layers.59.self_attn.indexer.wk": {
2567
+ "bits": 16,
2568
+ "data_type": "float"
2569
+ },
2570
+ "model.layers.59.self_attn.indexer.wq_b": {
2571
+ "bits": 16,
2572
+ "data_type": "float"
2573
+ },
2574
+ "model.layers.59.self_attn.kv_a_proj_with_mqa": {
2575
+ "bits": 16,
2576
+ "data_type": "float"
2577
+ },
2578
+ "model.layers.59.self_attn.kv_b_proj": {
2579
+ "bits": 16,
2580
+ "data_type": "float"
2581
+ },
2582
+ "model.layers.59.self_attn.o_proj": {
2583
+ "bits": 16,
2584
+ "data_type": "float"
2585
+ },
2586
+ "model.layers.59.self_attn.q_a_proj": {
2587
+ "bits": 16,
2588
+ "data_type": "float"
2589
+ },
2590
+ "model.layers.59.self_attn.q_b_proj": {
2591
+ "bits": 16,
2592
+ "data_type": "float"
2593
+ },
2594
+ "model.layers.6.mlp.shared_experts.down_proj": {
2595
+ "bits": 16,
2596
+ "data_type": "float"
2597
+ },
2598
+ "model.layers.6.mlp.shared_experts.gate_proj": {
2599
+ "bits": 16,
2600
+ "data_type": "float"
2601
+ },
2602
+ "model.layers.6.mlp.shared_experts.up_proj": {
2603
+ "bits": 16,
2604
+ "data_type": "float"
2605
+ },
2606
+ "model.layers.6.self_attn.indexer.weights_proj": {
2607
+ "bits": 16,
2608
+ "data_type": "float"
2609
+ },
2610
+ "model.layers.6.self_attn.indexer.wk": {
2611
+ "bits": 16,
2612
+ "data_type": "float"
2613
+ },
2614
+ "model.layers.6.self_attn.indexer.wq_b": {
2615
+ "bits": 16,
2616
+ "data_type": "float"
2617
+ },
2618
+ "model.layers.6.self_attn.kv_a_proj_with_mqa": {
2619
+ "bits": 16,
2620
+ "data_type": "float"
2621
+ },
2622
+ "model.layers.6.self_attn.kv_b_proj": {
2623
+ "bits": 16,
2624
+ "data_type": "float"
2625
+ },
2626
+ "model.layers.6.self_attn.o_proj": {
2627
+ "bits": 16,
2628
+ "data_type": "float"
2629
+ },
2630
+ "model.layers.6.self_attn.q_a_proj": {
2631
+ "bits": 16,
2632
+ "data_type": "float"
2633
+ },
2634
+ "model.layers.6.self_attn.q_b_proj": {
2635
+ "bits": 16,
2636
+ "data_type": "float"
2637
+ },
2638
+ "model.layers.60.mlp.shared_experts.down_proj": {
2639
+ "bits": 16,
2640
+ "data_type": "float"
2641
+ },
2642
+ "model.layers.60.mlp.shared_experts.gate_proj": {
2643
+ "bits": 16,
2644
+ "data_type": "float"
2645
+ },
2646
+ "model.layers.60.mlp.shared_experts.up_proj": {
2647
+ "bits": 16,
2648
+ "data_type": "float"
2649
+ },
2650
+ "model.layers.60.self_attn.indexer.weights_proj": {
2651
+ "bits": 16,
2652
+ "data_type": "float"
2653
+ },
2654
+ "model.layers.60.self_attn.indexer.wk": {
2655
+ "bits": 16,
2656
+ "data_type": "float"
2657
+ },
2658
+ "model.layers.60.self_attn.indexer.wq_b": {
2659
+ "bits": 16,
2660
+ "data_type": "float"
2661
+ },
2662
+ "model.layers.60.self_attn.kv_a_proj_with_mqa": {
2663
+ "bits": 16,
2664
+ "data_type": "float"
2665
+ },
2666
+ "model.layers.60.self_attn.kv_b_proj": {
2667
+ "bits": 16,
2668
+ "data_type": "float"
2669
+ },
2670
+ "model.layers.60.self_attn.o_proj": {
2671
+ "bits": 16,
2672
+ "data_type": "float"
2673
+ },
2674
+ "model.layers.60.self_attn.q_a_proj": {
2675
+ "bits": 16,
2676
+ "data_type": "float"
2677
+ },
2678
+ "model.layers.60.self_attn.q_b_proj": {
2679
+ "bits": 16,
2680
+ "data_type": "float"
2681
+ },
2682
+ "model.layers.61.mlp.shared_experts.down_proj": {
2683
+ "bits": 16,
2684
+ "data_type": "float"
2685
+ },
2686
+ "model.layers.61.mlp.shared_experts.gate_proj": {
2687
+ "bits": 16,
2688
+ "data_type": "float"
2689
+ },
2690
+ "model.layers.61.mlp.shared_experts.up_proj": {
2691
+ "bits": 16,
2692
+ "data_type": "float"
2693
+ },
2694
+ "model.layers.61.self_attn.indexer.weights_proj": {
2695
+ "bits": 16,
2696
+ "data_type": "float"
2697
+ },
2698
+ "model.layers.61.self_attn.indexer.wk": {
2699
+ "bits": 16,
2700
+ "data_type": "float"
2701
+ },
2702
+ "model.layers.61.self_attn.indexer.wq_b": {
2703
+ "bits": 16,
2704
+ "data_type": "float"
2705
+ },
2706
+ "model.layers.61.self_attn.kv_a_proj_with_mqa": {
2707
+ "bits": 16,
2708
+ "data_type": "float"
2709
+ },
2710
+ "model.layers.61.self_attn.kv_b_proj": {
2711
+ "bits": 16,
2712
+ "data_type": "float"
2713
+ },
2714
+ "model.layers.61.self_attn.o_proj": {
2715
+ "bits": 16,
2716
+ "data_type": "float"
2717
+ },
2718
+ "model.layers.61.self_attn.q_a_proj": {
2719
+ "bits": 16,
2720
+ "data_type": "float"
2721
+ },
2722
+ "model.layers.61.self_attn.q_b_proj": {
2723
+ "bits": 16,
2724
+ "data_type": "float"
2725
+ },
2726
+ "model.layers.62.mlp.shared_experts.down_proj": {
2727
+ "bits": 16,
2728
+ "data_type": "float"
2729
+ },
2730
+ "model.layers.62.mlp.shared_experts.gate_proj": {
2731
+ "bits": 16,
2732
+ "data_type": "float"
2733
+ },
2734
+ "model.layers.62.mlp.shared_experts.up_proj": {
2735
+ "bits": 16,
2736
+ "data_type": "float"
2737
+ },
2738
+ "model.layers.62.self_attn.indexer.weights_proj": {
2739
+ "bits": 16,
2740
+ "data_type": "float"
2741
+ },
2742
+ "model.layers.62.self_attn.indexer.wk": {
2743
+ "bits": 16,
2744
+ "data_type": "float"
2745
+ },
2746
+ "model.layers.62.self_attn.indexer.wq_b": {
2747
+ "bits": 16,
2748
+ "data_type": "float"
2749
+ },
2750
+ "model.layers.62.self_attn.kv_a_proj_with_mqa": {
2751
+ "bits": 16,
2752
+ "data_type": "float"
2753
+ },
2754
+ "model.layers.62.self_attn.kv_b_proj": {
2755
+ "bits": 16,
2756
+ "data_type": "float"
2757
+ },
2758
+ "model.layers.62.self_attn.o_proj": {
2759
+ "bits": 16,
2760
+ "data_type": "float"
2761
+ },
2762
+ "model.layers.62.self_attn.q_a_proj": {
2763
+ "bits": 16,
2764
+ "data_type": "float"
2765
+ },
2766
+ "model.layers.62.self_attn.q_b_proj": {
2767
+ "bits": 16,
2768
+ "data_type": "float"
2769
+ },
2770
+ "model.layers.63.mlp.shared_experts.down_proj": {
2771
+ "bits": 16,
2772
+ "data_type": "float"
2773
+ },
2774
+ "model.layers.63.mlp.shared_experts.gate_proj": {
2775
+ "bits": 16,
2776
+ "data_type": "float"
2777
+ },
2778
+ "model.layers.63.mlp.shared_experts.up_proj": {
2779
+ "bits": 16,
2780
+ "data_type": "float"
2781
+ },
2782
+ "model.layers.63.self_attn.indexer.weights_proj": {
2783
+ "bits": 16,
2784
+ "data_type": "float"
2785
+ },
2786
+ "model.layers.63.self_attn.indexer.wk": {
2787
+ "bits": 16,
2788
+ "data_type": "float"
2789
+ },
2790
+ "model.layers.63.self_attn.indexer.wq_b": {
2791
+ "bits": 16,
2792
+ "data_type": "float"
2793
+ },
2794
+ "model.layers.63.self_attn.kv_a_proj_with_mqa": {
2795
+ "bits": 16,
2796
+ "data_type": "float"
2797
+ },
2798
+ "model.layers.63.self_attn.kv_b_proj": {
2799
+ "bits": 16,
2800
+ "data_type": "float"
2801
+ },
2802
+ "model.layers.63.self_attn.o_proj": {
2803
+ "bits": 16,
2804
+ "data_type": "float"
2805
+ },
2806
+ "model.layers.63.self_attn.q_a_proj": {
2807
+ "bits": 16,
2808
+ "data_type": "float"
2809
+ },
2810
+ "model.layers.63.self_attn.q_b_proj": {
2811
+ "bits": 16,
2812
+ "data_type": "float"
2813
+ },
2814
+ "model.layers.64.mlp.shared_experts.down_proj": {
2815
+ "bits": 16,
2816
+ "data_type": "float"
2817
+ },
2818
+ "model.layers.64.mlp.shared_experts.gate_proj": {
2819
+ "bits": 16,
2820
+ "data_type": "float"
2821
+ },
2822
+ "model.layers.64.mlp.shared_experts.up_proj": {
2823
+ "bits": 16,
2824
+ "data_type": "float"
2825
+ },
2826
+ "model.layers.64.self_attn.indexer.weights_proj": {
2827
+ "bits": 16,
2828
+ "data_type": "float"
2829
+ },
2830
+ "model.layers.64.self_attn.indexer.wk": {
2831
+ "bits": 16,
2832
+ "data_type": "float"
2833
+ },
2834
+ "model.layers.64.self_attn.indexer.wq_b": {
2835
+ "bits": 16,
2836
+ "data_type": "float"
2837
+ },
2838
+ "model.layers.64.self_attn.kv_a_proj_with_mqa": {
2839
+ "bits": 16,
2840
+ "data_type": "float"
2841
+ },
2842
+ "model.layers.64.self_attn.kv_b_proj": {
2843
+ "bits": 16,
2844
+ "data_type": "float"
2845
+ },
2846
+ "model.layers.64.self_attn.o_proj": {
2847
+ "bits": 16,
2848
+ "data_type": "float"
2849
+ },
2850
+ "model.layers.64.self_attn.q_a_proj": {
2851
+ "bits": 16,
2852
+ "data_type": "float"
2853
+ },
2854
+ "model.layers.64.self_attn.q_b_proj": {
2855
+ "bits": 16,
2856
+ "data_type": "float"
2857
+ },
2858
+ "model.layers.65.mlp.shared_experts.down_proj": {
2859
+ "bits": 16,
2860
+ "data_type": "float"
2861
+ },
2862
+ "model.layers.65.mlp.shared_experts.gate_proj": {
2863
+ "bits": 16,
2864
+ "data_type": "float"
2865
+ },
2866
+ "model.layers.65.mlp.shared_experts.up_proj": {
2867
+ "bits": 16,
2868
+ "data_type": "float"
2869
+ },
2870
+ "model.layers.65.self_attn.indexer.weights_proj": {
2871
+ "bits": 16,
2872
+ "data_type": "float"
2873
+ },
2874
+ "model.layers.65.self_attn.indexer.wk": {
2875
+ "bits": 16,
2876
+ "data_type": "float"
2877
+ },
2878
+ "model.layers.65.self_attn.indexer.wq_b": {
2879
+ "bits": 16,
2880
+ "data_type": "float"
2881
+ },
2882
+ "model.layers.65.self_attn.kv_a_proj_with_mqa": {
2883
+ "bits": 16,
2884
+ "data_type": "float"
2885
+ },
2886
+ "model.layers.65.self_attn.kv_b_proj": {
2887
+ "bits": 16,
2888
+ "data_type": "float"
2889
+ },
2890
+ "model.layers.65.self_attn.o_proj": {
2891
+ "bits": 16,
2892
+ "data_type": "float"
2893
+ },
2894
+ "model.layers.65.self_attn.q_a_proj": {
2895
+ "bits": 16,
2896
+ "data_type": "float"
2897
+ },
2898
+ "model.layers.65.self_attn.q_b_proj": {
2899
+ "bits": 16,
2900
+ "data_type": "float"
2901
+ },
2902
+ "model.layers.66.mlp.shared_experts.down_proj": {
2903
+ "bits": 16,
2904
+ "data_type": "float"
2905
+ },
2906
+ "model.layers.66.mlp.shared_experts.gate_proj": {
2907
+ "bits": 16,
2908
+ "data_type": "float"
2909
+ },
2910
+ "model.layers.66.mlp.shared_experts.up_proj": {
2911
+ "bits": 16,
2912
+ "data_type": "float"
2913
+ },
2914
+ "model.layers.66.self_attn.indexer.weights_proj": {
2915
+ "bits": 16,
2916
+ "data_type": "float"
2917
+ },
2918
+ "model.layers.66.self_attn.indexer.wk": {
2919
+ "bits": 16,
2920
+ "data_type": "float"
2921
+ },
2922
+ "model.layers.66.self_attn.indexer.wq_b": {
2923
+ "bits": 16,
2924
+ "data_type": "float"
2925
+ },
2926
+ "model.layers.66.self_attn.kv_a_proj_with_mqa": {
2927
+ "bits": 16,
2928
+ "data_type": "float"
2929
+ },
2930
+ "model.layers.66.self_attn.kv_b_proj": {
2931
+ "bits": 16,
2932
+ "data_type": "float"
2933
+ },
2934
+ "model.layers.66.self_attn.o_proj": {
2935
+ "bits": 16,
2936
+ "data_type": "float"
2937
+ },
2938
+ "model.layers.66.self_attn.q_a_proj": {
2939
+ "bits": 16,
2940
+ "data_type": "float"
2941
+ },
2942
+ "model.layers.66.self_attn.q_b_proj": {
2943
+ "bits": 16,
2944
+ "data_type": "float"
2945
+ },
2946
+ "model.layers.67.mlp.shared_experts.down_proj": {
2947
+ "bits": 16,
2948
+ "data_type": "float"
2949
+ },
2950
+ "model.layers.67.mlp.shared_experts.gate_proj": {
2951
+ "bits": 16,
2952
+ "data_type": "float"
2953
+ },
2954
+ "model.layers.67.mlp.shared_experts.up_proj": {
2955
+ "bits": 16,
2956
+ "data_type": "float"
2957
+ },
2958
+ "model.layers.67.self_attn.indexer.weights_proj": {
2959
+ "bits": 16,
2960
+ "data_type": "float"
2961
+ },
2962
+ "model.layers.67.self_attn.indexer.wk": {
2963
+ "bits": 16,
2964
+ "data_type": "float"
2965
+ },
2966
+ "model.layers.67.self_attn.indexer.wq_b": {
2967
+ "bits": 16,
2968
+ "data_type": "float"
2969
+ },
2970
+ "model.layers.67.self_attn.kv_a_proj_with_mqa": {
2971
+ "bits": 16,
2972
+ "data_type": "float"
2973
+ },
2974
+ "model.layers.67.self_attn.kv_b_proj": {
2975
+ "bits": 16,
2976
+ "data_type": "float"
2977
+ },
2978
+ "model.layers.67.self_attn.o_proj": {
2979
+ "bits": 16,
2980
+ "data_type": "float"
2981
+ },
2982
+ "model.layers.67.self_attn.q_a_proj": {
2983
+ "bits": 16,
2984
+ "data_type": "float"
2985
+ },
2986
+ "model.layers.67.self_attn.q_b_proj": {
2987
+ "bits": 16,
2988
+ "data_type": "float"
2989
+ },
2990
+ "model.layers.68.mlp.shared_experts.down_proj": {
2991
+ "bits": 16,
2992
+ "data_type": "float"
2993
+ },
2994
+ "model.layers.68.mlp.shared_experts.gate_proj": {
2995
+ "bits": 16,
2996
+ "data_type": "float"
2997
+ },
2998
+ "model.layers.68.mlp.shared_experts.up_proj": {
2999
+ "bits": 16,
3000
+ "data_type": "float"
3001
+ },
3002
+ "model.layers.68.self_attn.indexer.weights_proj": {
3003
+ "bits": 16,
3004
+ "data_type": "float"
3005
+ },
3006
+ "model.layers.68.self_attn.indexer.wk": {
3007
+ "bits": 16,
3008
+ "data_type": "float"
3009
+ },
3010
+ "model.layers.68.self_attn.indexer.wq_b": {
3011
+ "bits": 16,
3012
+ "data_type": "float"
3013
+ },
3014
+ "model.layers.68.self_attn.kv_a_proj_with_mqa": {
3015
+ "bits": 16,
3016
+ "data_type": "float"
3017
+ },
3018
+ "model.layers.68.self_attn.kv_b_proj": {
3019
+ "bits": 16,
3020
+ "data_type": "float"
3021
+ },
3022
+ "model.layers.68.self_attn.o_proj": {
3023
+ "bits": 16,
3024
+ "data_type": "float"
3025
+ },
3026
+ "model.layers.68.self_attn.q_a_proj": {
3027
+ "bits": 16,
3028
+ "data_type": "float"
3029
+ },
3030
+ "model.layers.68.self_attn.q_b_proj": {
3031
+ "bits": 16,
3032
+ "data_type": "float"
3033
+ },
3034
+ "model.layers.69.mlp.shared_experts.down_proj": {
3035
+ "bits": 16,
3036
+ "data_type": "float"
3037
+ },
3038
+ "model.layers.69.mlp.shared_experts.gate_proj": {
3039
+ "bits": 16,
3040
+ "data_type": "float"
3041
+ },
3042
+ "model.layers.69.mlp.shared_experts.up_proj": {
3043
+ "bits": 16,
3044
+ "data_type": "float"
3045
+ },
3046
+ "model.layers.69.self_attn.indexer.weights_proj": {
3047
+ "bits": 16,
3048
+ "data_type": "float"
3049
+ },
3050
+ "model.layers.69.self_attn.indexer.wk": {
3051
+ "bits": 16,
3052
+ "data_type": "float"
3053
+ },
3054
+ "model.layers.69.self_attn.indexer.wq_b": {
3055
+ "bits": 16,
3056
+ "data_type": "float"
3057
+ },
3058
+ "model.layers.69.self_attn.kv_a_proj_with_mqa": {
3059
+ "bits": 16,
3060
+ "data_type": "float"
3061
+ },
3062
+ "model.layers.69.self_attn.kv_b_proj": {
3063
+ "bits": 16,
3064
+ "data_type": "float"
3065
+ },
3066
+ "model.layers.69.self_attn.o_proj": {
3067
+ "bits": 16,
3068
+ "data_type": "float"
3069
+ },
3070
+ "model.layers.69.self_attn.q_a_proj": {
3071
+ "bits": 16,
3072
+ "data_type": "float"
3073
+ },
3074
+ "model.layers.69.self_attn.q_b_proj": {
3075
+ "bits": 16,
3076
+ "data_type": "float"
3077
+ },
3078
+ "model.layers.7.mlp.shared_experts.down_proj": {
3079
+ "bits": 16,
3080
+ "data_type": "float"
3081
+ },
3082
+ "model.layers.7.mlp.shared_experts.gate_proj": {
3083
+ "bits": 16,
3084
+ "data_type": "float"
3085
+ },
3086
+ "model.layers.7.mlp.shared_experts.up_proj": {
3087
+ "bits": 16,
3088
+ "data_type": "float"
3089
+ },
3090
+ "model.layers.7.self_attn.indexer.weights_proj": {
3091
+ "bits": 16,
3092
+ "data_type": "float"
3093
+ },
3094
+ "model.layers.7.self_attn.indexer.wk": {
3095
+ "bits": 16,
3096
+ "data_type": "float"
3097
+ },
3098
+ "model.layers.7.self_attn.indexer.wq_b": {
3099
+ "bits": 16,
3100
+ "data_type": "float"
3101
+ },
3102
+ "model.layers.7.self_attn.kv_a_proj_with_mqa": {
3103
+ "bits": 16,
3104
+ "data_type": "float"
3105
+ },
3106
+ "model.layers.7.self_attn.kv_b_proj": {
3107
+ "bits": 16,
3108
+ "data_type": "float"
3109
+ },
3110
+ "model.layers.7.self_attn.o_proj": {
3111
+ "bits": 16,
3112
+ "data_type": "float"
3113
+ },
3114
+ "model.layers.7.self_attn.q_a_proj": {
3115
+ "bits": 16,
3116
+ "data_type": "float"
3117
+ },
3118
+ "model.layers.7.self_attn.q_b_proj": {
3119
+ "bits": 16,
3120
+ "data_type": "float"
3121
+ },
3122
+ "model.layers.70.mlp.shared_experts.down_proj": {
3123
+ "bits": 16,
3124
+ "data_type": "float"
3125
+ },
3126
+ "model.layers.70.mlp.shared_experts.gate_proj": {
3127
+ "bits": 16,
3128
+ "data_type": "float"
3129
+ },
3130
+ "model.layers.70.mlp.shared_experts.up_proj": {
3131
+ "bits": 16,
3132
+ "data_type": "float"
3133
+ },
3134
+ "model.layers.70.self_attn.indexer.weights_proj": {
3135
+ "bits": 16,
3136
+ "data_type": "float"
3137
+ },
3138
+ "model.layers.70.self_attn.indexer.wk": {
3139
+ "bits": 16,
3140
+ "data_type": "float"
3141
+ },
3142
+ "model.layers.70.self_attn.indexer.wq_b": {
3143
+ "bits": 16,
3144
+ "data_type": "float"
3145
+ },
3146
+ "model.layers.70.self_attn.kv_a_proj_with_mqa": {
3147
+ "bits": 16,
3148
+ "data_type": "float"
3149
+ },
3150
+ "model.layers.70.self_attn.kv_b_proj": {
3151
+ "bits": 16,
3152
+ "data_type": "float"
3153
+ },
3154
+ "model.layers.70.self_attn.o_proj": {
3155
+ "bits": 16,
3156
+ "data_type": "float"
3157
+ },
3158
+ "model.layers.70.self_attn.q_a_proj": {
3159
+ "bits": 16,
3160
+ "data_type": "float"
3161
+ },
3162
+ "model.layers.70.self_attn.q_b_proj": {
3163
+ "bits": 16,
3164
+ "data_type": "float"
3165
+ },
3166
+ "model.layers.71.mlp.shared_experts.down_proj": {
3167
+ "bits": 16,
3168
+ "data_type": "float"
3169
+ },
3170
+ "model.layers.71.mlp.shared_experts.gate_proj": {
3171
+ "bits": 16,
3172
+ "data_type": "float"
3173
+ },
3174
+ "model.layers.71.mlp.shared_experts.up_proj": {
3175
+ "bits": 16,
3176
+ "data_type": "float"
3177
+ },
3178
+ "model.layers.71.self_attn.indexer.weights_proj": {
3179
+ "bits": 16,
3180
+ "data_type": "float"
3181
+ },
3182
+ "model.layers.71.self_attn.indexer.wk": {
3183
+ "bits": 16,
3184
+ "data_type": "float"
3185
+ },
3186
+ "model.layers.71.self_attn.indexer.wq_b": {
3187
+ "bits": 16,
3188
+ "data_type": "float"
3189
+ },
3190
+ "model.layers.71.self_attn.kv_a_proj_with_mqa": {
3191
+ "bits": 16,
3192
+ "data_type": "float"
3193
+ },
3194
+ "model.layers.71.self_attn.kv_b_proj": {
3195
+ "bits": 16,
3196
+ "data_type": "float"
3197
+ },
3198
+ "model.layers.71.self_attn.o_proj": {
3199
+ "bits": 16,
3200
+ "data_type": "float"
3201
+ },
3202
+ "model.layers.71.self_attn.q_a_proj": {
3203
+ "bits": 16,
3204
+ "data_type": "float"
3205
+ },
3206
+ "model.layers.71.self_attn.q_b_proj": {
3207
+ "bits": 16,
3208
+ "data_type": "float"
3209
+ },
3210
+ "model.layers.72.mlp.shared_experts.down_proj": {
3211
+ "bits": 16,
3212
+ "data_type": "float"
3213
+ },
3214
+ "model.layers.72.mlp.shared_experts.gate_proj": {
3215
+ "bits": 16,
3216
+ "data_type": "float"
3217
+ },
3218
+ "model.layers.72.mlp.shared_experts.up_proj": {
3219
+ "bits": 16,
3220
+ "data_type": "float"
3221
+ },
3222
+ "model.layers.72.self_attn.indexer.weights_proj": {
3223
+ "bits": 16,
3224
+ "data_type": "float"
3225
+ },
3226
+ "model.layers.72.self_attn.indexer.wk": {
3227
+ "bits": 16,
3228
+ "data_type": "float"
3229
+ },
3230
+ "model.layers.72.self_attn.indexer.wq_b": {
3231
+ "bits": 16,
3232
+ "data_type": "float"
3233
+ },
3234
+ "model.layers.72.self_attn.kv_a_proj_with_mqa": {
3235
+ "bits": 16,
3236
+ "data_type": "float"
3237
+ },
3238
+ "model.layers.72.self_attn.kv_b_proj": {
3239
+ "bits": 16,
3240
+ "data_type": "float"
3241
+ },
3242
+ "model.layers.72.self_attn.o_proj": {
3243
+ "bits": 16,
3244
+ "data_type": "float"
3245
+ },
3246
+ "model.layers.72.self_attn.q_a_proj": {
3247
+ "bits": 16,
3248
+ "data_type": "float"
3249
+ },
3250
+ "model.layers.72.self_attn.q_b_proj": {
3251
+ "bits": 16,
3252
+ "data_type": "float"
3253
+ },
3254
+ "model.layers.73.mlp.shared_experts.down_proj": {
3255
+ "bits": 16,
3256
+ "data_type": "float"
3257
+ },
3258
+ "model.layers.73.mlp.shared_experts.gate_proj": {
3259
+ "bits": 16,
3260
+ "data_type": "float"
3261
+ },
3262
+ "model.layers.73.mlp.shared_experts.up_proj": {
3263
+ "bits": 16,
3264
+ "data_type": "float"
3265
+ },
3266
+ "model.layers.73.self_attn.indexer.weights_proj": {
3267
+ "bits": 16,
3268
+ "data_type": "float"
3269
+ },
3270
+ "model.layers.73.self_attn.indexer.wk": {
3271
+ "bits": 16,
3272
+ "data_type": "float"
3273
+ },
3274
+ "model.layers.73.self_attn.indexer.wq_b": {
3275
+ "bits": 16,
3276
+ "data_type": "float"
3277
+ },
3278
+ "model.layers.73.self_attn.kv_a_proj_with_mqa": {
3279
+ "bits": 16,
3280
+ "data_type": "float"
3281
+ },
3282
+ "model.layers.73.self_attn.kv_b_proj": {
3283
+ "bits": 16,
3284
+ "data_type": "float"
3285
+ },
3286
+ "model.layers.73.self_attn.o_proj": {
3287
+ "bits": 16,
3288
+ "data_type": "float"
3289
+ },
3290
+ "model.layers.73.self_attn.q_a_proj": {
3291
+ "bits": 16,
3292
+ "data_type": "float"
3293
+ },
3294
+ "model.layers.73.self_attn.q_b_proj": {
3295
+ "bits": 16,
3296
+ "data_type": "float"
3297
+ },
3298
+ "model.layers.74.mlp.shared_experts.down_proj": {
3299
+ "bits": 16,
3300
+ "data_type": "float"
3301
+ },
3302
+ "model.layers.74.mlp.shared_experts.gate_proj": {
3303
+ "bits": 16,
3304
+ "data_type": "float"
3305
+ },
3306
+ "model.layers.74.mlp.shared_experts.up_proj": {
3307
+ "bits": 16,
3308
+ "data_type": "float"
3309
+ },
3310
+ "model.layers.74.self_attn.indexer.weights_proj": {
3311
+ "bits": 16,
3312
+ "data_type": "float"
3313
+ },
3314
+ "model.layers.74.self_attn.indexer.wk": {
3315
+ "bits": 16,
3316
+ "data_type": "float"
3317
+ },
3318
+ "model.layers.74.self_attn.indexer.wq_b": {
3319
+ "bits": 16,
3320
+ "data_type": "float"
3321
+ },
3322
+ "model.layers.74.self_attn.kv_a_proj_with_mqa": {
3323
+ "bits": 16,
3324
+ "data_type": "float"
3325
+ },
3326
+ "model.layers.74.self_attn.kv_b_proj": {
3327
+ "bits": 16,
3328
+ "data_type": "float"
3329
+ },
3330
+ "model.layers.74.self_attn.o_proj": {
3331
+ "bits": 16,
3332
+ "data_type": "float"
3333
+ },
3334
+ "model.layers.74.self_attn.q_a_proj": {
3335
+ "bits": 16,
3336
+ "data_type": "float"
3337
+ },
3338
+ "model.layers.74.self_attn.q_b_proj": {
3339
+ "bits": 16,
3340
+ "data_type": "float"
3341
+ },
3342
+ "model.layers.75.mlp.shared_experts.down_proj": {
3343
+ "bits": 16,
3344
+ "data_type": "float"
3345
+ },
3346
+ "model.layers.75.mlp.shared_experts.gate_proj": {
3347
+ "bits": 16,
3348
+ "data_type": "float"
3349
+ },
3350
+ "model.layers.75.mlp.shared_experts.up_proj": {
3351
+ "bits": 16,
3352
+ "data_type": "float"
3353
+ },
3354
+ "model.layers.75.self_attn.indexer.weights_proj": {
3355
+ "bits": 16,
3356
+ "data_type": "float"
3357
+ },
3358
+ "model.layers.75.self_attn.indexer.wk": {
3359
+ "bits": 16,
3360
+ "data_type": "float"
3361
+ },
3362
+ "model.layers.75.self_attn.indexer.wq_b": {
3363
+ "bits": 16,
3364
+ "data_type": "float"
3365
+ },
3366
+ "model.layers.75.self_attn.kv_a_proj_with_mqa": {
3367
+ "bits": 16,
3368
+ "data_type": "float"
3369
+ },
3370
+ "model.layers.75.self_attn.kv_b_proj": {
3371
+ "bits": 16,
3372
+ "data_type": "float"
3373
+ },
3374
+ "model.layers.75.self_attn.o_proj": {
3375
+ "bits": 16,
3376
+ "data_type": "float"
3377
+ },
3378
+ "model.layers.75.self_attn.q_a_proj": {
3379
+ "bits": 16,
3380
+ "data_type": "float"
3381
+ },
3382
+ "model.layers.75.self_attn.q_b_proj": {
3383
+ "bits": 16,
3384
+ "data_type": "float"
3385
+ },
3386
+ "model.layers.76.mlp.shared_experts.down_proj": {
3387
+ "bits": 16,
3388
+ "data_type": "float"
3389
+ },
3390
+ "model.layers.76.mlp.shared_experts.gate_proj": {
3391
+ "bits": 16,
3392
+ "data_type": "float"
3393
+ },
3394
+ "model.layers.76.mlp.shared_experts.up_proj": {
3395
+ "bits": 16,
3396
+ "data_type": "float"
3397
+ },
3398
+ "model.layers.76.self_attn.indexer.weights_proj": {
3399
+ "bits": 16,
3400
+ "data_type": "float"
3401
+ },
3402
+ "model.layers.76.self_attn.indexer.wk": {
3403
+ "bits": 16,
3404
+ "data_type": "float"
3405
+ },
3406
+ "model.layers.76.self_attn.indexer.wq_b": {
3407
+ "bits": 16,
3408
+ "data_type": "float"
3409
+ },
3410
+ "model.layers.76.self_attn.kv_a_proj_with_mqa": {
3411
+ "bits": 16,
3412
+ "data_type": "float"
3413
+ },
3414
+ "model.layers.76.self_attn.kv_b_proj": {
3415
+ "bits": 16,
3416
+ "data_type": "float"
3417
+ },
3418
+ "model.layers.76.self_attn.o_proj": {
3419
+ "bits": 16,
3420
+ "data_type": "float"
3421
+ },
3422
+ "model.layers.76.self_attn.q_a_proj": {
3423
+ "bits": 16,
3424
+ "data_type": "float"
3425
+ },
3426
+ "model.layers.76.self_attn.q_b_proj": {
3427
+ "bits": 16,
3428
+ "data_type": "float"
3429
+ },
3430
+ "model.layers.77.mlp.shared_experts.down_proj": {
3431
+ "bits": 16,
3432
+ "data_type": "float"
3433
+ },
3434
+ "model.layers.77.mlp.shared_experts.gate_proj": {
3435
+ "bits": 16,
3436
+ "data_type": "float"
3437
+ },
3438
+ "model.layers.77.mlp.shared_experts.up_proj": {
3439
+ "bits": 16,
3440
+ "data_type": "float"
3441
+ },
3442
+ "model.layers.77.self_attn.indexer.weights_proj": {
3443
+ "bits": 16,
3444
+ "data_type": "float"
3445
+ },
3446
+ "model.layers.77.self_attn.indexer.wk": {
3447
+ "bits": 16,
3448
+ "data_type": "float"
3449
+ },
3450
+ "model.layers.77.self_attn.indexer.wq_b": {
3451
+ "bits": 16,
3452
+ "data_type": "float"
3453
+ },
3454
+ "model.layers.77.self_attn.kv_a_proj_with_mqa": {
3455
+ "bits": 16,
3456
+ "data_type": "float"
3457
+ },
3458
+ "model.layers.77.self_attn.kv_b_proj": {
3459
+ "bits": 16,
3460
+ "data_type": "float"
3461
+ },
3462
+ "model.layers.77.self_attn.o_proj": {
3463
+ "bits": 16,
3464
+ "data_type": "float"
3465
+ },
3466
+ "model.layers.77.self_attn.q_a_proj": {
3467
+ "bits": 16,
3468
+ "data_type": "float"
3469
+ },
3470
+ "model.layers.77.self_attn.q_b_proj": {
3471
+ "bits": 16,
3472
+ "data_type": "float"
3473
+ },
3474
+ "model.layers.8.mlp.shared_experts.down_proj": {
3475
+ "bits": 16,
3476
+ "data_type": "float"
3477
+ },
3478
+ "model.layers.8.mlp.shared_experts.gate_proj": {
3479
+ "bits": 16,
3480
+ "data_type": "float"
3481
+ },
3482
+ "model.layers.8.mlp.shared_experts.up_proj": {
3483
+ "bits": 16,
3484
+ "data_type": "float"
3485
+ },
3486
+ "model.layers.8.self_attn.indexer.weights_proj": {
3487
+ "bits": 16,
3488
+ "data_type": "float"
3489
+ },
3490
+ "model.layers.8.self_attn.indexer.wk": {
3491
+ "bits": 16,
3492
+ "data_type": "float"
3493
+ },
3494
+ "model.layers.8.self_attn.indexer.wq_b": {
3495
+ "bits": 16,
3496
+ "data_type": "float"
3497
+ },
3498
+ "model.layers.8.self_attn.kv_a_proj_with_mqa": {
3499
+ "bits": 16,
3500
+ "data_type": "float"
3501
+ },
3502
+ "model.layers.8.self_attn.kv_b_proj": {
3503
+ "bits": 16,
3504
+ "data_type": "float"
3505
+ },
3506
+ "model.layers.8.self_attn.o_proj": {
3507
+ "bits": 16,
3508
+ "data_type": "float"
3509
+ },
3510
+ "model.layers.8.self_attn.q_a_proj": {
3511
+ "bits": 16,
3512
+ "data_type": "float"
3513
+ },
3514
+ "model.layers.8.self_attn.q_b_proj": {
3515
+ "bits": 16,
3516
+ "data_type": "float"
3517
+ },
3518
+ "model.layers.9.mlp.shared_experts.down_proj": {
3519
+ "bits": 16,
3520
+ "data_type": "float"
3521
+ },
3522
+ "model.layers.9.mlp.shared_experts.gate_proj": {
3523
+ "bits": 16,
3524
+ "data_type": "float"
3525
+ },
3526
+ "model.layers.9.mlp.shared_experts.up_proj": {
3527
+ "bits": 16,
3528
+ "data_type": "float"
3529
+ },
3530
+ "model.layers.9.self_attn.indexer.weights_proj": {
3531
+ "bits": 16,
3532
+ "data_type": "float"
3533
+ },
3534
+ "model.layers.9.self_attn.indexer.wk": {
3535
+ "bits": 16,
3536
+ "data_type": "float"
3537
+ },
3538
+ "model.layers.9.self_attn.indexer.wq_b": {
3539
+ "bits": 16,
3540
+ "data_type": "float"
3541
+ },
3542
+ "model.layers.9.self_attn.kv_a_proj_with_mqa": {
3543
+ "bits": 16,
3544
+ "data_type": "float"
3545
+ },
3546
+ "model.layers.9.self_attn.kv_b_proj": {
3547
+ "bits": 16,
3548
+ "data_type": "float"
3549
+ },
3550
+ "model.layers.9.self_attn.o_proj": {
3551
+ "bits": 16,
3552
+ "data_type": "float"
3553
+ },
3554
+ "model.layers.9.self_attn.q_a_proj": {
3555
+ "bits": 16,
3556
+ "data_type": "float"
3557
+ },
3558
+ "model.layers.9.self_attn.q_b_proj": {
3559
+ "bits": 16,
3560
+ "data_type": "float"
3561
+ }
3562
+ },
3563
+ "group_size": 128,
3564
+ "iters": 0,
3565
+ "packing_format": "auto_round:auto_awq",
3566
+ "quant_method": "auto-round",
3567
+ "sym": false
3568
+ },
3569
+ "rms_norm_eps": 1e-05,
3570
+ "rope_interleave": true,
3571
+ "rope_parameters": {
3572
+ "rope_theta": 1000000,
3573
+ "rope_type": "default"
3574
+ },
3575
+ "routed_scaling_factor": 2.5,
3576
+ "scoring_func": "sigmoid",
3577
+ "tie_word_embeddings": false,
3578
+ "topk_group": 1,
3579
+ "topk_method": "noaux_tc",
3580
+ "transformers_version": "5.2.0",
3581
+ "use_cache": true,
3582
+ "v_head_dim": 256,
3583
+ "vocab_size": 154880,
3584
+ "torch_dtype": "float16"
3585
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 154820,
6
+ 154827,
7
+ 154829
8
+ ],
9
+ "pad_token_id": 154820,
10
+ "temperature": 1.0,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.2.0"
13
+ }
model-00001-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d503276ff6d45d1ac8531afe8941bd603583f4ae71f2f50dadacbeee6d52302a
3
+ size 5362682984
model-00002-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f9e21e411b68579fc57eab21aeac9232f42a10f9c60886d44a0ff75bb80c2a
3
+ size 5363333544
model-00003-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3acf2210bc30804c39fd742445fbbfda9cb71712065d0d1b1edd96dd4780cc65
3
+ size 5363333552
model-00004-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79acea19f77023040e064267ddddf6f91ba91e5698ff5a4fddeb6341bca0bb73
3
+ size 5363333544
model-00005-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4b5b3e5908816f9c5e68c7f775ffa41c1e2382e3df9c8b3e7ad9ba14903f98
3
+ size 5363333544
model-00006-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3c0714c028e9f39d7f514608595c3b954ddc434ad68428a5f598bcbdaa566d
3
+ size 5363333552
model-00007-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1c0e2097d05f6f1ad986b83c8e311f1be4161532a7d31a0eb5c1d89f271093
3
+ size 5363333544
model-00008-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52dafb35466732440b3d9fdb9b370fea4453a800f7bb72d7d8e8b857897ddf5f
3
+ size 5363334480
model-00009-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c3c927beee319a7eab7dc8567b40304463dfd70e507dc6ea9f2885c8bbc531
3
+ size 5363335848
model-00010-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843071249d02eb8d34cd92e2938c957b3c7008aee9211dc8848ccb3ee1440796
3
+ size 5363335872
model-00011-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5d4a34fb477772a7b4da781b04c1faeacc94ae69a1158bc50fdd90188b2333
3
+ size 5363335872
model-00012-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685b0f533f070a82aa90870916b260d54ba7f1e2d6fc9d9dc67409890f658091
3
+ size 5363335872
model-00013-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f141d107a01d8cd0d62fa08d35bb8515970807519752c49ac776447963e3a7a
3
+ size 5363335872
model-00014-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebf2da295ac32ffdcf56c8afc378725544618a9f605697b6bdbf3d88dae59a1
3
+ size 5363335872
model-00015-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c538482066a5c29b9a8bd4a86dc1aceba947af224f69010178416e77dfc3c7cf
3
+ size 5363335872
model-00016-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfbd2021d2465853a2c70d4a3b2d1b6326ed9f9d0160d929726b2bf490045b0
3
+ size 5363335872
model-00017-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f398f5d436bea896c5182a2fe5cd0b8b524a839f7d46ce7bcf0c04b9ea2bb91a
3
+ size 5363335872
model-00018-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972ef16e857b0fb9037488496f16e84880f087b5a2e771ff73ea8d476899edc2
3
+ size 5363335872
model-00019-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553e25b68fb5530b9e317849f94bc4a8e67a4be783e669827b6683c46bf87259
3
+ size 5363335872
model-00020-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f1b7e12a081b9d5839570a87c4efa465aa68035e0942f3132243c270dca862f
3
+ size 5363335872
model-00021-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f60d7db12e5ec54e57542878d43287be1c25eab707179d7eb78b4c7e34a230
3
+ size 5363335872
model-00022-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74800e0df90e9e5b1612cfea4bf2458881dcd9048670ad8eff840f32a76b3ea7
3
+ size 5363335872
model-00023-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:becfc4f008454aded18e0f84615d7d5ccb07a4f988c086a66ea4839e054ffad1
3
+ size 5363335872
model-00024-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fc9b3b53e75bc9a9c2844f56d48af93105eb19827830cd7156cf52530dc435c
3
+ size 5363335872
model-00025-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2947bc54e44885dc3e86ec8c58be75ea8f8c3efe17ebf3b08053eff47f68bd
3
+ size 5363335872
model-00026-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c4f39fd0dca044c3f22b3e6e7fac1796cde376c97cfa3eb26cc677dcd4db8f
3
+ size 5363335872
model-00027-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47968e296b6e64f246341c443a93562f5469b252389139d8fd4f88c2caf9931
3
+ size 5363335872
model-00028-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74585a47bd7eab51b57c5198b98b84de22aed72170c50ae58dbed845771c16d6
3
+ size 5363335872
model-00029-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf7738171dc771cd84ffa7b2b5b9eb72381da467c4405d051328eaedc1cead7
3
+ size 5363335872
model-00030-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a678110a9a68ae24c2db00b8d41c44bfb0f50c7540b4361b65d2aa33396c19
3
+ size 5363335896
model-00031-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13766f9d11c10b514e97340105aa5ba216860af30e9e7fbd4f87997cc66a8241
3
+ size 5363335904
model-00032-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75794c04beaa7366dc3a0e6bdaaf21a6e14230bee288a6c4853b407b48dab14f
3
+ size 5169415104
model-00033-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f94835d83bb3b299947a9d79e89190fba42194ff3e3fba3b629818edf04e306
3
+ size 5344776064
model-00034-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81d0c15834b3f77ade1b3cb122605d4a6f6ad2882918fa52c3661687193bf24
3
+ size 5347631632
model-00035-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:815f6e9c838b9fd8d30f802916d1d9550362cd9c9ddde1b024a8ef590be3f3a5
3
+ size 5366726448
model-00036-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540b75cac855ef92244eb4c0a1dab40b3db569434ef230a170bc1b4107bf3aa5
3
+ size 5363335832
model-00037-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2c865903988d31975bcf7719f7302860c4dbb1a71055e48e0c00c7d7492ae9
3
+ size 5363335832
model-00038-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0cbf81be335db6b9c717c8c1d9950b5d0a32ca34722b627c31ce7e1f4c93ab1
3
+ size 5363335832
model-00039-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d103944e19c6f3ef7a260f2c496ab93f55213f57172841424f7f5a6af39aa0
3
+ size 5363335832
model-00040-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e3049f45b92ae8e5ac35d42bf5b47c04aa158494f236feb38b3aea6699f197
3
+ size 5363335832
model-00041-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f2285c0659e65fb9b193be221fe33a164a8fbf990c1bbe9d7bb1c25593d8664
3
+ size 5363335832
model-00042-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa312393d0185010b765acc2eedb76bcd6a1870cd50d96f7b5dc064fada8d48
3
+ size 5363335832
model-00043-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97009205ecc46145d9830bc3ec87b48e6e30744054e02fd7fdf7c142863765f
3
+ size 5363335832
model-00044-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2340ef72dfcd971cc857ecb63c9e4d35cf245eecac8dc33cc7e19fe1cf8f6c2
3
+ size 5363335832
model-00045-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc60f161ef88d35022d151fd9dc08257306e2cf597ef00fc97d80bf17532282f
3
+ size 5363335832
model-00046-of-00079.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa875dfb566ad4cdbe9baf2871b3fe9173c0b6b27a33e7acba33cb1601b395ee
3
+ size 5363335832