Upload 9 files

Browse files

Files changed (9) hide show

adapter_config.json +46 -0
adapter_model.safetensors +3 -0
chat_template.jinja +337 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +30 -0
tokenizer_config.json +0 -0
trainer_state.json +124 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "swiss-ai/Apertus-8B-Instruct-2509",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.0",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "gate_proj",
+    "k_proj",
+    "q_proj",
+    "o_proj",
+    "up_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ad81fb75479b68f77a0e5343c7a843689c089b6169568cc6113b0738cb0ee0
+size 637586160

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,337 @@

+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{\n" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro render_tools(tools) -%}
+    {%- for tool in tools %}
+        {{- "// " + tool.description + "\n" }}
+        {{- "type "+ tool.name + " = " }}
+        {%- if tool.parameters and tool.parameters.properties %}
+            {{- "(_: {\n" }}
+            {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "\n" }}
+                {%- endif %}
+                {{- param_name }}
+                {%- if param_name not in (tool.parameters.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
+                        {{- "// default: " + param_spec.default }}
+                    {%- else %}
+                        {{- ", // default: " + param_spec.default|tojson }}
+                    {%- endif -%}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- ",\n" }}
+                {%- else %}
+                    {{- "\n" }}
+                {%- endif -%}
+            {%- endfor %}
+            {{- "}) => any;" }}
+        {%- else -%}
+            {{- "() => any;" }}
+        {%- endif -%}
+        {%- if not loop.last -%}
+            {{- "\n" }}
+        {%- endif -%}
+    {%- endfor %}
+{%- endmacro -%}
+{{ bos_token }}
+{%- set system_token = '<|system_start|>' -%}
+{%- set end_system_token = '<|system_end|>' -%}
+{%- set developer_token = '<|developer_start|>' -%}
+{%- set end_developer_token = '<|developer_end|>' -%}
+{%- set user_token = '<|user_start|>' -%}
+{%- set end_user_token = '<|user_end|>' -%}
+{%- set assistant_token = '<|assistant_start|>' -%}
+{%- set end_assistant_token = '<|assistant_end|>' -%}
+{%- set inner_token = '<|inner_prefix|>' -%}
+{%- set outer_token = '<|inner_suffix|>' -%}
+{%- set tool_calls_token = '<|tools_prefix|>' -%}
+{%- set end_tool_calls_token = '<|tools_suffix|>' -%}
+{%- set image_token = '<|image|>' -%}
+{%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, waiting_for_tool_outputs=false, assistant_format=none) -%}
+{%- if messages and messages[0].role == 'system' -%}
+    {%- if "content" in messages[0] -%}
+        {%- if messages[0].content is string -%}
+            {{ system_token + messages[0].content + end_system_token }}
+        {%- elif messages[0].content is mapping and "text" in messages[0].content -%}
+            {{ system_token + messages[0].content.text + end_system_token }}
+        {%- else -%}
+            {{- raise_exception("Invalid system message") -}}
+        {%- endif -%}
+    {%- else -%}
+        {{- raise_exception("Invalid system message") -}}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {{ system_token + 'You are Apertus, a helpful assistant created by the SwissAI initiative.\nKnowledge cutoff: 2024-04\nCurrent date: ' + strftime_now('%Y-%m-%d') + end_system_token }}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{{ developer_token + 'Deliberation: ' }}
+{%- if enable_thinking is defined and enable_thinking -%}
+    {{ 'enabled\n' }}
+{%- else -%}
+    {{ 'disabled\n' }}
+{%- endif -%}
+{%- if tools is defined and tools -%}
+    {{ 'Tool Capabilities:\n' + render_tools(tools) }}
+{%- else -%}
+    {{ 'Tool Capabilities: disabled' }}
+{%- endif -%}
+{{ end_developer_token }}
+{%- for message in loop_messages -%}
+    {%- if message.role == 'user' -%}
+        {%- set ns.in_inner = false -%}
+        {%- if ns.in_tool -%}
+            {{ ']' }}
+            {%- set ns.in_tool = false -%}
+        {%- endif -%}
+        {%- if ns.in_assistant -%}
+            {{ end_assistant_token }}
+            {%- set ns.in_assistant = false -%}
+        {%- endif -%}
+        {%- if "content" in message -%}
+            {{ user_token }}
+            {%- if message.content is string -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "parts" in message.content -%}
+                {%- set parts = message.content.parts -%}
+                {%- for part in parts -%}
+                    {%- if part.type == "text" -%}
+                        {{ part.text }}
+                    {%- elif part.type == "image" -%}
+                        {{ image_token }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid user part: " + part.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid user message: " + message.role) -}}
+            {%- endif -%}
+            {{ end_user_token }}
+        {%- endif -%}
+    {%- elif message.role == 'assistant' -%}
+        {%- if not ns.in_assistant -%}
+            {{ assistant_token }}
+            {%- set ns.in_assistant = true -%}
+        {%- endif -%}
+        {%- if "content" in message -%}
+            {%- if message.content is string and (ns.assistant_format is none or ns.assistant_format == "string") -%}
+                {%- if ns.in_tool -%}
+                    {{ ']' }}
+                    {%- set ns.in_tool = false -%}
+                {%- endif -%}
+                {%- set ns.assistant_format = "string" -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "blocks" in message.content and (ns.assistant_format is none or ns.assistant_format == "mapping") -%}
+                {%- set ns.assistant_format = "mapping" -%}
+                {%- set blocks = message.content.blocks -%}
+                {%- for block in blocks -%}
+                    {%- if block.type == 'thoughts' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if not ns.in_inner -%}
+                            {%- set ns.in_inner = true -%}
+                            {{ inner_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- elif block.type == 'tool_calls' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if ns.in_inner and not loop.first and block.calls|length == 1 and block.calls[0].name == 'display_answers' -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ tool_calls_token + '[' }}
+                        {%- for tool_call in block.calls -%}
+                            {{- '{"' + tool_call.name + '": ' + tool_call.arguments + '}' }}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{ ']' + end_tool_calls_token }}
+                        {%- set ns.waiting_for_tool_outputs = true -%}
+                    {%- elif block.type == 'tool_outputs' -%}
+                        {%- if ns.in_tool -%}
+                            {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
+                        {%- endif -%}
+                        {{ '[' }}
+                        {%- for tool_output in block.outputs -%}
+                            {{- tool_output.output }}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{- ']' }}
+                        {%- set ns.waiting_for_tool_outputs = false -%}
+                    {%- elif block.type == 'response' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if (not loop.first and ns.in_inner) or (ns.in_assistant and ns.in_inner) -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid assistant block type: " + block.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid assistant content") -}}
+            {%- endif -%}
+        {%- else -%}
+            {{- raise_exception("Invalid assistant message") -}}
+        {%- endif -%}
+        {%- if "tool_calls" in message and message.tool_calls -%}
+            {{ tool_calls_token + '[' }}
+            {%- for tool_call in message.tool_calls -%}
+                {%- if tool_call.type == 'function' -%}
+                    {%- set function = tool_call.function -%}
+                    {{- '{"' + function.name + '": ' + function.arguments + '}' }}
+                    {%- if not loop.last -%}
+                        {{- ", " }}
+                    {%- endif -%}
+                {%- else -%}
+                    {{- raise_exception("Invalid tool call type: " + tool_call.type) -}}
+                {%- endif -%}
+            {%- endfor -%}
+            {{ ']' + end_tool_calls_token }}
+            {%- set ns.waiting_for_tool_outputs = true -%}
+        {%- endif -%}
+    {%- elif message.role == 'tool' -%}
+        {%- if not ns.in_assistant -%}
+            {{- raise_exception("Tool message outside of assistant") -}}
+        {%- endif -%}
+        {%- if not ns.in_tool -%}
+            {{ '[' }}
+            {%- set ns.in_tool = true -%}
+        {%- else -%}
+            {{ ", "}}
+        {%- endif -%}
+        {{ message.content }}
+        {%- set ns.waiting_for_tool_outputs = false -%}
+    {%- else -%}
+        {{- raise_exception("Invalid message role") -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if ns.in_tool -%}
+    {{ ']' }}
+{%- endif -%}
+{%- if ns.in_assistant and not (continue_assistant_message is defined and continue_assistant_message) and not ns.waiting_for_tool_outputs -%}
+    {{ end_assistant_token }}
+{%- endif -%}
+{%- if add_generation_prompt -%}
+    {{ assistant_token }}
+{%- endif -%}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b2f7af3fb1b32a7ce631316d56c6c67e91ffa9a94529ff8f5de96fb2967c022
+size 1275392867

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eba5392c47348ab7d81d8dffdd7d473ffb249a5c66f2f37d70d8035feb46408
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7a3ff09c0928c1ed20cef557112ba0c3ee8103da6d654553c274b6cc450de8
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|assistant_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,124 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 981,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.6546974384784698,
+      "epoch": 0.3058103975535168,
+      "grad_norm": 0.37161776423454285,
+      "learning_rate": 9.931806517013612e-06,
+      "loss": 2.1612,
+      "mean_token_accuracy": 0.5265661662817002,
+      "num_tokens": 41805.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.7053046183288098,
+      "epoch": 0.6116207951070336,
+      "grad_norm": 0.14000245928764343,
+      "learning_rate": 9.381208231670505e-06,
+      "loss": 0.6959,
+      "mean_token_accuracy": 0.8424646317958832,
+      "num_tokens": 84002.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.27937801614403723,
+      "epoch": 0.9174311926605505,
+      "grad_norm": 0.1330832690000534,
+      "learning_rate": 8.336447938411616e-06,
+      "loss": 0.2956,
+      "mean_token_accuracy": 0.9407258796691894,
+      "num_tokens": 125931.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.2766046752035618,
+      "epoch": 1.2232415902140672,
+      "grad_norm": 0.25764134526252747,
+      "learning_rate": 6.915365469768857e-06,
+      "loss": 0.2872,
+      "mean_token_accuracy": 0.9397406846284866,
+      "num_tokens": 168641.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.24791674077510834,
+      "epoch": 1.529051987767584,
+      "grad_norm": 0.17321506142616272,
+      "learning_rate": 5.2782465115587796e-06,
+      "loss": 0.2481,
+      "mean_token_accuracy": 0.9486746054887771,
+      "num_tokens": 210299.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.24874767586588858,
+      "epoch": 1.834862385321101,
+      "grad_norm": 0.19254006445407867,
+      "learning_rate": 3.609743778011684e-06,
+      "loss": 0.2462,
+      "mean_token_accuracy": 0.9466655850410461,
+      "num_tokens": 252718.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.23701502427458762,
+      "epoch": 2.140672782874618,
+      "grad_norm": 0.1829081028699875,
+      "learning_rate": 2.098049799001508e-06,
+      "loss": 0.2318,
+      "mean_token_accuracy": 0.9517926776409149,
+      "num_tokens": 294521.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.249193025380373,
+      "epoch": 2.4464831804281344,
+      "grad_norm": 0.19727888703346252,
+      "learning_rate": 9.136704470001101e-07,
+      "loss": 0.2398,
+      "mean_token_accuracy": 0.9476780182123185,
+      "num_tokens": 337195.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.22941243454813956,
+      "epoch": 2.7522935779816513,
+      "grad_norm": 0.21921473741531372,
+      "learning_rate": 1.9019336445009918e-07,
+      "loss": 0.222,
+      "mean_token_accuracy": 0.9518460136651993,
+      "num_tokens": 378540.0,
+      "step": 900
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 981,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.8985572221907456e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}