diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4eeacd4a3b7d757bda34333acd400906e8cd89db
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md
@@ -0,0 +1,58 @@
+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test1
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test1
+
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/sfblzvnx) 
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..920897064cad23ae39b98fad16bd6f3c52a68044
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md
@@ -0,0 +1,58 @@
+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test2
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test2
+
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/ncgnoczk) 
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c53679e8b51a819a5c7ea83dcb846ef04d0c9fa3
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/trainer_state.json
@@ -0,0 +1,297 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1155,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.957948339009064e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..43785a5a9fef645220936257116a6dff036a2eb2
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/trainer_state.json
@@ -0,0 +1,378 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1540,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3259599564032195e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..045103965b96b67419e62ecd21dff7b58bdf1ab7
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/trainer_state.json
@@ -0,0 +1,469 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1925,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6564080889424607e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4e91b74233d2370bd5168eda1b78cdedaca5404e
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/trainer_state.json
@@ -0,0 +1,560 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 2310,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.9871331143277489e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..be7d4ed0fedbbb3e6f780b12a7ff0327e3d8b947
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/trainer_state.json
@@ -0,0 +1,641 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2695,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    },
+    {
+      "entropy": 0.16796037876725795,
+      "epoch": 6.1039636127355426,
+      "grad_norm": 2.2228569984436035,
+      "learning_rate": 0.00015076152745107442,
+      "loss": 0.5835284805297851,
+      "mean_token_accuracy": 0.9529892874123463,
+      "num_tokens": 5766129.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.14919219192117453,
+      "epoch": 6.23391812865497,
+      "grad_norm": 1.408840298652649,
+      "learning_rate": 0.00014236135942251215,
+      "loss": 0.5310631561279296,
+      "mean_token_accuracy": 0.9586454060673714,
+      "num_tokens": 5888746.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.1499051059409976,
+      "epoch": 6.363872644574399,
+      "grad_norm": 1.8611102104187012,
+      "learning_rate": 0.00013405998270370849,
+      "loss": 0.5127810668945313,
+      "mean_token_accuracy": 0.9591325157880783,
+      "num_tokens": 6014455.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.15334193099290133,
+      "epoch": 6.493827160493828,
+      "grad_norm": 1.6051015853881836,
+      "learning_rate": 0.00012587445454490892,
+      "loss": 0.5349758529663086,
+      "mean_token_accuracy": 0.9574431091547012,
+      "num_tokens": 6141229.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.15982334002852439,
+      "epoch": 6.623781676413255,
+      "grad_norm": 3.7065205574035645,
+      "learning_rate": 0.00011782159415658008,
+      "loss": 0.5602469253540039,
+      "mean_token_accuracy": 0.9555372184515,
+      "num_tokens": 6257983.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.16072992872446776,
+      "epoch": 6.753736192332683,
+      "grad_norm": 2.282320976257324,
+      "learning_rate": 0.00010991794815014401,
+      "loss": 0.5657939910888672,
+      "mean_token_accuracy": 0.9550630164146423,
+      "num_tokens": 6376198.0,
+      "step": 2600
+    },
+    {
+      "entropy": 0.1512781011685729,
+      "epoch": 6.883690708252112,
+      "grad_norm": 1.3716893196105957,
+      "learning_rate": 0.00010217975653883603,
+      "loss": 0.5340792465209961,
+      "mean_token_accuracy": 0.9578188157081604,
+      "num_tokens": 6502526.0,
+      "step": 2650
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.2444461930829745,
+      "eval_loss": 0.8798949718475342,
+      "eval_mean_token_accuracy": 0.8457763839799625,
+      "eval_num_tokens": 6613474.0,
+      "eval_runtime": 90.2868,
+      "eval_samples_per_second": 18.353,
+      "eval_steps_per_second": 2.304,
+      "step": 2695
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.31810912445653e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..957ae0bacf105fc23db7704b7d2020b1b2b6b335
--- /dev/null
+++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/trainer_state.json
@@ -0,0 +1,732 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 3080,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    },
+    {
+      "entropy": 0.16796037876725795,
+      "epoch": 6.1039636127355426,
+      "grad_norm": 2.2228569984436035,
+      "learning_rate": 0.00015076152745107442,
+      "loss": 0.5835284805297851,
+      "mean_token_accuracy": 0.9529892874123463,
+      "num_tokens": 5766129.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.14919219192117453,
+      "epoch": 6.23391812865497,
+      "grad_norm": 1.408840298652649,
+      "learning_rate": 0.00014236135942251215,
+      "loss": 0.5310631561279296,
+      "mean_token_accuracy": 0.9586454060673714,
+      "num_tokens": 5888746.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.1499051059409976,
+      "epoch": 6.363872644574399,
+      "grad_norm": 1.8611102104187012,
+      "learning_rate": 0.00013405998270370849,
+      "loss": 0.5127810668945313,
+      "mean_token_accuracy": 0.9591325157880783,
+      "num_tokens": 6014455.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.15334193099290133,
+      "epoch": 6.493827160493828,
+      "grad_norm": 1.6051015853881836,
+      "learning_rate": 0.00012587445454490892,
+      "loss": 0.5349758529663086,
+      "mean_token_accuracy": 0.9574431091547012,
+      "num_tokens": 6141229.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.15982334002852439,
+      "epoch": 6.623781676413255,
+      "grad_norm": 3.7065205574035645,
+      "learning_rate": 0.00011782159415658008,
+      "loss": 0.5602469253540039,
+      "mean_token_accuracy": 0.9555372184515,
+      "num_tokens": 6257983.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.16072992872446776,
+      "epoch": 6.753736192332683,
+      "grad_norm": 2.282320976257324,
+      "learning_rate": 0.00010991794815014401,
+      "loss": 0.5657939910888672,
+      "mean_token_accuracy": 0.9550630164146423,
+      "num_tokens": 6376198.0,
+      "step": 2600
+    },
+    {
+      "entropy": 0.1512781011685729,
+      "epoch": 6.883690708252112,
+      "grad_norm": 1.3716893196105957,
+      "learning_rate": 0.00010217975653883603,
+      "loss": 0.5340792465209961,
+      "mean_token_accuracy": 0.9578188157081604,
+      "num_tokens": 6502526.0,
+      "step": 2650
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.2444461930829745,
+      "eval_loss": 0.8798949718475342,
+      "eval_mean_token_accuracy": 0.8457763839799625,
+      "eval_num_tokens": 6613474.0,
+      "eval_runtime": 90.2868,
+      "eval_samples_per_second": 18.353,
+      "eval_steps_per_second": 2.304,
+      "step": 2695
+    },
+    {
+      "entropy": 0.1444593005668578,
+      "epoch": 7.012995451591943,
+      "grad_norm": 1.0965569019317627,
+      "learning_rate": 9.462291936854386e-05,
+      "loss": 0.511833839416504,
+      "mean_token_accuracy": 0.9595773016388093,
+      "num_tokens": 6626464.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.10985541097819805,
+      "epoch": 7.142949967511371,
+      "grad_norm": 1.8079149723052979,
+      "learning_rate": 8.726296404719584e-05,
+      "loss": 0.3876673126220703,
+      "mean_token_accuracy": 0.9704919803142548,
+      "num_tokens": 6746276.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.11304264679551125,
+      "epoch": 7.272904483430799,
+      "grad_norm": 1.5228444337844849,
+      "learning_rate": 8.01150134398253e-05,
+      "loss": 0.39335052490234373,
+      "mean_token_accuracy": 0.9695766788721084,
+      "num_tokens": 6868131.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.11066193280741572,
+      "epoch": 7.402858999350228,
+      "grad_norm": 2.265174388885498,
+      "learning_rate": 7.319375479487112e-05,
+      "loss": 0.38289966583251955,
+      "mean_token_accuracy": 0.9707033503055572,
+      "num_tokens": 6993803.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.12022399662062526,
+      "epoch": 7.532813515269655,
+      "grad_norm": 1.0657345056533813,
+      "learning_rate": 6.65134095655596e-05,
+      "loss": 0.4089087677001953,
+      "mean_token_accuracy": 0.9689779531955719,
+      "num_tokens": 7113063.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.11429863104596734,
+      "epoch": 7.662768031189084,
+      "grad_norm": 1.3440358638763428,
+      "learning_rate": 6.008770418837973e-05,
+      "loss": 0.3935198593139648,
+      "mean_token_accuracy": 0.9698223957419395,
+      "num_tokens": 7237174.0,
+      "step": 2950
+    },
+    {
+      "entropy": 0.11748226622119545,
+      "epoch": 7.792722547108512,
+      "grad_norm": 1.4607034921646118,
+      "learning_rate": 5.3929841878693804e-05,
+      "loss": 0.40399799346923826,
+      "mean_token_accuracy": 0.9695871344208717,
+      "num_tokens": 7357301.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.11790506653487683,
+      "epoch": 7.92267706302794,
+      "grad_norm": 1.4574708938598633,
+      "learning_rate": 4.805247550143646e-05,
+      "loss": 0.4049314880371094,
+      "mean_token_accuracy": 0.9693469110131264,
+      "num_tokens": 7482431.0,
+      "step": 3050
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.2104659411483086,
+      "eval_loss": 0.9939886927604675,
+      "eval_mean_token_accuracy": 0.8444042455118436,
+      "eval_num_tokens": 7558256.0,
+      "eval_runtime": 90.3118,
+      "eval_samples_per_second": 18.348,
+      "eval_steps_per_second": 2.303,
+      "step": 3080
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.648642717750723e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ef7cbd509e64b6efe77e24be8cbe43639e5af314
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md
@@ -0,0 +1,58 @@
+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test1
+tags:
+- generated_from_trainer
+- sft
+- trl
+licence: license
+---
+
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test1
+
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/rfqns0wc) 
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f86489f7a280a3e03cdc012c40d12d1f59c248d
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/trainer_state.json
@@ -0,0 +1,287 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1122,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.979346498185751e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b1dfc79f7cdd73124f909152ced65a01eb82b33
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/trainer_state.json
@@ -0,0 +1,368 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1496,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1971161045794035e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..02c25f2f0a17ce8a75e4dd95cf316c9e758e6736
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/trainer_state.json
@@ -0,0 +1,459 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1870,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4947622783933181e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..33cc61dc8bb5cddf4d6195fc66edf795c2ce13e8
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/trainer_state.json
@@ -0,0 +1,540 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 2244,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.7914914724245857e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5433ad385e82359d7b5946b9979d051f55eeeb93
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/trainer_state.json
@@ -0,0 +1,631 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2618,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    },
+    {
+      "entropy": 0.1413771447283451,
+      "epoch": 6.016064257028113,
+      "grad_norm": 1.2926467657089233,
+      "learning_rate": 5.1072296418730254e-05,
+      "loss": 0.5202234649658203,
+      "mean_token_accuracy": 0.9594009392189257,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.1042403375543654,
+      "epoch": 6.149933065595716,
+      "grad_norm": 1.9540276527404785,
+      "learning_rate": 4.8231507514154216e-05,
+      "loss": 0.39597846984863283,
+      "mean_token_accuracy": 0.9706364983320236,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.10351455600932241,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.139054775238037,
+      "learning_rate": 4.542102445300397e-05,
+      "loss": 0.38731266021728517,
+      "mean_token_accuracy": 0.9703371664881706,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11232182893902064,
+      "epoch": 6.417670682730924,
+      "grad_norm": 1.6526401042938232,
+      "learning_rate": 4.264696670352381e-05,
+      "loss": 0.42091716766357423,
+      "mean_token_accuracy": 0.9684987756609916,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.10796859875321388,
+      "epoch": 6.551539491298527,
+      "grad_norm": 1.297956109046936,
+      "learning_rate": 3.9915374422489785e-05,
+      "loss": 0.40640792846679685,
+      "mean_token_accuracy": 0.9703203043341637,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.10999857917428017,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.5105161666870117,
+      "learning_rate": 3.723219530353909e-05,
+      "loss": 0.4118352508544922,
+      "mean_token_accuracy": 0.9697986772656441,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11099046738818288,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 1.8809560537338257,
+      "learning_rate": 3.460327162682602e-05,
+      "loss": 0.41624794006347654,
+      "mean_token_accuracy": 0.9690032437443733,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11062245365232229,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.0219827890396118,
+      "learning_rate": 3.2034327538202464e-05,
+      "loss": 0.41484325408935546,
+      "mean_token_accuracy": 0.9690453514456749,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.18908375523984433,
+      "eval_loss": 0.8491571545600891,
+      "eval_mean_token_accuracy": 0.8642131051421166,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.4633,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.073,
+      "step": 2618
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.0923154774653926e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..18c30511083a57d4153d95c00daf86622ccbef21
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/trainer_state.json
@@ -0,0 +1,712 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 2992,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    },
+    {
+      "entropy": 0.1413771447283451,
+      "epoch": 6.016064257028113,
+      "grad_norm": 1.2926467657089233,
+      "learning_rate": 5.1072296418730254e-05,
+      "loss": 0.5202234649658203,
+      "mean_token_accuracy": 0.9594009392189257,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.1042403375543654,
+      "epoch": 6.149933065595716,
+      "grad_norm": 1.9540276527404785,
+      "learning_rate": 4.8231507514154216e-05,
+      "loss": 0.39597846984863283,
+      "mean_token_accuracy": 0.9706364983320236,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.10351455600932241,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.139054775238037,
+      "learning_rate": 4.542102445300397e-05,
+      "loss": 0.38731266021728517,
+      "mean_token_accuracy": 0.9703371664881706,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11232182893902064,
+      "epoch": 6.417670682730924,
+      "grad_norm": 1.6526401042938232,
+      "learning_rate": 4.264696670352381e-05,
+      "loss": 0.42091716766357423,
+      "mean_token_accuracy": 0.9684987756609916,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.10796859875321388,
+      "epoch": 6.551539491298527,
+      "grad_norm": 1.297956109046936,
+      "learning_rate": 3.9915374422489785e-05,
+      "loss": 0.40640792846679685,
+      "mean_token_accuracy": 0.9703203043341637,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.10999857917428017,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.5105161666870117,
+      "learning_rate": 3.723219530353909e-05,
+      "loss": 0.4118352508544922,
+      "mean_token_accuracy": 0.9697986772656441,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11099046738818288,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 1.8809560537338257,
+      "learning_rate": 3.460327162682602e-05,
+      "loss": 0.41624794006347654,
+      "mean_token_accuracy": 0.9690032437443733,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11062245365232229,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.0219827890396118,
+      "learning_rate": 3.2034327538202464e-05,
+      "loss": 0.41484325408935546,
+      "mean_token_accuracy": 0.9690453514456749,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.18908375523984433,
+      "eval_loss": 0.8491571545600891,
+      "eval_mean_token_accuracy": 0.8642131051421166,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.4633,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.073,
+      "step": 2618
+    },
+    {
+      "entropy": 0.09948956533664405,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.4661338329315186,
+      "learning_rate": 2.9530956585620777e-05,
+      "loss": 0.36354263305664064,
+      "mean_token_accuracy": 0.9727776297415146,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.08666609892621636,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.9116477966308594,
+      "learning_rate": 2.7098609539896744e-05,
+      "loss": 0.3243706130981445,
+      "mean_token_accuracy": 0.9765083396434784,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09543853564187885,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.0068918466567993,
+      "learning_rate": 2.4742582526351715e-05,
+      "loss": 0.35761878967285154,
+      "mean_token_accuracy": 0.9740070801973343,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.08997446410357952,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6730849742889404,
+      "learning_rate": 2.246800549317553e-05,
+      "loss": 0.33653587341308594,
+      "mean_token_accuracy": 0.9758713039755821,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08550533290952445,
+      "epoch": 7.621151271753681,
+      "grad_norm": 1.3010321855545044,
+      "learning_rate": 2.027983104161894e-05,
+      "loss": 0.3204774856567383,
+      "mean_token_accuracy": 0.977160106599331,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09146139286458492,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.1133384704589844,
+      "learning_rate": 1.8182823642336212e-05,
+      "loss": 0.3351753234863281,
+      "mean_token_accuracy": 0.9754938682913781,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.08813748911023139,
+      "epoch": 7.888888888888889,
+      "grad_norm": 0.9765240550041199,
+      "learning_rate": 1.618154926135836e-05,
+      "loss": 0.3303861236572266,
+      "mean_token_accuracy": 0.9758572709560395,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.153879771232605,
+      "eval_loss": 1.0034006834030151,
+      "eval_mean_token_accuracy": 0.8645920944213867,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.4871,
+      "eval_samples_per_second": 16.572,
+      "eval_steps_per_second": 2.073,
+      "step": 2992
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.393061170429429e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2a6e683789c900d4b972f670b04c8658bb6e254b
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3366/trainer_state.json
@@ -0,0 +1,803 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.0,
+  "eval_steps": 500,
+  "global_step": 3366,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    },
+    {
+      "entropy": 0.1413771447283451,
+      "epoch": 6.016064257028113,
+      "grad_norm": 1.2926467657089233,
+      "learning_rate": 5.1072296418730254e-05,
+      "loss": 0.5202234649658203,
+      "mean_token_accuracy": 0.9594009392189257,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.1042403375543654,
+      "epoch": 6.149933065595716,
+      "grad_norm": 1.9540276527404785,
+      "learning_rate": 4.8231507514154216e-05,
+      "loss": 0.39597846984863283,
+      "mean_token_accuracy": 0.9706364983320236,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.10351455600932241,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.139054775238037,
+      "learning_rate": 4.542102445300397e-05,
+      "loss": 0.38731266021728517,
+      "mean_token_accuracy": 0.9703371664881706,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11232182893902064,
+      "epoch": 6.417670682730924,
+      "grad_norm": 1.6526401042938232,
+      "learning_rate": 4.264696670352381e-05,
+      "loss": 0.42091716766357423,
+      "mean_token_accuracy": 0.9684987756609916,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.10796859875321388,
+      "epoch": 6.551539491298527,
+      "grad_norm": 1.297956109046936,
+      "learning_rate": 3.9915374422489785e-05,
+      "loss": 0.40640792846679685,
+      "mean_token_accuracy": 0.9703203043341637,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.10999857917428017,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.5105161666870117,
+      "learning_rate": 3.723219530353909e-05,
+      "loss": 0.4118352508544922,
+      "mean_token_accuracy": 0.9697986772656441,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11099046738818288,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 1.8809560537338257,
+      "learning_rate": 3.460327162682602e-05,
+      "loss": 0.41624794006347654,
+      "mean_token_accuracy": 0.9690032437443733,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11062245365232229,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.0219827890396118,
+      "learning_rate": 3.2034327538202464e-05,
+      "loss": 0.41484325408935546,
+      "mean_token_accuracy": 0.9690453514456749,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.18908375523984433,
+      "eval_loss": 0.8491571545600891,
+      "eval_mean_token_accuracy": 0.8642131051421166,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.4633,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.073,
+      "step": 2618
+    },
+    {
+      "entropy": 0.09948956533664405,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.4661338329315186,
+      "learning_rate": 2.9530956585620777e-05,
+      "loss": 0.36354263305664064,
+      "mean_token_accuracy": 0.9727776297415146,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.08666609892621636,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.9116477966308594,
+      "learning_rate": 2.7098609539896744e-05,
+      "loss": 0.3243706130981445,
+      "mean_token_accuracy": 0.9765083396434784,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09543853564187885,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.0068918466567993,
+      "learning_rate": 2.4742582526351715e-05,
+      "loss": 0.35761878967285154,
+      "mean_token_accuracy": 0.9740070801973343,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.08997446410357952,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6730849742889404,
+      "learning_rate": 2.246800549317553e-05,
+      "loss": 0.33653587341308594,
+      "mean_token_accuracy": 0.9758713039755821,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08550533290952445,
+      "epoch": 7.621151271753681,
+      "grad_norm": 1.3010321855545044,
+      "learning_rate": 2.027983104161894e-05,
+      "loss": 0.3204774856567383,
+      "mean_token_accuracy": 0.977160106599331,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09146139286458492,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.1133384704589844,
+      "learning_rate": 1.8182823642336212e-05,
+      "loss": 0.3351753234863281,
+      "mean_token_accuracy": 0.9754938682913781,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.08813748911023139,
+      "epoch": 7.888888888888889,
+      "grad_norm": 0.9765240550041199,
+      "learning_rate": 1.618154926135836e-05,
+      "loss": 0.3303861236572266,
+      "mean_token_accuracy": 0.9758572709560395,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.153879771232605,
+      "eval_loss": 1.0034006834030151,
+      "eval_mean_token_accuracy": 0.8645920944213867,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.4871,
+      "eval_samples_per_second": 16.572,
+      "eval_steps_per_second": 2.073,
+      "step": 2992
+    },
+    {
+      "entropy": 0.09048398275568027,
+      "epoch": 8.021419009370817,
+      "grad_norm": 0.4018457531929016,
+      "learning_rate": 1.4280365418284746e-05,
+      "loss": 0.3326351547241211,
+      "mean_token_accuracy": 0.9755137812609624,
+      "num_tokens": 6997584.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.08544229088351131,
+      "epoch": 8.15528781793842,
+      "grad_norm": 0.552768886089325,
+      "learning_rate": 1.2483411698340072e-05,
+      "loss": 0.3177168655395508,
+      "mean_token_accuracy": 0.977306153178215,
+      "num_tokens": 7109661.0,
+      "step": 3050
+    },
+    {
+      "entropy": 0.08211908274330199,
+      "epoch": 8.289156626506024,
+      "grad_norm": 0.7745324373245239,
+      "learning_rate": 1.0794600738955833e-05,
+      "loss": 0.305778751373291,
+      "mean_token_accuracy": 0.9774795493483543,
+      "num_tokens": 7228951.0,
+      "step": 3100
+    },
+    {
+      "entropy": 0.07924632488749922,
+      "epoch": 8.423025435073628,
+      "grad_norm": 0.6892443299293518,
+      "learning_rate": 9.217609710501601e-06,
+      "loss": 0.29681636810302736,
+      "mean_token_accuracy": 0.9784620434045792,
+      "num_tokens": 7345974.0,
+      "step": 3150
+    },
+    {
+      "entropy": 0.07959031270816923,
+      "epoch": 8.556894243641231,
+      "grad_norm": 1.8224815130233765,
+      "learning_rate": 7.755872309715688e-06,
+      "loss": 0.2975615882873535,
+      "mean_token_accuracy": 0.9780591726303101,
+      "num_tokens": 7465280.0,
+      "step": 3200
+    },
+    {
+      "entropy": 0.07911164808087051,
+      "epoch": 8.690763052208835,
+      "grad_norm": 0.7088468074798584,
+      "learning_rate": 6.4125712832686665e-06,
+      "loss": 0.2949537658691406,
+      "mean_token_accuracy": 0.9787314286828042,
+      "num_tokens": 7584144.0,
+      "step": 3250
+    },
+    {
+      "entropy": 0.08394345591776073,
+      "epoch": 8.824631860776439,
+      "grad_norm": 0.7498103976249695,
+      "learning_rate": 5.19063149773867e-06,
+      "loss": 0.3096595764160156,
+      "mean_token_accuracy": 0.9774689373373985,
+      "num_tokens": 7698337.0,
+      "step": 3300
+    },
+    {
+      "entropy": 0.08363399875350297,
+      "epoch": 8.958500669344042,
+      "grad_norm": 0.5460980534553528,
+      "learning_rate": 4.092713571087534e-06,
+      "loss": 0.31484752655029297,
+      "mean_token_accuracy": 0.9767562291026115,
+      "num_tokens": 7815006.0,
+      "step": 3350
+    },
+    {
+      "epoch": 9.0,
+      "eval_entropy": 0.14292236048728227,
+      "eval_loss": 1.0725034475326538,
+      "eval_mean_token_accuracy": 0.8639731431007385,
+      "eval_num_tokens": 7850223.0,
+      "eval_runtime": 96.5743,
+      "eval_samples_per_second": 16.557,
+      "eval_steps_per_second": 2.071,
+      "step": 3366
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.6911646503474606e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..85594b7a33d4f63b0b612916e22727c8151d4bdd
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-374/trainer_state.json
@@ -0,0 +1,115 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 374,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.9712111865733606e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6364c521cb2ab2c74496ee68a0b05029744ec8b7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3740/trainer_state.json
@@ -0,0 +1,884 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 3740,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    },
+    {
+      "entropy": 0.1413771447283451,
+      "epoch": 6.016064257028113,
+      "grad_norm": 1.2926467657089233,
+      "learning_rate": 5.1072296418730254e-05,
+      "loss": 0.5202234649658203,
+      "mean_token_accuracy": 0.9594009392189257,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.1042403375543654,
+      "epoch": 6.149933065595716,
+      "grad_norm": 1.9540276527404785,
+      "learning_rate": 4.8231507514154216e-05,
+      "loss": 0.39597846984863283,
+      "mean_token_accuracy": 0.9706364983320236,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.10351455600932241,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.139054775238037,
+      "learning_rate": 4.542102445300397e-05,
+      "loss": 0.38731266021728517,
+      "mean_token_accuracy": 0.9703371664881706,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11232182893902064,
+      "epoch": 6.417670682730924,
+      "grad_norm": 1.6526401042938232,
+      "learning_rate": 4.264696670352381e-05,
+      "loss": 0.42091716766357423,
+      "mean_token_accuracy": 0.9684987756609916,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.10796859875321388,
+      "epoch": 6.551539491298527,
+      "grad_norm": 1.297956109046936,
+      "learning_rate": 3.9915374422489785e-05,
+      "loss": 0.40640792846679685,
+      "mean_token_accuracy": 0.9703203043341637,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.10999857917428017,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.5105161666870117,
+      "learning_rate": 3.723219530353909e-05,
+      "loss": 0.4118352508544922,
+      "mean_token_accuracy": 0.9697986772656441,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11099046738818288,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 1.8809560537338257,
+      "learning_rate": 3.460327162682602e-05,
+      "loss": 0.41624794006347654,
+      "mean_token_accuracy": 0.9690032437443733,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11062245365232229,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.0219827890396118,
+      "learning_rate": 3.2034327538202464e-05,
+      "loss": 0.41484325408935546,
+      "mean_token_accuracy": 0.9690453514456749,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.18908375523984433,
+      "eval_loss": 0.8491571545600891,
+      "eval_mean_token_accuracy": 0.8642131051421166,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.4633,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.073,
+      "step": 2618
+    },
+    {
+      "entropy": 0.09948956533664405,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.4661338329315186,
+      "learning_rate": 2.9530956585620777e-05,
+      "loss": 0.36354263305664064,
+      "mean_token_accuracy": 0.9727776297415146,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.08666609892621636,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.9116477966308594,
+      "learning_rate": 2.7098609539896744e-05,
+      "loss": 0.3243706130981445,
+      "mean_token_accuracy": 0.9765083396434784,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09543853564187885,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.0068918466567993,
+      "learning_rate": 2.4742582526351715e-05,
+      "loss": 0.35761878967285154,
+      "mean_token_accuracy": 0.9740070801973343,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.08997446410357952,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6730849742889404,
+      "learning_rate": 2.246800549317553e-05,
+      "loss": 0.33653587341308594,
+      "mean_token_accuracy": 0.9758713039755821,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08550533290952445,
+      "epoch": 7.621151271753681,
+      "grad_norm": 1.3010321855545044,
+      "learning_rate": 2.027983104161894e-05,
+      "loss": 0.3204774856567383,
+      "mean_token_accuracy": 0.977160106599331,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09146139286458492,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.1133384704589844,
+      "learning_rate": 1.8182823642336212e-05,
+      "loss": 0.3351753234863281,
+      "mean_token_accuracy": 0.9754938682913781,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.08813748911023139,
+      "epoch": 7.888888888888889,
+      "grad_norm": 0.9765240550041199,
+      "learning_rate": 1.618154926135836e-05,
+      "loss": 0.3303861236572266,
+      "mean_token_accuracy": 0.9758572709560395,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.153879771232605,
+      "eval_loss": 1.0034006834030151,
+      "eval_mean_token_accuracy": 0.8645920944213867,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.4871,
+      "eval_samples_per_second": 16.572,
+      "eval_steps_per_second": 2.073,
+      "step": 2992
+    },
+    {
+      "entropy": 0.09048398275568027,
+      "epoch": 8.021419009370817,
+      "grad_norm": 0.4018457531929016,
+      "learning_rate": 1.4280365418284746e-05,
+      "loss": 0.3326351547241211,
+      "mean_token_accuracy": 0.9755137812609624,
+      "num_tokens": 6997584.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.08544229088351131,
+      "epoch": 8.15528781793842,
+      "grad_norm": 0.552768886089325,
+      "learning_rate": 1.2483411698340072e-05,
+      "loss": 0.3177168655395508,
+      "mean_token_accuracy": 0.977306153178215,
+      "num_tokens": 7109661.0,
+      "step": 3050
+    },
+    {
+      "entropy": 0.08211908274330199,
+      "epoch": 8.289156626506024,
+      "grad_norm": 0.7745324373245239,
+      "learning_rate": 1.0794600738955833e-05,
+      "loss": 0.305778751373291,
+      "mean_token_accuracy": 0.9774795493483543,
+      "num_tokens": 7228951.0,
+      "step": 3100
+    },
+    {
+      "entropy": 0.07924632488749922,
+      "epoch": 8.423025435073628,
+      "grad_norm": 0.6892443299293518,
+      "learning_rate": 9.217609710501601e-06,
+      "loss": 0.29681636810302736,
+      "mean_token_accuracy": 0.9784620434045792,
+      "num_tokens": 7345974.0,
+      "step": 3150
+    },
+    {
+      "entropy": 0.07959031270816923,
+      "epoch": 8.556894243641231,
+      "grad_norm": 1.8224815130233765,
+      "learning_rate": 7.755872309715688e-06,
+      "loss": 0.2975615882873535,
+      "mean_token_accuracy": 0.9780591726303101,
+      "num_tokens": 7465280.0,
+      "step": 3200
+    },
+    {
+      "entropy": 0.07911164808087051,
+      "epoch": 8.690763052208835,
+      "grad_norm": 0.7088468074798584,
+      "learning_rate": 6.4125712832686665e-06,
+      "loss": 0.2949537658691406,
+      "mean_token_accuracy": 0.9787314286828042,
+      "num_tokens": 7584144.0,
+      "step": 3250
+    },
+    {
+      "entropy": 0.08394345591776073,
+      "epoch": 8.824631860776439,
+      "grad_norm": 0.7498103976249695,
+      "learning_rate": 5.19063149773867e-06,
+      "loss": 0.3096595764160156,
+      "mean_token_accuracy": 0.9774689373373985,
+      "num_tokens": 7698337.0,
+      "step": 3300
+    },
+    {
+      "entropy": 0.08363399875350297,
+      "epoch": 8.958500669344042,
+      "grad_norm": 0.5460980534553528,
+      "learning_rate": 4.092713571087534e-06,
+      "loss": 0.31484752655029297,
+      "mean_token_accuracy": 0.9767562291026115,
+      "num_tokens": 7815006.0,
+      "step": 3350
+    },
+    {
+      "epoch": 9.0,
+      "eval_entropy": 0.14292236048728227,
+      "eval_loss": 1.0725034475326538,
+      "eval_mean_token_accuracy": 0.8639731431007385,
+      "eval_num_tokens": 7850223.0,
+      "eval_runtime": 96.5743,
+      "eval_samples_per_second": 16.557,
+      "eval_steps_per_second": 2.071,
+      "step": 3366
+    },
+    {
+      "entropy": 0.08699184825474565,
+      "epoch": 9.09103078982597,
+      "grad_norm": 0.4920552670955658,
+      "learning_rate": 3.1212080795047673e-06,
+      "loss": 0.3132004165649414,
+      "mean_token_accuracy": 0.9772917390471757,
+      "num_tokens": 7924040.0,
+      "step": 3400
+    },
+    {
+      "entropy": 0.07937462277710437,
+      "epoch": 9.224899598393574,
+      "grad_norm": 0.5245408415794373,
+      "learning_rate": 2.278230352232899e-06,
+      "loss": 0.2948256874084473,
+      "mean_token_accuracy": 0.9782775729894638,
+      "num_tokens": 8035493.0,
+      "step": 3450
+    },
+    {
+      "entropy": 0.08003638771362603,
+      "epoch": 9.358768406961179,
+      "grad_norm": 0.5275429487228394,
+      "learning_rate": 1.5656158657080147e-06,
+      "loss": 0.2957208824157715,
+      "mean_token_accuracy": 0.9780283415317536,
+      "num_tokens": 8150965.0,
+      "step": 3500
+    },
+    {
+      "entropy": 0.07463583857752383,
+      "epoch": 9.492637215528783,
+      "grad_norm": 0.41151365637779236,
+      "learning_rate": 9.849162470439522e-07,
+      "loss": 0.2795832633972168,
+      "mean_token_accuracy": 0.9797070980072021,
+      "num_tokens": 8273746.0,
+      "step": 3550
+    },
+    {
+      "entropy": 0.07939885665662587,
+      "epoch": 9.626506024096386,
+      "grad_norm": 0.6897122859954834,
+      "learning_rate": 5.373958955623113e-07,
+      "loss": 0.29586851119995117,
+      "mean_token_accuracy": 0.9783321896195412,
+      "num_tokens": 8391652.0,
+      "step": 3600
+    },
+    {
+      "entropy": 0.0767002559825778,
+      "epoch": 9.76037483266399,
+      "grad_norm": 0.9138604998588562,
+      "learning_rate": 2.240292297242689e-07,
+      "loss": 0.28384410858154296,
+      "mean_token_accuracy": 0.9795003500580788,
+      "num_tokens": 8509546.0,
+      "step": 3650
+    },
+    {
+      "entropy": 0.07247084728442132,
+      "epoch": 9.894243641231594,
+      "grad_norm": 0.5386644601821899,
+      "learning_rate": 4.549856545868282e-08,
+      "loss": 0.2697745323181152,
+      "mean_token_accuracy": 0.9802604866027832,
+      "num_tokens": 8634626.0,
+      "step": 3700
+    },
+    {
+      "epoch": 10.0,
+      "eval_entropy": 0.1328240069001913,
+      "eval_loss": 1.144450306892395,
+      "eval_mean_token_accuracy": 0.8635567063093186,
+      "eval_num_tokens": 8722470.0,
+      "eval_runtime": 96.7758,
+      "eval_samples_per_second": 16.523,
+      "eval_steps_per_second": 2.067,
+      "step": 3740
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.9909736944491034e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2148faba04b3eea9d8bc79cdd2f52c92b8cda9e7
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d4413e450433ac499cb4144e230d4444e246e487
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-748/trainer_state.json
@@ -0,0 +1,196 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 748,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.968365525090723e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c55b738fe5c4e833530a57ccb4e434a28983796e
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md
@@ -0,0 +1,58 @@
+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test2
+tags:
+- generated_from_trainer
+- sft
+- trl
+licence: license
+---
+
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test2
+
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/xuz5975p) 
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..959d2f8a2c527013315d4760e1f067117eb36984
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1122/trainer_state.json
@@ -0,0 +1,287 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1122,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.906910982888778e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5a55f4fb938957c63e018aa3264c96b476a6578
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1496/trainer_state.json
@@ -0,0 +1,368 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1496,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1874590853383002e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..107f041ea655da0ed5c4590dd66b6c2a00c1e982
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1870/trainer_state.json
@@ -0,0 +1,459 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1870,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.482704176402959e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb431803690260ca3f6cba8114749bb3bb5a5531
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2244/trainer_state.json
@@ -0,0 +1,540 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 2244,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    },
+    {
+      "entropy": 0.1729431924871122,
+      "epoch": 5.080321285140562,
+      "grad_norm": 1.9089794158935547,
+      "learning_rate": 9.395941842759104e-05,
+      "loss": 0.6498579406738281,
+      "mean_token_accuracy": 0.948695200561273,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14844272032380104,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.9947986602783203,
+      "learning_rate": 9.016614462600325e-05,
+      "loss": 0.5658287048339844,
+      "mean_token_accuracy": 0.9562490177154541,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.15113764170557262,
+      "epoch": 5.34805890227577,
+      "grad_norm": 3.0459158420562744,
+      "learning_rate": 8.635492104894498e-05,
+      "loss": 0.569720458984375,
+      "mean_token_accuracy": 0.9561498582363128,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.15329553466290236,
+      "epoch": 5.481927710843373,
+      "grad_norm": 2.5919315814971924,
+      "learning_rate": 8.253404614943809e-05,
+      "loss": 0.5799734878540039,
+      "mean_token_accuracy": 0.954962648153305,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.1544624574482441,
+      "epoch": 5.615796519410977,
+      "grad_norm": 3.170863628387451,
+      "learning_rate": 7.871183939502759e-05,
+      "loss": 0.5769558715820312,
+      "mean_token_accuracy": 0.9549962303042412,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.1543046496436,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.843237280845642,
+      "learning_rate": 7.489662315320254e-05,
+      "loss": 0.5841741561889648,
+      "mean_token_accuracy": 0.9532951918244362,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.14787622597068548,
+      "epoch": 5.883534136546185,
+      "grad_norm": 2.890704393386841,
+      "learning_rate": 7.109670457050292e-05,
+      "loss": 0.5526316452026367,
+      "mean_token_accuracy": 0.9569103759527207,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2396429342031479,
+      "eval_loss": 0.7484959959983826,
+      "eval_mean_token_accuracy": 0.8640641874074936,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.6984,
+      "eval_samples_per_second": 16.526,
+      "eval_steps_per_second": 2.068,
+      "step": 2244
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.7770396848717358e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..955d6570bde68e1a78726644c57f06a4aab2c7dd
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2618/trainer_state.json
@@ -0,0 +1,631 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2618,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    },
+    {
+      "entropy": 0.1729431924871122,
+      "epoch": 5.080321285140562,
+      "grad_norm": 1.9089794158935547,
+      "learning_rate": 9.395941842759104e-05,
+      "loss": 0.6498579406738281,
+      "mean_token_accuracy": 0.948695200561273,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14844272032380104,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.9947986602783203,
+      "learning_rate": 9.016614462600325e-05,
+      "loss": 0.5658287048339844,
+      "mean_token_accuracy": 0.9562490177154541,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.15113764170557262,
+      "epoch": 5.34805890227577,
+      "grad_norm": 3.0459158420562744,
+      "learning_rate": 8.635492104894498e-05,
+      "loss": 0.569720458984375,
+      "mean_token_accuracy": 0.9561498582363128,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.15329553466290236,
+      "epoch": 5.481927710843373,
+      "grad_norm": 2.5919315814971924,
+      "learning_rate": 8.253404614943809e-05,
+      "loss": 0.5799734878540039,
+      "mean_token_accuracy": 0.954962648153305,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.1544624574482441,
+      "epoch": 5.615796519410977,
+      "grad_norm": 3.170863628387451,
+      "learning_rate": 7.871183939502759e-05,
+      "loss": 0.5769558715820312,
+      "mean_token_accuracy": 0.9549962303042412,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.1543046496436,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.843237280845642,
+      "learning_rate": 7.489662315320254e-05,
+      "loss": 0.5841741561889648,
+      "mean_token_accuracy": 0.9532951918244362,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.14787622597068548,
+      "epoch": 5.883534136546185,
+      "grad_norm": 2.890704393386841,
+      "learning_rate": 7.109670457050292e-05,
+      "loss": 0.5526316452026367,
+      "mean_token_accuracy": 0.9569103759527207,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2396429342031479,
+      "eval_loss": 0.7484959959983826,
+      "eval_mean_token_accuracy": 0.8640641874074936,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.6984,
+      "eval_samples_per_second": 16.526,
+      "eval_steps_per_second": 2.068,
+      "step": 2244
+    },
+    {
+      "entropy": 0.14854476036447467,
+      "epoch": 6.016064257028113,
+      "grad_norm": 2.5248372554779053,
+      "learning_rate": 6.732035748476789e-05,
+      "loss": 0.5454582977294922,
+      "mean_token_accuracy": 0.9571841708337417,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.10946711044758559,
+      "epoch": 6.149933065595716,
+      "grad_norm": 2.492736577987671,
+      "learning_rate": 6.357580440990978e-05,
+      "loss": 0.4096903991699219,
+      "mean_token_accuracy": 0.96969064027071,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.1087673882767558,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.550652027130127,
+      "learning_rate": 5.9871198632439174e-05,
+      "loss": 0.4065860748291016,
+      "mean_token_accuracy": 0.969154157936573,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11702938644215465,
+      "epoch": 6.417670682730924,
+      "grad_norm": 2.2913568019866943,
+      "learning_rate": 5.621460645872391e-05,
+      "loss": 0.4343274688720703,
+      "mean_token_accuracy": 0.9671943977475166,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.11134784514084459,
+      "epoch": 6.551539491298527,
+      "grad_norm": 2.7473230361938477,
+      "learning_rate": 5.2613989651636254e-05,
+      "loss": 0.4231544876098633,
+      "mean_token_accuracy": 0.968756687939167,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.11262517396360636,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.8821665048599243,
+      "learning_rate": 4.90771880948302e-05,
+      "loss": 0.42503364562988283,
+      "mean_token_accuracy": 0.968946928679943,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11436546228826046,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 2.5898215770721436,
+      "learning_rate": 4.561190272239513e-05,
+      "loss": 0.4263697052001953,
+      "mean_token_accuracy": 0.967577712237835,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11348343381658196,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.4793730974197388,
+      "learning_rate": 4.222567875105448e-05,
+      "loss": 0.42732913970947267,
+      "mean_token_accuracy": 0.9683026453852653,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.20438951179385184,
+      "eval_loss": 0.8442238569259644,
+      "eval_mean_token_accuracy": 0.8623786172270775,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.5733,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.071,
+      "step": 2618
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.0754369719077043e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b2adeb85f853dc74ab13629bfaa8db613d396d9
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2992/trainer_state.json
@@ -0,0 +1,712 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 2992,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    },
+    {
+      "entropy": 0.1729431924871122,
+      "epoch": 5.080321285140562,
+      "grad_norm": 1.9089794158935547,
+      "learning_rate": 9.395941842759104e-05,
+      "loss": 0.6498579406738281,
+      "mean_token_accuracy": 0.948695200561273,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14844272032380104,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.9947986602783203,
+      "learning_rate": 9.016614462600325e-05,
+      "loss": 0.5658287048339844,
+      "mean_token_accuracy": 0.9562490177154541,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.15113764170557262,
+      "epoch": 5.34805890227577,
+      "grad_norm": 3.0459158420562744,
+      "learning_rate": 8.635492104894498e-05,
+      "loss": 0.569720458984375,
+      "mean_token_accuracy": 0.9561498582363128,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.15329553466290236,
+      "epoch": 5.481927710843373,
+      "grad_norm": 2.5919315814971924,
+      "learning_rate": 8.253404614943809e-05,
+      "loss": 0.5799734878540039,
+      "mean_token_accuracy": 0.954962648153305,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.1544624574482441,
+      "epoch": 5.615796519410977,
+      "grad_norm": 3.170863628387451,
+      "learning_rate": 7.871183939502759e-05,
+      "loss": 0.5769558715820312,
+      "mean_token_accuracy": 0.9549962303042412,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.1543046496436,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.843237280845642,
+      "learning_rate": 7.489662315320254e-05,
+      "loss": 0.5841741561889648,
+      "mean_token_accuracy": 0.9532951918244362,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.14787622597068548,
+      "epoch": 5.883534136546185,
+      "grad_norm": 2.890704393386841,
+      "learning_rate": 7.109670457050292e-05,
+      "loss": 0.5526316452026367,
+      "mean_token_accuracy": 0.9569103759527207,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2396429342031479,
+      "eval_loss": 0.7484959959983826,
+      "eval_mean_token_accuracy": 0.8640641874074936,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.6984,
+      "eval_samples_per_second": 16.526,
+      "eval_steps_per_second": 2.068,
+      "step": 2244
+    },
+    {
+      "entropy": 0.14854476036447467,
+      "epoch": 6.016064257028113,
+      "grad_norm": 2.5248372554779053,
+      "learning_rate": 6.732035748476789e-05,
+      "loss": 0.5454582977294922,
+      "mean_token_accuracy": 0.9571841708337417,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.10946711044758559,
+      "epoch": 6.149933065595716,
+      "grad_norm": 2.492736577987671,
+      "learning_rate": 6.357580440990978e-05,
+      "loss": 0.4096903991699219,
+      "mean_token_accuracy": 0.96969064027071,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.1087673882767558,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.550652027130127,
+      "learning_rate": 5.9871198632439174e-05,
+      "loss": 0.4065860748291016,
+      "mean_token_accuracy": 0.969154157936573,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11702938644215465,
+      "epoch": 6.417670682730924,
+      "grad_norm": 2.2913568019866943,
+      "learning_rate": 5.621460645872391e-05,
+      "loss": 0.4343274688720703,
+      "mean_token_accuracy": 0.9671943977475166,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.11134784514084459,
+      "epoch": 6.551539491298527,
+      "grad_norm": 2.7473230361938477,
+      "learning_rate": 5.2613989651636254e-05,
+      "loss": 0.4231544876098633,
+      "mean_token_accuracy": 0.968756687939167,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.11262517396360636,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.8821665048599243,
+      "learning_rate": 4.90771880948302e-05,
+      "loss": 0.42503364562988283,
+      "mean_token_accuracy": 0.968946928679943,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11436546228826046,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 2.5898215770721436,
+      "learning_rate": 4.561190272239513e-05,
+      "loss": 0.4263697052001953,
+      "mean_token_accuracy": 0.967577712237835,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11348343381658196,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.4793730974197388,
+      "learning_rate": 4.222567875105448e-05,
+      "loss": 0.42732913970947267,
+      "mean_token_accuracy": 0.9683026453852653,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.20438951179385184,
+      "eval_loss": 0.8442238569259644,
+      "eval_mean_token_accuracy": 0.8623786172270775,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.5733,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.071,
+      "step": 2618
+    },
+    {
+      "entropy": 0.10301848094571721,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.7398229837417603,
+      "learning_rate": 3.8925889251419277e-05,
+      "loss": 0.3753490447998047,
+      "mean_token_accuracy": 0.9721216511244726,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.09001849109306931,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.2295695543289185,
+      "learning_rate": 3.571971909406742e-05,
+      "loss": 0.3318290710449219,
+      "mean_token_accuracy": 0.9757600504159928,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09881723931059241,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.2764956951141357,
+      "learning_rate": 3.2614149305404984e-05,
+      "loss": 0.3623368453979492,
+      "mean_token_accuracy": 0.973207780122757,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.09360236193984747,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6123838424682617,
+      "learning_rate": 2.961594186737198e-05,
+      "loss": 0.34752960205078126,
+      "mean_token_accuracy": 0.9750900790095329,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08683731975033879,
+      "epoch": 7.621151271753681,
+      "grad_norm": 2.689685821533203,
+      "learning_rate": 2.6731624994089548e-05,
+      "loss": 0.3220005798339844,
+      "mean_token_accuracy": 0.9765287268161774,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09308060238137841,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.0739898681640625,
+      "learning_rate": 2.3967478917506556e-05,
+      "loss": 0.34147651672363283,
+      "mean_token_accuracy": 0.975026119351387,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.09029730424284935,
+      "epoch": 7.888888888888889,
+      "grad_norm": 1.9244085550308228,
+      "learning_rate": 2.1329522212996067e-05,
+      "loss": 0.3376229476928711,
+      "mean_token_accuracy": 0.9750253957509994,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.16788041561841965,
+      "eval_loss": 0.9710925817489624,
+      "eval_mean_token_accuracy": 0.8621352380514145,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.6722,
+      "eval_samples_per_second": 16.53,
+      "eval_steps_per_second": 2.069,
+      "step": 2992
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.373756578602813e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3cc7f56edafe22843e326c0f56ad2bca98e1d47d
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3366/trainer_state.json
@@ -0,0 +1,803 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.0,
+  "eval_steps": 500,
+  "global_step": 3366,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    },
+    {
+      "entropy": 0.1729431924871122,
+      "epoch": 5.080321285140562,
+      "grad_norm": 1.9089794158935547,
+      "learning_rate": 9.395941842759104e-05,
+      "loss": 0.6498579406738281,
+      "mean_token_accuracy": 0.948695200561273,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14844272032380104,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.9947986602783203,
+      "learning_rate": 9.016614462600325e-05,
+      "loss": 0.5658287048339844,
+      "mean_token_accuracy": 0.9562490177154541,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.15113764170557262,
+      "epoch": 5.34805890227577,
+      "grad_norm": 3.0459158420562744,
+      "learning_rate": 8.635492104894498e-05,
+      "loss": 0.569720458984375,
+      "mean_token_accuracy": 0.9561498582363128,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.15329553466290236,
+      "epoch": 5.481927710843373,
+      "grad_norm": 2.5919315814971924,
+      "learning_rate": 8.253404614943809e-05,
+      "loss": 0.5799734878540039,
+      "mean_token_accuracy": 0.954962648153305,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.1544624574482441,
+      "epoch": 5.615796519410977,
+      "grad_norm": 3.170863628387451,
+      "learning_rate": 7.871183939502759e-05,
+      "loss": 0.5769558715820312,
+      "mean_token_accuracy": 0.9549962303042412,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.1543046496436,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.843237280845642,
+      "learning_rate": 7.489662315320254e-05,
+      "loss": 0.5841741561889648,
+      "mean_token_accuracy": 0.9532951918244362,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.14787622597068548,
+      "epoch": 5.883534136546185,
+      "grad_norm": 2.890704393386841,
+      "learning_rate": 7.109670457050292e-05,
+      "loss": 0.5526316452026367,
+      "mean_token_accuracy": 0.9569103759527207,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2396429342031479,
+      "eval_loss": 0.7484959959983826,
+      "eval_mean_token_accuracy": 0.8640641874074936,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.6984,
+      "eval_samples_per_second": 16.526,
+      "eval_steps_per_second": 2.068,
+      "step": 2244
+    },
+    {
+      "entropy": 0.14854476036447467,
+      "epoch": 6.016064257028113,
+      "grad_norm": 2.5248372554779053,
+      "learning_rate": 6.732035748476789e-05,
+      "loss": 0.5454582977294922,
+      "mean_token_accuracy": 0.9571841708337417,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.10946711044758559,
+      "epoch": 6.149933065595716,
+      "grad_norm": 2.492736577987671,
+      "learning_rate": 6.357580440990978e-05,
+      "loss": 0.4096903991699219,
+      "mean_token_accuracy": 0.96969064027071,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.1087673882767558,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.550652027130127,
+      "learning_rate": 5.9871198632439174e-05,
+      "loss": 0.4065860748291016,
+      "mean_token_accuracy": 0.969154157936573,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11702938644215465,
+      "epoch": 6.417670682730924,
+      "grad_norm": 2.2913568019866943,
+      "learning_rate": 5.621460645872391e-05,
+      "loss": 0.4343274688720703,
+      "mean_token_accuracy": 0.9671943977475166,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.11134784514084459,
+      "epoch": 6.551539491298527,
+      "grad_norm": 2.7473230361938477,
+      "learning_rate": 5.2613989651636254e-05,
+      "loss": 0.4231544876098633,
+      "mean_token_accuracy": 0.968756687939167,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.11262517396360636,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.8821665048599243,
+      "learning_rate": 4.90771880948302e-05,
+      "loss": 0.42503364562988283,
+      "mean_token_accuracy": 0.968946928679943,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11436546228826046,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 2.5898215770721436,
+      "learning_rate": 4.561190272239513e-05,
+      "loss": 0.4263697052001953,
+      "mean_token_accuracy": 0.967577712237835,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11348343381658196,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.4793730974197388,
+      "learning_rate": 4.222567875105448e-05,
+      "loss": 0.42732913970947267,
+      "mean_token_accuracy": 0.9683026453852653,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.20438951179385184,
+      "eval_loss": 0.8442238569259644,
+      "eval_mean_token_accuracy": 0.8623786172270775,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.5733,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.071,
+      "step": 2618
+    },
+    {
+      "entropy": 0.10301848094571721,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.7398229837417603,
+      "learning_rate": 3.8925889251419277e-05,
+      "loss": 0.3753490447998047,
+      "mean_token_accuracy": 0.9721216511244726,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.09001849109306931,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.2295695543289185,
+      "learning_rate": 3.571971909406742e-05,
+      "loss": 0.3318290710449219,
+      "mean_token_accuracy": 0.9757600504159928,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09881723931059241,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.2764956951141357,
+      "learning_rate": 3.2614149305404984e-05,
+      "loss": 0.3623368453979492,
+      "mean_token_accuracy": 0.973207780122757,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.09360236193984747,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6123838424682617,
+      "learning_rate": 2.961594186737198e-05,
+      "loss": 0.34752960205078126,
+      "mean_token_accuracy": 0.9750900790095329,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08683731975033879,
+      "epoch": 7.621151271753681,
+      "grad_norm": 2.689685821533203,
+      "learning_rate": 2.6731624994089548e-05,
+      "loss": 0.3220005798339844,
+      "mean_token_accuracy": 0.9765287268161774,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09308060238137841,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.0739898681640625,
+      "learning_rate": 2.3967478917506556e-05,
+      "loss": 0.34147651672363283,
+      "mean_token_accuracy": 0.975026119351387,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.09029730424284935,
+      "epoch": 7.888888888888889,
+      "grad_norm": 1.9244085550308228,
+      "learning_rate": 2.1329522212996067e-05,
+      "loss": 0.3376229476928711,
+      "mean_token_accuracy": 0.9750253957509994,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.16788041561841965,
+      "eval_loss": 0.9710925817489624,
+      "eval_mean_token_accuracy": 0.8621352380514145,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.6722,
+      "eval_samples_per_second": 16.53,
+      "eval_steps_per_second": 2.069,
+      "step": 2992
+    },
+    {
+      "entropy": 0.09257136479095378,
+      "epoch": 8.021419009370817,
+      "grad_norm": 0.6420087218284607,
+      "learning_rate": 1.882349869467544e-05,
+      "loss": 0.3422011566162109,
+      "mean_token_accuracy": 0.9748761277608197,
+      "num_tokens": 6997584.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.08579577693715691,
+      "epoch": 8.15528781793842,
+      "grad_norm": 0.6230213046073914,
+      "learning_rate": 1.6454864908983872e-05,
+      "loss": 0.3201043319702148,
+      "mean_token_accuracy": 0.9772940769791603,
+      "num_tokens": 7109661.0,
+      "step": 3050
+    },
+    {
+      "entropy": 0.08380989912897348,
+      "epoch": 8.289156626506024,
+      "grad_norm": 0.7393160462379456,
+      "learning_rate": 1.4228778253748889e-05,
+      "loss": 0.31029510498046875,
+      "mean_token_accuracy": 0.9774167820811271,
+      "num_tokens": 7228951.0,
+      "step": 3100
+    },
+    {
+      "entropy": 0.08082627209834754,
+      "epoch": 8.423025435073628,
+      "grad_norm": 1.839464783668518,
+      "learning_rate": 1.2150085748610697e-05,
+      "loss": 0.3011078643798828,
+      "mean_token_accuracy": 0.9782456710934639,
+      "num_tokens": 7345974.0,
+      "step": 3150
+    },
+    {
+      "entropy": 0.0822253195475787,
+      "epoch": 8.556894243641231,
+      "grad_norm": 1.0962920188903809,
+      "learning_rate": 1.0223313481255313e-05,
+      "loss": 0.3033403205871582,
+      "mean_token_accuracy": 0.9777043810486794,
+      "num_tokens": 7465280.0,
+      "step": 3200
+    },
+    {
+      "entropy": 0.08191775260493159,
+      "epoch": 8.690763052208835,
+      "grad_norm": 0.9543969035148621,
+      "learning_rate": 8.452656752436198e-06,
+      "loss": 0.30089645385742186,
+      "mean_token_accuracy": 0.9785924726724624,
+      "num_tokens": 7584144.0,
+      "step": 3250
+    },
+    {
+      "entropy": 0.08602304834872485,
+      "epoch": 8.824631860776439,
+      "grad_norm": 1.2917355298995972,
+      "learning_rate": 6.841970941242257e-06,
+      "loss": 0.3139409637451172,
+      "mean_token_accuracy": 0.9771846759319306,
+      "num_tokens": 7698337.0,
+      "step": 3300
+    },
+    {
+      "entropy": 0.08643955274485052,
+      "epoch": 8.958500669344042,
+      "grad_norm": 0.7915636301040649,
+      "learning_rate": 5.394763110501694e-06,
+      "loss": 0.32275047302246096,
+      "mean_token_accuracy": 0.9766862055659294,
+      "num_tokens": 7815006.0,
+      "step": 3350
+    },
+    {
+      "epoch": 9.0,
+      "eval_entropy": 0.1494569706916809,
+      "eval_loss": 1.0805635452270508,
+      "eval_mean_token_accuracy": 0.8623243406414985,
+      "eval_num_tokens": 7850223.0,
+      "eval_runtime": 96.1431,
+      "eval_samples_per_second": 16.621,
+      "eval_steps_per_second": 2.08,
+      "step": 3366
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.6694552867277025e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e820916e2fc5a587d0fdc7d5166b85de94a2eb7f
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-374/trainer_state.json
@@ -0,0 +1,115 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 374,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.947242714769866e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d652737f2a37d1cab0bc904cf9277bdf9e4bc7ff
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3740/trainer_state.json
@@ -0,0 +1,884 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 3740,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    },
+    {
+      "entropy": 0.44389665202058926,
+      "epoch": 2.005354752342704,
+      "grad_norm": 3.126400947570801,
+      "learning_rate": 0.00015887802469157283,
+      "loss": 1.74362060546875,
+      "mean_token_accuracy": 0.882077858604566,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.36593644849956036,
+      "epoch": 2.139223560910308,
+      "grad_norm": 4.825572490692139,
+      "learning_rate": 0.000157483812567062,
+      "loss": 1.4261384582519532,
+      "mean_token_accuracy": 0.897953551709652,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3735161118209362,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 2.341724395751953,
+      "learning_rate": 0.00015592507503496244,
+      "loss": 1.4566732788085937,
+      "mean_token_accuracy": 0.8954837635159493,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3712782260775566,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 2.141064405441284,
+      "learning_rate": 0.00015420520604735334,
+      "loss": 1.4417454528808593,
+      "mean_token_accuracy": 0.8987472346425056,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.3738844521343708,
+      "epoch": 2.540829986613119,
+      "grad_norm": 2.904395818710327,
+      "learning_rate": 0.0001523279503994976,
+      "loss": 1.4441893005371094,
+      "mean_token_accuracy": 0.8981871575117111,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.3702578065544367,
+      "epoch": 2.674698795180723,
+      "grad_norm": 2.941880226135254,
+      "learning_rate": 0.00015029739557602818,
+      "loss": 1.4411444091796874,
+      "mean_token_accuracy": 0.8977779766917229,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.37640198186039925,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 4.814720153808594,
+      "learning_rate": 0.00014811796285097166,
+      "loss": 1.463765869140625,
+      "mean_token_accuracy": 0.8968957820534706,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.3884583811461926,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 2.023144483566284,
+      "learning_rate": 0.0001457943976609884,
+      "loss": 1.4860101318359376,
+      "mean_token_accuracy": 0.8945580047369003,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.41501702144742014,
+      "eval_loss": 0.5103150010108948,
+      "eval_mean_token_accuracy": 0.8657891270518303,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.5282,
+      "eval_samples_per_second": 16.555,
+      "eval_steps_per_second": 2.072,
+      "step": 1122
+    },
+    {
+      "entropy": 0.33624990103822766,
+      "epoch": 3.074966532797858,
+      "grad_norm": 2.8879244327545166,
+      "learning_rate": 0.0001433317592727896,
+      "loss": 1.2471446990966797,
+      "mean_token_accuracy": 0.9082047313150733,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.3088844185322523,
+      "epoch": 3.208835341365462,
+      "grad_norm": 2.8509788513183594,
+      "learning_rate": 0.00014073540976722957,
+      "loss": 1.1441875457763673,
+      "mean_token_accuracy": 0.9140481147170066,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.30224390886723995,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 2.5208239555358887,
+      "learning_rate": 0.00013801100236405915,
+      "loss": 1.1275232696533204,
+      "mean_token_accuracy": 0.9146806076169014,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.295175199881196,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 2.4297444820404053,
+      "learning_rate": 0.00013516446911276066,
+      "loss": 1.1239344787597656,
+      "mean_token_accuracy": 0.9151004731655121,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.295776079967618,
+      "epoch": 3.610441767068273,
+      "grad_norm": 2.3972175121307373,
+      "learning_rate": 0.00013220200797626748,
+      "loss": 1.148626480102539,
+      "mean_token_accuracy": 0.9141753858327866,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.2951671688258648,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 2.1329967975616455,
+      "learning_rate": 0.00012913006933569033,
+      "loss": 1.1505547332763673,
+      "mean_token_accuracy": 0.9145446908473969,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.28995474845170977,
+      "epoch": 3.878179384203481,
+      "grad_norm": 2.111231803894043,
+      "learning_rate": 0.0001259553419454356,
+      "loss": 1.12584228515625,
+      "mean_token_accuracy": 0.9153258377313613,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34918407052755357,
+      "eval_loss": 0.5466129183769226,
+      "eval_mean_token_accuracy": 0.8677999797463417,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.4025,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.075,
+      "step": 1496
+    },
+    {
+      "entropy": 0.28912228466284395,
+      "epoch": 4.010709504685408,
+      "grad_norm": 2.6088380813598633,
+      "learning_rate": 0.00012268473836929623,
+      "loss": 1.1048170471191405,
+      "mean_token_accuracy": 0.9165902002291246,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.2037667266279459,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.7244584560394287,
+      "learning_rate": 0.00011932537992922588,
+      "loss": 0.7798351287841797,
+      "mean_token_accuracy": 0.9385521411895752,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.21155868768692015,
+      "epoch": 4.278447121820616,
+      "grad_norm": 17.15939712524414,
+      "learning_rate": 0.00011588458119956922,
+      "loss": 0.8124880981445313,
+      "mean_token_accuracy": 0.9354887393116951,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.21142005987465382,
+      "epoch": 4.412315930388219,
+      "grad_norm": 3.1366724967956543,
+      "learning_rate": 0.00011236983408050962,
+      "loss": 0.8087466430664062,
+      "mean_token_accuracy": 0.9360431012511253,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.21077097810804843,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.4884378910064697,
+      "learning_rate": 0.0001087887914854125,
+      "loss": 0.8054198455810547,
+      "mean_token_accuracy": 0.9361811754107475,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.21570609882473946,
+      "epoch": 4.680053547523427,
+      "grad_norm": 2.5278756618499756,
+      "learning_rate": 0.00010514925067758285,
+      "loss": 0.8254692077636718,
+      "mean_token_accuracy": 0.9351590833067894,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.21050533920526504,
+      "epoch": 4.813922356091031,
+      "grad_norm": 2.563352584838867,
+      "learning_rate": 0.00010145913629271953,
+      "loss": 0.8124603271484375,
+      "mean_token_accuracy": 0.9365199673175811,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.21181554518640042,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.7008941173553467,
+      "learning_rate": 9.772648308403213e-05,
+      "loss": 0.8135105895996094,
+      "mean_token_accuracy": 0.9371505591273308,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.2898014415055513,
+      "eval_loss": 0.6175746917724609,
+      "eval_mean_token_accuracy": 0.8674856871366501,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.3162,
+      "eval_samples_per_second": 16.591,
+      "eval_steps_per_second": 2.076,
+      "step": 1870
+    },
+    {
+      "entropy": 0.1729431924871122,
+      "epoch": 5.080321285140562,
+      "grad_norm": 1.9089794158935547,
+      "learning_rate": 9.395941842759104e-05,
+      "loss": 0.6498579406738281,
+      "mean_token_accuracy": 0.948695200561273,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14844272032380104,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.9947986602783203,
+      "learning_rate": 9.016614462600325e-05,
+      "loss": 0.5658287048339844,
+      "mean_token_accuracy": 0.9562490177154541,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.15113764170557262,
+      "epoch": 5.34805890227577,
+      "grad_norm": 3.0459158420562744,
+      "learning_rate": 8.635492104894498e-05,
+      "loss": 0.569720458984375,
+      "mean_token_accuracy": 0.9561498582363128,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.15329553466290236,
+      "epoch": 5.481927710843373,
+      "grad_norm": 2.5919315814971924,
+      "learning_rate": 8.253404614943809e-05,
+      "loss": 0.5799734878540039,
+      "mean_token_accuracy": 0.954962648153305,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.1544624574482441,
+      "epoch": 5.615796519410977,
+      "grad_norm": 3.170863628387451,
+      "learning_rate": 7.871183939502759e-05,
+      "loss": 0.5769558715820312,
+      "mean_token_accuracy": 0.9549962303042412,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.1543046496436,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.843237280845642,
+      "learning_rate": 7.489662315320254e-05,
+      "loss": 0.5841741561889648,
+      "mean_token_accuracy": 0.9532951918244362,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.14787622597068548,
+      "epoch": 5.883534136546185,
+      "grad_norm": 2.890704393386841,
+      "learning_rate": 7.109670457050292e-05,
+      "loss": 0.5526316452026367,
+      "mean_token_accuracy": 0.9569103759527207,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2396429342031479,
+      "eval_loss": 0.7484959959983826,
+      "eval_mean_token_accuracy": 0.8640641874074936,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.6984,
+      "eval_samples_per_second": 16.526,
+      "eval_steps_per_second": 2.068,
+      "step": 2244
+    },
+    {
+      "entropy": 0.14854476036447467,
+      "epoch": 6.016064257028113,
+      "grad_norm": 2.5248372554779053,
+      "learning_rate": 6.732035748476789e-05,
+      "loss": 0.5454582977294922,
+      "mean_token_accuracy": 0.9571841708337417,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.10946711044758559,
+      "epoch": 6.149933065595716,
+      "grad_norm": 2.492736577987671,
+      "learning_rate": 6.357580440990978e-05,
+      "loss": 0.4096903991699219,
+      "mean_token_accuracy": 0.96969064027071,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.1087673882767558,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.550652027130127,
+      "learning_rate": 5.9871198632439174e-05,
+      "loss": 0.4065860748291016,
+      "mean_token_accuracy": 0.969154157936573,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11702938644215465,
+      "epoch": 6.417670682730924,
+      "grad_norm": 2.2913568019866943,
+      "learning_rate": 5.621460645872391e-05,
+      "loss": 0.4343274688720703,
+      "mean_token_accuracy": 0.9671943977475166,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.11134784514084459,
+      "epoch": 6.551539491298527,
+      "grad_norm": 2.7473230361938477,
+      "learning_rate": 5.2613989651636254e-05,
+      "loss": 0.4231544876098633,
+      "mean_token_accuracy": 0.968756687939167,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.11262517396360636,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.8821665048599243,
+      "learning_rate": 4.90771880948302e-05,
+      "loss": 0.42503364562988283,
+      "mean_token_accuracy": 0.968946928679943,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11436546228826046,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 2.5898215770721436,
+      "learning_rate": 4.561190272239513e-05,
+      "loss": 0.4263697052001953,
+      "mean_token_accuracy": 0.967577712237835,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11348343381658196,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.4793730974197388,
+      "learning_rate": 4.222567875105448e-05,
+      "loss": 0.42732913970947267,
+      "mean_token_accuracy": 0.9683026453852653,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.20438951179385184,
+      "eval_loss": 0.8442238569259644,
+      "eval_mean_token_accuracy": 0.8623786172270775,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.5733,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.071,
+      "step": 2618
+    },
+    {
+      "entropy": 0.10301848094571721,
+      "epoch": 7.085676037483267,
+      "grad_norm": 1.7398229837417603,
+      "learning_rate": 3.8925889251419277e-05,
+      "loss": 0.3753490447998047,
+      "mean_token_accuracy": 0.9721216511244726,
+      "num_tokens": 6183429.0,
+      "step": 2650
+    },
+    {
+      "entropy": 0.09001849109306931,
+      "epoch": 7.21954484605087,
+      "grad_norm": 1.2295695543289185,
+      "learning_rate": 3.571971909406742e-05,
+      "loss": 0.3318290710449219,
+      "mean_token_accuracy": 0.9757600504159928,
+      "num_tokens": 6303432.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.09881723931059241,
+      "epoch": 7.353413654618474,
+      "grad_norm": 1.2764956951141357,
+      "learning_rate": 3.2614149305404984e-05,
+      "loss": 0.3623368453979492,
+      "mean_token_accuracy": 0.973207780122757,
+      "num_tokens": 6414176.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.09360236193984747,
+      "epoch": 7.4872824631860775,
+      "grad_norm": 1.6123838424682617,
+      "learning_rate": 2.961594186737198e-05,
+      "loss": 0.34752960205078126,
+      "mean_token_accuracy": 0.9750900790095329,
+      "num_tokens": 6531772.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.08683731975033879,
+      "epoch": 7.621151271753681,
+      "grad_norm": 2.689685821533203,
+      "learning_rate": 2.6731624994089548e-05,
+      "loss": 0.3220005798339844,
+      "mean_token_accuracy": 0.9765287268161774,
+      "num_tokens": 6655745.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.09308060238137841,
+      "epoch": 7.755020080321285,
+      "grad_norm": 2.0739898681640625,
+      "learning_rate": 2.3967478917506556e-05,
+      "loss": 0.34147651672363283,
+      "mean_token_accuracy": 0.975026119351387,
+      "num_tokens": 6772303.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.09029730424284935,
+      "epoch": 7.888888888888889,
+      "grad_norm": 1.9244085550308228,
+      "learning_rate": 2.1329522212996067e-05,
+      "loss": 0.3376229476928711,
+      "mean_token_accuracy": 0.9750253957509994,
+      "num_tokens": 6887254.0,
+      "step": 2950
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.16788041561841965,
+      "eval_loss": 0.9710925817489624,
+      "eval_mean_token_accuracy": 0.8621352380514145,
+      "eval_num_tokens": 6977976.0,
+      "eval_runtime": 96.6722,
+      "eval_samples_per_second": 16.53,
+      "eval_steps_per_second": 2.069,
+      "step": 2992
+    },
+    {
+      "entropy": 0.09257136479095378,
+      "epoch": 8.021419009370817,
+      "grad_norm": 0.6420087218284607,
+      "learning_rate": 1.882349869467544e-05,
+      "loss": 0.3422011566162109,
+      "mean_token_accuracy": 0.9748761277608197,
+      "num_tokens": 6997584.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.08579577693715691,
+      "epoch": 8.15528781793842,
+      "grad_norm": 0.6230213046073914,
+      "learning_rate": 1.6454864908983872e-05,
+      "loss": 0.3201043319702148,
+      "mean_token_accuracy": 0.9772940769791603,
+      "num_tokens": 7109661.0,
+      "step": 3050
+    },
+    {
+      "entropy": 0.08380989912897348,
+      "epoch": 8.289156626506024,
+      "grad_norm": 0.7393160462379456,
+      "learning_rate": 1.4228778253748889e-05,
+      "loss": 0.31029510498046875,
+      "mean_token_accuracy": 0.9774167820811271,
+      "num_tokens": 7228951.0,
+      "step": 3100
+    },
+    {
+      "entropy": 0.08082627209834754,
+      "epoch": 8.423025435073628,
+      "grad_norm": 1.839464783668518,
+      "learning_rate": 1.2150085748610697e-05,
+      "loss": 0.3011078643798828,
+      "mean_token_accuracy": 0.9782456710934639,
+      "num_tokens": 7345974.0,
+      "step": 3150
+    },
+    {
+      "entropy": 0.0822253195475787,
+      "epoch": 8.556894243641231,
+      "grad_norm": 1.0962920188903809,
+      "learning_rate": 1.0223313481255313e-05,
+      "loss": 0.3033403205871582,
+      "mean_token_accuracy": 0.9777043810486794,
+      "num_tokens": 7465280.0,
+      "step": 3200
+    },
+    {
+      "entropy": 0.08191775260493159,
+      "epoch": 8.690763052208835,
+      "grad_norm": 0.9543969035148621,
+      "learning_rate": 8.452656752436198e-06,
+      "loss": 0.30089645385742186,
+      "mean_token_accuracy": 0.9785924726724624,
+      "num_tokens": 7584144.0,
+      "step": 3250
+    },
+    {
+      "entropy": 0.08602304834872485,
+      "epoch": 8.824631860776439,
+      "grad_norm": 1.2917355298995972,
+      "learning_rate": 6.841970941242257e-06,
+      "loss": 0.3139409637451172,
+      "mean_token_accuracy": 0.9771846759319306,
+      "num_tokens": 7698337.0,
+      "step": 3300
+    },
+    {
+      "entropy": 0.08643955274485052,
+      "epoch": 8.958500669344042,
+      "grad_norm": 0.7915636301040649,
+      "learning_rate": 5.394763110501694e-06,
+      "loss": 0.32275047302246096,
+      "mean_token_accuracy": 0.9766862055659294,
+      "num_tokens": 7815006.0,
+      "step": 3350
+    },
+    {
+      "epoch": 9.0,
+      "eval_entropy": 0.1494569706916809,
+      "eval_loss": 1.0805635452270508,
+      "eval_mean_token_accuracy": 0.8623243406414985,
+      "eval_num_tokens": 7850223.0,
+      "eval_runtime": 96.1431,
+      "eval_samples_per_second": 16.621,
+      "eval_steps_per_second": 2.08,
+      "step": 3366
+    },
+    {
+      "entropy": 0.08869564228437164,
+      "epoch": 9.09103078982597,
+      "grad_norm": 0.7001141309738159,
+      "learning_rate": 4.114184370600321e-06,
+      "loss": 0.31673063278198244,
+      "mean_token_accuracy": 0.9770309651138807,
+      "num_tokens": 7924040.0,
+      "step": 3400
+    },
+    {
+      "entropy": 0.08074495340697467,
+      "epoch": 9.224899598393574,
+      "grad_norm": 0.5722501277923584,
+      "learning_rate": 3.003023018340723e-06,
+      "loss": 0.29645233154296874,
+      "mean_token_accuracy": 0.9780940434336662,
+      "num_tokens": 8035493.0,
+      "step": 3450
+    },
+    {
+      "entropy": 0.08180667289532721,
+      "epoch": 9.358768406961179,
+      "grad_norm": 0.6047417521476746,
+      "learning_rate": 2.0636984657818187e-06,
+      "loss": 0.29764341354370116,
+      "mean_token_accuracy": 0.9783824861049653,
+      "num_tokens": 8150965.0,
+      "step": 3500
+    },
+    {
+      "entropy": 0.07683482679538428,
+      "epoch": 9.492637215528783,
+      "grad_norm": 2.4113316535949707,
+      "learning_rate": 1.298255972277725e-06,
+      "loss": 0.2808952713012695,
+      "mean_token_accuracy": 0.9798626834154129,
+      "num_tokens": 8273746.0,
+      "step": 3550
+    },
+    {
+      "entropy": 0.08143842275254429,
+      "epoch": 9.626506024096386,
+      "grad_norm": 0.8744550347328186,
+      "learning_rate": 7.083621911865702e-07,
+      "loss": 0.29585289001464843,
+      "mean_token_accuracy": 0.9784767204523086,
+      "num_tokens": 8391652.0,
+      "step": 3600
+    },
+    {
+      "entropy": 0.07885600795969366,
+      "epoch": 9.76037483266399,
+      "grad_norm": 1.0551655292510986,
+      "learning_rate": 2.953015409454723e-07,
+      "loss": 0.2857367134094238,
+      "mean_token_accuracy": 0.9793685188889504,
+      "num_tokens": 8509546.0,
+      "step": 3650
+    },
+    {
+      "entropy": 0.07381519969552755,
+      "epoch": 9.894243641231594,
+      "grad_norm": 0.7463138103485107,
+      "learning_rate": 5.997340841324148e-08,
+      "loss": 0.27086233139038085,
+      "mean_token_accuracy": 0.9803544229269028,
+      "num_tokens": 8634626.0,
+      "step": 3700
+    },
+    {
+      "epoch": 10.0,
+      "eval_entropy": 0.1379357658699155,
+      "eval_loss": 1.1639536619186401,
+      "eval_mean_token_accuracy": 0.8618501043319702,
+      "eval_num_tokens": 8722470.0,
+      "eval_runtime": 96.5903,
+      "eval_samples_per_second": 16.544,
+      "eval_steps_per_second": 2.071,
+      "step": 3740
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.9668458004157363e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/README.md b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/adapter_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9abe250820de6d55106ad056cc8dddd15cd6bd60
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05026173039334608,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/tokenizer_config.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/trainer_state.json b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5bb5a15342afd2aba51b80829251e253ce2ddbe2
--- /dev/null
+++ b/DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-748/trainer_state.json
@@ -0,0 +1,196 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 748,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.2582822921872139,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 5.033235549926758,
+      "learning_rate": 2.1466288485778066e-05,
+      "loss": 5.046328735351563,
+      "mean_token_accuracy": 0.7502484863996506,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5685664692521095,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 3.554903030395508,
+      "learning_rate": 4.337066449167405e-05,
+      "loss": 2.209185791015625,
+      "mean_token_accuracy": 0.8581047981977463,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5092987871170044,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 5.598084926605225,
+      "learning_rate": 6.527504049757005e-05,
+      "loss": 2.0146630859375,
+      "mean_token_accuracy": 0.8670356649160386,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4832092320919037,
+      "epoch": 0.535475234270415,
+      "grad_norm": 3.579439163208008,
+      "learning_rate": 8.717941650346603e-05,
+      "loss": 1.9066175842285156,
+      "mean_token_accuracy": 0.8735517236590385,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.49370287612080577,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 22.771854400634766,
+      "learning_rate": 0.00010908379250936202,
+      "loss": 1.9317852783203124,
+      "mean_token_accuracy": 0.8717742815613747,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.475419160425663,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 191.79444885253906,
+      "learning_rate": 0.000130988168515258,
+      "loss": 1.9097901916503905,
+      "mean_token_accuracy": 0.8744278407096863,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5022796393930912,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 3.216644287109375,
+      "learning_rate": 0.00015289254452115398,
+      "loss": 2.147085876464844,
+      "mean_token_accuracy": 0.869993035197258,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5250271247327327,
+      "eval_loss": 0.5184861421585083,
+      "eval_mean_token_accuracy": 0.8612929663062096,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.6309,
+      "eval_samples_per_second": 16.537,
+      "eval_steps_per_second": 2.07,
+      "step": 374
+    },
+    {
+      "entropy": 0.4808884654382263,
+      "epoch": 1.069611780455154,
+      "grad_norm": 2.666057586669922,
+      "learning_rate": 0.00016382243255158818,
+      "loss": 1.8543489074707031,
+      "mean_token_accuracy": 0.8768212256407497,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.45330303743481637,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 2.1171557903289795,
+      "learning_rate": 0.00016364410560779942,
+      "loss": 1.7948956298828125,
+      "mean_token_accuracy": 0.8791207140684127,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.45347678795456886,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 4.511318683624268,
+      "learning_rate": 0.00016328784000438723,
+      "loss": 1.7988812255859374,
+      "mean_token_accuracy": 0.8801145932078361,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.44952490359544756,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 2.961987257003784,
+      "learning_rate": 0.0001627544114642431,
+      "loss": 1.7823495483398437,
+      "mean_token_accuracy": 0.8799195346236229,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4502506497502327,
+      "epoch": 1.605087014725569,
+      "grad_norm": 2.924865484237671,
+      "learning_rate": 0.000162044981459947,
+      "loss": 1.7603852844238281,
+      "mean_token_accuracy": 0.8811277949810028,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.44528300017118455,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 2.928840160369873,
+      "learning_rate": 0.00016116109468480906,
+      "loss": 1.7513160705566406,
+      "mean_token_accuracy": 0.8816375133395195,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.43578719861805437,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 15.26456356048584,
+      "learning_rate": 0.00016010467568949708,
+      "loss": 1.7112632751464845,
+      "mean_token_accuracy": 0.884103564620018,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4700849764049053,
+      "eval_loss": 0.49145790934562683,
+      "eval_mean_token_accuracy": 0.8681637060642242,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5704,
+      "eval_samples_per_second": 16.548,
+      "eval_steps_per_second": 2.071,
+      "step": 748
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.920219300598998e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..91db40851463ac42dcf1b728985e59b9df3a916e
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/trainer_state.json
@@ -0,0 +1,853 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.8811104405552204,
+  "eval_steps": 20,
+  "global_step": 780,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.519332302187689e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b4310a836237d93697a8e1296dc66ee7402753f
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/trainer_state.json
@@ -0,0 +1,118 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.19312009656004828,
+  "eval_steps": 20,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.67938648931657e+16,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..af969bbe591de1f41dc419698acbbee08a8ce3e6
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/trainer_state.json
@@ -0,0 +1,874 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9293904646952322,
+  "eval_steps": 20,
+  "global_step": 800,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.696819086240467e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..27c339610106ea1b0b907a3c0edc1d1f847efe0b
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/trainer_state.json
@@ -0,0 +1,895 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9776704888352445,
+  "eval_steps": 20,
+  "global_step": 820,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.855600064591891e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..27c34ad7a3cb0e784238bb71dd4a4bdbe8b4cb09
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/trainer_state.json
@@ -0,0 +1,916 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.024140012070006,
+  "eval_steps": 20,
+  "global_step": 840,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.031650787581724e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d09b09e511b2ce7b883f76c17b9be4070d614c1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/trainer_state.json
@@ -0,0 +1,937 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0724200362100182,
+  "eval_steps": 20,
+  "global_step": 860,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.207153734702186e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..61faa3b25c5b05f8269ca4ca5f724ecda639a30a
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/trainer_state.json
@@ -0,0 +1,958 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.12070006035003,
+  "eval_steps": 20,
+  "global_step": 880,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.368444118725458e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d2c5fd2ded4eba006e5927e2bf24bbe8fdc8f1d
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/trainer_state.json
@@ -0,0 +1,979 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.1689800844900424,
+  "eval_steps": 20,
+  "global_step": 900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    },
+    {
+      "entropy": 0.40760905370116235,
+      "epoch": 2.1689800844900424,
+      "grad_norm": 2.571462631225586,
+      "learning_rate": 0.00013010355922233707,
+      "loss": 1.5575182914733887,
+      "mean_token_accuracy": 0.8846474155783653,
+      "num_tokens": 2117470.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "eval_entropy": 0.4561347976494371,
+      "eval_loss": 0.5359405875205994,
+      "eval_mean_token_accuracy": 0.8610902686467331,
+      "eval_num_tokens": 2117470.0,
+      "eval_runtime": 90.7395,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 900
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.547248525815372e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4942664a2bdebf007a6236015f858145388ebfc7
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/trainer_state.json
@@ -0,0 +1,1000 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.2172601086300543,
+  "eval_steps": 20,
+  "global_step": 920,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    },
+    {
+      "entropy": 0.40760905370116235,
+      "epoch": 2.1689800844900424,
+      "grad_norm": 2.571462631225586,
+      "learning_rate": 0.00013010355922233707,
+      "loss": 1.5575182914733887,
+      "mean_token_accuracy": 0.8846474155783653,
+      "num_tokens": 2117470.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "eval_entropy": 0.4561347976494371,
+      "eval_loss": 0.5359405875205994,
+      "eval_mean_token_accuracy": 0.8610902686467331,
+      "eval_num_tokens": 2117470.0,
+      "eval_runtime": 90.7395,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 900
+    },
+    {
+      "entropy": 0.4111258488148451,
+      "epoch": 2.2172601086300543,
+      "grad_norm": 1.8378095626831055,
+      "learning_rate": 0.00012964300611154316,
+      "loss": 1.538413143157959,
+      "mean_token_accuracy": 0.8867764480412006,
+      "num_tokens": 2169713.0,
+      "step": 920
+    },
+    {
+      "epoch": 2.2172601086300543,
+      "eval_entropy": 0.43873994337039046,
+      "eval_loss": 0.5272142887115479,
+      "eval_mean_token_accuracy": 0.8617157025283642,
+      "eval_num_tokens": 2169713.0,
+      "eval_runtime": 90.7602,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 920
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.733980878257937e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9076ef2215aebc28e777398a243691be95f922de
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/trainer_state.json
@@ -0,0 +1,1021 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.2655401327700666,
+  "eval_steps": 20,
+  "global_step": 940,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    },
+    {
+      "entropy": 0.40760905370116235,
+      "epoch": 2.1689800844900424,
+      "grad_norm": 2.571462631225586,
+      "learning_rate": 0.00013010355922233707,
+      "loss": 1.5575182914733887,
+      "mean_token_accuracy": 0.8846474155783653,
+      "num_tokens": 2117470.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "eval_entropy": 0.4561347976494371,
+      "eval_loss": 0.5359405875205994,
+      "eval_mean_token_accuracy": 0.8610902686467331,
+      "eval_num_tokens": 2117470.0,
+      "eval_runtime": 90.7395,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 900
+    },
+    {
+      "entropy": 0.4111258488148451,
+      "epoch": 2.2172601086300543,
+      "grad_norm": 1.8378095626831055,
+      "learning_rate": 0.00012964300611154316,
+      "loss": 1.538413143157959,
+      "mean_token_accuracy": 0.8867764480412006,
+      "num_tokens": 2169713.0,
+      "step": 920
+    },
+    {
+      "epoch": 2.2172601086300543,
+      "eval_entropy": 0.43873994337039046,
+      "eval_loss": 0.5272142887115479,
+      "eval_mean_token_accuracy": 0.8617157025283642,
+      "eval_num_tokens": 2169713.0,
+      "eval_runtime": 90.7602,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 920
+    },
+    {
+      "entropy": 0.4261228807270527,
+      "epoch": 2.2655401327700666,
+      "grad_norm": 2.3900320529937744,
+      "learning_rate": 0.0001291649587621756,
+      "loss": 1.58123836517334,
+      "mean_token_accuracy": 0.8852489396929741,
+      "num_tokens": 2211210.0,
+      "step": 940
+    },
+    {
+      "epoch": 2.2655401327700666,
+      "eval_entropy": 0.40514066028461027,
+      "eval_loss": 0.5390793681144714,
+      "eval_mean_token_accuracy": 0.8620827519491817,
+      "eval_num_tokens": 2211210.0,
+      "eval_runtime": 90.761,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 940
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.885825829722867e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..add9468553ae172eb4000232933e56ce392943bc
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/trainer_state.json
@@ -0,0 +1,1042 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.3138201569100785,
+  "eval_steps": 20,
+  "global_step": 960,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    },
+    {
+      "entropy": 0.40760905370116235,
+      "epoch": 2.1689800844900424,
+      "grad_norm": 2.571462631225586,
+      "learning_rate": 0.00013010355922233707,
+      "loss": 1.5575182914733887,
+      "mean_token_accuracy": 0.8846474155783653,
+      "num_tokens": 2117470.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "eval_entropy": 0.4561347976494371,
+      "eval_loss": 0.5359405875205994,
+      "eval_mean_token_accuracy": 0.8610902686467331,
+      "eval_num_tokens": 2117470.0,
+      "eval_runtime": 90.7395,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 900
+    },
+    {
+      "entropy": 0.4111258488148451,
+      "epoch": 2.2172601086300543,
+      "grad_norm": 1.8378095626831055,
+      "learning_rate": 0.00012964300611154316,
+      "loss": 1.538413143157959,
+      "mean_token_accuracy": 0.8867764480412006,
+      "num_tokens": 2169713.0,
+      "step": 920
+    },
+    {
+      "epoch": 2.2172601086300543,
+      "eval_entropy": 0.43873994337039046,
+      "eval_loss": 0.5272142887115479,
+      "eval_mean_token_accuracy": 0.8617157025283642,
+      "eval_num_tokens": 2169713.0,
+      "eval_runtime": 90.7602,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 920
+    },
+    {
+      "entropy": 0.4261228807270527,
+      "epoch": 2.2655401327700666,
+      "grad_norm": 2.3900320529937744,
+      "learning_rate": 0.0001291649587621756,
+      "loss": 1.58123836517334,
+      "mean_token_accuracy": 0.8852489396929741,
+      "num_tokens": 2211210.0,
+      "step": 940
+    },
+    {
+      "epoch": 2.2655401327700666,
+      "eval_entropy": 0.40514066028461027,
+      "eval_loss": 0.5390793681144714,
+      "eval_mean_token_accuracy": 0.8620827519491817,
+      "eval_num_tokens": 2211210.0,
+      "eval_runtime": 90.761,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 940
+    },
+    {
+      "entropy": 0.4236912790685892,
+      "epoch": 2.3138201569100785,
+      "grad_norm": 1.635823130607605,
+      "learning_rate": 0.00012866955245597952,
+      "loss": 1.5851353645324706,
+      "mean_token_accuracy": 0.8841134652495384,
+      "num_tokens": 2256672.0,
+      "step": 960
+    },
+    {
+      "epoch": 2.3138201569100785,
+      "eval_entropy": 0.46641212182768277,
+      "eval_loss": 0.5175439119338989,
+      "eval_mean_token_accuracy": 0.8625401358925895,
+      "eval_num_tokens": 2256672.0,
+      "eval_runtime": 90.7723,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 960
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.048500458172792e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/README.md b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/README.md
@@ -0,0 +1,209 @@
+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/adapter_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43f588183c3a6860ce09a29af1b562bae0504be
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.047757012531964065,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer_config.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer_config.json
@@ -0,0 +1,54 @@
+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/trainer_state.json b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5f71c3c2c0becb17274d9fc613bcd5b1021721c
--- /dev/null
+++ b/overgeneralisation_original_Estonian/gemma-4-31B_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/trainer_state.json
@@ -0,0 +1,1063 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.3621001810500903,
+  "eval_steps": 20,
+  "global_step": 980,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.675290709733963,
+      "epoch": 0.04828002414001207,
+      "grad_norm": 6.707607269287109,
+      "learning_rate": 6.210328529812303e-06,
+      "loss": 7.104328918457031,
+      "mean_token_accuracy": 0.6682514727115632,
+      "num_tokens": 48182.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_entropy": 1.5423412115386363,
+      "eval_loss": 1.416153907775879,
+      "eval_mean_token_accuracy": 0.713003780734673,
+      "eval_num_tokens": 48182.0,
+      "eval_runtime": 90.8818,
+      "eval_samples_per_second": 15.625,
+      "eval_steps_per_second": 1.959,
+      "step": 20
+    },
+    {
+      "entropy": 1.1686139158904552,
+      "epoch": 0.09656004828002414,
+      "grad_norm": 3.5588884353637695,
+      "learning_rate": 1.2747516455930517e-05,
+      "loss": 4.294140243530274,
+      "mean_token_accuracy": 0.7630169309675694,
+      "num_tokens": 97030.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.09656004828002414,
+      "eval_entropy": 0.801704225580344,
+      "eval_loss": 0.7841165661811829,
+      "eval_mean_token_accuracy": 0.8063843169908845,
+      "eval_num_tokens": 97030.0,
+      "eval_runtime": 90.7834,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 40
+    },
+    {
+      "entropy": 0.7488047637045383,
+      "epoch": 0.14484007242003621,
+      "grad_norm": 4.866708755493164,
+      "learning_rate": 1.9284704382048732e-05,
+      "loss": 2.9088117599487306,
+      "mean_token_accuracy": 0.8165101781487465,
+      "num_tokens": 144528.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.14484007242003621,
+      "eval_entropy": 0.6810337137640192,
+      "eval_loss": 0.6656371355056763,
+      "eval_mean_token_accuracy": 0.8306669830606225,
+      "eval_num_tokens": 144528.0,
+      "eval_runtime": 90.8474,
+      "eval_samples_per_second": 15.631,
+      "eval_steps_per_second": 1.959,
+      "step": 60
+    },
+    {
+      "entropy": 0.6792228668928146,
+      "epoch": 0.19312009656004828,
+      "grad_norm": 4.510631084442139,
+      "learning_rate": 2.5821892308166943e-05,
+      "loss": 2.6342445373535157,
+      "mean_token_accuracy": 0.8298680819571018,
+      "num_tokens": 189657.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.19312009656004828,
+      "eval_entropy": 0.6384875539983257,
+      "eval_loss": 0.6206316947937012,
+      "eval_mean_token_accuracy": 0.8366272945082589,
+      "eval_num_tokens": 189657.0,
+      "eval_runtime": 90.8078,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 80
+    },
+    {
+      "entropy": 0.6113388158380986,
+      "epoch": 0.24140012070006034,
+      "grad_norm": 2.513516664505005,
+      "learning_rate": 3.235908023428516e-05,
+      "loss": 2.413893127441406,
+      "mean_token_accuracy": 0.8396451488137245,
+      "num_tokens": 238869.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "eval_entropy": 0.6067953471387371,
+      "eval_loss": 0.6021680235862732,
+      "eval_mean_token_accuracy": 0.839132690362716,
+      "eval_num_tokens": 238869.0,
+      "eval_runtime": 90.7994,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 100
+    },
+    {
+      "entropy": 0.6011081866919994,
+      "epoch": 0.28968014484007243,
+      "grad_norm": 3.0723557472229004,
+      "learning_rate": 3.8896268160403376e-05,
+      "loss": 2.3560277938842775,
+      "mean_token_accuracy": 0.8405322283506393,
+      "num_tokens": 286432.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.28968014484007243,
+      "eval_entropy": 0.5886335322696171,
+      "eval_loss": 0.5883614420890808,
+      "eval_mean_token_accuracy": 0.8427048559938923,
+      "eval_num_tokens": 286432.0,
+      "eval_runtime": 90.7823,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 120
+    },
+    {
+      "entropy": 0.5986796505749226,
+      "epoch": 0.33796016898008446,
+      "grad_norm": 2.583876609802246,
+      "learning_rate": 4.543345608652159e-05,
+      "loss": 2.3548404693603517,
+      "mean_token_accuracy": 0.8397360973060131,
+      "num_tokens": 335416.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.33796016898008446,
+      "eval_entropy": 0.5859675710455755,
+      "eval_loss": 0.5772915482521057,
+      "eval_mean_token_accuracy": 0.8440543389722203,
+      "eval_num_tokens": 335416.0,
+      "eval_runtime": 90.755,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 140
+    },
+    {
+      "entropy": 0.5869754277169704,
+      "epoch": 0.38624019312009655,
+      "grad_norm": 2.9026308059692383,
+      "learning_rate": 5.19706440126398e-05,
+      "loss": 2.320369338989258,
+      "mean_token_accuracy": 0.8441123567521572,
+      "num_tokens": 380490.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.38624019312009655,
+      "eval_entropy": 0.5944042242644878,
+      "eval_loss": 0.5694729089736938,
+      "eval_mean_token_accuracy": 0.8468695527382111,
+      "eval_num_tokens": 380490.0,
+      "eval_runtime": 90.7588,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 160
+    },
+    {
+      "entropy": 0.5780520122498274,
+      "epoch": 0.43452021726010864,
+      "grad_norm": 3.3172314167022705,
+      "learning_rate": 5.850783193875801e-05,
+      "loss": 2.280506134033203,
+      "mean_token_accuracy": 0.8448525600135326,
+      "num_tokens": 429118.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.43452021726010864,
+      "eval_entropy": 0.5612959178645959,
+      "eval_loss": 0.5575970411300659,
+      "eval_mean_token_accuracy": 0.8498810844474964,
+      "eval_num_tokens": 429118.0,
+      "eval_runtime": 90.7375,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 180
+    },
+    {
+      "entropy": 0.5705389507114887,
+      "epoch": 0.4828002414001207,
+      "grad_norm": 1.8956339359283447,
+      "learning_rate": 6.504501986487622e-05,
+      "loss": 2.242726516723633,
+      "mean_token_accuracy": 0.848711597174406,
+      "num_tokens": 478235.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "eval_entropy": 0.5524000726389081,
+      "eval_loss": 0.5511140823364258,
+      "eval_mean_token_accuracy": 0.851530607831612,
+      "eval_num_tokens": 478235.0,
+      "eval_runtime": 90.7557,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 200
+    },
+    {
+      "entropy": 0.5800516583025456,
+      "epoch": 0.5310802655401328,
+      "grad_norm": 2.2796475887298584,
+      "learning_rate": 7.158220779099443e-05,
+      "loss": 2.2988216400146486,
+      "mean_token_accuracy": 0.8455737859010697,
+      "num_tokens": 523478.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.5310802655401328,
+      "eval_entropy": 0.5344762455546455,
+      "eval_loss": 0.5491540431976318,
+      "eval_mean_token_accuracy": 0.8520114234324252,
+      "eval_num_tokens": 523478.0,
+      "eval_runtime": 90.7308,
+      "eval_samples_per_second": 15.651,
+      "eval_steps_per_second": 1.962,
+      "step": 220
+    },
+    {
+      "entropy": 0.5515169702470303,
+      "epoch": 0.5793602896801449,
+      "grad_norm": 1.7194722890853882,
+      "learning_rate": 7.811939571711266e-05,
+      "loss": 2.1997905731201173,
+      "mean_token_accuracy": 0.85145553201437,
+      "num_tokens": 569874.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.5793602896801449,
+      "eval_entropy": 0.5982093161411499,
+      "eval_loss": 0.550338625907898,
+      "eval_mean_token_accuracy": 0.852124593565973,
+      "eval_num_tokens": 569874.0,
+      "eval_runtime": 90.7467,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.962,
+      "step": 240
+    },
+    {
+      "entropy": 0.565448484942317,
+      "epoch": 0.627640313820157,
+      "grad_norm": 1.6864795684814453,
+      "learning_rate": 8.465658364323088e-05,
+      "loss": 2.228106880187988,
+      "mean_token_accuracy": 0.85054235085845,
+      "num_tokens": 614229.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.627640313820157,
+      "eval_entropy": 0.5699995079737031,
+      "eval_loss": 0.5463655591011047,
+      "eval_mean_token_accuracy": 0.852450091852231,
+      "eval_num_tokens": 614229.0,
+      "eval_runtime": 90.7728,
+      "eval_samples_per_second": 15.643,
+      "eval_steps_per_second": 1.961,
+      "step": 260
+    },
+    {
+      "entropy": 0.5574715089052915,
+      "epoch": 0.6759203379601689,
+      "grad_norm": 2.7099924087524414,
+      "learning_rate": 9.119377156934908e-05,
+      "loss": 2.173061180114746,
+      "mean_token_accuracy": 0.852943730354309,
+      "num_tokens": 664249.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.6759203379601689,
+      "eval_entropy": 0.5770252673478609,
+      "eval_loss": 0.5421484708786011,
+      "eval_mean_token_accuracy": 0.8533824799435862,
+      "eval_num_tokens": 664249.0,
+      "eval_runtime": 90.764,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 280
+    },
+    {
+      "entropy": 0.5531694382429123,
+      "epoch": 0.724200362100181,
+      "grad_norm": 2.56211519241333,
+      "learning_rate": 9.77309594954673e-05,
+      "loss": 2.1611295700073243,
+      "mean_token_accuracy": 0.8546892657876015,
+      "num_tokens": 711614.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.724200362100181,
+      "eval_entropy": 0.5576409329189344,
+      "eval_loss": 0.5419679284095764,
+      "eval_mean_token_accuracy": 0.8531393000249112,
+      "eval_num_tokens": 711614.0,
+      "eval_runtime": 90.7815,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 300
+    },
+    {
+      "entropy": 0.5627521466463804,
+      "epoch": 0.7724803862401931,
+      "grad_norm": 158.44029235839844,
+      "learning_rate": 0.0001042681474215855,
+      "loss": 2.391754913330078,
+      "mean_token_accuracy": 0.8485012218356133,
+      "num_tokens": 758911.0,
+      "step": 320
+    },
+    {
+      "epoch": 0.7724803862401931,
+      "eval_entropy": 0.6003884867335973,
+      "eval_loss": 0.7040325403213501,
+      "eval_mean_token_accuracy": 0.8316127952564968,
+      "eval_num_tokens": 758911.0,
+      "eval_runtime": 90.7921,
+      "eval_samples_per_second": 15.64,
+      "eval_steps_per_second": 1.961,
+      "step": 320
+    },
+    {
+      "entropy": 0.5796094480901957,
+      "epoch": 0.8207604103802052,
+      "grad_norm": 7.587340354919434,
+      "learning_rate": 0.00011080533534770373,
+      "loss": 2.458403968811035,
+      "mean_token_accuracy": 0.8445835530757904,
+      "num_tokens": 809011.0,
+      "step": 340
+    },
+    {
+      "epoch": 0.8207604103802052,
+      "eval_entropy": 0.5516570319285553,
+      "eval_loss": 0.5431923270225525,
+      "eval_mean_token_accuracy": 0.8532732303222913,
+      "eval_num_tokens": 809011.0,
+      "eval_runtime": 90.7991,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 340
+    },
+    {
+      "entropy": 0.5793778888881207,
+      "epoch": 0.8690404345202173,
+      "grad_norm": 2.124638319015503,
+      "learning_rate": 0.00011734252327382194,
+      "loss": 2.2603307723999024,
+      "mean_token_accuracy": 0.8511219322681427,
+      "num_tokens": 851557.0,
+      "step": 360
+    },
+    {
+      "epoch": 0.8690404345202173,
+      "eval_entropy": 0.560486475570818,
+      "eval_loss": 0.5465312600135803,
+      "eval_mean_token_accuracy": 0.8535054861829522,
+      "eval_num_tokens": 851557.0,
+      "eval_runtime": 90.7552,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 360
+    },
+    {
+      "entropy": 0.567094936594367,
+      "epoch": 0.9173204586602294,
+      "grad_norm": 2.3157570362091064,
+      "learning_rate": 0.00012387971119994014,
+      "loss": 2.233865737915039,
+      "mean_token_accuracy": 0.8490609914064408,
+      "num_tokens": 894234.0,
+      "step": 380
+    },
+    {
+      "epoch": 0.9173204586602294,
+      "eval_entropy": 0.5316838782824828,
+      "eval_loss": 0.5352600812911987,
+      "eval_mean_token_accuracy": 0.8547654972317513,
+      "eval_num_tokens": 894234.0,
+      "eval_runtime": 90.9552,
+      "eval_samples_per_second": 15.612,
+      "eval_steps_per_second": 1.957,
+      "step": 380
+    },
+    {
+      "entropy": 0.5548127952963113,
+      "epoch": 0.9656004828002414,
+      "grad_norm": 3.601078748703003,
+      "learning_rate": 0.00013041689912605836,
+      "loss": 2.2153223037719725,
+      "mean_token_accuracy": 0.8552668362855911,
+      "num_tokens": 939370.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "eval_entropy": 0.5799920406569256,
+      "eval_loss": 0.5496681928634644,
+      "eval_mean_token_accuracy": 0.853103037630574,
+      "eval_num_tokens": 939370.0,
+      "eval_runtime": 90.7969,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 400
+    },
+    {
+      "entropy": 0.5529916116169521,
+      "epoch": 1.012070006035003,
+      "grad_norm": 2.1900832653045654,
+      "learning_rate": 0.00013564626559428973,
+      "loss": 2.0600866317749023,
+      "mean_token_accuracy": 0.856480234629148,
+      "num_tokens": 985127.0,
+      "step": 420
+    },
+    {
+      "epoch": 1.012070006035003,
+      "eval_entropy": 0.5062234095308218,
+      "eval_loss": 0.5424100756645203,
+      "eval_mean_token_accuracy": 0.8541433596878909,
+      "eval_num_tokens": 985127.0,
+      "eval_runtime": 90.8162,
+      "eval_samples_per_second": 15.636,
+      "eval_steps_per_second": 1.96,
+      "step": 420
+    },
+    {
+      "entropy": 0.4908415086567402,
+      "epoch": 1.060350030175015,
+      "grad_norm": 2.2977170944213867,
+      "learning_rate": 0.00013563283050733522,
+      "loss": 1.9583213806152344,
+      "mean_token_accuracy": 0.8643453657627106,
+      "num_tokens": 1035652.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.060350030175015,
+      "eval_entropy": 0.5066900360450316,
+      "eval_loss": 0.5420679450035095,
+      "eval_mean_token_accuracy": 0.8551041915845335,
+      "eval_num_tokens": 1035652.0,
+      "eval_runtime": 90.8096,
+      "eval_samples_per_second": 15.637,
+      "eval_steps_per_second": 1.96,
+      "step": 440
+    },
+    {
+      "entropy": 0.50622633472085,
+      "epoch": 1.1086300543150271,
+      "grad_norm": 2.5061802864074707,
+      "learning_rate": 0.00013560020613235583,
+      "loss": 1.9980314254760743,
+      "mean_token_accuracy": 0.8637742318212986,
+      "num_tokens": 1082852.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.1086300543150271,
+      "eval_entropy": 0.5188879335529348,
+      "eval_loss": 0.5445871949195862,
+      "eval_mean_token_accuracy": 0.8549745655461644,
+      "eval_num_tokens": 1082852.0,
+      "eval_runtime": 90.9655,
+      "eval_samples_per_second": 15.61,
+      "eval_steps_per_second": 1.957,
+      "step": 460
+    },
+    {
+      "entropy": 0.5019329734146595,
+      "epoch": 1.1569100784550392,
+      "grad_norm": 2.253516912460327,
+      "learning_rate": 0.0001355484017016638,
+      "loss": 1.9593570709228516,
+      "mean_token_accuracy": 0.8636295884847641,
+      "num_tokens": 1131836.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.1569100784550392,
+      "eval_entropy": 0.4907115553871969,
+      "eval_loss": 0.5450211763381958,
+      "eval_mean_token_accuracy": 0.8554045839256115,
+      "eval_num_tokens": 1131836.0,
+      "eval_runtime": 91.0455,
+      "eval_samples_per_second": 15.597,
+      "eval_steps_per_second": 1.955,
+      "step": 480
+    },
+    {
+      "entropy": 0.5109445530921221,
+      "epoch": 1.2051901025950513,
+      "grad_norm": 10.47754192352295,
+      "learning_rate": 0.00013547743187530023,
+      "loss": 2.0416118621826174,
+      "mean_token_accuracy": 0.8610585704445839,
+      "num_tokens": 1176544.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "eval_entropy": 0.5329894945862588,
+      "eval_loss": 0.5426890254020691,
+      "eval_mean_token_accuracy": 0.8550159998154372,
+      "eval_num_tokens": 1176544.0,
+      "eval_runtime": 90.7977,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 500
+    },
+    {
+      "entropy": 0.529351257160306,
+      "epoch": 1.2534701267350634,
+      "grad_norm": 2.3251631259918213,
+      "learning_rate": 0.00013538731673688647,
+      "loss": 2.035448455810547,
+      "mean_token_accuracy": 0.8615639433264732,
+      "num_tokens": 1224767.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.2534701267350634,
+      "eval_entropy": 0.5154926207628143,
+      "eval_loss": 0.5380744338035583,
+      "eval_mean_token_accuracy": 0.8570477728093608,
+      "eval_num_tokens": 1224767.0,
+      "eval_runtime": 90.9006,
+      "eval_samples_per_second": 15.621,
+      "eval_steps_per_second": 1.958,
+      "step": 520
+    },
+    {
+      "entropy": 0.5304025936871767,
+      "epoch": 1.3017501508750755,
+      "grad_norm": 2.1253819465637207,
+      "learning_rate": 0.00013527808178794075,
+      "loss": 1.9914405822753907,
+      "mean_token_accuracy": 0.8642974093556404,
+      "num_tokens": 1272629.0,
+      "step": 540
+    },
+    {
+      "epoch": 1.3017501508750755,
+      "eval_entropy": 0.5014389195803846,
+      "eval_loss": 0.5321570038795471,
+      "eval_mean_token_accuracy": 0.8578029737043916,
+      "eval_num_tokens": 1272629.0,
+      "eval_runtime": 90.8317,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 540
+    },
+    {
+      "entropy": 0.5210890706628561,
+      "epoch": 1.3500301750150876,
+      "grad_norm": 2.370936393737793,
+      "learning_rate": 0.00013514975794066148,
+      "loss": 1.9768535614013671,
+      "mean_token_accuracy": 0.8633426748216152,
+      "num_tokens": 1318908.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.3500301750150876,
+      "eval_entropy": 0.527289214428891,
+      "eval_loss": 0.5302034020423889,
+      "eval_mean_token_accuracy": 0.8576852588841085,
+      "eval_num_tokens": 1318908.0,
+      "eval_runtime": 90.9133,
+      "eval_samples_per_second": 15.619,
+      "eval_steps_per_second": 1.958,
+      "step": 560
+    },
+    {
+      "entropy": 0.5380321107804775,
+      "epoch": 1.3983101991550995,
+      "grad_norm": 2.9873898029327393,
+      "learning_rate": 0.00013500238150917956,
+      "loss": 2.024580192565918,
+      "mean_token_accuracy": 0.8618835039436817,
+      "num_tokens": 1360949.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.3983101991550995,
+      "eval_entropy": 0.5204530746749277,
+      "eval_loss": 0.5321171879768372,
+      "eval_mean_token_accuracy": 0.8571079852205984,
+      "eval_num_tokens": 1360949.0,
+      "eval_runtime": 90.8323,
+      "eval_samples_per_second": 15.633,
+      "eval_steps_per_second": 1.96,
+      "step": 580
+    },
+    {
+      "entropy": 0.5245487812906504,
+      "epoch": 1.4465902232951118,
+      "grad_norm": 1.9314465522766113,
+      "learning_rate": 0.00013483599419928177,
+      "loss": 2.007284164428711,
+      "mean_token_accuracy": 0.8627093754708767,
+      "num_tokens": 1407135.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "eval_entropy": 0.536725418453806,
+      "eval_loss": 0.5315413475036621,
+      "eval_mean_token_accuracy": 0.8581455457076598,
+      "eval_num_tokens": 1407135.0,
+      "eval_runtime": 90.7502,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 600
+    },
+    {
+      "entropy": 0.5325499556958675,
+      "epoch": 1.4948702474351236,
+      "grad_norm": 2.1466152667999268,
+      "learning_rate": 0.00013465064309660862,
+      "loss": 2.0100082397460937,
+      "mean_token_accuracy": 0.8619302660226822,
+      "num_tokens": 1454219.0,
+      "step": 620
+    },
+    {
+      "epoch": 1.4948702474351236,
+      "eval_entropy": 0.5285820202546173,
+      "eval_loss": 0.5281327366828918,
+      "eval_mean_token_accuracy": 0.8574312443143866,
+      "eval_num_tokens": 1454219.0,
+      "eval_runtime": 90.7975,
+      "eval_samples_per_second": 15.639,
+      "eval_steps_per_second": 1.96,
+      "step": 620
+    },
+    {
+      "entropy": 0.5270347118377685,
+      "epoch": 1.5431502715751357,
+      "grad_norm": 1.972477912902832,
+      "learning_rate": 0.00013444638065332972,
+      "loss": 2.0097970962524414,
+      "mean_token_accuracy": 0.8616458527743817,
+      "num_tokens": 1500879.0,
+      "step": 640
+    },
+    {
+      "epoch": 1.5431502715751357,
+      "eval_entropy": 0.5531984363379103,
+      "eval_loss": 0.525027871131897,
+      "eval_mean_token_accuracy": 0.8590488440535041,
+      "eval_num_tokens": 1500879.0,
+      "eval_runtime": 90.8289,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 640
+    },
+    {
+      "entropy": 0.5264006167650223,
+      "epoch": 1.5914302957151478,
+      "grad_norm": 2.101114273071289,
+      "learning_rate": 0.00013422326467330028,
+      "loss": 2.003971481323242,
+      "mean_token_accuracy": 0.8630450166761875,
+      "num_tokens": 1547565.0,
+      "step": 660
+    },
+    {
+      "epoch": 1.5914302957151478,
+      "eval_entropy": 0.4910608320758584,
+      "eval_loss": 0.5248087644577026,
+      "eval_mean_token_accuracy": 0.8599436738517847,
+      "eval_num_tokens": 1547565.0,
+      "eval_runtime": 91.0328,
+      "eval_samples_per_second": 15.599,
+      "eval_steps_per_second": 1.955,
+      "step": 660
+    },
+    {
+      "entropy": 0.5071224015206098,
+      "epoch": 1.63971031985516,
+      "grad_norm": 2.1309502124786377,
+      "learning_rate": 0.00013398135829570344,
+      "loss": 1.9901405334472657,
+      "mean_token_accuracy": 0.8636759266257286,
+      "num_tokens": 1593600.0,
+      "step": 680
+    },
+    {
+      "epoch": 1.63971031985516,
+      "eval_entropy": 0.5047111117772842,
+      "eval_loss": 0.5270171165466309,
+      "eval_mean_token_accuracy": 0.8586233539527721,
+      "eval_num_tokens": 1593600.0,
+      "eval_runtime": 90.8264,
+      "eval_samples_per_second": 15.634,
+      "eval_steps_per_second": 1.96,
+      "step": 680
+    },
+    {
+      "entropy": 0.517396530508995,
+      "epoch": 1.687990343995172,
+      "grad_norm": 2.6385438442230225,
+      "learning_rate": 0.00013372072997718266,
+      "loss": 2.0036354064941406,
+      "mean_token_accuracy": 0.8638267777860165,
+      "num_tokens": 1642224.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.687990343995172,
+      "eval_entropy": 0.49953744317708393,
+      "eval_loss": 0.5215877890586853,
+      "eval_mean_token_accuracy": 0.859384286269713,
+      "eval_num_tokens": 1642224.0,
+      "eval_runtime": 90.8569,
+      "eval_samples_per_second": 15.629,
+      "eval_steps_per_second": 1.959,
+      "step": 700
+    },
+    {
+      "entropy": 0.5117329221218825,
+      "epoch": 1.736270368135184,
+      "grad_norm": 1.6593103408813477,
+      "learning_rate": 0.00013344145347246906,
+      "loss": 2.003920555114746,
+      "mean_token_accuracy": 0.8636307917535305,
+      "num_tokens": 1693392.0,
+      "step": 720
+    },
+    {
+      "epoch": 1.736270368135184,
+      "eval_entropy": 0.5288207604644004,
+      "eval_loss": 0.5156714916229248,
+      "eval_mean_token_accuracy": 0.8617460369394067,
+      "eval_num_tokens": 1693392.0,
+      "eval_runtime": 90.7698,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 720
+    },
+    {
+      "entropy": 0.5143411785364151,
+      "epoch": 1.7845503922751962,
+      "grad_norm": 2.080177068710327,
+      "learning_rate": 0.00013314360781350998,
+      "loss": 1.994948959350586,
+      "mean_token_accuracy": 0.8643602155148983,
+      "num_tokens": 1742358.0,
+      "step": 740
+    },
+    {
+      "epoch": 1.7845503922751962,
+      "eval_entropy": 0.5050565709223908,
+      "eval_loss": 0.5188468098640442,
+      "eval_mean_token_accuracy": 0.8601690252845207,
+      "eval_num_tokens": 1742358.0,
+      "eval_runtime": 90.7641,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 740
+    },
+    {
+      "entropy": 0.5174011919647455,
+      "epoch": 1.832830416415208,
+      "grad_norm": 3.259908676147461,
+      "learning_rate": 0.00013282727728710375,
+      "loss": 1.9772701263427734,
+      "mean_token_accuracy": 0.8646314896643161,
+      "num_tokens": 1786930.0,
+      "step": 760
+    },
+    {
+      "epoch": 1.832830416415208,
+      "eval_entropy": 0.4937750380695536,
+      "eval_loss": 0.5224619507789612,
+      "eval_mean_token_accuracy": 0.8592762418007582,
+      "eval_num_tokens": 1786930.0,
+      "eval_runtime": 90.7224,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 760
+    },
+    {
+      "entropy": 0.5243690617382526,
+      "epoch": 1.8811104405552204,
+      "grad_norm": 2.209547519683838,
+      "learning_rate": 0.00013249255141104747,
+      "loss": 2.0030281066894533,
+      "mean_token_accuracy": 0.8628844127058983,
+      "num_tokens": 1833956.0,
+      "step": 780
+    },
+    {
+      "epoch": 1.8811104405552204,
+      "eval_entropy": 0.5570755493774843,
+      "eval_loss": 0.5178046226501465,
+      "eval_mean_token_accuracy": 0.8601498302449001,
+      "eval_num_tokens": 1833956.0,
+      "eval_runtime": 90.7399,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 780
+    },
+    {
+      "entropy": 0.5075355738401413,
+      "epoch": 1.9293904646952322,
+      "grad_norm": 1.8813495635986328,
+      "learning_rate": 0.00013213952490880468,
+      "loss": 1.9060043334960937,
+      "mean_token_accuracy": 0.8672933347523213,
+      "num_tokens": 1881345.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "eval_entropy": 0.5167921193864908,
+      "eval_loss": 0.5141814947128296,
+      "eval_mean_token_accuracy": 0.8620959691117319,
+      "eval_num_tokens": 1881345.0,
+      "eval_runtime": 90.7632,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 800
+    },
+    {
+      "entropy": 0.5104774657636881,
+      "epoch": 1.9776704888352445,
+      "grad_norm": 2.2347588539123535,
+      "learning_rate": 0.0001317682976826996,
+      "loss": 1.9154193878173829,
+      "mean_token_accuracy": 0.8677295126020909,
+      "num_tokens": 1926308.0,
+      "step": 820
+    },
+    {
+      "epoch": 1.9776704888352445,
+      "eval_entropy": 0.4975446199768045,
+      "eval_loss": 0.5171827077865601,
+      "eval_mean_token_accuracy": 0.8614644890420893,
+      "eval_num_tokens": 1926308.0,
+      "eval_runtime": 90.7332,
+      "eval_samples_per_second": 15.65,
+      "eval_steps_per_second": 1.962,
+      "step": 820
+    },
+    {
+      "entropy": 0.4617252717544506,
+      "epoch": 2.024140012070006,
+      "grad_norm": 2.3023998737335205,
+      "learning_rate": 0.00013137897478564603,
+      "loss": 1.672403907775879,
+      "mean_token_accuracy": 0.877363781650345,
+      "num_tokens": 1972496.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.024140012070006,
+      "eval_entropy": 0.4930287114020144,
+      "eval_loss": 0.5240046977996826,
+      "eval_mean_token_accuracy": 0.8597234454047814,
+      "eval_num_tokens": 1972496.0,
+      "eval_runtime": 90.7242,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.962,
+      "step": 840
+    },
+    {
+      "entropy": 0.41189998425543306,
+      "epoch": 2.0724200362100182,
+      "grad_norm": 2.5921578407287598,
+      "learning_rate": 0.00013097166639141857,
+      "loss": 1.5435317993164062,
+      "mean_token_accuracy": 0.8864825963973999,
+      "num_tokens": 2020733.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.0724200362100182,
+      "eval_entropy": 0.46020560820450945,
+      "eval_loss": 0.5281100869178772,
+      "eval_mean_token_accuracy": 0.8605042665861966,
+      "eval_num_tokens": 2020733.0,
+      "eval_runtime": 90.7546,
+      "eval_samples_per_second": 15.647,
+      "eval_steps_per_second": 1.961,
+      "step": 860
+    },
+    {
+      "entropy": 0.4227153487503529,
+      "epoch": 2.12070006035003,
+      "grad_norm": 2.2209794521331787,
+      "learning_rate": 0.0001305464877634748,
+      "loss": 1.571579933166504,
+      "mean_token_accuracy": 0.8854078397154808,
+      "num_tokens": 2066856.0,
+      "step": 880
+    },
+    {
+      "epoch": 2.12070006035003,
+      "eval_entropy": 0.4408075308866715,
+      "eval_loss": 0.534494161605835,
+      "eval_mean_token_accuracy": 0.8604544247134348,
+      "eval_num_tokens": 2066856.0,
+      "eval_runtime": 90.8502,
+      "eval_samples_per_second": 15.63,
+      "eval_steps_per_second": 1.959,
+      "step": 880
+    },
+    {
+      "entropy": 0.40760905370116235,
+      "epoch": 2.1689800844900424,
+      "grad_norm": 2.571462631225586,
+      "learning_rate": 0.00013010355922233707,
+      "loss": 1.5575182914733887,
+      "mean_token_accuracy": 0.8846474155783653,
+      "num_tokens": 2117470.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "eval_entropy": 0.4561347976494371,
+      "eval_loss": 0.5359405875205994,
+      "eval_mean_token_accuracy": 0.8610902686467331,
+      "eval_num_tokens": 2117470.0,
+      "eval_runtime": 90.7395,
+      "eval_samples_per_second": 15.649,
+      "eval_steps_per_second": 1.962,
+      "step": 900
+    },
+    {
+      "entropy": 0.4111258488148451,
+      "epoch": 2.2172601086300543,
+      "grad_norm": 1.8378095626831055,
+      "learning_rate": 0.00012964300611154316,
+      "loss": 1.538413143157959,
+      "mean_token_accuracy": 0.8867764480412006,
+      "num_tokens": 2169713.0,
+      "step": 920
+    },
+    {
+      "epoch": 2.2172601086300543,
+      "eval_entropy": 0.43873994337039046,
+      "eval_loss": 0.5272142887115479,
+      "eval_mean_token_accuracy": 0.8617157025283642,
+      "eval_num_tokens": 2169713.0,
+      "eval_runtime": 90.7602,
+      "eval_samples_per_second": 15.646,
+      "eval_steps_per_second": 1.961,
+      "step": 920
+    },
+    {
+      "entropy": 0.4261228807270527,
+      "epoch": 2.2655401327700666,
+      "grad_norm": 2.3900320529937744,
+      "learning_rate": 0.0001291649587621756,
+      "loss": 1.58123836517334,
+      "mean_token_accuracy": 0.8852489396929741,
+      "num_tokens": 2211210.0,
+      "step": 940
+    },
+    {
+      "epoch": 2.2655401327700666,
+      "eval_entropy": 0.40514066028461027,
+      "eval_loss": 0.5390793681144714,
+      "eval_mean_token_accuracy": 0.8620827519491817,
+      "eval_num_tokens": 2211210.0,
+      "eval_runtime": 90.761,
+      "eval_samples_per_second": 15.645,
+      "eval_steps_per_second": 1.961,
+      "step": 940
+    },
+    {
+      "entropy": 0.4236912790685892,
+      "epoch": 2.3138201569100785,
+      "grad_norm": 1.635823130607605,
+      "learning_rate": 0.00012866955245597952,
+      "loss": 1.5851353645324706,
+      "mean_token_accuracy": 0.8841134652495384,
+      "num_tokens": 2256672.0,
+      "step": 960
+    },
+    {
+      "epoch": 2.3138201569100785,
+      "eval_entropy": 0.46641212182768277,
+      "eval_loss": 0.5175439119338989,
+      "eval_mean_token_accuracy": 0.8625401358925895,
+      "eval_num_tokens": 2256672.0,
+      "eval_runtime": 90.7723,
+      "eval_samples_per_second": 15.644,
+      "eval_steps_per_second": 1.961,
+      "step": 960
+    },
+    {
+      "entropy": 0.4201499901711941,
+      "epoch": 2.3621001810500903,
+      "grad_norm": 2.2045373916625977,
+      "learning_rate": 0.0001281569273870795,
+      "loss": 1.603045654296875,
+      "mean_token_accuracy": 0.8843393631279468,
+      "num_tokens": 2302305.0,
+      "step": 980
+    },
+    {
+      "epoch": 2.3621001810500903,
+      "eval_entropy": 0.45320005028435356,
+      "eval_loss": 0.5284702181816101,
+      "eval_mean_token_accuracy": 0.8614657628402281,
+      "eval_num_tokens": 2302305.0,
+      "eval_runtime": 90.784,
+      "eval_samples_per_second": 15.642,
+      "eval_steps_per_second": 1.961,
+      "step": 980
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 4150,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 20,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.214631996501044e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}