K2triinK commited on 19 days ago

Commit

d54e212

verified ·

1 Parent(s): 912fc28

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md +58 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md +58 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/trainer_state.json +297 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/trainer_state.json +378 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/trainer_state.json +469 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/trainer_state.json +560 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/trainer_state.json +641 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/README.md +209 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/adapter_config.json +40 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/tokenizer_config.json +54 -0
DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/trainer_state.json +732 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md +58 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/tokenizer_config.json +54 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/trainer_state.json +287 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/tokenizer_config.json +54 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/trainer_state.json +368 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/tokenizer_config.json +54 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/trainer_state.json +459 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/tokenizer_config.json +54 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/trainer_state.json +540 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/tokenizer_config.json +54 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/trainer_state.json +631 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/README.md +209 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/adapter_config.json +40 -0
DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/tokenizer_config.json +54 -0

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test1
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test1
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/sfblzvnx)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+## Citations
+Cite TRL as:
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test2
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test2
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/ncgnoczk)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+## Citations
+Cite TRL as:
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1155/trainer_state.json ADDED Viewed

	@@ -0,0 +1,297 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1155,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.957948339009064e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1540/trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1540,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3259599564032195e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-1925/trainer_state.json ADDED Viewed

	@@ -0,0 +1,469 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1925,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6564080889424607e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2310/trainer_state.json ADDED Viewed

	@@ -0,0 +1,560 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 2310,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.9871331143277489e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-2695/trainer_state.json ADDED Viewed

	@@ -0,0 +1,641 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2695,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    },
+    {
+      "entropy": 0.16796037876725795,
+      "epoch": 6.1039636127355426,
+      "grad_norm": 2.2228569984436035,
+      "learning_rate": 0.00015076152745107442,
+      "loss": 0.5835284805297851,
+      "mean_token_accuracy": 0.9529892874123463,
+      "num_tokens": 5766129.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.14919219192117453,
+      "epoch": 6.23391812865497,
+      "grad_norm": 1.408840298652649,
+      "learning_rate": 0.00014236135942251215,
+      "loss": 0.5310631561279296,
+      "mean_token_accuracy": 0.9586454060673714,
+      "num_tokens": 5888746.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.1499051059409976,
+      "epoch": 6.363872644574399,
+      "grad_norm": 1.8611102104187012,
+      "learning_rate": 0.00013405998270370849,
+      "loss": 0.5127810668945313,
+      "mean_token_accuracy": 0.9591325157880783,
+      "num_tokens": 6014455.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.15334193099290133,
+      "epoch": 6.493827160493828,
+      "grad_norm": 1.6051015853881836,
+      "learning_rate": 0.00012587445454490892,
+      "loss": 0.5349758529663086,
+      "mean_token_accuracy": 0.9574431091547012,
+      "num_tokens": 6141229.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.15982334002852439,
+      "epoch": 6.623781676413255,
+      "grad_norm": 3.7065205574035645,
+      "learning_rate": 0.00011782159415658008,
+      "loss": 0.5602469253540039,
+      "mean_token_accuracy": 0.9555372184515,
+      "num_tokens": 6257983.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.16072992872446776,
+      "epoch": 6.753736192332683,
+      "grad_norm": 2.282320976257324,
+      "learning_rate": 0.00010991794815014401,
+      "loss": 0.5657939910888672,
+      "mean_token_accuracy": 0.9550630164146423,
+      "num_tokens": 6376198.0,
+      "step": 2600
+    },
+    {
+      "entropy": 0.1512781011685729,
+      "epoch": 6.883690708252112,
+      "grad_norm": 1.3716893196105957,
+      "learning_rate": 0.00010217975653883603,
+      "loss": 0.5340792465209961,
+      "mean_token_accuracy": 0.9578188157081604,
+      "num_tokens": 6502526.0,
+      "step": 2650
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.2444461930829745,
+      "eval_loss": 0.8798949718475342,
+      "eval_mean_token_accuracy": 0.8457763839799625,
+      "eval_num_tokens": 6613474.0,
+      "eval_runtime": 90.2868,
+      "eval_samples_per_second": 18.353,
+      "eval_steps_per_second": 2.304,
+      "step": 2695
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.31810912445653e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.00985279561940916,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3080/trainer_state.json ADDED Viewed

	@@ -0,0 +1,732 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 3080,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.353258643448353,
+      "epoch": 0.1299545159194282,
+      "grad_norm": 3.010725975036621,
+      "learning_rate": 4.8475852375026876e-05,
+      "loss": 5.475971069335937,
+      "mean_token_accuracy": 0.7263440760970116,
+      "num_tokens": 128842.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.649170914888382,
+      "epoch": 0.2599090318388564,
+      "grad_norm": 1.9099390506744385,
+      "learning_rate": 9.794100785974817e-05,
+      "loss": 2.55168701171875,
+      "mean_token_accuracy": 0.8364580717682838,
+      "num_tokens": 255497.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5930788792669773,
+      "epoch": 0.3898635477582846,
+      "grad_norm": 2.1239051818847656,
+      "learning_rate": 0.0001474061633444695,
+      "loss": 2.3440716552734373,
+      "mean_token_accuracy": 0.8452290838956833,
+      "num_tokens": 372014.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.5564522063732147,
+      "epoch": 0.5198180636777128,
+      "grad_norm": 411.71807861328125,
+      "learning_rate": 0.00019687131882919077,
+      "loss": 2.2838446044921876,
+      "mean_token_accuracy": 0.8498487600684166,
+      "num_tokens": 500623.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.5539529167115689,
+      "epoch": 0.649772579597141,
+      "grad_norm": 2.1969902515411377,
+      "learning_rate": 0.0002463364743139121,
+      "loss": 2.675394287109375,
+      "mean_token_accuracy": 0.8430694487690925,
+      "num_tokens": 616223.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.5719467167556286,
+      "epoch": 0.7797270955165692,
+      "grad_norm": 1.98796546459198,
+      "learning_rate": 0.00029580162979863343,
+      "loss": 2.2434300231933593,
+      "mean_token_accuracy": 0.851241897046566,
+      "num_tokens": 737263.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.5502805083990097,
+      "epoch": 0.9096816114359974,
+      "grad_norm": 2.0211398601531982,
+      "learning_rate": 0.0003452667852833547,
+      "loss": 2.1729367065429686,
+      "mean_token_accuracy": 0.8554597494006156,
+      "num_tokens": 861477.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5580813550891784,
+      "eval_loss": 0.5830356478691101,
+      "eval_mean_token_accuracy": 0.8432669037809739,
+      "eval_num_tokens": 944782.0,
+      "eval_runtime": 90.3664,
+      "eval_samples_per_second": 18.336,
+      "eval_steps_per_second": 2.302,
+      "step": 385
+    },
+    {
+      "entropy": 0.5498402091725987,
+      "epoch": 1.0389863547758285,
+      "grad_norm": 3.8034188747406006,
+      "learning_rate": 0.000380866355527619,
+      "loss": 2.113946990966797,
+      "mean_token_accuracy": 0.8578129452676629,
+      "num_tokens": 982803.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.5182110907137394,
+      "epoch": 1.1689408706952567,
+      "grad_norm": 2.7830824851989746,
+      "learning_rate": 0.0003805611725593471,
+      "loss": 1.9833453369140626,
+      "mean_token_accuracy": 0.8656822636723518,
+      "num_tokens": 1105926.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.5260789206624031,
+      "epoch": 1.2988953866146848,
+      "grad_norm": 1.7993361949920654,
+      "learning_rate": 0.0003798653399371568,
+      "loss": 2.006897430419922,
+      "mean_token_accuracy": 0.8631055191159248,
+      "num_tokens": 1229857.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.5327546864748001,
+      "epoch": 1.428849902534113,
+      "grad_norm": 1.7606678009033203,
+      "learning_rate": 0.0003787802874228295,
+      "loss": 2.020283050537109,
+      "mean_token_accuracy": 0.8638329988718033,
+      "num_tokens": 1352330.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.5285360223054886,
+      "epoch": 1.5588044184535412,
+      "grad_norm": 4.76006555557251,
+      "learning_rate": 0.00037730824452755275,
+      "loss": 1.9987391662597656,
+      "mean_token_accuracy": 0.8644696187973022,
+      "num_tokens": 1474790.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.5134804363548756,
+      "epoch": 1.6887589343729694,
+      "grad_norm": 1.8447264432907104,
+      "learning_rate": 0.000375452235930833,
+      "loss": 1.9669386291503905,
+      "mean_token_accuracy": 0.8659948265552521,
+      "num_tokens": 1600381.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.5371069309115409,
+      "epoch": 1.8187134502923976,
+      "grad_norm": 1.6537392139434814,
+      "learning_rate": 0.00037321607526553675,
+      "loss": 2.0411550903320315,
+      "mean_token_accuracy": 0.8624854254722595,
+      "num_tokens": 1716827.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.5270501750707627,
+      "epoch": 1.9486679662118258,
+      "grad_norm": 2.6990911960601807,
+      "learning_rate": 0.00037060435728183,
+      "loss": 2.015792236328125,
+      "mean_token_accuracy": 0.8631013777852058,
+      "num_tokens": 1842798.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.5477195472384875,
+      "eval_loss": 0.5585702657699585,
+      "eval_mean_token_accuracy": 0.8486175815073344,
+      "eval_num_tokens": 1889564.0,
+      "eval_runtime": 90.2194,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 770
+    },
+    {
+      "entropy": 0.4782189565088282,
+      "epoch": 2.077972709551657,
+      "grad_norm": 2.041952610015869,
+      "learning_rate": 0.0003676224484061175,
+      "loss": 1.7843829345703126,
+      "mean_token_accuracy": 0.8739750406250881,
+      "num_tokens": 1959778.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.4443667846918106,
+      "epoch": 2.207927225471085,
+      "grad_norm": 16.27313804626465,
+      "learning_rate": 0.00036427647571437996,
+      "loss": 1.6559255981445313,
+      "mean_token_accuracy": 0.8808386281132699,
+      "num_tokens": 2087384.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.44861202985048293,
+      "epoch": 2.3378817413905133,
+      "grad_norm": 1.648870587348938,
+      "learning_rate": 0.0003605733143425679,
+      "loss": 1.677943878173828,
+      "mean_token_accuracy": 0.879555520415306,
+      "num_tokens": 2211962.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.4568726105988026,
+      "epoch": 2.4678362573099415,
+      "grad_norm": 1.7573126554489136,
+      "learning_rate": 0.00035652057335991866,
+      "loss": 1.6760734558105468,
+      "mean_token_accuracy": 0.8791913360357284,
+      "num_tokens": 2334838.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.44863338857889173,
+      "epoch": 2.5977907732293697,
+      "grad_norm": 1.8639047145843506,
+      "learning_rate": 0.00035212658013422465,
+      "loss": 1.6799411010742187,
+      "mean_token_accuracy": 0.8790675121545791,
+      "num_tokens": 2461732.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.4585830120742321,
+      "epoch": 2.727745289148798,
+      "grad_norm": 1.9825985431671143,
+      "learning_rate": 0.0003474003632211781,
+      "loss": 1.7172026062011718,
+      "mean_token_accuracy": 0.8782495930790901,
+      "num_tokens": 2580026.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.45422692246735097,
+      "epoch": 2.857699805068226,
+      "grad_norm": 1.7149962186813354,
+      "learning_rate": 0.00034235163381294995,
+      "loss": 1.679084014892578,
+      "mean_token_accuracy": 0.8795321774482727,
+      "num_tokens": 2705600.0,
+      "step": 1100
+    },
+    {
+      "entropy": 0.47297614574432373,
+      "epoch": 2.9876543209876543,
+      "grad_norm": 1.7435617446899414,
+      "learning_rate": 0.0003369907657841221,
+      "loss": 1.7386201477050782,
+      "mean_token_accuracy": 0.8779115182161331,
+      "num_tokens": 2822808.0,
+      "step": 1150
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.5031588454372607,
+      "eval_loss": 0.5551120638847351,
+      "eval_mean_token_accuracy": 0.8531603300227568,
+      "eval_num_tokens": 2834346.0,
+      "eval_runtime": 90.2397,
+      "eval_samples_per_second": 18.362,
+      "eval_steps_per_second": 2.305,
+      "step": 1155
+    },
+    {
+      "entropy": 0.37655152073457615,
+      "epoch": 3.116959064327485,
+      "grad_norm": 1.504384160041809,
+      "learning_rate": 0.0003313287743759729,
+      "loss": 1.3653451538085937,
+      "mean_token_accuracy": 0.8971295344769655,
+      "num_tokens": 2939773.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.37069276951253416,
+      "epoch": 3.246913580246914,
+      "grad_norm": 1.9665946960449219,
+      "learning_rate": 0.0003253772935629151,
+      "loss": 1.3458108520507812,
+      "mean_token_accuracy": 0.8982205548882485,
+      "num_tokens": 3063617.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.37295883789658546,
+      "epoch": 3.3768680961663415,
+      "grad_norm": 1.7501362562179565,
+      "learning_rate": 0.00031914855214759165,
+      "loss": 1.357562255859375,
+      "mean_token_accuracy": 0.8977113124728203,
+      "num_tokens": 3189800.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.3805788069963455,
+      "epoch": 3.50682261208577,
+      "grad_norm": 1.7277154922485352,
+      "learning_rate": 0.00031265534863374894,
+      "loss": 1.3735618591308594,
+      "mean_token_accuracy": 0.8962143072485924,
+      "num_tokens": 3311908.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.3840580120682716,
+      "epoch": 3.636777128005198,
+      "grad_norm": 2.2338802814483643,
+      "learning_rate": 0.0003059110249285165,
+      "loss": 1.3903216552734374,
+      "mean_token_accuracy": 0.8958476388454437,
+      "num_tokens": 3432934.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.37621145449578763,
+      "epoch": 3.7667316439246266,
+      "grad_norm": 1.9029661417007446,
+      "learning_rate": 0.00029892943892812944,
+      "loss": 1.3776657104492187,
+      "mean_token_accuracy": 0.8964926180243492,
+      "num_tokens": 3561408.0,
+      "step": 1450
+    },
+    {
+      "entropy": 0.3784803995490074,
+      "epoch": 3.8966861598440543,
+      "grad_norm": 2.089708089828491,
+      "learning_rate": 0.00029172493604342163,
+      "loss": 1.3816807556152344,
+      "mean_token_accuracy": 0.8962833172082901,
+      "num_tokens": 3684624.0,
+      "step": 1500
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.4351254403591156,
+      "eval_loss": 0.5814722180366516,
+      "eval_mean_token_accuracy": 0.8530604747625498,
+      "eval_num_tokens": 3779128.0,
+      "eval_runtime": 90.2232,
+      "eval_samples_per_second": 18.366,
+      "eval_steps_per_second": 2.305,
+      "step": 1540
+    },
+    {
+      "entropy": 0.36326556409423677,
+      "epoch": 4.025990903183885,
+      "grad_norm": 2.1354947090148926,
+      "learning_rate": 0.0002843123197235993,
+      "loss": 1.3295362854003907,
+      "mean_token_accuracy": 0.8993093811686913,
+      "num_tokens": 3804993.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.2879397062957287,
+      "epoch": 4.155945419103314,
+      "grad_norm": 2.201097011566162,
+      "learning_rate": 0.0002767068210388601,
+      "loss": 1.0272974395751953,
+      "mean_token_accuracy": 0.9182627710700035,
+      "num_tokens": 3928162.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.2848948486149311,
+      "epoch": 4.2858999350227425,
+      "grad_norm": 2.01479172706604,
+      "learning_rate": 0.000268924067384358,
+      "loss": 1.0278727722167968,
+      "mean_token_accuracy": 0.9194766515493393,
+      "num_tokens": 4049012.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.2940504560619593,
+      "epoch": 4.41585445094217,
+      "grad_norm": 2.0893027782440186,
+      "learning_rate": 0.00026098005036982003,
+      "loss": 1.0586751556396485,
+      "mean_token_accuracy": 0.9167885810136795,
+      "num_tokens": 4167845.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.293505182415247,
+      "epoch": 4.545808966861598,
+      "grad_norm": 1.6346389055252075,
+      "learning_rate": 0.0002528910929607928,
+      "loss": 1.0669570922851563,
+      "mean_token_accuracy": 0.9160876458883286,
+      "num_tokens": 4287505.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.2898535231500864,
+      "epoch": 4.675763482781027,
+      "grad_norm": 1.6645033359527588,
+      "learning_rate": 0.0002446738159390364,
+      "loss": 1.0582612609863282,
+      "mean_token_accuracy": 0.9177632886171341,
+      "num_tokens": 4412221.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.2842763290554285,
+      "epoch": 4.805717998700455,
+      "grad_norm": 2.4594268798828125,
+      "learning_rate": 0.0002363451037509798,
+      "loss": 1.0467537689208983,
+      "mean_token_accuracy": 0.9177608361840248,
+      "num_tokens": 4537178.0,
+      "step": 1850
+    },
+    {
+      "entropy": 0.284430123642087,
+      "epoch": 4.935672514619883,
+      "grad_norm": 2.1724514961242676,
+      "learning_rate": 0.00022792206981441223,
+      "loss": 1.0753899383544923,
+      "mean_token_accuracy": 0.915192686021328,
+      "num_tokens": 4664196.0,
+      "step": 1900
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.3632780872285366,
+      "eval_loss": 0.6438126564025879,
+      "eval_mean_token_accuracy": 0.8511462942338907,
+      "eval_num_tokens": 4723910.0,
+      "eval_runtime": 90.1846,
+      "eval_samples_per_second": 18.373,
+      "eval_steps_per_second": 2.306,
+      "step": 1925
+    },
+    {
+      "entropy": 0.23515464736139355,
+      "epoch": 5.064977257959714,
+      "grad_norm": 1.651587724685669,
+      "learning_rate": 0.00021942202135469513,
+      "loss": 0.8597064971923828,
+      "mean_token_accuracy": 0.9324622603517082,
+      "num_tokens": 4789568.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.1958953895419836,
+      "epoch": 5.1949317738791425,
+      "grad_norm": 1.923292636871338,
+      "learning_rate": 0.0002108624238427481,
+      "loss": 0.7188112640380859,
+      "mean_token_accuracy": 0.9416415295004845,
+      "num_tokens": 4913407.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.21068542070686816,
+      "epoch": 5.32488628979857,
+      "grad_norm": 2.299356460571289,
+      "learning_rate": 0.0002022608651078804,
+      "loss": 0.7712985229492187,
+      "mean_token_accuracy": 0.9386440163850784,
+      "num_tokens": 5032951.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.21234643168747425,
+      "epoch": 5.454840805717999,
+      "grad_norm": 2.2119295597076416,
+      "learning_rate": 0.00019363501919920608,
+      "loss": 0.7650181579589844,
+      "mean_token_accuracy": 0.938471505343914,
+      "num_tokens": 5156908.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.21658269092440605,
+      "epoch": 5.584795321637427,
+      "grad_norm": 1.5394288301467896,
+      "learning_rate": 0.00018500261006989887,
+      "loss": 0.7784209442138672,
+      "mean_token_accuracy": 0.9371598136425018,
+      "num_tokens": 5276087.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.2045296123996377,
+      "epoch": 5.714749837556855,
+      "grad_norm": 1.913680076599121,
+      "learning_rate": 0.00017638137515890763,
+      "loss": 0.7638166046142578,
+      "mean_token_accuracy": 0.9378301629424095,
+      "num_tokens": 5398787.0,
+      "step": 2200
+    },
+    {
+      "entropy": 0.20917976945638656,
+      "epoch": 5.844704353476283,
+      "grad_norm": 2.0847299098968506,
+      "learning_rate": 0.00016778902894496063,
+      "loss": 0.7631703186035156,
+      "mean_token_accuracy": 0.9387557968497277,
+      "num_tokens": 5522332.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.22262076318264007,
+      "epoch": 5.974658869395712,
+      "grad_norm": 2.1597352027893066,
+      "learning_rate": 0.0001592432265477485,
+      "loss": 0.798133773803711,
+      "mean_token_accuracy": 0.936034984588623,
+      "num_tokens": 5642361.0,
+      "step": 2300
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.31502799331568754,
+      "eval_loss": 0.7417300343513489,
+      "eval_mean_token_accuracy": 0.8477253922476218,
+      "eval_num_tokens": 5668692.0,
+      "eval_runtime": 90.4252,
+      "eval_samples_per_second": 18.325,
+      "eval_steps_per_second": 2.3,
+      "step": 2310
+    },
+    {
+      "entropy": 0.16796037876725795,
+      "epoch": 6.1039636127355426,
+      "grad_norm": 2.2228569984436035,
+      "learning_rate": 0.00015076152745107442,
+      "loss": 0.5835284805297851,
+      "mean_token_accuracy": 0.9529892874123463,
+      "num_tokens": 5766129.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.14919219192117453,
+      "epoch": 6.23391812865497,
+      "grad_norm": 1.408840298652649,
+      "learning_rate": 0.00014236135942251215,
+      "loss": 0.5310631561279296,
+      "mean_token_accuracy": 0.9586454060673714,
+      "num_tokens": 5888746.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.1499051059409976,
+      "epoch": 6.363872644574399,
+      "grad_norm": 1.8611102104187012,
+      "learning_rate": 0.00013405998270370849,
+      "loss": 0.5127810668945313,
+      "mean_token_accuracy": 0.9591325157880783,
+      "num_tokens": 6014455.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.15334193099290133,
+      "epoch": 6.493827160493828,
+      "grad_norm": 1.6051015853881836,
+      "learning_rate": 0.00012587445454490892,
+      "loss": 0.5349758529663086,
+      "mean_token_accuracy": 0.9574431091547012,
+      "num_tokens": 6141229.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.15982334002852439,
+      "epoch": 6.623781676413255,
+      "grad_norm": 3.7065205574035645,
+      "learning_rate": 0.00011782159415658008,
+      "loss": 0.5602469253540039,
+      "mean_token_accuracy": 0.9555372184515,
+      "num_tokens": 6257983.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.16072992872446776,
+      "epoch": 6.753736192332683,
+      "grad_norm": 2.282320976257324,
+      "learning_rate": 0.00010991794815014401,
+      "loss": 0.5657939910888672,
+      "mean_token_accuracy": 0.9550630164146423,
+      "num_tokens": 6376198.0,
+      "step": 2600
+    },
+    {
+      "entropy": 0.1512781011685729,
+      "epoch": 6.883690708252112,
+      "grad_norm": 1.3716893196105957,
+      "learning_rate": 0.00010217975653883603,
+      "loss": 0.5340792465209961,
+      "mean_token_accuracy": 0.9578188157081604,
+      "num_tokens": 6502526.0,
+      "step": 2650
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.2444461930829745,
+      "eval_loss": 0.8798949718475342,
+      "eval_mean_token_accuracy": 0.8457763839799625,
+      "eval_num_tokens": 6613474.0,
+      "eval_runtime": 90.2868,
+      "eval_samples_per_second": 18.353,
+      "eval_steps_per_second": 2.304,
+      "step": 2695
+    },
+    {
+      "entropy": 0.1444593005668578,
+      "epoch": 7.012995451591943,
+      "grad_norm": 1.0965569019317627,
+      "learning_rate": 9.462291936854386e-05,
+      "loss": 0.511833839416504,
+      "mean_token_accuracy": 0.9595773016388093,
+      "num_tokens": 6626464.0,
+      "step": 2700
+    },
+    {
+      "entropy": 0.10985541097819805,
+      "epoch": 7.142949967511371,
+      "grad_norm": 1.8079149723052979,
+      "learning_rate": 8.726296404719584e-05,
+      "loss": 0.3876673126220703,
+      "mean_token_accuracy": 0.9704919803142548,
+      "num_tokens": 6746276.0,
+      "step": 2750
+    },
+    {
+      "entropy": 0.11304264679551125,
+      "epoch": 7.272904483430799,
+      "grad_norm": 1.5228444337844849,
+      "learning_rate": 8.01150134398253e-05,
+      "loss": 0.39335052490234373,
+      "mean_token_accuracy": 0.9695766788721084,
+      "num_tokens": 6868131.0,
+      "step": 2800
+    },
+    {
+      "entropy": 0.11066193280741572,
+      "epoch": 7.402858999350228,
+      "grad_norm": 2.265174388885498,
+      "learning_rate": 7.319375479487112e-05,
+      "loss": 0.38289966583251955,
+      "mean_token_accuracy": 0.9707033503055572,
+      "num_tokens": 6993803.0,
+      "step": 2850
+    },
+    {
+      "entropy": 0.12022399662062526,
+      "epoch": 7.532813515269655,
+      "grad_norm": 1.0657345056533813,
+      "learning_rate": 6.65134095655596e-05,
+      "loss": 0.4089087677001953,
+      "mean_token_accuracy": 0.9689779531955719,
+      "num_tokens": 7113063.0,
+      "step": 2900
+    },
+    {
+      "entropy": 0.11429863104596734,
+      "epoch": 7.662768031189084,
+      "grad_norm": 1.3440358638763428,
+      "learning_rate": 6.008770418837973e-05,
+      "loss": 0.3935198593139648,
+      "mean_token_accuracy": 0.9698223957419395,
+      "num_tokens": 7237174.0,
+      "step": 2950
+    },
+    {
+      "entropy": 0.11748226622119545,
+      "epoch": 7.792722547108512,
+      "grad_norm": 1.4607034921646118,
+      "learning_rate": 5.3929841878693804e-05,
+      "loss": 0.40399799346923826,
+      "mean_token_accuracy": 0.9695871344208717,
+      "num_tokens": 7357301.0,
+      "step": 3000
+    },
+    {
+      "entropy": 0.11790506653487683,
+      "epoch": 7.92267706302794,
+      "grad_norm": 1.4574708938598633,
+      "learning_rate": 4.805247550143646e-05,
+      "loss": 0.4049314880371094,
+      "mean_token_accuracy": 0.9693469110131264,
+      "num_tokens": 7482431.0,
+      "step": 3050
+    },
+    {
+      "epoch": 8.0,
+      "eval_entropy": 0.2104659411483086,
+      "eval_loss": 0.9939886927604675,
+      "eval_mean_token_accuracy": 0.8444042455118436,
+      "eval_num_tokens": 7558256.0,
+      "eval_runtime": 90.3118,
+      "eval_samples_per_second": 18.348,
+      "eval_steps_per_second": 2.303,
+      "step": 3080
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.648642717750723e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-4-31B
+library_name: transformers
+model_name: gemma-4-31B_original_features_structural_train_original_features_structural_test1
+tags:
+- generated_from_trainer
+- sft
+- trl
+licence: license
+---
+# Model Card for gemma-4-31B_original_features_structural_train_original_features_structural_test1
+This model is a fine-tuned version of [google/gemma-4-31B](https://huggingface.co/google/gemma-4-31B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/rfqns0wc)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.29.0
+- Transformers: 5.5.4
+- Pytorch: 2.10.0
+- Datasets: 4.6.1
+- Tokenizers: 0.22.2
+## Citations
+Cite TRL as:
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1122/trainer_state.json ADDED Viewed

	@@ -0,0 +1,287 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1122,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.979346498185751e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1496/trainer_state.json ADDED Viewed

	@@ -0,0 +1,368 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1496,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1971161045794035e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1870/trainer_state.json ADDED Viewed

	@@ -0,0 +1,459 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1870,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4947622783933181e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2244/trainer_state.json ADDED Viewed

	@@ -0,0 +1,540 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 2244,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.7914914724245857e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2618/trainer_state.json ADDED Viewed

	@@ -0,0 +1,631 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2618,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3355020767450332,
+      "epoch": 0.13386880856760375,
+      "grad_norm": 3.2956597805023193,
+      "learning_rate": 1.628530639938585e-05,
+      "loss": 5.349910278320312,
+      "mean_token_accuracy": 0.7383818039298058,
+      "num_tokens": 116199.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.5958842460811138,
+      "epoch": 0.2677376171352075,
+      "grad_norm": 2.5947492122650146,
+      "learning_rate": 3.290296599059591e-05,
+      "loss": 2.312855072021484,
+      "mean_token_accuracy": 0.8520967712998391,
+      "num_tokens": 232864.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.5190362003445625,
+      "epoch": 0.40160642570281124,
+      "grad_norm": 1.5038394927978516,
+      "learning_rate": 4.9520625581805955e-05,
+      "loss": 2.0574468994140624,
+      "mean_token_accuracy": 0.8657039344310761,
+      "num_tokens": 352382.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.4922871346771717,
+      "epoch": 0.535475234270415,
+      "grad_norm": 1.645923137664795,
+      "learning_rate": 6.613828517301602e-05,
+      "loss": 1.916438446044922,
+      "mean_token_accuracy": 0.8717759534716606,
+      "num_tokens": 474532.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.491110111027956,
+      "epoch": 0.6693440428380187,
+      "grad_norm": 1.866817593574524,
+      "learning_rate": 8.275594476422607e-05,
+      "loss": 1.9421713256835937,
+      "mean_token_accuracy": 0.8710730043053627,
+      "num_tokens": 589198.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.47134352535009383,
+      "epoch": 0.8032128514056225,
+      "grad_norm": 117.62409210205078,
+      "learning_rate": 9.937360435543611e-05,
+      "loss": 1.9768324279785157,
+      "mean_token_accuracy": 0.8741078078746796,
+      "num_tokens": 707057.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.4820582258701325,
+      "epoch": 0.9370816599732262,
+      "grad_norm": 2.3274827003479004,
+      "learning_rate": 0.00011599126394664616,
+      "loss": 2.2025875854492187,
+      "mean_token_accuracy": 0.8697148504853248,
+      "num_tokens": 822888.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.5010400542616844,
+      "eval_loss": 0.5114277601242065,
+      "eval_mean_token_accuracy": 0.8587275749444961,
+      "eval_num_tokens": 872247.0,
+      "eval_runtime": 96.5515,
+      "eval_samples_per_second": 16.561,
+      "eval_steps_per_second": 2.071,
+      "step": 374
+    },
+    {
+      "entropy": 0.4708875769918615,
+      "epoch": 1.069611780455154,
+      "grad_norm": 3.3712940216064453,
+      "learning_rate": 0.00012428317596508976,
+      "loss": 1.83294189453125,
+      "mean_token_accuracy": 0.8772370366737096,
+      "num_tokens": 929365.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.44804590195417404,
+      "epoch": 1.2034805890227578,
+      "grad_norm": 1.4833389520645142,
+      "learning_rate": 0.00012414788900475706,
+      "loss": 1.7768891906738282,
+      "mean_token_accuracy": 0.8791097947955131,
+      "num_tokens": 1046629.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.4510513086616993,
+      "epoch": 1.3373493975903614,
+      "grad_norm": 2.814790964126587,
+      "learning_rate": 0.00012387760965418496,
+      "loss": 1.7745071411132813,
+      "mean_token_accuracy": 0.8813075706362724,
+      "num_tokens": 1165744.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.4479117552936077,
+      "epoch": 1.4712182061579653,
+      "grad_norm": 1.855610728263855,
+      "learning_rate": 0.00012347292641217135,
+      "loss": 1.7583291625976563,
+      "mean_token_accuracy": 0.8815277495980263,
+      "num_tokens": 1284843.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.4380264139175415,
+      "epoch": 1.605087014725569,
+      "grad_norm": 1.383190631866455,
+      "learning_rate": 0.00012293472042483757,
+      "loss": 1.7229583740234375,
+      "mean_token_accuracy": 0.8832098203897476,
+      "num_tokens": 1406485.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.4342571949958801,
+      "epoch": 1.7389558232931726,
+      "grad_norm": 1.4977834224700928,
+      "learning_rate": 0.00012226416356704526,
+      "loss": 1.7174737548828125,
+      "mean_token_accuracy": 0.8834967383742333,
+      "num_tokens": 1525460.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.42700962007045745,
+      "epoch": 1.8728246318607764,
+      "grad_norm": 1.6156537532806396,
+      "learning_rate": 0.00012146271589078838,
+      "loss": 1.682061767578125,
+      "mean_token_accuracy": 0.8858474844694137,
+      "num_tokens": 1638984.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.0,
+      "eval_entropy": 0.4838937771320343,
+      "eval_loss": 0.4826815128326416,
+      "eval_mean_token_accuracy": 0.8682844692468643,
+      "eval_num_tokens": 1744494.0,
+      "eval_runtime": 96.5071,
+      "eval_samples_per_second": 16.569,
+      "eval_steps_per_second": 2.072,
+      "step": 748
+    },
+    {
+      "entropy": 0.4378527848407476,
+      "epoch": 2.005354752342704,
+      "grad_norm": 1.400229573249817,
+      "learning_rate": 0.0001205321224461161,
+      "loss": 1.7096096801757812,
+      "mean_token_accuracy": 0.8838462468349573,
+      "num_tokens": 1749755.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.3559799794852734,
+      "epoch": 2.139223560910308,
+      "grad_norm": 1.7168083190917969,
+      "learning_rate": 0.0001194744094815093,
+      "loss": 1.3893603515625,
+      "mean_token_accuracy": 0.9004731178283691,
+      "num_tokens": 1868231.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.3671448823064566,
+      "epoch": 2.2730923694779115,
+      "grad_norm": 1.9720135927200317,
+      "learning_rate": 0.00011829188003198282,
+      "loss": 1.429988555908203,
+      "mean_token_accuracy": 0.8970818132162094,
+      "num_tokens": 1979116.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.3597494306415319,
+      "epoch": 2.4069611780455156,
+      "grad_norm": 1.4947372674942017,
+      "learning_rate": 0.00011698710890452068,
+      "loss": 1.418173828125,
+      "mean_token_accuracy": 0.8994651186466217,
+      "num_tokens": 2094539.0,
+      "step": 900
+    },
+    {
+      "entropy": 0.36254502907395364,
+      "epoch": 2.540829986613119,
+      "grad_norm": 1.6768454313278198,
+      "learning_rate": 0.00011556293707176242,
+      "loss": 1.4158590698242188,
+      "mean_token_accuracy": 0.8995477721095085,
+      "num_tokens": 2209415.0,
+      "step": 950
+    },
+    {
+      "entropy": 0.36290778368711474,
+      "epoch": 2.674698795180723,
+      "grad_norm": 1.6033697128295898,
+      "learning_rate": 0.00011402246548614765,
+      "loss": 1.4300469970703125,
+      "mean_token_accuracy": 0.8986452376842499,
+      "num_tokens": 2324269.0,
+      "step": 1000
+    },
+    {
+      "entropy": 0.3635872249305248,
+      "epoch": 2.8085676037483265,
+      "grad_norm": 1.546893835067749,
+      "learning_rate": 0.00011236904832798785,
+      "loss": 1.42587646484375,
+      "mean_token_accuracy": 0.9003903394937516,
+      "num_tokens": 2447336.0,
+      "step": 1050
+    },
+    {
+      "entropy": 0.36871150620281695,
+      "epoch": 2.9424364123159306,
+      "grad_norm": 1.2951405048370361,
+      "learning_rate": 0.0001106062857021667,
+      "loss": 1.448046875,
+      "mean_token_accuracy": 0.8967258337140084,
+      "num_tokens": 2565837.0,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0,
+      "eval_entropy": 0.4225208269059658,
+      "eval_loss": 0.489418089389801,
+      "eval_mean_token_accuracy": 0.8697815361618996,
+      "eval_num_tokens": 2616741.0,
+      "eval_runtime": 96.4058,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.075,
+      "step": 1122
+    },
+    {
+      "entropy": 0.3120347365285411,
+      "epoch": 3.074966532797858,
+      "grad_norm": 1.639520287513733,
+      "learning_rate": 0.00010873801579937106,
+      "loss": 1.1941973876953125,
+      "mean_token_accuracy": 0.9117801315856703,
+      "num_tokens": 2685975.0,
+      "step": 1150
+    },
+    {
+      "entropy": 0.28257040068507194,
+      "epoch": 3.208835341365462,
+      "grad_norm": 1.7459681034088135,
+      "learning_rate": 0.00010676830653892058,
+      "loss": 1.0850601196289062,
+      "mean_token_accuracy": 0.9177472350001336,
+      "num_tokens": 2798277.0,
+      "step": 1200
+    },
+    {
+      "entropy": 0.27802520349621773,
+      "epoch": 3.3427041499330654,
+      "grad_norm": 1.5176103115081787,
+      "learning_rate": 0.00010470144671139238,
+      "loss": 1.0840838623046876,
+      "mean_token_accuracy": 0.9179763168096542,
+      "num_tokens": 2918973.0,
+      "step": 1250
+    },
+    {
+      "entropy": 0.280417420566082,
+      "epoch": 3.4765729585006695,
+      "grad_norm": 1.3774974346160889,
+      "learning_rate": 0.00010254193664032686,
+      "loss": 1.0911756896972655,
+      "mean_token_accuracy": 0.9162956389784813,
+      "num_tokens": 3039073.0,
+      "step": 1300
+    },
+    {
+      "entropy": 0.2834589210152626,
+      "epoch": 3.610441767068273,
+      "grad_norm": 1.5929396152496338,
+      "learning_rate": 0.00010029447838334742,
+      "loss": 1.0985262298583984,
+      "mean_token_accuracy": 0.9174074530601501,
+      "num_tokens": 3153710.0,
+      "step": 1350
+    },
+    {
+      "entropy": 0.282296127229929,
+      "epoch": 3.7443105756358768,
+      "grad_norm": 1.50350022315979,
+      "learning_rate": 9.796396549403e-05,
+      "loss": 1.101386260986328,
+      "mean_token_accuracy": 0.9168545073270797,
+      "num_tokens": 3263594.0,
+      "step": 1400
+    },
+    {
+      "entropy": 0.279728781580925,
+      "epoch": 3.878179384203481,
+      "grad_norm": 1.4728187322616577,
+      "learning_rate": 9.555547236681456e-05,
+      "loss": 1.0859880065917968,
+      "mean_token_accuracy": 0.9178367125988006,
+      "num_tokens": 3386033.0,
+      "step": 1450
+    },
+    {
+      "epoch": 4.0,
+      "eval_entropy": 0.34304031178355215,
+      "eval_loss": 0.5295785665512085,
+      "eval_mean_token_accuracy": 0.8698753178119659,
+      "eval_num_tokens": 3488988.0,
+      "eval_runtime": 96.3616,
+      "eval_samples_per_second": 16.594,
+      "eval_steps_per_second": 2.076,
+      "step": 1496
+    },
+    {
+      "entropy": 0.27893446536377225,
+      "epoch": 4.010709504685408,
+      "grad_norm": 1.545491337776184,
+      "learning_rate": 9.30742431881587e-05,
+      "loss": 1.0577442169189453,
+      "mean_token_accuracy": 0.9191552999645772,
+      "num_tokens": 3498406.0,
+      "step": 1500
+    },
+    {
+      "entropy": 0.19769302535802125,
+      "epoch": 4.144578313253012,
+      "grad_norm": 2.10296893119812,
+      "learning_rate": 9.052568051799083e-05,
+      "loss": 0.7461458587646485,
+      "mean_token_accuracy": 0.9415343621373177,
+      "num_tokens": 3614301.0,
+      "step": 1550
+    },
+    {
+      "entropy": 0.1981763695180416,
+      "epoch": 4.278447121820616,
+      "grad_norm": 2.067410945892334,
+      "learning_rate": 8.791533352632524e-05,
+      "loss": 0.7580889892578125,
+      "mean_token_accuracy": 0.9396374526619912,
+      "num_tokens": 3735705.0,
+      "step": 1600
+    },
+    {
+      "entropy": 0.19850988369435071,
+      "epoch": 4.412315930388219,
+      "grad_norm": 1.9034850597381592,
+      "learning_rate": 8.524888591065258e-05,
+      "loss": 0.7526986694335938,
+      "mean_token_accuracy": 0.9402479353547096,
+      "num_tokens": 3854287.0,
+      "step": 1650
+    },
+    {
+      "entropy": 0.19905407220125199,
+      "epoch": 4.546184738955823,
+      "grad_norm": 2.1477949619293213,
+      "learning_rate": 8.253214352041379e-05,
+      "loss": 0.7603612518310547,
+      "mean_token_accuracy": 0.9396576225757599,
+      "num_tokens": 3967362.0,
+      "step": 1700
+    },
+    {
+      "entropy": 0.20251497332006693,
+      "epoch": 4.680053547523427,
+      "grad_norm": 1.5489246845245361,
+      "learning_rate": 7.97710217155036e-05,
+      "loss": 0.7711930084228515,
+      "mean_token_accuracy": 0.9400961664319039,
+      "num_tokens": 4081441.0,
+      "step": 1750
+    },
+    {
+      "entropy": 0.1991352306306362,
+      "epoch": 4.813922356091031,
+      "grad_norm": 1.969994068145752,
+      "learning_rate": 7.697153248632946e-05,
+      "loss": 0.7681967163085938,
+      "mean_token_accuracy": 0.9399621617794037,
+      "num_tokens": 4197604.0,
+      "step": 1800
+    },
+    {
+      "entropy": 0.20229352474212647,
+      "epoch": 4.947791164658635,
+      "grad_norm": 2.2329719066619873,
+      "learning_rate": 7.41397713634694e-05,
+      "loss": 0.7733911895751953,
+      "mean_token_accuracy": 0.9396535342931748,
+      "num_tokens": 4318894.0,
+      "step": 1850
+    },
+    {
+      "epoch": 5.0,
+      "eval_entropy": 0.270584502145648,
+      "eval_loss": 0.6255385875701904,
+      "eval_mean_token_accuracy": 0.8687835082411766,
+      "eval_num_tokens": 4361235.0,
+      "eval_runtime": 96.6331,
+      "eval_samples_per_second": 16.547,
+      "eval_steps_per_second": 2.07,
+      "step": 1870
+    },
+    {
+      "entropy": 0.16372355209155517,
+      "epoch": 5.080321285140562,
+      "grad_norm": 8.029130935668945,
+      "learning_rate": 7.128190414543193e-05,
+      "loss": 0.6145073699951172,
+      "mean_token_accuracy": 0.9516371590922578,
+      "num_tokens": 4434412.0,
+      "step": 1900
+    },
+    {
+      "entropy": 0.14057113960385323,
+      "epoch": 5.214190093708166,
+      "grad_norm": 2.23626446723938,
+      "learning_rate": 6.840415347341672e-05,
+      "loss": 0.5295140075683594,
+      "mean_token_accuracy": 0.9593333688378334,
+      "num_tokens": 4548703.0,
+      "step": 1950
+    },
+    {
+      "entropy": 0.14139273861423135,
+      "epoch": 5.34805890227577,
+      "grad_norm": 2.0157318115234375,
+      "learning_rate": 6.551278528230729e-05,
+      "loss": 0.5296827697753906,
+      "mean_token_accuracy": 0.9590813705325126,
+      "num_tokens": 4665542.0,
+      "step": 2000
+    },
+    {
+      "entropy": 0.14537794288247824,
+      "epoch": 5.481927710843373,
+      "grad_norm": 1.5371013879776,
+      "learning_rate": 6.261409515739736e-05,
+      "loss": 0.5478645706176758,
+      "mean_token_accuracy": 0.9577724316716194,
+      "num_tokens": 4778075.0,
+      "step": 2050
+    },
+    {
+      "entropy": 0.14534839443862438,
+      "epoch": 5.615796519410977,
+      "grad_norm": 2.0134589672088623,
+      "learning_rate": 5.971439462655727e-05,
+      "loss": 0.5426230239868164,
+      "mean_token_accuracy": 0.9581041479110718,
+      "num_tokens": 4897453.0,
+      "step": 2100
+    },
+    {
+      "entropy": 0.14614912170916797,
+      "epoch": 5.749665327978581,
+      "grad_norm": 1.286437749862671,
+      "learning_rate": 5.6819997417687274e-05,
+      "loss": 0.5487421798706055,
+      "mean_token_accuracy": 0.9563529288768768,
+      "num_tokens": 5012767.0,
+      "step": 2150
+    },
+    {
+      "entropy": 0.13987606402486563,
+      "epoch": 5.883534136546185,
+      "grad_norm": 1.7586702108383179,
+      "learning_rate": 5.393720571138079e-05,
+      "loss": 0.5254617309570313,
+      "mean_token_accuracy": 0.9590577334165573,
+      "num_tokens": 5129878.0,
+      "step": 2200
+    },
+    {
+      "epoch": 6.0,
+      "eval_entropy": 0.2240281231701374,
+      "eval_loss": 0.7485206723213196,
+      "eval_mean_token_accuracy": 0.8668996468186378,
+      "eval_num_tokens": 5233482.0,
+      "eval_runtime": 96.4089,
+      "eval_samples_per_second": 16.586,
+      "eval_steps_per_second": 2.074,
+      "step": 2244
+    },
+    {
+      "entropy": 0.1413771447283451,
+      "epoch": 6.016064257028113,
+      "grad_norm": 1.2926467657089233,
+      "learning_rate": 5.1072296418730254e-05,
+      "loss": 0.5202234649658203,
+      "mean_token_accuracy": 0.9594009392189257,
+      "num_tokens": 5246734.0,
+      "step": 2250
+    },
+    {
+      "entropy": 0.1042403375543654,
+      "epoch": 6.149933065595716,
+      "grad_norm": 1.9540276527404785,
+      "learning_rate": 4.8231507514154216e-05,
+      "loss": 0.39597846984863283,
+      "mean_token_accuracy": 0.9706364983320236,
+      "num_tokens": 5366334.0,
+      "step": 2300
+    },
+    {
+      "entropy": 0.10351455600932241,
+      "epoch": 6.28380187416332,
+      "grad_norm": 2.139054775238037,
+      "learning_rate": 4.542102445300397e-05,
+      "loss": 0.38731266021728517,
+      "mean_token_accuracy": 0.9703371664881706,
+      "num_tokens": 5487013.0,
+      "step": 2350
+    },
+    {
+      "entropy": 0.11232182893902064,
+      "epoch": 6.417670682730924,
+      "grad_norm": 1.6526401042938232,
+      "learning_rate": 4.264696670352381e-05,
+      "loss": 0.42091716766357423,
+      "mean_token_accuracy": 0.9684987756609916,
+      "num_tokens": 5599415.0,
+      "step": 2400
+    },
+    {
+      "entropy": 0.10796859875321388,
+      "epoch": 6.551539491298527,
+      "grad_norm": 1.297956109046936,
+      "learning_rate": 3.9915374422489785e-05,
+      "loss": 0.40640792846679685,
+      "mean_token_accuracy": 0.9703203043341637,
+      "num_tokens": 5718099.0,
+      "step": 2450
+    },
+    {
+      "entropy": 0.10999857917428017,
+      "epoch": 6.685408299866131,
+      "grad_norm": 1.5105161666870117,
+      "learning_rate": 3.723219530353909e-05,
+      "loss": 0.4118352508544922,
+      "mean_token_accuracy": 0.9697986772656441,
+      "num_tokens": 5833902.0,
+      "step": 2500
+    },
+    {
+      "entropy": 0.11099046738818288,
+      "epoch": 6.8192771084337345,
+      "grad_norm": 1.8809560537338257,
+      "learning_rate": 3.460327162682602e-05,
+      "loss": 0.41624794006347654,
+      "mean_token_accuracy": 0.9690032437443733,
+      "num_tokens": 5948132.0,
+      "step": 2550
+    },
+    {
+      "entropy": 0.11062245365232229,
+      "epoch": 6.953145917001339,
+      "grad_norm": 1.0219827890396118,
+      "learning_rate": 3.2034327538202464e-05,
+      "loss": 0.41484325408935546,
+      "mean_token_accuracy": 0.9690453514456749,
+      "num_tokens": 6066224.0,
+      "step": 2600
+    },
+    {
+      "epoch": 7.0,
+      "eval_entropy": 0.18908375523984433,
+      "eval_loss": 0.8491571545600891,
+      "eval_mean_token_accuracy": 0.8642131051421166,
+      "eval_num_tokens": 6105729.0,
+      "eval_runtime": 96.4633,
+      "eval_samples_per_second": 16.576,
+      "eval_steps_per_second": 2.073,
+      "step": 2618
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3740,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.0923154774653926e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-4-31B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-4-31B
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.19.1

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-4-31B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.015034304668777832,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$",
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

DBCA_original_Swedish/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2992/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}